In [2]:
import json
import os
import glob
import re
import pandas as pd
import plotly.express as px
from typing import List, Dict, Tuple

# Configuration globale
DEFAULT_DIRECTORIES = ["/home/sbstndbs/sbstndbs/regression/results/test/compilers"]
DEFAULT_FILE_PATTERNS = ["*blas1*.json", "*blas2*.json"]  # Exemple de motifs à analyser
OUTPUT_BASE_DIR = "./reports/"  # Répertoire de base pour les rapports

def get_sorted_files(directory: str, file_pattern: str) -> List[str]:
    """Récupère et trie les fichiers par version"""
    search_path = os.path.join(directory, file_pattern)
    files = glob.glob(search_path)
    files.sort(key=lambda f: extract_version(f))
    return files

def extract_version(filename: str) -> str:
    """Extrait la version numérique du nom de fichier"""
    match = re.search(r'output_([0-9.]+)(?=.json$)', filename)
    return match.group(1) if match else os.path.basename(filename)

def process_benchmark_data(files: List[str]) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """Charge et traite les données de benchmark"""
    all_with_size = []
    all_without_size = []

    for file_path in files:
        with open(file_path, "r") as f:
            data = json.load(f)
        
        df = pd.json_normalize(data["benchmarks"])
        df = process_size_column(df)
        df["filename"] = os.path.basename(file_path)
        
        # Séparation des données avec/sans taille
        mask = df["size"].notna()
        all_with_size.append(df[mask])
        all_without_size.append(df[~mask])

    return pd.concat(all_with_size), pd.concat(all_without_size)

def process_size_column(df: pd.DataFrame) -> pd.DataFrame:
    """Extrait la taille et nettoie le nom du test"""
    size_pattern = r'[/<](\d+)(?:>)?$'
    
    df["size"] = df["name"].apply(
        lambda x: int(re.search(size_pattern, x).group(1)) 
        if re.search(size_pattern, x) 
        else None
    )
    
    df["test_case"] = df["name"].apply(
        lambda x: re.sub(r'[/<]\d+>?$', '', x)
    )
    return df

def generate_size_analysis_plots(df: pd.DataFrame, pattern_name: str, output_dir: str):
    """Génère les visualisations pour les benchmarks avec taille"""
    # Graphique animé par fichier
    fig = px.line(
        df,
        x="size", 
        y="cpu_time",
        color="test_case",
        log_x=True,
        log_y=True,
        animation_frame="filename",
        title=f"Benchmarks avec taille - {pattern_name}",
        height=700
    )
    fig.write_html(os.path.join(output_dir, f"animated_size_{pattern_name}.html"))

    # Graphique multivue
    fig = px.line(
        df,
        x="size",
        y="cpu_time",
        color="filename",
        log_x=True,
        log_y=True,
        facet_col="test_case",
        facet_col_wrap=4,
        title=f"Comparaison multivue - {pattern_name}",
        height=1800
    )
    fig.write_html(os.path.join(output_dir, f"multiview_size_{pattern_name}.html"))

def generate_static_analysis_plots(df: pd.DataFrame, pattern_name: str, output_dir: str):
    """Génère les visualisations pour les benchmarks sans taille"""
    # Normalisation des données
    df['normalised_cpu_time'] = df.groupby('name')['cpu_time'].transform(lambda x: x / x.iloc[0])
    
    # Graphique de comparaison
    fig = px.line(
        df,
        x="filename",
        y="normalised_cpu_time",
        facet_col="test_case",
        facet_col_wrap=4,
        title=f"Performance relative - {pattern_name}",
        height=700
    )
    fig.update_yaxes(range=[0.1, 5])
    fig.write_html(os.path.join(output_dir, f"relative_perf_{pattern_name}.html"))

def analyze_file_pattern(directory: str, file_pattern: str, output_dir: str):
    """Lance l'analyse complète pour un motif de fichier"""
    print(f"\nAnalyzing pattern: {file_pattern} in directory: {directory}")
    
    # Création du répertoire de sortie
    os.makedirs(output_dir, exist_ok=True)
    
    # Récupération des fichiers
    files = get_sorted_files(directory, file_pattern)
    if not files:
        print(f"Aucun fichier trouvé pour {file_pattern} dans {directory}")
        return
    
    # Traitement des données
    df_size, df_static = process_benchmark_data(files)
    
    # Génération des rapports
    pattern_name = file_pattern.replace("*", "").replace(".json", "")
    generate_size_analysis_plots(df_size, pattern_name, output_dir)
    generate_static_analysis_plots(df_static, pattern_name, output_dir)

def main(file_patterns: List[str] = DEFAULT_FILE_PATTERNS, directories: List[str] = DEFAULT_DIRECTORIES):
    """Point d'entrée principal"""
    for directory in directories:
        # Créer un répertoire de sortie spécifique pour chaque répertoire d'entrée
        output_dir = os.path.join(OUTPUT_BASE_DIR, os.path.basename(os.path.normpath(directory)))
        for pattern in file_patterns:
            analyze_file_pattern(directory, pattern, output_dir)

if __name__ == "__main__":
    # Exemple d'utilisation avec des motifs personnalisés
    custom_patterns = [
        "allocation_*.json",
        "find_equal_*.json",
        "find_gt_*.json",
        "op_*.json",
    ]
    
    main(file_patterns=custom_patterns)


Analyzing pattern: allocation_*.json in directory: /home/sbstndbs/sbstndbs/regression/results/test/compilers

Analyzing pattern: find_equal_*.json in directory: /home/sbstndbs/sbstndbs/regression/results/test/compilers

Analyzing pattern: find_gt_*.json in directory: /home/sbstndbs/sbstndbs/regression/results/test/compilers

Analyzing pattern: op_*.json in directory: /home/sbstndbs/sbstndbs/regression/results/test/compilers
