# Visualisation des données de runs (CPU/GPU)
Ce notebook charge les résultats produits par les scripts Slurm (CSV dans `../results/`) et fournit des vues d’analyse et de visualisation.

In [None]:
# Section 1 — Installer et importer les bibliothèques
# (Les installations sont idempotentes; exécutez au besoin.)
# %pip install -q pandas numpy seaborn matplotlib plotly ipywidgets pyarrow mlflow || true

import os
import glob
import json
from datetime import timedelta

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

import ipywidgets as widgets
from IPython.display import display, HTML

# Styles
%matplotlib inline
sns.set_theme(style="whitegrid")
plt.rcParams["figure.figsize"] = (10, 5)

# Dossiers par défaut (adaptés à ce repo)
ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))
RESULTS_DIR = os.path.join(ROOT, "results")
FIG_DIR = os.path.join(ROOT, "figures")
OUT_DIR = os.path.join(ROOT, "output")
os.makedirs(FIG_DIR, exist_ok=True)
os.makedirs(OUT_DIR, exist_ok=True)

print("RESULTS_DIR:", RESULTS_DIR)

In [None]:
# Section 2 — Charger les données de runs (CSV/JSON/Parquet ou MLflow)
# 2.1 Lectures CSV CPU/GPU du repo (results/cpu_*.csv, results/gpu_*.csv)

def read_many_csv(pattern: str, kind: str) -> pd.DataFrame:
    files = sorted(glob.glob(pattern))
    dfs = []
    for f in files:
        try:
            df = pd.read_csv(f)
            df["source_file"] = os.path.basename(f)
            df["kind"] = kind
            dfs.append(df)
        except Exception as e:
            print(f"[WARN] Lecture échouée {f}: {e}")
    if dfs:
        return pd.concat(dfs, ignore_index=True)
    return pd.DataFrame()

cpu_df = read_many_csv(os.path.join(RESULTS_DIR, "cpu_*.csv"), kind="cpu")
gpu_df = read_many_csv(os.path.join(RESULTS_DIR, "gpu_*.csv"), kind="gpu")

# 2.2 Option: charger un unique fichier générique (au cas où)
unique_path = os.path.join(RESULTS_DIR, "runs.csv")
unique_df = pd.read_csv(unique_path) if os.path.exists(unique_path) else pd.DataFrame()

# 2.3 Option: MLflow (si un tracking server est utilisé)
try:
    import mlflow
    MLFLOW_AVAILABLE = True
except Exception:
    MLFLOW_AVAILABLE = False

mlflow_df = pd.DataFrame()
# Exemple (adapter l'ID d'expérience):
# if MLFLOW_AVAILABLE:
#     runs = mlflow.search_runs(experiment_ids=["0"], output_format="pandas")
#     mlflow_df = runs

print(cpu_df.shape, gpu_df.shape, unique_df.shape, mlflow_df.shape)

In [None]:
# Section 3 — Nettoyer et normaliser les colonnes

from typing import Dict

def snake(name: str) -> str:
    return name.lower().replace(" ", "_")

# Normaliser CPU
cpu_norm = cpu_df.copy()
if not cpu_norm.empty:
    cpu_norm.columns = [snake(c) for c in cpu_norm.columns]
    # colonnes attendues: node, mode, threads, runs, duration_s, avg_events_per_s, stddev_events_per_s, timestamp
    cpu_norm["timestamp"] = pd.to_datetime(cpu_norm.get("timestamp"), errors="coerce", utc=True)
    num_cols = ["threads", "runs", "duration_s", "avg_events_per_s", "stddev_events_per_s"]
    for c in num_cols:
        if c in cpu_norm:
            cpu_norm[c] = pd.to_numeric(cpu_norm[c], errors="coerce")

# Normaliser GPU: colonnes dynamiques par backend; on garde wide et long
gpu_norm = gpu_df.copy()
if not gpu_norm.empty:
    gpu_norm.columns = [snake(c) for c in gpu_norm.columns]
    gpu_norm["timestamp"] = pd.to_datetime(gpu_norm.get("timestamp"), errors="coerce", utc=True)

    # Extraire colonnes métriques backend_*_mono_avg/std et *_multi_* par backend
    metric_cols = [c for c in gpu_norm.columns if any(x in c for x in ("_mono_", "_multi_"))]
    if metric_cols:
        gpu_long = gpu_norm.melt(
            id_vars=[c for c in gpu_norm.columns if c not in metric_cols],
            value_vars=metric_cols,
            var_name="metric",
            value_name="value",
        )
    else:
        gpu_long = pd.DataFrame()
else:
    gpu_long = pd.DataFrame()

# Fusion minimale pour un DataFrame unifié optionnel (si utile)
all_runs = pd.concat([cpu_norm.assign(kind="cpu"), gpu_norm.assign(kind="gpu")], ignore_index=True, sort=False)
print("cpu_norm:", cpu_norm.shape, "gpu_norm:", gpu_norm.shape, "all_runs:", all_runs.shape)

In [None]:
# Section 4 — Aperçu et validation des données

if not cpu_norm.empty:
    display(HTML("<h3>CPU — Aperçu</h3>"))
    display(cpu_norm.head())
    display(cpu_norm.describe(include="all"))

if not gpu_norm.empty:
    display(HTML("<h3>GPU — Aperçu</h3>"))
    display(gpu_norm.head())
    display(gpu_norm.describe(include="all"))

# Vérifs simples
if "node" in all_runs:
    display(all_runs["node"].value_counts().to_frame("count").head(20))

In [None]:
# Section 5 — Calcul de métriques dérivées par run

# CPU: meilleures perfs par nœud et par mode
best_cpu = pd.DataFrame()
if not cpu_norm.empty:
    best_cpu = (cpu_norm
                .assign(key=lambda d: d["node"].astype(str) + "::" + d["mode"].astype(str))
                .sort_values("avg_events_per_s", ascending=False)
                .groupby("key", as_index=False)
                .head(1))

# GPU: extraire les *_mono_avg / *_multi_avg en colonnes longues et repérer le max par nœud
best_gpu = pd.DataFrame()
if not gpu_long.empty:
    # garder uniquement les colonnes *_avg
    gpu_avg = gpu_long[gpu_long["metric"].str.endswith("_avg", na=False)].copy()
    best_gpu = (gpu_avg
                .assign(key=lambda d: d["node"].astype(str))
                .sort_values("value", ascending=False)
                .groupby(["key", "metric"], as_index=False)
                .head(1))

print("best_cpu:", best_cpu.shape, "best_gpu:", best_gpu.shape)

In [None]:
# Section 6 — Visualisations (lignes/évolution si step, sinon barres)

if not cpu_norm.empty:
    display(HTML("<h3>CPU — Meilleures perfs par nœud (mode multi)</h3>"))
    cpu_multi = cpu_norm[cpu_norm.get("mode")=="multi"].copy()
    top_cpu = (cpu_multi
               .sort_values(["node", "avg_events_per_s"], ascending=[True, False])
               .groupby("node", as_index=False).head(1))
    ax = sns.barplot(data=top_cpu.sort_values("avg_events_per_s", ascending=False),
                     x="avg_events_per_s", y="node", color="#4C78A8")
    ax.set_xlabel("events/s (avg)")
    ax.set_ylabel("node")
    plt.show()

if not gpu_long.empty:
    display(HTML("<h3>GPU — Distribution des meilleures métriques (monos et multi)</h3>"))
    g = sns.histplot(data=gpu_long[gpu_long["metric"].str.endswith("_avg")], x="value", hue="metric", element="step", stat="density", common_norm=False)
    plt.show()

In [None]:
# Section 7 — Distributions et corrélations

if not cpu_norm.empty:
    display(HTML("<h4>CPU — Histogramme avg_events_per_s</h4>"))
    sns.histplot(cpu_norm["avg_events_per_s"].dropna(), kde=True)
    plt.show()

if not gpu_long.empty:
    # Matrice de corrélation sur les moyennes par backend (au format large)
    gpu_avg_wide = (gpu_long[gpu_long["metric"].str.endswith("_avg")] 
                    .pivot_table(index="node", columns="metric", values="value", aggfunc="mean"))
    if not gpu_avg_wide.empty:
        corr = gpu_avg_wide.corr(numeric_only=True)
        sns.heatmap(corr, cmap="coolwarm", center=0)
        plt.title("GPU — Corrélations des métriques")
        plt.show()

In [None]:
# Section 8 — Comparaison multi-runs (métriques et paramètres)

# Pour CPU: tableau des meilleures perfs multi par nœud
comp_cpu = pd.DataFrame()
if not cpu_norm.empty:
    cpu_multi = cpu_norm[cpu_norm.get("mode")=="multi"].copy()
    comp_cpu = (cpu_multi
                .sort_values(["node", "avg_events_per_s"], ascending=[True, False])
                .groupby("node", as_index=False).head(1))
    display(comp_cpu.sort_values("avg_events_per_s", ascending=False).head(20))

    plt.figure(figsize=(10,6))
    sns.barplot(data=comp_cpu.sort_values("avg_events_per_s", ascending=False),
                x="avg_events_per_s", y="node", color="#72B7B2")
    plt.title("CPU — Meilleures perfs multi par nœud")
    plt.xlabel("events/s (avg)")
    plt.show()

# Pour GPU: meilleurs *_avg par nœud et par metric
comp_gpu = pd.DataFrame()
if not gpu_long.empty:
    comp_gpu = (gpu_long[gpu_long["metric"].str.endswith("_avg")] 
                .sort_values(["node", "value"], ascending=[True, False])
                .groupby(["node", "metric"], as_index=False).head(1))
    display(comp_gpu.head(20))

    fig = px.bar(comp_gpu.sort_values("value", ascending=False).head(50),
                 x="value", y="node", color="metric",
                 title="GPU — Meilleures métriques par nœud (top 50)")
    fig.show()

In [None]:
# Section 9 — Filtres interactifs (ipywidgets)

have_widgets = False
try:
    import ipywidgets as widgets
    from ipywidgets import interact, interactive, HBox, VBox
    have_widgets = True
except Exception as e:
    print("ipywidgets non disponible:", e)

if have_widgets:
    # GPU: sélection de la métrique *_avg
    gpu_metrics = sorted({m for m in gpu_long.get("metric", []) if str(m).endswith("_avg")}) if not gpu_long.empty else []
    if gpu_metrics:
        @widgets.interact(metric=gpu_metrics)
        def _gpu_metric_hist(metric=gpu_metrics[0]):
            df = gpu_long[(gpu_long.metric == metric)]
            fig = px.histogram(df, x="value", color="backend", marginal="box",
                               title=f"Distribution {metric}")
            fig.show()
    else:
        print("Pas de métriques GPU *_avg trouvées.")

    # CPU: sélection mode et top N
    if not cpu_norm.empty:
        modes = sorted(cpu_norm["mode"].dropna().unique())
        @widgets.interact(mode=modes, topN=widgets.IntSlider(min=5, max=50, step=5, value=20))
        def _cpu_top(mode=modes[0], topN=20):
            df = (cpu_norm[cpu_norm.mode==mode]
                  .sort_values("avg_events_per_s", ascending=False)
                  .head(topN))
            plt.figure(figsize=(10, min(8, 0.4*len(df))))
            sns.barplot(data=df, x="avg_events_per_s", y="node", color="#4C78A8")
            plt.title(f"CPU — Top {topN} ({mode})")
            plt.xlabel("events/s (avg)")
            plt.tight_layout()
            plt.show()
else:
    print("Activer ipywidgets pour utiliser les filtres interactifs (voir Section 1).")

In [None]:
# Section 10 — Export des figures et des sous-ensembles
from pathlib import Path

EXPORT_DIR = Path(FIGURES_DIR) if 'FIGURES_DIR' in globals() else Path('figures')
EXPORT_DIR.mkdir(parents=True, exist_ok=True)

# Exemples d'exports CSV/Parquet
if not all_runs.empty:
    out_csv = EXPORT_DIR / 'runs_all_clean.csv'
    out_parquet = EXPORT_DIR / 'runs_all_clean.parquet'
    try:
        all_runs.to_csv(out_csv, index=False)
        print("CSV exporté:", out_csv)
    except Exception as e:
        print("CSV non exporté:", e)
    try:
        all_runs.to_parquet(out_parquet)
        print("Parquet exporté:", out_parquet)
    except Exception as e:
        print("Parquet non exporté:", e)

# Export de quelques figures récentes (si présentes)
try:
    plt.gcf()
    plt.savefig(EXPORT_DIR / 'dernier_plot_matplotlib.png', dpi=150, bbox_inches='tight')
    print("Figure Matplotlib exportée ->", EXPORT_DIR / 'dernier_plot_matplotlib.png')
except Exception as e:
    pass

# Pour Plotly, sauvegarde HTML (dernier fig) si accessible via variable fig
try:
    if 'fig' in globals() and fig is not None:
        fig.write_html(str(EXPORT_DIR / 'dernier_plot_plotly.html'))
        print("Figure Plotly exportée ->", EXPORT_DIR / 'dernier_plot_plotly.html')
except Exception as e:
    pass

print("Exports terminés.")