In [1]:
"""
Análisis de tracking para Tracking1, Tracking2 y Average(Tracking1,Tracking2).
- Detecta columnas relevantes del CSV.
- Convierte las columnas métricas a numérico (coerce).
- Agrupa por modelo y por (seq, model).
- Calcula composite_score basado en métricas de asociación/identidad.
- Genera CSVs y PDFs con gráficas, forzando orden de modelos:
    Salmones2024, yolov8m, yolov8l, yolov9c, yolo11m, yolo11l
- Extiende el análisis para ejecutar versiones específicas por formato de exportación (Pytorch, FP16, INT8).

Simplificado: se elimina toda la lógica de análisis por tracker y combinaciones export+tracker.
"""

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

# -----------------------
# Config
# -----------------------
IN_PATH = "combined_results.csv"           # Cambia si tu CSV está en otra ruta
BASE_OUT_DIR = "results"                   # Carpeta de salida raíz
SEQ_LIST = ["Tracking1", "Tracking2"]      # Secuencias objetivo
DESIRED_ORDER = ['Salmones2024', 'yolov8m', 'yolov8l', 'yolov9c', 'yolo11m', 'yolo11l']
EXPORT_VARIANTS = ["Pytorch", "FP16", "INT8"]  # Formatos de exportación a analizar individualmente

os.makedirs(BASE_OUT_DIR, exist_ok=True)

METRIC_CANDIDATES = {
    'IDF1'      : ['IDF1','IDF_1','ID_F1'],
    'AssA_AUC'  : ['AssA___AUC','AssA_AUC','AssA AUC','AssA'],
    'HOTA_AUC'  : ['HOTA___AUC','HOTA_AUC','HOTA'],
    'MT'        : ['MT','MTR','Mostly-Tracked','MostlyTracked'],
    'PT'        : ['PT','Partially-Tracked','PartiallyTracked'],
    'ML'        : ['ML','Mostly-Lost','MostlyLost'],
    'IDSW'      : ['IDSW','ID Sw','ID_Switches','id switches'],
    'Frag'      : ['Frag','FRAG','Fragmentation','Frags'],
    'GT_IDs'    : ['GT_IDs','GT_ID','GT_IDS','GT_IDs','GT_IDs '],
    'Dets'      : ['Dets','Detections','GT_Dets','Dets '],
    'MT_percent': ['MT_percent','MT(%)','MT %']
}

METRIC_DISPLAY_ORDER = ['IDF1','AssA_AUC','HOTA_AUC','MT','PT','ML','IDSW','Frag']

def detect_model_column(df):
    # prefer 'model', luego 'export', luego alguna columna object con >1 valor, si no última
    if 'model' in df.columns:
        return 'model'
    if 'export' in df.columns:
        return 'export'
    for c in df.columns:
        if df[c].dtype == object and df[c].nunique() > 1:
            return c
    return df.columns[-1]

def ensure_seq_string(df):
    if 'seq' in df.columns:
        df = df.copy()
        df['seq'] = df['seq'].astype(str)
    return df

# Aesthetic matplotlib
plt.style.use('default')
import matplotlib as mpl
mpl.rcParams.update({
    'figure.facecolor': 'white',
    'figure.edgecolor': 'white',
    'axes.facecolor': 'white',
    'axes.edgecolor': 'black',
    'axes.labelcolor': 'black',
    'xtick.color': 'black',
    'ytick.color': 'black',
    'text.color': 'black',
    'legend.facecolor': 'white',
    'legend.edgecolor': 'black',
    'savefig.facecolor': 'white',
    'savefig.edgecolor': 'white',
    'savefig.transparent': False,
})

# -----------------------
# Funciones utilitarias generales
# -----------------------

def slugify_tag(value):
    """Normaliza strings para usarlos en nombres de carpetas."""
    text = str(value).strip()
    for ch in (" ", "/", "\\", ":", "*", "?", "\"", "<", ">", "|"):
        text = text.replace(ch, "_")
    while "__" in text:
        text = text.replace("__", "_")
    return text or "unknown"


def choose_col(df, preferred_list):
    """Elige la columna exacta o la primera que contiene el substring."""
    for p in preferred_list:
        if p in df.columns:
            return p
    for p in preferred_list:
        matches = [c for c in df.columns if p.lower() in c.lower()]
        if matches:
            return matches[0]
    return None


def agg_map_from_metrics(metrics_map, df):
    cols = [v for v in metrics_map.values() if v in df.columns and pd.api.types.is_numeric_dtype(df[v])]
    return {col: 'mean' for col in cols}


def compute_grouped_model(df_sub, model_col, agg_map, metrics_map):
    """Agrupa df_sub por modelo, usando agg_map seguro, y renombra a short names."""
    if df_sub.empty:
        return pd.DataFrame()
    agg_map_sub = {k: v for k, v in agg_map.items() if k in df_sub.columns}
    if not agg_map_sub:
        return pd.DataFrame()
    grp = df_sub.groupby(model_col).agg(agg_map_sub)
    rename_map = {v: k for k, v in metrics_map.items() if v in grp.columns}
    grp = grp.rename(columns=rename_map)
    return grp


def add_composite_score(grouped_model):
    """Anexa columnas normalizadas y composite_score (0..1)."""
    gm = grouped_model.copy()
    if gm.empty:
        return gm
    gm = gm.fillna(0)
    norm_cols = [c for c in ['AssA_AUC', 'IDF1', 'HOTA_AUC', 'MT_percent', 'IDSW', 'Frag'] if c in gm.columns]
    if norm_cols:
        scaler = MinMaxScaler()
        scaled = scaler.fit_transform(gm[norm_cols])
        scaled_df = pd.DataFrame(scaled, index=gm.index, columns=[c + "_norm" for c in norm_cols])
        gm = pd.concat([gm, scaled_df], axis=1)
    weights = {
        'AssA_AUC_norm': 0.35,
        'IDF1_norm': 0.25,
        'HOTA_AUC_norm': 0.15,
        'MT_percent_norm': 0.10,
        'IDSW_norm': -0.10,
        'Frag_norm': -0.10
    }
    used_weights = {k: w for k, w in weights.items() if k in gm.columns}
    score = np.zeros(len(gm))
    for k, w in used_weights.items():
        score += w * gm[k].values
    if score.max() > score.min():
        score_norm = (score - score.min()) / (score.max() - score.min())
    else:
        score_norm = np.zeros_like(score)
    gm['composite_score_raw'] = score
    gm['composite_score'] = score_norm
    return gm


def reorder_df_index(df_in, order):
    if df_in.empty:
        return df_in
    present = [m for m in order if m in df_in.index]
    others = [m for m in df_in.index if m not in present]
    return df_in.reindex(present + others)


def safe_get_vals(df_g, metric, model_order):
    vals = []
    for m in model_order:
        if m in df_g.index and metric in df_g.columns:
            vals.append(df_g.loc[m, metric])
        else:
            vals.append(np.nan)
    return vals


def compute_seq_composites_table(df_seq_model_table, model_col, metrics_map):
    rows = []
    metrics_for_score = [c for c in ['AssA_AUC', 'IDF1', 'HOTA_AUC', 'MT_percent', 'IDSW', 'Frag'] if c in df_seq_model_table.columns]
    if not metrics_for_score or 'seq' not in df_seq_model_table.columns:
        return pd.DataFrame()
    for seq, g in df_seq_model_table.groupby('seq'):
        g2 = g.copy()
        norm_vals = {}
        for c in metrics_for_score:
            vals = g2[c].astype(float).values.reshape(-1, 1)
            if np.isnan(vals).all() or np.nanmax(vals) == np.nanmin(vals):
                norm_vals[c + '_norm'] = np.full(vals.shape, 0.5).flatten()
            else:
                norm_vals[c + '_norm'] = MinMaxScaler().fit_transform(vals).flatten()
        for idx, row in g2.iterrows():
            model_name = row[model_col]
            row_out = {'seq': seq, model_col: model_name}
            for c in metrics_for_score:
                row_out[c] = row[c]
                row_out[c + '_norm'] = float(norm_vals[c + '_norm'][list(g2.index).index(idx)])
            rows.append(row_out)
    df_out = pd.DataFrame(rows)
    weights = {'AssA_AUC_norm': 0.35, 'IDF1_norm': 0.25, 'HOTA_AUC_norm': 0.15, 'MT_percent_norm': 0.10, 'IDSW_norm': -0.10, 'Frag_norm': -0.10}
    df_out['composite_score_raw'] = 0.0
    for k, w in weights.items():
        if k in df_out.columns:
            df_out['composite_score_raw'] += w * df_out[k]
    df_out = df_out.groupby('seq').apply(
        lambda g2: g2.assign(
            composite_score=(g2['composite_score_raw'] - g2['composite_score_raw'].min()) /
            (g2['composite_score_raw'].max() - g2['composite_score_raw'].min())
            if g2['composite_score_raw'].max() > g2['composite_score_raw'].min() else 0.0
        )
    ).reset_index(drop=True)
    return df_out

# -----------------------
# Núcleo del análisis parametrizable por subconjunto
# -----------------------

def run_analysis_for_subset(df_input, out_dir, label, seq_list=SEQ_LIST, desired_order=DESIRED_ORDER):
    print(f"\n=== Análisis para {label} ===")
    if df_input is None or df_input.empty:
        print("Sin datos para analizar. Se omite.")
        return

    os.makedirs(out_dir, exist_ok=True)

    df = df_input.copy()
    print(f"Filas consideradas: {len(df)}")

    seq_present, seq_labels_for_outputs = detect_sequences(df, seq_list)
    model_col = detect_model_column(df)
    metrics_map, detected_raw = detect_metrics(df, METRIC_CANDIDATES)
    df = ensure_numeric_metrics(df, metrics_map)
    df, metrics_map = ensure_mt_percent(df, metrics_map)
    detected_raw['MT_percent'] = metrics_map.get('MT_percent')
    print("Columnas detectadas:")
    print(" model_col:", model_col)
    for key in METRIC_DISPLAY_ORDER + ['MT_percent']:
        print(f" {key}:", detected_raw.get(key))
    print("Métricas mapeadas:")
    for k, v in metrics_map.items():
        print(" -", k, "->", v)
    agg_map = agg_map_from_metrics(metrics_map, df)
    print("Agg map (columnas numéricas que se usarán para mean):")
    print(agg_map)

    grouped_by_seq, df_seq_model, seq_for_agg = build_grouped_views(
        df, seq_labels_for_outputs, seq_present, model_col, agg_map, metrics_map, desired_order
    )

    # Guardar CSV resumen por modelo para cada vista
    for key, gm in grouped_by_seq.items():
        if gm is None or gm.empty:
            continue
        out_path = os.path.join(out_dir, f"model_summary_{key}.csv")
        gm.sort_values('composite_score', ascending=False).to_csv(out_path)
        print("Guardado:", out_path)

    # Plots: métricas clave por modelo
    plot_metrics = [
        c for c in ['IDF1', 'AssA_AUC', 'HOTA_AUC', 'MT_percent']
        if any(c in grouped_by_seq[k].columns for k in grouped_by_seq)
    ]
    for key in list(grouped_by_seq.keys()):
        gm = grouped_by_seq[key]
        if gm is None or gm.empty:
            continue
        x = np.arange(len(desired_order))
        plt.figure(figsize=(10, 6))
        width = 0.15
        n = len(plot_metrics)
        for i, metric in enumerate(plot_metrics):
            vals = safe_get_vals(gm, metric, desired_order)
            plt.bar(x + (i - (n - 1) / 2) * width, vals, width, label=metric)
        plt.xticks(x, desired_order, rotation=45, ha='right')
        plt.ylabel("Valor medio (por seq)")
        plt.title(f"Comparación métricas clave por modelo - {key}")
        plt.legend()
        plt.tight_layout()
        p = os.path.join(out_dir, f"metricas_clave_por_modelo_{key}.pdf")
        plt.savefig(p)
        plt.close()
        print("Plot guardado:", p)

    # Curvas AssA & DetA por IoU (por seq y Average)
    ass_cols = [c for c in df.columns if c.startswith("AssA___") and c.split("___")[-1].isdigit()]
    det_cols = [c for c in df.columns if c.startswith("DetA___") and c.split("___")[-1].isdigit()]

    thresholds_ass = [int(c.split("___")[-1]) for c in ass_cols] if ass_cols else []
    thresholds_det = [int(c.split("___")[-1]) for c in det_cols] if det_cols else []

    if 'seq' in df.columns and seq_labels_for_outputs:
        for seq_name in seq_labels_for_outputs:
            df_sub = df[df['seq'] == seq_name]
            if df_sub.empty:
                continue
            if ass_cols:
                mean_by_model_ass = df_sub.groupby(model_col)[ass_cols].mean()
                plt.figure(figsize=(8, 5))
                for m in desired_order:
                    if m in mean_by_model_ass.index:
                        plt.plot(thresholds_ass, mean_by_model_ass.loc[m].values, marker='o', label=m)
                plt.xlabel("IoU threshold (%)")
                plt.ylabel("AssA")
                plt.title(f"AssA vs IoU - {seq_name}")
                plt.legend()
                plt.tight_layout()
                p = os.path.join(out_dir, f"assa_vs_iou_{seq_name}.pdf")
                plt.savefig(p)
                plt.close()
                print("Saved:", p)
            if det_cols:
                mean_by_model_det = df_sub.groupby(model_col)[det_cols].mean()
                plt.figure(figsize=(8, 5))
                for m in desired_order:
                    if m in mean_by_model_det.index:
                        plt.plot(thresholds_det, mean_by_model_det.loc[m].values, marker='o', label=m)
                plt.xlabel("IoU threshold (%)")
                plt.ylabel("DetA")
                plt.title(f"DetA vs IoU - {seq_name}")
                plt.legend()
                plt.tight_layout()
                p = os.path.join(out_dir, f"deta_vs_iou_{seq_name}.pdf")
                plt.savefig(p)
                plt.close()
                print("Saved:", p)

        if ass_cols and seq_for_agg:
            mean_seq_model_ass = df[df['seq'].isin(seq_for_agg)].groupby(['seq', model_col])[ass_cols].mean()
        else:
            mean_seq_model_ass = None
        if det_cols and seq_for_agg:
            mean_seq_model_det = df[df['seq'].isin(seq_for_agg)].groupby(['seq', model_col])[det_cols].mean()
        else:
            mean_seq_model_det = None

        if ass_cols and mean_seq_model_ass is not None and not mean_seq_model_ass.empty:
            avg_ass = mean_seq_model_ass.groupby(model_col).mean()
            plt.figure(figsize=(8, 5))
            for m in desired_order:
                if m in avg_ass.index:
                    plt.plot(thresholds_ass, avg_ass.loc[m].values, marker='o', label=m)
            plt.xlabel("IoU threshold (%)")
            plt.ylabel("AssA")
            plt.title("AssA vs IoU - Average")
            plt.legend()
            plt.tight_layout()
            p = os.path.join(out_dir, f"assa_vs_iou_Average.pdf")
            plt.savefig(p)
            plt.close()
            print("Saved:", p)
        if det_cols and mean_seq_model_det is not None and not mean_seq_model_det.empty:
            avg_det = mean_seq_model_det.groupby(model_col).mean()
            plt.figure(figsize=(8, 5))
            for m in desired_order:
                if m in avg_det.index:
                    plt.plot(thresholds_det, avg_det.loc[m].values, marker='o', label=m)
            plt.xlabel("IoU threshold (%)")
            plt.ylabel("DetA")
            plt.title("DetA vs IoU - Average")
            plt.legend()
            plt.tight_layout()
            p = os.path.join(out_dir, f"deta_vs_iou_Average.pdf")
            plt.savefig(p)
            plt.close()
            print("Saved:", p)

    # IDSW & Frag por modelo (por seq y Average)
    for key in list(grouped_by_seq.keys()):
        gm = grouped_by_seq.get(key)
        if gm is None or gm.empty:
            continue
        x = np.arange(len(desired_order))
        plt.figure(figsize=(10, 4))
        width = 0.35
        idsw_vals = safe_get_vals(gm, 'IDSW', desired_order)
        frag_vals = safe_get_vals(gm, 'Frag', desired_order)
        plt.bar(x - width / 2, idsw_vals, width, label='IDSW')
        plt.bar(x + width / 2, frag_vals, width, label='Frag')
        plt.xticks(x, desired_order, rotation=45, ha='right')
        plt.ylabel("Valor medio")
        plt.title(f"IDSW y Frag por modelo - {key}")
        plt.legend()
        plt.tight_layout()
        p = os.path.join(out_dir, f"idsw_frag_por_modelo_{key}.pdf")
        plt.savefig(p)
        plt.close()
        print("Saved:", p)

    # Matriz / heatmap composite: Tracking1, Tracking2, Average
    df_seq_model_short = df_seq_model.copy()
    rename_map_long_to_short = {v: k for k, v in metrics_map.items() if v in df_seq_model_short.columns}
    if rename_map_long_to_short:
        df_seq_model_short = df_seq_model_short.rename(columns=rename_map_long_to_short)

    if 'seq' in df_seq_model_short.columns and not df_seq_model_short.empty:
        df_seq_composite = compute_seq_composites_table(df_seq_model_short, model_col, metrics_map)
    else:
        df_seq_composite = pd.DataFrame()

    seq_for_matrix = seq_for_agg
    if not df_seq_composite.empty and seq_for_matrix:
        pivot = df_seq_composite[df_seq_composite['seq'].isin(seq_for_matrix)].pivot(index='seq', columns=model_col, values='composite_score')
    else:
        fallback_index = seq_for_matrix + ['Average'] if seq_for_matrix else ['Average']
        pivot = pd.DataFrame(index=fallback_index)

    if not pivot.empty:
        avg_row = pivot.mean(axis=0).to_frame().T
        avg_row.index = ['Average']
        pivot = pd.concat([pivot, avg_row])
        cols_present = [c for c in desired_order if c in pivot.columns]
        other_cols = [c for c in pivot.columns if c not in cols_present]
        pivot = pivot[cols_present + other_cols]
    pivot_path = os.path.join(out_dir, "seq_model_composite_matrix_tracking12.csv")
    pivot.to_csv(pivot_path)
    print("Guardado pivot:", pivot_path)

    if not pivot.empty:
        plt.figure(figsize=(8, 2 + 0.8 * len(pivot.index)))
        plt.imshow(pivot.values, aspect='auto', interpolation='nearest')
        plt.yticks(range(len(pivot.index)), pivot.index)
        plt.xticks(range(len(pivot.columns)), pivot.columns, rotation=45, ha='right')
        plt.colorbar(label='composite_score (0-1)')
        plt.title("Heatmap: composite_score (Tracking1, Tracking2, Average)")
        plt.tight_layout()
        heatmap_path = os.path.join(out_dir, "seq_model_composite_heatmap_tracking12.pdf")
        plt.savefig(heatmap_path)
        plt.close()
        print("Saved heatmap:", heatmap_path)
    else:
        print("No se pudo generar heatmap para", label)

    if not df_seq_composite.empty and seq_for_matrix:
        seq_rankings = df_seq_composite[df_seq_composite['seq'].isin(seq_for_matrix)].copy()
    else:
        seq_rankings = pd.DataFrame()
    if not seq_rankings.empty:
        avg_composite = seq_rankings.groupby(model_col)['composite_score_raw'].mean().reset_index()
        if avg_composite['composite_score_raw'].max() > avg_composite['composite_score_raw'].min():
            avg_composite['composite_score'] = (
                (avg_composite['composite_score_raw'] - avg_composite['composite_score_raw'].min()) /
                (avg_composite['composite_score_raw'].max() - avg_composite['composite_score_raw'].min())
            )
        else:
            avg_composite['composite_score'] = 0.0
        avg_composite['seq'] = 'Average'
        seq_rankings_combined = pd.concat([
            seq_rankings,
            avg_composite.rename(columns={model_col: model_col})
        ], sort=False, ignore_index=True, axis=0)
        seq_rankings_path = os.path.join(out_dir, "seq_model_rankings_tracking12.csv")
        seq_rankings_combined.to_csv(seq_rankings_path, index=False)
        print("Guardado seq rankings:", seq_rankings_path)
    else:
        print("No hay df_seq_composite para guardar rankings.")

    print(f"Análisis completado para {label}. Resultados en: {out_dir}")

# -----------------------
# Lectura de datos y ejecución por subconjuntos (global + export)
# -----------------------

df_all = pd.read_csv(IN_PATH)

analysis_targets = [("global", df_all, BASE_OUT_DIR)]
registered_labels = {"global"}

def add_subset(label, subset_df):
    if subset_df is None or subset_df.empty:
        print(f"Aviso: subset '{label}' sin datos. Se omite.")
        return
    if label in registered_labels:
        return
    out_dir = os.path.join(BASE_OUT_DIR, label)
    analysis_targets.append((label, subset_df, out_dir))
    registered_labels.add(label)

export_values_detected = []
target_exports = []
if 'export' in df_all.columns:
    export_values_detected = sorted(
        [v for v in df_all['export'].dropna().unique() if str(v).strip()],
        key=lambda v: str(v)
    )
    if export_values_detected:
        print("Formatos de export detectados:", ", ".join(str(v) for v in export_values_detected))
    else:
        print("No se encontraron valores en la columna 'export'.")
    target_exports = [val for val in EXPORT_VARIANTS if val in export_values_detected]
    missing_exports = [val for val in EXPORT_VARIANTS if val not in export_values_detected]
    if target_exports:
        print("Se generarán análisis específicos para:", ", ".join(str(v) for v in target_exports))
    else:
        print("No se encontraron variantes de export objetivo en los datos.")
    if missing_exports:
        print("Nota: sin datos para:", ", ".join(str(v) for v in missing_exports))
    additional_exports = [val for val in export_values_detected if val not in EXPORT_VARIANTS]
    if additional_exports:
        print("Exportaciones adicionales detectadas:", ", ".join(str(v) for v in additional_exports))
    for val in target_exports:
        subset = df_all[df_all['export'] == val]
        add_subset(f"export_{slugify_tag(val)}", subset)
else:
    print("Aviso: columna 'export' no encontrada. Sólo se generará el análisis global.")

# REMOVED: análisis por tracker y combinaciones export+tracker
# (Se eliminó bloque que definía tracker_values_detected, target_trackers y bucle de combinaciones)
for label, df_subset, out_dir in analysis_targets:
    run_analysis_for_subset(df_subset, out_dir, label)

Formatos de export detectados: FP16, INT8, Pytorch
Se generarán análisis específicos para: Pytorch, FP16, INT8

=== Análisis para global ===
Filas consideradas: 108


NameError: name 'detect_sequences' is not defined