In [2]:
#!/usr/bin/env python3
"""
Análisis de tracking para Tracking1, Tracking2 y Average(Tracking1,Tracking2).
- Detecta columnas relevantes del CSV.
- Convierte las columnas métricas a numérico (coerce).
- Agrupa por modelo y por (seq, model).
- Calcula composite_score basado en métricas de asociación/identidad.
- Genera CSVs y PDFs con gráficas, forzando orden de modelos:
    Salmones2024, yolov8m, yolov8l, yolov9c, yolo11m, yolo11l
"""

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

# -----------------------
# Config
# -----------------------
IN_PATH = "combined_results.csv"           # Cambia si tu CSV está en otra ruta
OUT_DIR = "results"             # Carpeta de salida
SEQ_LIST = ["Tracking1", "Tracking2"]      # Secuencias objetivo
DESIRED_ORDER = ['Salmones2024','yolov8m','yolov8l','yolov9c','yolo11m','yolo11l']

os.makedirs(OUT_DIR, exist_ok=True)

# Aesthetic matplotlib
plt.style.use('default')
import matplotlib as mpl
mpl.rcParams.update({
    'figure.facecolor': 'white',
    'figure.edgecolor': 'white',
    'axes.facecolor': 'white',
    'axes.edgecolor': 'black',
    'axes.labelcolor': 'black',
    'xtick.color': 'black',
    'ytick.color': 'black',
    'text.color': 'black',
    'legend.facecolor': 'white',
    'legend.edgecolor': 'black',
    'savefig.facecolor': 'white',
    'savefig.edgecolor': 'white',
    'savefig.transparent': False,
})

# -----------------------
# Lectura y detección de columnas
# -----------------------
df = pd.read_csv(IN_PATH)

# Asegurar seq como string si existe
if 'seq' in df.columns:
    df['seq'] = df['seq'].astype(str)

def choose_col(df, preferred_list):
    """Elige la columna exacta o la primera que contiene el substring."""
    for p in preferred_list:
        if p in df.columns:
            return p
    for p in preferred_list:
        matches = [c for c in df.columns if p.lower() in c.lower()]
        if matches:
            return matches[0]
    return None

# detectar columna de modelo/export
model_col = 'model' if 'model' in df.columns else ('export' if 'export' in df.columns else df.columns[-1])

# detectar métricas importantes (intenta varios nombres)
assauc_col   = choose_col(df, ['AssA___AUC','AssA_AUC','AssA___AUC','AssA AUC','AssA'])
idf1_col     = choose_col(df, ['IDF1','IDF_1','ID_F1'])
hota_auc_col = choose_col(df, ['HOTA___AUC','HOTA_AUC','HOTA___AUC','HOTA'])
mt_col       = choose_col(df, ['MT','MTR','Mostly-Tracked','MostlyTracked'])
gt_ids_col   = choose_col(df, ['GT_IDs','GT_IDS','GT_ID','GT_IDs','GT_ID'])
idsw_col     = choose_col(df, ['IDSW','ID Sw','ID_Switches','id switches'])
frag_col     = choose_col(df, ['Frag','Fragmentation','FRAG'])
dets_col     = choose_col(df, ['Dets','Detections','GT_Dets','Dets '])

print("Columnas detectadas:")
print(" model_col:", model_col)
print(" AssA_AUC:", assauc_col)
print(" IDF1:", idf1_col)
print(" HOTA_AUC:", hota_auc_col)
print(" MT:", mt_col)
print(" GT_IDs:", gt_ids_col)
print(" IDSW:", idsw_col)
print(" Frag:", frag_col)
print(" Dets:", dets_col)

# -----------------------
# Crear MT_percent si es posible
# -----------------------
if mt_col and gt_ids_col and mt_col in df.columns and gt_ids_col in df.columns:
    # Convertimos los dos a numérico para evitar problemas
    df[mt_col] = pd.to_numeric(df[mt_col], errors='coerce')
    df[gt_ids_col] = pd.to_numeric(df[gt_ids_col], errors='coerce')
    df['MT_percent'] = df[mt_col] / df[gt_ids_col].replace({0: np.nan})
else:
    df['MT_percent'] = np.nan

# -----------------------
# Mapear métricas disponibles (short name -> actual column)
# -----------------------
metrics_map = {}
candidates = [
    ('AssA_AUC', assauc_col),
    ('IDF1', idf1_col),
    ('HOTA_AUC', hota_auc_col),
    ('MT', mt_col),
    ('GT_IDs', gt_ids_col),
    ('IDSW', idsw_col),
    ('Frag', frag_col),
    ('Dets', dets_col),
    ('MT_percent', 'MT_percent')
]
for short, col in candidates:
    if col and (col in df.columns or col == 'MT_percent'):
        metrics_map[short] = col

print("Métricas mapeadas:")
for k,v in metrics_map.items():
    print(" -", k, "->", v)

# -----------------------
# Conversión a numérico de columnas métricas detectadas
# -----------------------
numeric_cols_to_try = [v for v in metrics_map.values() if v in df.columns]
for c in numeric_cols_to_try:
    df[c] = pd.to_numeric(df[c], errors='coerce')

# -----------------------
# Construir agg_map seguro (solo columnas numéricas)
# -----------------------
def agg_map_from_metrics(metrics_map, df):
    cols = [v for v in metrics_map.values() if v in df.columns and pd.api.types.is_numeric_dtype(df[v])]
    return {col: 'mean' for col in cols}

agg_map = agg_map_from_metrics(metrics_map, df)
print("Agg map (columnas numéricas que se usarán para mean):")
print(agg_map)

# -----------------------
# Funciones utilitarias
# -----------------------
def compute_grouped_model(df_sub, model_col, agg_map, metrics_map):
    """Agrupa df_sub por modelo, usando agg_map seguro, y renombra a short names."""
    if df_sub.empty:
        return pd.DataFrame()
    agg_map_sub = {k:v for k,v in agg_map.items() if k in df_sub.columns}
    if not agg_map_sub:
        return pd.DataFrame()
    grp = df_sub.groupby(model_col).agg(agg_map_sub)
    # rename a short names donde proceda
    rename_map = {v:k for k,v in metrics_map.items() if v in grp.columns}
    grp = grp.rename(columns=rename_map)
    return grp

def add_composite_score(grouped_model):
    """Anexa columnas normalizadas y composite_score (0..1)."""
    gm = grouped_model.copy()
    if gm.empty:
        return gm
    gm = gm.fillna(0)
    norm_cols = [c for c in ['AssA_AUC','IDF1','HOTA_AUC','MT_percent','IDSW','Frag'] if c in gm.columns]
    if norm_cols:
        scaler = MinMaxScaler()
        scaled = scaler.fit_transform(gm[norm_cols])
        scaled_df = pd.DataFrame(scaled, index=gm.index, columns=[c + "_norm" for c in norm_cols])
        gm = pd.concat([gm, scaled_df], axis=1)
    weights = {
        'AssA_AUC_norm': 0.35,
        'IDF1_norm': 0.25,
        'HOTA_AUC_norm': 0.15,
        'MT_percent_norm': 0.10,
        'IDSW_norm': -0.10,
        'Frag_norm': -0.10
    }
    used_weights = {k:w for k,w in weights.items() if k in gm.columns}
    score = np.zeros(len(gm))
    for k,w in used_weights.items():
        score += w * gm[k].values
    if score.max() > score.min():
        score_norm = (score - score.min())/(score.max()-score.min())
    else:
        score_norm = np.zeros_like(score)
    gm['composite_score_raw'] = score
    gm['composite_score'] = score_norm
    return gm

def reorder_df_index(df_in, order):
    if df_in.empty:
        return df_in
    present = [m for m in order if m in df_in.index]
    others = [m for m in df_in.index if m not in present]
    return df_in.reindex(present + others)

def safe_get_vals(df_g, metric, model_order):
    vals = []
    for m in model_order:
        if m in df_g.index and metric in df_g.columns:
            vals.append(df_g.loc[m, metric])
        else:
            vals.append(np.nan)
    return vals

# -----------------------
# 1) Agrupar por secuencia (Tracking1, Tracking2)
# -----------------------
grouped_by_seq = {}
for seq in SEQ_LIST:
    df_sub = df[df['seq'] == seq]
    grouped_by_seq[seq] = compute_grouped_model(df_sub, model_col, agg_map, metrics_map)

# -----------------------
# 2) Crear df_seq_model (solo columnas numéricas) y calcular Average por modelo
# -----------------------
if agg_map:
    # numeric_agg_cols son las columnas reales en df que son numéricas
    numeric_agg_cols = list(agg_map.keys())
    # Conservar solo 'seq', model_col y las columnas numéricas
    df_seq_model = df[df['seq'].isin(SEQ_LIST)][['seq', model_col] + numeric_agg_cols].copy()
    # Agrupar por (seq, model) sobre esas columnas numéricas
    df_seq_model = df_seq_model.groupby(['seq', model_col]).mean().reset_index()
    # Renombrar a short names donde aplique
    rename_map = {v:k for k,v in metrics_map.items() if v in df_seq_model.columns}
    if rename_map:
        df_seq_model = df_seq_model.rename(columns=rename_map)
else:
    df_seq_model = pd.DataFrame(columns=['seq', model_col])

# Calcular Average: promedio (por modelo) de las columnas numéricas existentes (usando short names)
if not df_seq_model.empty:
    numeric_short_cols = [k for k in metrics_map.keys() if k in df_seq_model.columns]
    grouped_by_seq['Average'] = df_seq_model.groupby(model_col)[numeric_short_cols].mean()
else:
    grouped_by_seq['Average'] = pd.DataFrame()

# Añadir composite score y reordenar índices
for key in list(grouped_by_seq.keys()):
    grouped_by_seq[key] = add_composite_score(grouped_by_seq[key])
    grouped_by_seq[key] = reorder_df_index(grouped_by_seq[key], DESIRED_ORDER)

# Guardar CSV resumen por modelo para cada vista
for key, gm in grouped_by_seq.items():
    out_path = os.path.join(OUT_DIR, f"model_summary_{key}.csv")
    gm.sort_values('composite_score', ascending=False).to_csv(out_path)
    print("Guardado:", out_path)

# -----------------------
# Plots: métricas clave por modelo (IDF1, AssA_AUC, HOTA_AUC, MT_percent)
# -----------------------
plot_metrics = [c for c in ['IDF1','AssA_AUC','HOTA_AUC','MT_percent'] if any(c in grouped_by_seq[k].columns for k in grouped_by_seq)]
for key in list(grouped_by_seq.keys()):
    gm = grouped_by_seq[key]
    if gm is None or gm.empty:
        continue
    x = np.arange(len(DESIRED_ORDER))
    plt.figure(figsize=(10,6))
    width = 0.15
    n = len(plot_metrics)
    for i, metric in enumerate(plot_metrics):
        vals = safe_get_vals(gm, metric, DESIRED_ORDER)
        plt.bar(x + (i-(n-1)/2)*width, vals, width, label=metric)
    plt.xticks(x, DESIRED_ORDER, rotation=45, ha='right')
    plt.ylabel("Valor medio (por seq)")
    plt.title(f"Comparación métricas clave por modelo - {key}")
    plt.legend()
    plt.tight_layout()
    p = os.path.join(OUT_DIR, f"metricas_clave_por_modelo_{key}.pdf")
    plt.savefig(p)
    plt.close()
    print("Plot guardado:", p)

# -----------------------
# Curvas AssA & DetA por IoU (por seq y Average)
# -----------------------
ass_cols = [c for c in df.columns if c.startswith("AssA___") and c.split("___")[-1].isdigit()]
det_cols = [c for c in df.columns if c.startswith("DetA___") and c.split("___")[-1].isdigit()]

thresholds_ass = [int(c.split("___")[-1]) for c in ass_cols] if ass_cols else []
thresholds_det = [int(c.split("___")[-1]) for c in det_cols] if det_cols else []

# Por cada seq
for seq in SEQ_LIST:
    df_sub = df[df['seq'] == seq]
    if df_sub.empty:
        continue
    if ass_cols:
        mean_by_model_ass = df_sub.groupby(model_col)[ass_cols].mean()
        plt.figure(figsize=(8,5))
        for m in DESIRED_ORDER:
            if m in mean_by_model_ass.index:
                plt.plot(thresholds_ass, mean_by_model_ass.loc[m].values, marker='o', label=m)
        plt.xlabel("IoU threshold (%)"); plt.ylabel("AssA"); plt.title(f"AssA vs IoU - {seq}")
        plt.legend(); plt.tight_layout()
        p = os.path.join(OUT_DIR, f"assa_vs_iou_{seq}.pdf"); plt.savefig(p); plt.close(); print("Saved:", p)
    if det_cols:
        mean_by_model_det = df_sub.groupby(model_col)[det_cols].mean()
        plt.figure(figsize=(8,5))
        for m in DESIRED_ORDER:
            if m in mean_by_model_det.index:
                plt.plot(thresholds_det, mean_by_model_det.loc[m].values, marker='o', label=m)
        plt.xlabel("IoU threshold (%)"); plt.ylabel("DetA"); plt.title(f"DetA vs IoU - {seq}")
        plt.legend(); plt.tight_layout()
        p = os.path.join(OUT_DIR, f"deta_vs_iou_{seq}.pdf"); plt.savefig(p); plt.close(); print("Saved:", p)

# Curvas promedio (Average) calculadas promediando curvas por seq
if ass_cols or det_cols:
    mean_seq_model_ass = df[df['seq'].isin(SEQ_LIST)].groupby(['seq', model_col])[ass_cols].mean() if ass_cols else None
    mean_seq_model_det = df[df['seq'].isin(SEQ_LIST)].groupby(['seq', model_col])[det_cols].mean() if det_cols else None
    if ass_cols and mean_seq_model_ass is not None:
        avg_ass = mean_seq_model_ass.groupby(model_col).mean()
        plt.figure(figsize=(8,5))
        for m in DESIRED_ORDER:
            if m in avg_ass.index:
                plt.plot(thresholds_ass, avg_ass.loc[m].values, marker='o', label=m)
        plt.xlabel("IoU threshold (%)"); plt.ylabel("AssA"); plt.title("AssA vs IoU - Average")
        plt.legend(); plt.tight_layout()
        p = os.path.join(OUT_DIR, f"assa_vs_iou_Average.pdf"); plt.savefig(p); plt.close(); print("Saved:", p)
    if det_cols and mean_seq_model_det is not None:
        avg_det = mean_seq_model_det.groupby(model_col).mean()
        plt.figure(figsize=(8,5))
        for m in DESIRED_ORDER:
            if m in avg_det.index:
                plt.plot(thresholds_det, avg_det.loc[m].values, marker='o', label=m)
        plt.xlabel("IoU threshold (%)"); plt.ylabel("DetA"); plt.title("DetA vs IoU - Average")
        plt.legend(); plt.tight_layout()
        p = os.path.join(OUT_DIR, f"deta_vs_iou_Average.pdf"); plt.savefig(p); plt.close(); print("Saved:", p)

# -----------------------
# IDSW & Frag por modelo (por seq y Average)
# -----------------------
for key in list(grouped_by_seq.keys()):
    gm = grouped_by_seq.get(key)
    if gm is None or gm.empty:
        continue
    x = np.arange(len(DESIRED_ORDER))
    plt.figure(figsize=(10,4))
    width = 0.35
    idsw_vals = safe_get_vals(gm, 'IDSW', DESIRED_ORDER)
    frag_vals = safe_get_vals(gm, 'Frag', DESIRED_ORDER)
    plt.bar(x - width/2, idsw_vals, width, label='IDSW')
    plt.bar(x + width/2, frag_vals, width, label='Frag')
    plt.xticks(x, DESIRED_ORDER, rotation=45, ha='right')
    plt.ylabel("Valor medio")
    plt.title(f"IDSW y Frag por modelo - {key}")
    plt.legend(); plt.tight_layout()
    p = os.path.join(OUT_DIR, f"idsw_frag_por_modelo_{key}.pdf"); plt.savefig(p); plt.close(); print("Saved:", p)

# -----------------------
# Matriz / heatmap composite: Tracking1, Tracking2, Average
# -----------------------
# Primero construimos df_seq_composite (por-seq normalizando métricas dentro de cada seq)
def compute_seq_composites_table(df_seq_model_table, model_col, metrics_map):
    rows = []
    metrics_for_score = [c for c in ['AssA_AUC','IDF1','HOTA_AUC','MT_percent','IDSW','Frag'] if c in df_seq_model_table.columns]
    if not metrics_for_score:
        return pd.DataFrame()
    for seq, g in df_seq_model_table.groupby('seq'):
        g2 = g.copy()
        # Normalizar por seq
        norm_vals = {}
        for c in metrics_for_score:
            vals = g2[c].astype(float).values.reshape(-1,1)
            if np.nanmax(vals) == np.nanmin(vals):
                norm_vals[c+'_norm'] = np.full(vals.shape, 0.5).flatten()
            else:
                norm_vals[c+'_norm'] = MinMaxScaler().fit_transform(vals).flatten()
        for i, row in g2.iterrows():
            model_name = row[model_col]
            row_out = {'seq': seq, model_col: model_name}
            for c in metrics_for_score:
                row_out[c] = row[c]
                row_out[c + '_norm'] = float(norm_vals[c + '_norm'][list(g2.index).index(i)])
            rows.append(row_out)
    df_out = pd.DataFrame(rows)
    # composite raw
    weights = {'AssA_AUC_norm':0.35,'IDF1_norm':0.25,'HOTA_AUC_norm':0.15,'MT_percent_norm':0.10,'IDSW_norm':-0.10,'Frag_norm':-0.10}
    df_out['composite_score_raw'] = 0.0
    for k,w in weights.items():
        if k in df_out.columns:
            df_out['composite_score_raw'] += w * df_out[k]
    # normalize per seq
    df_out = df_out.groupby('seq').apply(
        lambda g2: g2.assign(composite_score = (g2['composite_score_raw'] - g2['composite_score_raw'].min()) / (g2['composite_score_raw'].max() - g2['composite_score_raw'].min()) if g2['composite_score_raw'].max() > g2['composite_score_raw'].min() else 0.0)
    ).reset_index(drop=True)
    return df_out

# Construir df_seq_model_short (asegurando nombres cortos presentes)
df_seq_model_short = df_seq_model.copy() if 'df_seq_model' in locals() else pd.DataFrame()
# Si df_seq_model_short usa nombres de columna largos, renombrar a short donde aplique
rename_map_long_to_short = {v:k for k,v in metrics_map.items() if v in df_seq_model_short.columns}
if rename_map_long_to_short:
    df_seq_model_short = df_seq_model_short.rename(columns=rename_map_long_to_short)

df_seq_composite = compute_seq_composites_table(df_seq_model_short, model_col, metrics_map)

# Pivot para Tracking1 y Tracking2, y agregar Average como fila
pivot = df_seq_composite[df_seq_composite['seq'].isin(SEQ_LIST)].pivot(index='seq', columns=model_col, values='composite_score')
if not pivot.empty:
    avg_row = pivot.mean(axis=0).to_frame().T
    avg_row.index = ['Average']
    pivot = pd.concat([pivot, avg_row])
    cols_present = [c for c in DESIRED_ORDER if c in pivot.columns]
    other_cols = [c for c in pivot.columns if c not in cols_present]
    pivot = pivot[cols_present + other_cols]
else:
    pivot = pd.DataFrame(index=SEQ_LIST + ['Average'])  # vacío

pivot_path = os.path.join(OUT_DIR, "seq_model_composite_matrix_tracking12.csv")
pivot.to_csv(pivot_path)
print("Guardado pivot:", pivot_path)

plt.figure(figsize=(8, 2 + 0.8*len(pivot.index)))
plt.imshow(pivot.values, aspect='auto', interpolation='nearest')
plt.yticks(range(len(pivot.index)), pivot.index)
plt.xticks(range(len(pivot.columns)), pivot.columns, rotation=45, ha='right')
plt.colorbar(label='composite_score (0-1)')
plt.title("Heatmap: composite_score (Tracking1, Tracking2, Average)")
plt.tight_layout()
heatmap_path = os.path.join(OUT_DIR, "seq_model_composite_heatmap_tracking12.pdf")
plt.savefig(heatmap_path); plt.close()
print("Saved heatmap:", heatmap_path)

# -----------------------
# Rankings por seq (CSV)
# -----------------------
seq_rankings = df_seq_composite[df_seq_composite['seq'].isin(SEQ_LIST)].copy() if not df_seq_composite.empty else pd.DataFrame()
if not seq_rankings.empty:
    avg_composite = seq_rankings.groupby(model_col)['composite_score_raw'].mean().reset_index()
    if avg_composite['composite_score_raw'].max() > avg_composite['composite_score_raw'].min():
        avg_composite['composite_score'] = (avg_composite['composite_score_raw'] - avg_composite['composite_score_raw'].min()) / (avg_composite['composite_score_raw'].max() - avg_composite['composite_score_raw'].min())
    else:
        avg_composite['composite_score'] = 0.0
    avg_composite['seq'] = 'Average'
    seq_rankings_combined = pd.concat([seq_rankings, avg_composite.rename(columns={model_col:model_col})], sort=False, ignore_index=True, axis=0)
    seq_rankings_path = os.path.join(OUT_DIR, "seq_model_rankings_tracking12.csv")
    seq_rankings_combined.to_csv(seq_rankings_path, index=False)
    print("Guardado seq rankings:", seq_rankings_path)
else:
    print("No hay df_seq_composite para guardar rankings.")

print("Análisis completado. Resultados en:", OUT_DIR)


Columnas detectadas:
 model_col: model
 AssA_AUC: AssA___AUC
 IDF1: IDF1
 HOTA_AUC: HOTA___AUC
 MT: MT
 GT_IDs: GT_IDs
 IDSW: IDSW
 Frag: Frag
 Dets: Dets
Métricas mapeadas:
 - AssA_AUC -> AssA___AUC
 - IDF1 -> IDF1
 - HOTA_AUC -> HOTA___AUC
 - MT -> MT
 - GT_IDs -> GT_IDs
 - IDSW -> IDSW
 - Frag -> Frag
 - Dets -> Dets
 - MT_percent -> MT_percent
Agg map (columnas numéricas que se usarán para mean):
{'AssA___AUC': 'mean', 'IDF1': 'mean', 'HOTA___AUC': 'mean', 'MT': 'mean', 'GT_IDs': 'mean', 'IDSW': 'mean', 'Frag': 'mean', 'Dets': 'mean', 'MT_percent': 'mean'}
Guardado: results/model_summary_Tracking1.csv
Guardado: results/model_summary_Tracking2.csv
Guardado: results/model_summary_Average.csv
Plot guardado: results/metricas_clave_por_modelo_Tracking1.pdf
Plot guardado: results/metricas_clave_por_modelo_Tracking2.pdf
Plot guardado: results/metricas_clave_por_modelo_Average.pdf
Saved: results/assa_vs_iou_Tracking1.pdf
Saved: results/deta_vs_iou_Tracking1.pdf
Saved: results/assa_vs_iou_T

  df_out = df_out.groupby('seq').apply(
