# Import y definiciones

In [1]:
"""
Análisis de tracking extendido:
- Agregación por secuencia, modelo, formato y promedios.
- Detección flexible de columnas de métricas y curvas IoU.
- Normalización y composite_score con pesos configurables.
- Gráficas de barras para métricas clave, IDSW/Frag, curvas IoU y heatmaps.
- Comparaciones entre formatos (solo FACTOR DE MEJORA = Métrica_target / Métrica_baseline).
- Gráficas de factor de mejora como CURVAS (no barras) por modelo y por secuencia.
- Gráficas opcionales para combinaciones específicas (modelo + formato).
"""

import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

plt.style.use('default')

# =========================================================
# CONFIGURACIÓN
# =========================================================
IN_PATH: str = "combined_results.csv"
OUT_DIR: str = "results"
SEQ_LIST: list[str] = ["Tracking1", "Tracking2"]
DESIRED_ORDER_MODELS: list[str] = ['Salmones2024','yolov8m','yolov8l','yolov9c','yolo11m','yolo11l']
DESIRED_ORDER_FORMATS: list[str] = ["Pytorch", "FP16", "INT8"]

COMPOSITE_WEIGHTS: dict[str, float] = {
    'AssA_AUC_norm': 0.35,
    'IDF1_norm': 0.25,
    'HOTA_AUC_norm': 0.15,
    'MT_percent_norm': 0.10,
    'IDSW_norm': -0.10,
    'Frag_norm': -0.10
}

METRIC_PATTERNS = [
    ('AssA_AUC', ['AssA___AUC','AssA_AUC','AssA AUC','AssA']),
    ('IDF1',     ['IDF1','IDF_1','ID_F1']),
    ('HOTA_AUC', ['HOTA___AUC','HOTA_AUC','HOTA']),
    ('MT',       ['MT','MTR','Mostly-Tracked','MostlyTracked']),
    ('GT_IDs',   ['GT_IDs','GT_IDS','GT_ID','GT_IDs','GT_ID']),
    ('IDSW',     ['IDSW','ID Sw','ID_Switches','id switches']),
    ('Frag',     ['Frag','Fragmentation','FRAG']),
    ('Dets',     ['Dets','Detections','GT_Dets','Dets ']),
]

ASSA_PREFIX = "AssA___"
DETA_PREFIX = "DetA___"

# =========================================================
# COMPARACIONES (SOLO FACTOR DE MEJORA)
# =========================================================
BASELINE_FORMAT: str = "Pytorch"
COMPARISON_TARGET_FORMATS: list[str] = ["FP16", "INT8"]
COMPARISON_METRICS_CANDIDATES = ['AssA_AUC','IDF1','HOTA_AUC','MT_percent','IDSW','Frag']
ALLOWED_EXPORT_METRICS = ['IDF1','AssA_AUC','HOTA_AUC','Det_AUC','MT_percent','IDSW','Frag']


# Combinaciones específicas (modelo, formato) para graficar métricas por secuencia
SPECIFIC_MODEL_FORMATS: list[tuple[str, str]] = [
    # ('yolov8m','FP16'),
    # ('yolov8m','INT8'),
]

# Utilidades

In [2]:
# =========================================================
# UTILIDADES
# =========================================================
def log(msg: str):
    print(msg)

def ensure_dir(path: str):
    os.makedirs(path, exist_ok=True)

def choose_col(df: pd.DataFrame, patterns: list[str]) -> str | None:
    for p in patterns:
        if p in df.columns:
            return p
    for p in patterns:
        matches = [c for c in df.columns if p.lower() in c.lower()]
        if matches:
            return matches[0]
    return None

def reorder_index(df_in: pd.DataFrame, order: list[str]) -> pd.DataFrame:
    if df_in.empty or not order:
        return df_in
    present = [m for m in order if m in df_in.index]
    others = [m for m in df_in.index if m not in present]
    return df_in.reindex(present + others)

def to_numeric_safe(df: pd.DataFrame, cols: list[str]):
    for c in cols:
        if c in df.columns:
            df[c] = pd.to_numeric(df[c], errors='coerce')

def minmax_cols(df: pd.DataFrame, cols: list[str]) -> pd.DataFrame:
    d = df.copy()
    keep = [c for c in cols if c in d.columns]
    if not keep:
        return d
    scaler = MinMaxScaler()
    scaled = scaler.fit_transform(d[keep])
    for i, c in enumerate(keep):
        d[c + "_norm"] = scaled[:, i]
    return d

def compute_composite(df_in: pd.DataFrame, weights: dict[str,float]) -> pd.DataFrame:
    df = df_in.copy()
    if df.empty:
        return df
    usable_weights = {k:v for k,v in weights.items() if k in df.columns}
    if not usable_weights:
        df['composite_score_raw'] = 0.0
        df['composite_score'] = 0.0
        return df
    raw = pd.Series(0.0, index=df.index, dtype=float)
    for k, w in usable_weights.items():
        raw += w * df[k]
    df['composite_score_raw'] = raw
    if raw.max() > raw.min():
        df['composite_score'] = (raw - raw.min())/(raw.max()-raw.min())
    else:
        df['composite_score'] = 0.0
    return df

def safe_vals(df_g: pd.DataFrame, metric: str, ordered: list[str]) -> list[float]:
    def to_real(val):
        if pd.isna(val): return float('nan')
        if isinstance(val, complex): return float('nan')
        try: return float(val)
        except: return float('nan')
    return [to_real(df_g.loc[m, metric]) if (m in df_g.index and metric in df_g.columns) else float('nan') for m in ordered]

def detect_metric_columns(df: pd.DataFrame) -> dict[str,str]:
    mapping = {}
    for short, patterns in METRIC_PATTERNS:
        col = choose_col(df, patterns)
        if col:
            mapping[short] = col
    return mapping

def build_agg_map(df: pd.DataFrame, metrics_map: dict[str,str]) -> dict[str,str]:
    cols = [v for v in metrics_map.values() if v in df.columns and pd.api.types.is_numeric_dtype(df[v])]
    return {c: 'mean' for c in cols}

def add_mt_percent(df: pd.DataFrame, metrics_map: dict[str,str]) -> None:
    mt_col = metrics_map.get('MT')
    gt_col = metrics_map.get('GT_IDs')
    if mt_col and gt_col and mt_col in df.columns and gt_col in df.columns:
        df[mt_col] = pd.to_numeric(df[mt_col], errors='coerce')
        df[gt_col] = pd.to_numeric(df[gt_col], errors='coerce')
        df['MT_percent'] = df[mt_col] / df[gt_col].replace({0: np.nan})
    else:
        df['MT_percent'] = np.nan

def group_and_rename(df_sub: pd.DataFrame, group_col: str, agg_map: dict[str,str], metrics_map: dict[str,str]) -> pd.DataFrame:
    if df_sub.empty:
        return pd.DataFrame()
    usable = {k:v for k,v in agg_map.items() if k in df_sub.columns}
    if not usable:
        return pd.DataFrame()
    g = df_sub.groupby(group_col).agg(usable)
    rename_map = {v:k for k,v in metrics_map.items() if v in g.columns}
    g = g.rename(columns=rename_map)
    if 'MT_percent' in df_sub.columns:
        g['MT_percent'] = df_sub.groupby(group_col)['MT_percent'].mean()
    return g

def normalize_and_composite(df_metrics: pd.DataFrame, weights: dict[str,float]) -> pd.DataFrame:
    if df_metrics.empty:
        return df_metrics
    base = df_metrics.copy().fillna(0)
    norm_candidates = ['AssA_AUC','IDF1','HOTA_AUC','MT_percent','IDSW','Frag']
    base = minmax_cols(base, [c for c in norm_candidates if c in base.columns])
    return compute_composite(base, weights)

def compute_average(grouped_per_seq: dict[str,pd.DataFrame]) -> pd.DataFrame:
    seq_dfs = [df for name, df in grouped_per_seq.items() if name in SEQ_LIST and not df.empty]
    if not seq_dfs:
        return pd.DataFrame()
    common_cols = set(seq_dfs[0].columns)
    for d in seq_dfs[1:]:
        common_cols &= set(d.columns)
    common_cols = [c for c in common_cols if pd.api.types.is_numeric_dtype(seq_dfs[0][c])]
    if not common_cols:
        return pd.DataFrame()
    all_index = sorted(set().union(*[d.index for d in seq_dfs]))
    stacked = [d.reindex(all_index) for d in seq_dfs]
    concat = pd.concat(stacked, keys=range(len(seq_dfs)))
    return concat.groupby(level=1)[common_cols].mean()

def plot_group_bars(group_df: pd.DataFrame, name: str, group_type: str, ordered: list[str], metrics: list[str], out_dir: str, figsize=(7,4)):
    if group_df.empty:
        return
    x_labels = ordered if ordered else list(group_df.index)
    x = np.arange(len(x_labels))
    width = 0.15
    n = len(metrics)
    plt.figure(figsize=figsize)
    for i, metric in enumerate(metrics):
        vals = safe_vals(group_df, metric, x_labels)
        plt.bar(x + (i-(n-1)/2)*width, vals, width, label=metric)
    #plt.xticks(x, x_labels, rotation=45, ha='right')
    plt.xticks(x, x_labels)
    plt.ylabel("Valor medio")
    # plt.title(f"Métricas clave por {group_type} - {name}")
    plt.legend(fontsize=8)
    plt.tight_layout()
    path = os.path.join(out_dir, f"metricas_clave_{group_type}_{name}.pdf")
    plt.savefig(path); plt.close()
    log(f"Plot guardado: {path}")

def plot_idsw_frag(group_df: pd.DataFrame, name: str, group_type: str, ordered: list[str], out_dir: str, figsize=(7,4)):
    if group_df.empty:
        return
    if 'IDSW' not in group_df.columns and 'Frag' not in group_df.columns:
        return
    x_labels = ordered if ordered else list(group_df.index)
    x = np.arange(len(x_labels))
    width = 0.35
    idsw_vals = safe_vals(group_df, 'IDSW', x_labels)
    frag_vals = safe_vals(group_df, 'Frag', x_labels)
    plt.figure(figsize=figsize)
    plt.bar(x - width/2, idsw_vals, width, label='IDSW')
    plt.bar(x + width/2, frag_vals, width, label='Frag')
    #plt.xticks(x, x_labels, rotation=45, ha='right')
    plt.xticks(x, x_labels)
    plt.ylabel("Valor medio")
    # plt.title(f"IDSW y Frag por {group_type} - {name}")
    plt.legend(fontsize=8)
    plt.tight_layout()
    path = os.path.join(out_dir, f"idsw_frag_{group_type}_{name}.pdf")
    plt.savefig(path); plt.close()
    log(f"Plot guardado: {path}")

def plot_curves_by_iou(df: pd.DataFrame, group_col: str, prefix: str, thresholds: list[int], cols: list[str], ordered: list[str], title_metric: str, tag: str, name: str, out_dir: str, figsize=(7,4)):
    if not cols or group_col not in df.columns:
        return
    mean_table = df.groupby(group_col)[cols].mean()
    plt.figure(figsize=figsize)
    for g in (ordered if ordered else mean_table.index):
        if g in mean_table.index:
            plt.plot(np.array(thresholds), mean_table.loc[g].to_numpy(), marker='o', label=g)
    # plt.xlabel("IoU threshold (%)")
    plt.ylabel(title_metric)
    plt.title(f"{title_metric} vs IoU - {name} ({tag})")
    plt.legend(fontsize=8)
    plt.tight_layout()
    path = os.path.join(out_dir, f"{prefix.lower()}_vs_iou_{tag}_{name}.pdf")
    plt.savefig(path); plt.close()
    log(f"Plot guardado: {path}")

def heatmap(matrix: pd.DataFrame, title: str, path: str):
    if matrix.empty:
        log(f"Heatmap vacío: {path}")
        return
    plt.figure(figsize=(8, 2 + 0.6*len(matrix.index)))
    plt.imshow(matrix.values, aspect='auto', interpolation='nearest')
    plt.yticks(range(len(matrix.index)), matrix.index.tolist())
    #plt.xticks(range(len(matrix.columns)), matrix.columns.tolist(), rotation=45, ha='right')
    plt.xticks(range(len(matrix.columns)), matrix.columns.tolist())
    plt.colorbar(label='composite_score (0-1)')
    plt.title(title)
    plt.tight_layout()
    plt.savefig(path); plt.close()
    log(f"Heatmap guardado: {path}")

# =========================================================
# COMPARACIONES FACTOR DE MEJORA
# =========================================================
def aggregate_seq_model_format(df: pd.DataFrame, seq_list: list[str], model_col: str, format_col: str,
                               metrics_map: dict[str,str]) -> pd.DataFrame:
    if any(col not in df.columns for col in ['seq', model_col, format_col]):
        return pd.DataFrame()
    subset = df[df['seq'].isin(seq_list)].copy()
    numeric_cols = list(metrics_map.values())
    if 'MT_percent' in subset.columns:
        numeric_cols.append('MT_percent')
    numeric_cols = [c for c in numeric_cols if c in subset.columns]
    grp = subset.groupby(['seq', model_col, format_col])[numeric_cols].mean().reset_index()
    rename_map = {v:k for k,v in metrics_map.items() if v in grp.columns}
    grp = grp.rename(columns=rename_map)
    return grp

def compute_format_factor(df_triplet: pd.DataFrame,
                          model_col: str,
                          format_col: str,
                          baseline_format: str,
                          target_formats: list[str],
                          metrics: list[str]) -> dict[str, pd.DataFrame]:
    """
    Factor de mejora = valor_target / valor_baseline
    """
    results = {}
    if df_triplet.empty:
        return results
    metrics = [m for m in metrics if m in df_triplet.columns]
    for tgt in target_formats:
        rows = []
        keys_baseline = set(tuple(x) for x in df_triplet[df_triplet[format_col]==baseline_format][['seq', model_col]].values)
        keys_target = set(tuple(x) for x in df_triplet[df_triplet[format_col]==tgt][['seq', model_col]].values)
        common = sorted(keys_baseline & keys_target)
        if not common:
            continue
        for seq, model in common:
            rb = df_triplet[(df_triplet['seq']==seq) & (df_triplet[model_col]==model) & (df_triplet[format_col]==baseline_format)]
            rt = df_triplet[(df_triplet['seq']==seq) & (df_triplet[model_col]==model) & (df_triplet[format_col]==tgt)]
            if rb.empty or rt.empty:
                continue
            rb = rb.iloc[0]
            rt = rt.iloc[0]
            for m in metrics:
                v1 = rb[m]  # baseline
                v2 = rt[m]  # target
                if pd.isna(v1) or pd.isna(v2) or v1 == 0:
                    factor = np.nan
                else:
                    factor = v2 / v1
                rows.append({
                    'seq': seq,
                    model_col: model,
                    'baseline_format': baseline_format,
                    'target_format': tgt,
                    'metric': m,
                    'baseline_value': v1,
                    'target_value': v2,
                    'improvement_factor': factor
                })
        results[tgt] = pd.DataFrame(rows)
    return results

def save_factor_tables(factor_results: dict[str,pd.DataFrame], out_dir: str, model_col: str):
    for fmt, dfc in factor_results.items():
        if dfc.empty: continue
        # Filtrar solo métricas permitidas
        dfc = dfc[dfc['metric'].isin(ALLOWED_EXPORT_METRICS)]
        if dfc.empty: 
            continue
        raw_path = os.path.join(out_dir, f"format_factor_seq_model_{fmt}.csv")
        dfc.to_csv(raw_path, index=False)
        log(f"Guardado factores detalle: {raw_path}")
        by_model = dfc.groupby([model_col,'metric'])['improvement_factor'].mean().reset_index()
        by_model = by_model[by_model['metric'].isin(ALLOWED_EXPORT_METRICS)]
        by_model_pivot = by_model.pivot(index=model_col, columns='metric', values='improvement_factor')
        by_model_pivot.to_csv(os.path.join(out_dir, f"format_factor_by_model_{fmt}.csv"))
        by_seq = dfc.groupby(['seq','metric'])['improvement_factor'].mean().reset_index()
        by_seq = by_seq[by_seq['metric'].isin(ALLOWED_EXPORT_METRICS)]
        by_seq_pivot = by_seq.pivot(index='seq', columns='metric', values='improvement_factor')
        by_seq_pivot.to_csv(os.path.join(out_dir, f"format_factor_by_seq_{fmt}.csv"))

def plot_factor_lines_by_model(factor_results: dict[str,pd.DataFrame],
                               out_dir: str,
                               model_col: str,
                               desired_order_models: list[str],
                               figsize=(9,5)):
    for fmt, dfc in factor_results.items():
        if dfc.empty: continue
        pivot = (dfc.groupby([model_col,'metric'])['improvement_factor']
                 .mean()
                 .unstack('metric'))
        if desired_order_models:
            present = [m for m in desired_order_models if m in pivot.index]
            others = [m for m in pivot.index if m not in present]
            pivot = pivot.reindex(present + others)
        plt.figure(figsize=figsize)
        x = np.arange(len(pivot.index))
        for m in pivot.columns:
            plt.plot(x, pivot[m].values, marker='o', label=m)
        plt.axhline(1.0, color='gray', linestyle='--', linewidth=1)
        #plt.xticks(x, pivot.index, rotation=45, ha='right')
        plt.xticks(x, pivot.index)
        plt.ylabel("Factor de mejora (target / baseline)")
        #plt.title(f"Factor de mejora {fmt} vs {BASELINE_FORMAT} (por modelo)")
        plt.legend(fontsize=8)
        plt.tight_layout()
        fname = os.path.join(out_dir, f"factor_lines_{fmt}_by_model.pdf")
        plt.savefig(fname); plt.close()
        log(f"Plot guardado: {fname}")

def plot_factor_lines_by_seq(factor_results: dict[str,pd.DataFrame], out_dir: str, figsize=(9,5)):
    for fmt, dfc in factor_results.items():
        if dfc.empty: continue
        pivot = (dfc.groupby(['seq','metric'])['improvement_factor']
                 .mean()
                 .unstack('metric'))
        plt.figure(figsize=figsize)
        x = np.arange(len(pivot.index))
        for m in pivot.columns:
            plt.plot(x, pivot[m].values, marker='o', label=m)
        plt.axhline(1.0, color='gray', linestyle='--', linewidth=1)
        #plt.xticks(x, pivot.index, rotation=45, ha='right')
        plt.xticks(x, pivot.index)
        plt.ylabel("Factor de mejora (target / baseline)")
        #plt.title(f"Factor de mejora {fmt} vs {BASELINE_FORMAT} (por secuencia)")
        plt.legend(fontsize=8)
        plt.tight_layout()
        fname = os.path.join(out_dir, f"factor_lines_{fmt}_by_seq.pdf")
        plt.savefig(fname); plt.close()
        log(f"Plot guardado: {fname}")

def plot_specific_model_format_combos(df_triplet: pd.DataFrame,
                                      combos: list[tuple[str,str]],
                                      model_col: str,
                                      format_col: str,
                                      metrics: list[str],
                                      out_dir: str):
    if df_triplet.empty or not combos:
        return
    ensure_dir(out_dir)
    for model, fmt in combos:
        subset = df_triplet[(df_triplet[model_col]==model) & (df_triplet[format_col]==fmt)]
        if subset.empty:
            log(f"Combo sin datos: ({model}, {fmt})")
            continue
        seq_order = [s for s in SEQ_LIST if s in subset['seq'].unique()] + [s for s in subset['seq'].unique() if s not in SEQ_LIST]
        metrics_present = [m for m in metrics if m in subset.columns]
        if not metrics_present:
            continue
        plt.figure(figsize=(11,5))
        x = np.arange(len(seq_order))
        for m in metrics_present:
            vals = [subset[subset['seq']==s][m].mean() if not subset[subset['seq']==s].empty else np.nan for s in seq_order]
            plt.plot(x, vals, marker='o', label=m)
        #plt.xticks(x, seq_order, rotation=45, ha='right')
        plt.xticks(x, seq_order)
        plt.ylabel("Valor")
        plt.title(f"Métricas por secuencia ({model}, {fmt})")
        plt.legend(fontsize=8)
        plt.tight_layout()
        fname = os.path.join(out_dir, f"specific_combo_{model}_{fmt}.pdf")
        plt.savefig(fname); plt.close()
        log(f"Plot guardado: {fname}")

# Cargar tablas

In [3]:
ensure_dir(OUT_DIR)
if not os.path.isfile(IN_PATH):
    log(f"ERROR: No existe el archivo: {IN_PATH}")
    sys.exit(1)

df = pd.read_csv(IN_PATH)

if 'seq' in df.columns:
    df['seq'] = df['seq'].astype(str)

model_col = choose_col(df, ['model','export','modelo','Model'])
if not model_col:
    model_col = df.columns[-1]
    log(f"Advertencia: No se detectó columna 'model'. Usando: {model_col}")

format_col = choose_col(df, ['format','Formato','formato','fmt','format_name', 'export'])
if format_col:
    df[format_col] = df[format_col].astype(str)
    log(f"Columna de formato detectada: {format_col}")
else:
    log("No se detectó columna de formato. El análisis por formato será omitido.")

metrics_map = detect_metric_columns(df)
log("Métricas detectadas (short -> original):")
for k,v in metrics_map.items():
    log(f" - {k} -> {v}")

add_mt_percent(df, metrics_map)

numeric_candidates = list(metrics_map.values()) + ['MT_percent']
to_numeric_safe(df, [c for c in numeric_candidates if c in df.columns])

agg_map = build_agg_map(df, metrics_map)
log(f"Agg map (mean): {agg_map}")

# Agrupaciones por secuencia -> modelo
grouped_models_by_seq = {}
for seq in SEQ_LIST:
    df_seq = df[df['seq'] == seq]
    gm = group_and_rename(df_seq, model_col, agg_map, metrics_map)
    gm = normalize_and_composite(gm, COMPOSITE_WEIGHTS)
    gm = reorder_index(gm, DESIRED_ORDER_MODELS)
    grouped_models_by_seq[seq] = gm

avg_models = compute_average(grouped_models_by_seq)
if not avg_models.empty:
    avg_models = normalize_and_composite(avg_models, COMPOSITE_WEIGHTS)
    avg_models = reorder_index(avg_models, DESIRED_ORDER_MODELS)
    grouped_models_by_seq['Average'] = avg_models
else:
    grouped_models_by_seq['Average'] = pd.DataFrame()

for seq_name, gdf in grouped_models_by_seq.items():
    out_csv = os.path.join(OUT_DIR, f"model_summary_{seq_name}.csv")
    if not gdf.empty:
        export_cols = [c for c in ALLOWED_EXPORT_METRICS if c in gdf.columns]
        gdf_export = gdf[export_cols]
        gdf_export.to_csv(out_csv)
        log(f"Guardado: {out_csv}")

# Agrupaciones por secuencia -> formato
grouped_formats_by_seq = {}
if format_col:
    for seq in SEQ_LIST:
        df_seq = df[df['seq'] == seq]
        gf = group_and_rename(df_seq, format_col, agg_map, metrics_map)
        gf = normalize_and_composite(gf, COMPOSITE_WEIGHTS)
        gf = reorder_index(gf, DESIRED_ORDER_FORMATS)
        grouped_formats_by_seq[seq] = gf
    avg_formats = compute_average(grouped_formats_by_seq)
    if not avg_formats.empty:
        avg_formats = normalize_and_composite(avg_formats, COMPOSITE_WEIGHTS)
        avg_formats = reorder_index(avg_formats, DESIRED_ORDER_FORMATS)
        grouped_formats_by_seq['Average'] = avg_formats
    else:
        grouped_formats_by_seq['Average'] = pd.DataFrame()
    for seq_name, gdf in grouped_formats_by_seq.items():
        out_csv = os.path.join(OUT_DIR, f"format_summary_{seq_name}.csv")
        if not gdf.empty:
            export_cols = [c for c in ALLOWED_EXPORT_METRICS if c in gdf.columns]
            gdf_export = gdf[export_cols]
            gdf_export.to_csv(out_csv)
            log(f"Guardado: {out_csv}")

# Tablas (seq, model) y (seq, format)
df_seq_model = pd.DataFrame()
if 'seq' in df.columns and model_col in df.columns:
    numeric_cols = list(agg_map.keys()) + ['MT_percent']
    numeric_cols = [c for c in numeric_cols if c in df.columns]
    df_seq_model = df[df['seq'].isin(SEQ_LIST)][['seq', model_col] + numeric_cols].copy()
    df_seq_model = df_seq_model.groupby(['seq', model_col]).mean().reset_index()
    rename_map = {v:k for k,v in metrics_map.items() if v in df_seq_model.columns}
    df_seq_model = df_seq_model.rename(columns=rename_map)

df_seq_format = pd.DataFrame()
if format_col and 'seq' in df.columns:
    numeric_cols_f = list(agg_map.keys()) + ['MT_percent']
    numeric_cols_f = [c for c in numeric_cols_f if c in df.columns]
    df_seq_format = df[df['seq'].isin(SEQ_LIST)][['seq', format_col] + numeric_cols_f].copy()
    df_seq_format = df_seq_format.groupby(['seq', format_col]).mean().reset_index()
    rename_map_f = {v:k for k,v in metrics_map.items() if v in df_seq_format.columns}
    df_seq_format = df_seq_format.rename(columns=rename_map_f)

Columna de formato detectada: export
Métricas detectadas (short -> original):
 - AssA_AUC -> AssA___AUC
 - IDF1 -> IDF1
 - HOTA_AUC -> HOTA___AUC
 - MT -> MT
 - GT_IDs -> GT_IDs
 - IDSW -> IDSW
 - Frag -> Frag
 - Dets -> Dets
Agg map (mean): {'AssA___AUC': 'mean', 'IDF1': 'mean', 'HOTA___AUC': 'mean', 'MT': 'mean', 'GT_IDs': 'mean', 'IDSW': 'mean', 'Frag': 'mean', 'Dets': 'mean'}
Guardado: results/model_summary_Tracking1.csv
Guardado: results/model_summary_Tracking2.csv
Guardado: results/model_summary_Average.csv
Guardado: results/format_summary_Tracking1.csv
Guardado: results/format_summary_Tracking2.csv
Guardado: results/format_summary_Average.csv


# Gráficas de Barras

In [4]:
# Gráficas modelos / formatos
figsize = (8, 3)
plot_metrics = [m for m in ['IDF1','AssA_AUC','HOTA_AUC','MT_percent'] if any((m in g.columns) for g in grouped_models_by_seq.values())]
for seq_name, gdf in grouped_models_by_seq.items():
    if gdf.empty: continue
    plot_group_bars(gdf, seq_name, "modelo", DESIRED_ORDER_MODELS, plot_metrics, OUT_DIR, figsize)
    plot_idsw_frag(gdf, seq_name, "modelo", DESIRED_ORDER_MODELS, OUT_DIR, figsize)

if format_col:
    plot_metrics_fmt = [m for m in ['IDF1','AssA_AUC','HOTA_AUC','MT_percent'] if any((m in g.columns) for g in grouped_formats_by_seq.values())]
    for seq_name, gdf in grouped_formats_by_seq.items():
        if gdf.empty: continue
        plot_group_bars(gdf, seq_name, "formato", DESIRED_ORDER_FORMATS, plot_metrics_fmt, OUT_DIR, figsize)
        plot_idsw_frag(gdf, seq_name, "formato", DESIRED_ORDER_FORMATS, OUT_DIR, figsize)

Plot guardado: results/metricas_clave_modelo_Tracking1.pdf
Plot guardado: results/idsw_frag_modelo_Tracking1.pdf
Plot guardado: results/metricas_clave_modelo_Tracking2.pdf
Plot guardado: results/idsw_frag_modelo_Tracking2.pdf
Plot guardado: results/metricas_clave_modelo_Average.pdf
Plot guardado: results/idsw_frag_modelo_Average.pdf
Plot guardado: results/metricas_clave_formato_Tracking1.pdf
Plot guardado: results/idsw_frag_formato_Tracking1.pdf
Plot guardado: results/metricas_clave_formato_Tracking2.pdf
Plot guardado: results/idsw_frag_formato_Tracking2.pdf
Plot guardado: results/metricas_clave_formato_Average.pdf
Plot guardado: results/idsw_frag_formato_Average.pdf


# Curvas IOU

In [5]:
# Curvas IoU
ass_cols = [c for c in df.columns if c.startswith(ASSA_PREFIX) and c.split("___")[-1].isdigit()]
det_cols = [c for c in df.columns if c.startswith(DETA_PREFIX) and c.split("___")[-1].isdigit()]
thresholds_ass = [int(c.split("___")[-1]) for c in ass_cols]
thresholds_det = [int(c.split("___")[-1]) for c in det_cols]
figsize = (5, 3)

for seq in SEQ_LIST:
    df_seq = df[df['seq'] == seq]
    if df_seq.empty: continue
    if ass_cols:
        plot_curves_by_iou(df_seq, model_col, "AssA", thresholds_ass, ass_cols, DESIRED_ORDER_MODELS, "AssA", "model", seq, OUT_DIR, figsize)
    if det_cols:
        plot_curves_by_iou(df_seq, model_col, "DetA", thresholds_det, det_cols, DESIRED_ORDER_MODELS, "DetA", "model", seq, OUT_DIR, figsize)
    if format_col:
        if ass_cols:
            plot_curves_by_iou(df_seq, format_col, "AssA", thresholds_ass, ass_cols, DESIRED_ORDER_FORMATS, "AssA", "format", seq, OUT_DIR, figsize)
        if det_cols:
            plot_curves_by_iou(df_seq, format_col, "DetA", thresholds_det, det_cols, DESIRED_ORDER_FORMATS, "DetA", "format", seq, OUT_DIR, figsize)

if ass_cols:
    m_ass = df[df['seq'].isin(SEQ_LIST)].groupby(['seq', model_col])[ass_cols].mean()
    avg_ass = m_ass.groupby(model_col).mean().reset_index()
    plot_curves_by_iou(avg_ass, model_col, "AssA", thresholds_ass, ass_cols, DESIRED_ORDER_MODELS, "AssA", "model", "Average", OUT_DIR, figsize)
if det_cols:
    m_det = df[df['seq'].isin(SEQ_LIST)].groupby(['seq', model_col])[det_cols].mean()
    avg_det = m_det.groupby(model_col).mean().reset_index()
    plot_curves_by_iou(avg_det, model_col, "DetA", thresholds_det, det_cols, DESIRED_ORDER_MODELS, "DetA", "model", "Average", OUT_DIR, figsize)

if format_col:
    if ass_cols:
        f_ass = df[df['seq'].isin(SEQ_LIST)].groupby(['seq', format_col])[ass_cols].mean()
        avg_f_ass = f_ass.groupby(format_col).mean().reset_index()
        plot_curves_by_iou(avg_f_ass, format_col, "AssA", thresholds_ass, ass_cols, DESIRED_ORDER_FORMATS, "AssA", "format", "Average", OUT_DIR, figsize)
    if det_cols:
        f_det = df[df['seq'].isin(SEQ_LIST)].groupby(['seq', format_col])[det_cols].mean()
        avg_f_det = f_det.groupby(format_col).mean().reset_index()
        plot_curves_by_iou(avg_f_det, format_col, "DetA", thresholds_det, det_cols, DESIRED_ORDER_FORMATS, "DetA", "format", "Average", OUT_DIR, figsize)

Plot guardado: results/assa_vs_iou_model_Tracking1.pdf
Plot guardado: results/deta_vs_iou_model_Tracking1.pdf
Plot guardado: results/assa_vs_iou_format_Tracking1.pdf
Plot guardado: results/deta_vs_iou_format_Tracking1.pdf
Plot guardado: results/assa_vs_iou_model_Tracking2.pdf
Plot guardado: results/deta_vs_iou_model_Tracking2.pdf
Plot guardado: results/assa_vs_iou_format_Tracking2.pdf
Plot guardado: results/deta_vs_iou_format_Tracking2.pdf
Plot guardado: results/assa_vs_iou_model_Average.pdf
Plot guardado: results/deta_vs_iou_model_Average.pdf
Plot guardado: results/assa_vs_iou_format_Average.pdf
Plot guardado: results/deta_vs_iou_format_Average.pdf


# Mapas de Calor

In [6]:
# Heatmaps composite
model_composite_rows = []
for seq_name, gdf in grouped_models_by_seq.items():
    if gdf.empty: continue
    for idx, row in gdf.iterrows():
        model_composite_rows.append({'seq': seq_name, 'model': idx, 'composite_score': row.get('composite_score', np.nan)})
model_comp_df = pd.DataFrame(model_composite_rows)
pivot_models = pd.DataFrame()
if not model_comp_df.empty:
    pivot_models = model_comp_df.pivot(index='seq', columns='model', values='composite_score')
    cols_order = [c for c in DESIRED_ORDER_MODELS if c in pivot_models.columns]
    other_cols = [c for c in pivot_models.columns if c not in cols_order]
    pivot_models = pivot_models[cols_order + other_cols]
    pivot_path = os.path.join(OUT_DIR, "seq_model_composite_matrix.csv")
    pivot_models.to_csv(pivot_path)
    heatmap_path = os.path.join(OUT_DIR, "seq_model_composite_heatmap.pdf")
    heatmap(pivot_models, "Heatmap composite modelos", heatmap_path)

if format_col and grouped_formats_by_seq:
    format_composite_rows = []
    for seq_name, gdf in grouped_formats_by_seq.items():
        if gdf.empty: continue
        for idx, row in gdf.iterrows():
            format_composite_rows.append({'seq': seq_name, 'format': idx, 'composite_score': row.get('composite_score', np.nan)})
    format_comp_df = pd.DataFrame(format_composite_rows)
    if not format_comp_df.empty:
        pivot_formats = format_comp_df.pivot(index='seq', columns='format', values='composite_score')
        cols_order_f = [c for c in DESIRED_ORDER_FORMATS if c in pivot_formats.columns]
        other_cols_f = [c for c in pivot_formats.columns if c not in cols_order_f]
        pivot_formats = pivot_formats[cols_order_f + other_cols_f]
        pivot_f_path = os.path.join(OUT_DIR, "seq_format_composite_matrix.csv")
        pivot_formats.to_csv(pivot_f_path)
        heatmap_f_path = os.path.join(OUT_DIR, "seq_format_composite_heatmap.pdf")
        heatmap(pivot_formats, "Heatmap composite formatos", heatmap_f_path)

Heatmap guardado: results/seq_model_composite_heatmap.pdf
Heatmap guardado: results/seq_format_composite_heatmap.pdf


In [7]:
# Rankings
if not pivot_models.empty:
    avg_model_scores = pivot_models.loc[[r for r in pivot_models.index if r != 'Average']].mean(axis=0).sort_values(ascending=False)
    avg_model_scores.to_csv(os.path.join(OUT_DIR, "ranking_modelos_promedio.csv"), header=['mean_composite'])
if format_col and 'Average' in grouped_formats_by_seq and not grouped_formats_by_seq['Average'].empty:
    fmt_avg = grouped_formats_by_seq['Average']
    fmt_avg[['composite_score']].sort_values('composite_score', ascending=False).to_csv(
        os.path.join(OUT_DIR, "ranking_formatos_average.csv"))

# Factor de Mejora entre formatos

In [8]:
# =====================================================
# FACTOR DE MEJORA ENTRE FORMATOS
# =====================================================
if format_col:
    df_triplet = aggregate_seq_model_format(df, SEQ_LIST, model_col, format_col, metrics_map)
    if not df_triplet.empty:
        comparison_metrics = [m for m in COMPARISON_METRICS_CANDIDATES if m in df_triplet.columns]
        factor_results = compute_format_factor(
            df_triplet=df_triplet,
            model_col=model_col,
            format_col=format_col,
            baseline_format=BASELINE_FORMAT,
            target_formats=[f for f in COMPARISON_TARGET_FORMATS if f in df_triplet[format_col].unique()],
            metrics=comparison_metrics
        )
        if factor_results:
            save_factor_tables(factor_results, OUT_DIR, model_col)
            plot_factor_lines_by_model(factor_results, OUT_DIR, model_col, DESIRED_ORDER_MODELS)
            plot_factor_lines_by_seq(factor_results, OUT_DIR)

        # Gráficas para combos específicos
        if SPECIFIC_MODEL_FORMATS:
            plot_specific_model_format_combos(
                df_triplet=df_triplet,
                combos=SPECIFIC_MODEL_FORMATS,
                model_col=model_col,
                format_col=format_col,
                metrics=comparison_metrics,
                out_dir=OUT_DIR
            )
    else:
        log("No se pudo crear df_triplet para factores de formato.")

log(f"Análisis completado. Resultados en: {OUT_DIR}")

Guardado factores detalle: results/format_factor_seq_model_FP16.csv
Guardado factores detalle: results/format_factor_seq_model_INT8.csv
Plot guardado: results/factor_lines_FP16_by_model.pdf
Plot guardado: results/factor_lines_INT8_by_model.pdf
Plot guardado: results/factor_lines_FP16_by_seq.pdf
Plot guardado: results/factor_lines_INT8_by_seq.pdf
Análisis completado. Resultados en: results


# LaTex Tables

In [9]:
import pandas as pd

def csv_a_latex(csv_path: str,
                dec: int = 2,
                cols: list[str] | None = None,
                index: bool = False,
                na: str = '',
                max_w: int | None = None,
                out_path: str | None = None) -> str:
    """
    Convierte un CSV en tabla LaTeX.
    - Redondeo a 'dec' decimales.
    - Selección de columnas.
    - Truncado opcional de texto.
    - Renombra columnas:
        * Elimina sufijo '_AUC'
        * MT_percent -> MT\\%
    - Renombra valores de formatos:
        * FP16 -> TensorRT-FP16
        * INT8 -> TensorRT-INT8
    """
    if not os.path.isfile(csv_path):
        log(f"No existe: {csv_path}")
        return ""
    df = pd.read_csv(csv_path)

    # Si viene índice exportado como primera columna tipo 'Unnamed: 0', intentar usarlo
    if 'Unnamed: 0' in df.columns and 'format' not in df.columns and 'model' not in df.columns:
        df = df.rename(columns={'Unnamed: 0': 'index_label'})

    # Filtrar columnas deseadas
    if cols:
        keep = [c for c in cols if c in df.columns]
        if keep:
            df = df[keep]

    # Renombrar columnas (_AUC fuera y MT_percent -> MT\%)
    rename_cols = {}
    for c in df.columns:
        new_c = c
        if new_c.endswith('_AUC'):
            new_c = new_c[:-4]  # quitar _AUC
        if new_c == 'MT_percent':
            new_c = 'MT\\%'
        rename_cols[c] = new_c
    df = df.rename(columns=rename_cols)

    # Map de formatos
    format_map = {
        'FP16': 'TensorRT-FP16',
        'INT8': 'TensorRT-INT8',
        'Pytorch': 'Pytorch'
    }

    # Aplicar renombre si existe columna de formato
    for possible in ['format', 'Formato', 'formato']:
        if possible in df.columns:
            df[possible] = df[possible].replace(format_map)
    # También intentar sobre el índice si procede
    try:
        df.index = df.index.to_series().replace(format_map).values
    except Exception:
        pass

    # Redondeo numérico
    num_cols = df.select_dtypes(include=[np.number]).columns
    if len(num_cols):
        df[num_cols] = df[num_cols].round(dec)

    # Truncar texto
    if max_w:
        for c in df.columns:
            if c not in num_cols:
                df[c] = df[c].astype(str).map(lambda v: v if len(v) <= max_w else v[:max_w-1] + '…')

    fmt = lambda x: f"{x:.{dec}f}"
    latex = df.to_latex(index=index, float_format=fmt, na_rep=na, escape=False)

    if out_path:
        with open(out_path, 'w', encoding='utf-8') as f:
            f.write(latex)
        log(f"Tabla LaTeX guardada: {out_path}")
    return latex


In [10]:
csv_path = "results/model_summary_Tracking2.csv"
latex_text = csv_a_latex(csv_path=csv_path, dec=6, index=False, na='-')
print(latex_text)

\begin{tabular}{lrrrrrr}
\toprule
model & IDF1 & AssA & HOTA & MT\% & IDSW & Frag \\
\midrule
Salmones2024 & 0.642824 & 0.534584 & 0.488566 & 0.274775 & 11.333333 & 29.000000 \\
yolov8m & 0.377577 & 0.363339 & 0.287797 & 0.009009 & 5.500000 & 29.666667 \\
yolov8l & 0.389989 & 0.391296 & 0.304429 & 0.049550 & 5.833333 & 26.000000 \\
yolov9c & 0.313651 & 0.367838 & 0.257206 & 0.018018 & 5.666667 & 15.166667 \\
yolo11m & 0.411761 & 0.438106 & 0.341444 & 0.063063 & 4.833333 & 16.833333 \\
yolo11l & 0.373115 & 0.397294 & 0.299152 & 0.072072 & 6.666667 & 20.000000 \\
\bottomrule
\end{tabular}

