In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [None]:
def read_csv(csv_file):
    # writer.writerow(["iter", "video_name", "category", "miou", "F_score"])  # header
    df = pd.read_csv(csv_file)

    return df

baseline_path = "/home/sophie/repos/AVSegFormer/output_files/eval_2209_s4_baseline.csv"
model_path = "/home/sophie/repos/AVSegFormer/output_files/eval_2909_epochs5_sav-pretrained_s4_output.csv"

baseline_df = read_csv(baseline_path)
model_df = read_csv(model_path)

In [None]:
def calculate_avg_metrics_by_category(df):
    
    return df.groupby('category')[['miou', 'F_score']].mean()

def find_top_n(df, n=10):
    return df.nlargest(n, 'miou')


def find_bottom_n(df, n=10):
    return df.nsmallest(n, 'miou')  

In [None]:
baseline_met_by_cat = calculate_avg_metrics_by_category(baseline_df)
print(baseline_met_by_cat)

In [None]:
print(find_top_n(baseline_df))

print(find_bottom_n(baseline_df))

In [None]:
print(calculate_avg_metrics_by_category(model_df))

In [None]:
print(find_top_n(model_df))

print(find_bottom_n(model_df))

In [None]:
baseline_metrics = calculate_avg_metrics_by_category(baseline_df)
model_metrics = calculate_avg_metrics_by_category(model_df)

# plotting
categories = baseline_metrics.index
baseline_iou = baseline_metrics['miou']
model_iou = model_metrics['miou']

fig, ax = plt.subplots(figsize=(14, 8))

x = np.arange(len(categories))
width = 0.35
bars1 = ax.bar(x - width/2, baseline_iou, width, label='Baseline (2209)', alpha=0.8, color='skyblue')
bars2 = ax.bar(x + width/2, model_iou, width, label='Model (2909)', alpha=0.8, color='orange')

ax.set_xlabel('Category')
ax.set_ylabel('Mean IoU')
ax.set_title('IoU Comparison by Category: Baseline vs Model')
ax.set_xticks(x)
ax.set_xticklabels(categories, rotation=45, ha='right')
ax.legend()
ax.grid(True, alpha=0.3)

for bar in bars1:
    height = bar.get_height()
    ax.annotate(f'{height:.3f}',
                xy=(bar.get_x() + bar.get_width() / 2, height),
                xytext=(0, 3),
                textcoords="offset points",
                ha='center', va='bottom', fontsize=8)

for bar in bars2:
    height = bar.get_height()
    ax.annotate(f'{height:.3f}',
                xy=(bar.get_x() + bar.get_width() / 2, height),
                xytext=(0, 3),
                textcoords="offset points",
                ha='center', va='bottom', fontsize=8)
plt.tight_layout()
plt.show()


# numerical
comparison_df = pd.DataFrame({
    'Baseline': baseline_metrics['miou'],
    'Model': model_metrics['miou'],
    'Difference': model_metrics['miou'] - baseline_metrics['miou'],
    'Difference %': ((model_metrics['miou'] - baseline_metrics['miou']) / baseline_metrics['miou'] * 100)
}).sort_values('Difference %', ascending=False)

print("IoU Comparison by Category (Sorted by Difference %):")
print("=" * 60)
print(comparison_df.round(4))