In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [None]:
def read_csv(csv_file):
    # writer.writerow(["iter", "video_name", "category", "miou", "F_score"])  # header
    df = pd.read_csv(csv_file)

    return df

baseline_path = "/home/sophie/repos/AVSegFormer/output_files/eval_2209_s4_baseline.csv"
#model_path = "/home/sophie/repos/AVSegFormer/output_files/eval_2909_epochs5_sav-pretrained_s4_output.csv"
model_path = "/home/sophie/repos/AVSegFormer/output_files/eval_1809_duration5s_epochs5_sav-pretrained_s4.csv"

baseline_df = read_csv(baseline_path)
model_df = read_csv(model_path)

In [None]:
def calculate_avg_metrics_by_category(df):
    
    return df.groupby('category')[['miou', 'F_score']].mean()

def find_top_n(df, n=10):
    return df.nlargest(n, 'miou')


def find_bottom_n(df, n=10):
    return df.nsmallest(n, 'miou')  

In [None]:
baseline_met_by_cat = calculate_avg_metrics_by_category(baseline_df)
print(baseline_met_by_cat)

In [None]:
print("Top-10 IoU Videos (baseline)")
print(find_top_n(baseline_df))

print("\nBottom-10 IoU Videos (baseline)")
print(find_bottom_n(baseline_df))

In [None]:
print(calculate_avg_metrics_by_category(model_df))

In [None]:
print("Top-10 IoU Videos (5s 000-025 model)")
print(find_top_n(model_df))

print("\nBottom-10 IoU Videos (5s 000-025 model)")
print(find_bottom_n(model_df))

In [None]:
baseline_metrics = calculate_avg_metrics_by_category(baseline_df)
model_metrics = calculate_avg_metrics_by_category(model_df)

# plotting
categories = baseline_metrics.index
baseline_iou = baseline_metrics['miou']
model_iou = model_metrics['miou']

fig, ax = plt.subplots(figsize=(14, 8))

x = np.arange(len(categories))
width = 0.35
bars1 = ax.bar(x - width/2, baseline_iou, width, label='Baseline (2209)', alpha=0.8, color='skyblue')
bars2 = ax.bar(x + width/2, model_iou, width, label='Model (2909)', alpha=0.8, color='orange')

ax.set_xlabel('Category')
ax.set_ylabel('Mean IoU')
ax.set_title('IoU Comparison by Category: Baseline vs Model')
ax.set_xticks(x)
ax.set_xticklabels(categories, rotation=45, ha='right')
ax.legend()
ax.grid(True, alpha=0.3)

for bar in bars1:
    height = bar.get_height()
    ax.annotate(f'{height:.3f}',
                xy=(bar.get_x() + bar.get_width() / 2, height),
                xytext=(0, 3),
                textcoords="offset points",
                ha='center', va='bottom', fontsize=8)

for bar in bars2:
    height = bar.get_height()
    ax.annotate(f'{height:.3f}',
                xy=(bar.get_x() + bar.get_width() / 2, height),
                xytext=(0, 3),
                textcoords="offset points",
                ha='center', va='bottom', fontsize=8)
plt.tight_layout()
plt.show()


# numerical
comparison_df = pd.DataFrame({
    'Baseline': baseline_metrics['miou'],
    'Model': model_metrics['miou'],
    'Difference': model_metrics['miou'] - baseline_metrics['miou'],
    'Difference %': ((model_metrics['miou'] - baseline_metrics['miou']) / baseline_metrics['miou'] * 100)
}).sort_values('Difference %', ascending=False)

print("IoU Comparison by Category (Sorted by Difference %):")
print("=" * 60)
print(comparison_df.round(4))

In [None]:
def find_biggest_iou_differences(baseline_df, model_df, n=10):
    # Merge dataframes on video_name and category to compare same samples
    merged_df = pd.merge(baseline_df, model_df, on=['video_name', 'category'], suffixes=('_baseline', '_model'))
    
    # Calculate difference (model - baseline)
    merged_df['iou_difference'] = merged_df['miou_model'] - merged_df['miou_baseline']
    merged_df['iou_difference_pct'] = (merged_df['iou_difference'] / merged_df['miou_baseline']) * 100
    
    # Sort by absolute difference to get biggest improvements and degradations
    merged_df['abs_iou_difference'] = abs(merged_df['iou_difference'])
    
    return merged_df.nlargest(n, 'abs_iou_difference')[
        ['video_name', 'category', 'miou_baseline', 'miou_model', 'iou_difference', 'iou_difference_pct']
    ]

biggest_differences = find_biggest_iou_differences(baseline_df, model_df, 10)

print("Top 10 Samples with Biggest IoU Differences:")
print("=" * 80)
print(biggest_differences.round(4).to_string(index=False))

In [None]:
# Calculate improvements vs degradations from ALL samples
merged_all = pd.merge(baseline_df, model_df, on=['video_name', 'category'], suffixes=('_baseline', '_model'))
merged_all['iou_difference'] = merged_all['miou_model'] - merged_all['miou_baseline']

all_improvements = merged_all[merged_all['iou_difference'] > 0]
all_degradations = merged_all[merged_all['iou_difference'] < 0]
no_change = merged_all[merged_all['iou_difference'] == 0]

print(f"\nTotal samples: {len(merged_all)}")
print(f"Improvements: {len(all_improvements)} samples ({len(all_improvements)/len(merged_all)*100:.1f}%)")
print(f"Degradations: {len(all_degradations)} samples ({len(all_degradations)/len(merged_all)*100:.1f}%)")
print(f"No change: {len(no_change)} samples ({len(no_change)/len(merged_all)*100:.1f}%)")

In [None]:

print(f"\nTop 10 Improvements (from all samples):")
print("=" * 110)
print(f"{'Video Name':<20} {'Category':<35} {'Baseline':<10} {'Model':<10} {'Difference':>12}")
print("-" * 110)
top_improvements = all_improvements.nlargest(10, 'iou_difference')[
    ['video_name', 'category', 'miou_baseline', 'miou_model', 'iou_difference']
]
for idx, row in top_improvements.iterrows():
    print(f"{row['video_name']:<20} {row['category']:<35} {row['miou_baseline']:<10.4f} {row['miou_model']:<10.4f} {row['iou_difference']:>+12.4f}")

In [None]:
print(f"\nTop 10 Degradations (from all samples):")
print("=" * 110)
print(f"{'Video Name':<20} {'Category':<35} {'Baseline':<10} {'Model':<10} {'Difference':>12}")
print("-" * 110)
top_degradations = all_degradations.nsmallest(10, 'iou_difference')[
    ['video_name', 'category', 'miou_baseline', 'miou_model', 'iou_difference']
]
for idx, row in top_degradations.iterrows():
    print(f"{row['video_name']:<20} {row['category']:<35} {row['miou_baseline']:<10.4f} {row['miou_model']:<10.4f} {row['iou_difference']:>12.4f}")

print("=" * 110)