In [1]:
import os
import pandas as pd
from tabulate import tabulate

test_1_path = 'test_1'
test_2_path = 'test_2'
test_3_path = 'test_3'
test_4_path = 'test_4'  

def get_accuracy(folder_path):
  accuracy_dict = {}
  for model_folder in os.listdir(folder_path):
    model_path = os.path.join(folder_path, model_folder)
    if os.path.isdir(model_path):
      if os.path.exists(os.path.join(model_path, 'summary_metrics.csv')):
        summary_file = os.path.join(model_path, 'summary_metrics.csv')
        df = pd.read_csv(summary_file)
        accuracy = df.loc[df['Model'] == model_folder, 'Accuracy'].values[0]
        accuracy_dict[model_folder] = accuracy
  return accuracy_dict

test_1_accuracy = get_accuracy(test_1_path)
test_2_accuracy = get_accuracy(test_2_path)
test_3_accuracy = get_accuracy(test_3_path)
test_4_accuracy = get_accuracy(test_4_path)

df = pd.DataFrame({
  'Model': list(test_1_accuracy.keys()),
  'Test 1': list(test_1_accuracy.values()),
  'Test 1 (few-shot)': [test_3_accuracy.get(model, None) for model in test_1_accuracy.keys()],
  'Test 2': [test_2_accuracy.get(model, None) for model in test_1_accuracy.keys()],
  'Test 2 (few-shot)': [test_4_accuracy.get(model, None) for model in test_1_accuracy.keys()],
})

baseline_df = pd.read_csv('dataset/ArtDL-data/baseline.csv')
baseline_df = baseline_df[baseline_df['Class Name'] != 'Mean']

# Calculate TP as recall * (TP + FN) = recall * # Test Images
baseline_df['Recall'] = baseline_df['Recall'].str.rstrip('%').astype('float') / 100.0
baseline_df['# Test Images'] = baseline_df['# Test Images'].str.replace(',', '').astype('int')
baseline_df['True Positives'] = baseline_df['# Test Images'] * baseline_df['Recall']

# Calculate accuracy as TP / # Test Images
total_true_positives = baseline_df['True Positives'].sum()
total_test_images = baseline_df['# Test Images'].sum()
baseline_accuracy = total_true_positives / total_test_images

df.set_index('Model', inplace=True)
df.sort_index(inplace=True)

df['Test 1 (few-shot)'] = df['Test 1 (few-shot)'].fillna('-')

df = df.reindex([idx for idx in df.index if idx != 'gpt-4o-mini'] + ['gpt-4o-mini'])
df = df.reindex([idx for idx in df.index if idx != 'gpt-4o'] + ['gpt-4o'])

df.loc['Baseline'] = [f"{baseline_accuracy:.2%}", "-", "-", "-"]
df = df.reindex(['clip-vit-base-patch32'] + [idx for idx in df.index if idx != 'clip-vit-base-patch32'])

print(df.to_markdown())

| Model                    | Test 1   | Test 1 (few-shot)   | Test 2   | Test 2 (few-shot)   |
|:-------------------------|:---------|:--------------------|:---------|:--------------------|
| clip-vit-base-patch32    | 0.8%     | -                   | 0.8%     | -                   |
| clip-vit-base-patch16    | 0.8%     | -                   | 0.8%     | -                   |
| clip-vit-large-patch14   | 0.8%     | -                   | 0.8%     | -                   |
| siglip-base-patch16-512  | 0.8%     | -                   | 0.8%     | -                   |
| siglip-large-patch16-384 | 0.8%     | -                   | 0.8%     | -                   |
| siglip-so400m-patch14-384| 0.8%     | -                   | 0.8%     | -                   |
| gpt-4o-mini             | 0.8%     | -                   | 0.8%     | -                   |
| gpt-4o                  | 0.8%     | -                   | 0.8%     | -                   |
| Baseline                | 0.80%    | -            

In [2]:
def get_f1_scores(folder_path):
  f1_scores_dict = {}
  for model_folder in os.listdir(folder_path):
    model_path = os.path.join(folder_path, model_folder)
    if os.path.isdir(model_path):
      class_metrics_file = os.path.join(model_path, 'class_metrics.csv')
      df = pd.read_csv(class_metrics_file)
      f1_scores = df.set_index('class_name')['F1 Score'].to_dict()
      f1_scores_dict[model_folder] = f1_scores
  return f1_scores_dict

test_1_f1_scores = get_f1_scores(test_1_path)
test_1_df = pd.DataFrame.from_dict(test_1_f1_scores, orient='index')
test_1_df.drop(columns=['Macro', 'Micro'], inplace=True)
test_1_df.sort_index(inplace=True)
test_1_df.index.name = 'Model'
test_1_df = test_1_df.reindex(['clip-vit-base-patch32'] + [idx for idx in test_1_df.index if idx != 'clip-vit-base-patch32'])

# Read classes.csv to map IDs to Labels
classes_df = pd.read_csv('dataset/ArtDL-data/classes.csv')
id_to_label = dict(zip(classes_df['ID'], classes_df['Label']))

baseline_df = baseline_df[baseline_df['Class Name'] != 'Mean']
test_1_df.columns = [id_to_label.get(col, col) for col in test_1_df.columns]

# Append baseline F1-Score to test_1_df
baseline_f1_scores = baseline_df.groupby('Class Name')['F1-Score'].sum().to_dict()
test_1_df.loc['Baseline'] = [baseline_f1_scores.get(class_name, '-') for class_name in test_1_df.columns]

test_1_df = test_1_df.reindex([idx for idx in df.index if idx != 'gpt-4o-mini'] + ['gpt-4o-mini'])
test_1_df = test_1_df.reindex([idx for idx in df.index if idx != 'gpt-4o'] + ['gpt-4o'])
test_1_df = test_1_df.reindex([idx for idx in test_1_df.index if idx != 'Baseline'] + ['Baseline'])

print("Test 1 F1 Scores:")
print(test_1_df.to_markdown())

print("\nTest 2 F1 Scores:")

test_2_f1_scores = get_f1_scores(test_2_path)
test_2_df = pd.DataFrame.from_dict(test_2_f1_scores, orient='index')
test_2_df.drop(columns=['Macro', 'Micro'], inplace=True)
test_2_df.sort_index(inplace=True)
test_2_df = test_2_df.reindex(['clip-vit-base-patch32'] + [idx for idx in test_2_df.index if idx != 'clip-vit-base-patch32'])
test_2_df.index.name = 'Model'

test_2_df.columns = [id_to_label.get(col, col) for col in test_2_df.columns]

test_2_df.loc['Baseline'] = [baseline_f1_scores.get(class_name, '-') for class_name in test_2_df.columns]

test_2_df = test_2_df.reindex([idx for idx in df.index if idx != 'gpt-4o-mini'] + ['gpt-4o-mini'])
test_2_df = test_2_df.reindex([idx for idx in df.index if idx != 'gpt-4o'] + ['gpt-4o'])
test_2_df = test_2_df.reindex([idx for idx in test_1_df.index if idx != 'Baseline'] + ['Baseline'])

print(test_2_df.to_markdown())

Test 1 F1 Scores:
| Model                    |   ANTHONY OF PADUA |   FRANCIS OF ASSISI |   JEROME |   JOHN THE BAPTIST |   MARY MAGDALENE |   PAUL |   PETER |   SAINT DOMINIC |   SAINT SEBASTIAN |   VIRGIN MARY |
|:-------------------------|-------------------:|--------------------:|---------:|-------------------:|------------------:|-------:|--------:|----------------:|------------------:|--------------:|
| clip-vit-base-patch32    |                0.8 |                 0.8 |      0.8 |                0.8 |               0.8 |    0.8 |     0.8 |             0.8 |               0.8 |           0.8 |
| clip-vit-base-patch16    |                0.8 |                 0.8 |      0.8 |                0.8 |               0.8 |    0.8 |     0.8 |             0.8 |               0.8 |           0.8 |
| clip-vit-large-patch14   |                0.8 |                 0.8 |      0.8 |                0.8 |               0.8 |    0.8 |     0.8 |             0.8 |               0.8 |           0.8 