In [1]:
import os
import json

# Load models
models = [name for name in os.listdir() if os.path.isdir(name)]
print('Models')
print(models)

# Load classes
with open(os.path.join(os.pardir, 'classes.txt'), 'r') as f:
  classes = [tuple(line.strip().split(',')) for line in f]
class_names = [cls[1] for cls in classes]
print('\nClasses')
print(class_names)

# Create ground truth dictionary
with open(os.path.join(os.pardir, os.pardir, '2_ground_truth.json'), 'r') as json_file:
  ground_truth_data = json.load(json_file)
ground_truth_dict = {item['item']: item['class'] for item in ground_truth_data}
print('\nGround truth loaded')

# Test images
with open(os.path.join(os.pardir, os.pardir, '2_test.txt'), 'r') as file:
  images = file.read().splitlines()
print('\nTest images loaded')

Models
['clip-vit-base-patch16', 'siglip-base-patch16-512', 'clip-vit-large-patch14', 'clip-vit-base-patch32', 'siglip-large-patch16-384', 'siglip-so400m-patch14-384']

Classes
['ANTHONY OF PADUA', 'JOHN THE BAPTIST', 'PAUL', 'FRANCIS OF ASSISI', 'MARY MAGDALENE', 'JEROME', 'SAINT DOMINIC', 'VIRGIN MARY', 'PETER', 'SAINT SEBASTIAN']

Ground truth loaded

Test images loaded


In [2]:
import pandas as pd
import torch
import os
import tabulate
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

y_trues = []
y_preds = []
all_probs = []

for model_name in models:
  output_dir = os.path.join(os.pardir,'evaluations',model_name)
  os.makedirs(output_dir, exist_ok=True)
  print(f"Model: {model_name}")

  # Load model data
  probs = torch.load(os.path.join(model_name, 'probs.pt'), weights_only=True)
  all_probs.append(probs)

  # Create confusion matrix using ground truth and predicted classes
  y_true = [ground_truth_dict.get(item) for item in images]
  y_pred = [classes[probs[i].argmax().item()][1] for i in range(len(images))]
  y_true_indices = [class_names.index(cls) for cls in y_true]
  y_pred_indices = [class_names.index(cls) for cls in y_pred]
  
  y_trues.append(y_true_indices)
  y_preds.append(y_pred_indices)
  
  cm = confusion_matrix(y_true_indices, y_pred_indices, labels=range(len(class_names)))

  confusion_matrix_df = pd.DataFrame(cm, index=class_names, columns=class_names)
  confusion_matrix_df.to_csv(os.path.join(output_dir, 'confusion_matrix.csv'))

  plt.figure(figsize=(10, 8))
  sns.heatmap(confusion_matrix_df, annot=True, fmt='d', cmap='Oranges', xticklabels=class_names, yticklabels=class_names)
  plt.xlabel('Predicted classes')
  plt.ylabel('Actual classes')
  plt.title(f'Confusion Matrix for {model_name}')
  plt.savefig(os.path.join(output_dir, 'confusion_matrix.png'), bbox_inches='tight')
  plt.close()

Model: clip-vit-base-patch16
Model: siglip-base-patch16-512
Model: clip-vit-large-patch14
Model: clip-vit-base-patch32
Model: siglip-large-patch16-384
Model: siglip-so400m-patch14-384


In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, average_precision_score
from sklearn.preprocessing import label_binarize

KEY_CLASS_NAME = 'class_name'
KEY_NUM_IMAGES = '# of Images'
KEY_PRECISION = 'Precision'
KEY_RECALL = 'Recall'
KEY_F1_SCORE = 'F1 Score'
KEY_AVG_PRECISION = 'Average Precision'

class_image_counts = {cls: 0 for cls in class_names}
for item in y_true:
  class_image_counts[item] += 1

for i in range(0, len(models)):

  y_true_indices = y_trues[i]
  y_pred_indices = y_preds[i]
  probs = all_probs[i]
  
  # one vs all encoding
  y_true_one_hot = label_binarize(y_true_indices, classes=range(len(class_names)))

  # Calculate metrics
  class_precisions = precision_score(y_true_indices, y_pred_indices, average=None, labels=range(len(class_names)), zero_division=0) * 100
  class_recalls = recall_score(y_true_indices, y_pred_indices, average=None, labels=range(len(class_names)), zero_division=0) * 100
  class_f1_scores = f1_score(y_true_indices, y_pred_indices, average=None, labels=range(len(class_names)), zero_division=0) * 100
  class_avg_precisions = average_precision_score(y_true_one_hot, probs, average=None) * 100

  # Store precision, recall, and f1 score for each class into a dataframe
  metrics_df = pd.DataFrame({
      KEY_CLASS_NAME: class_names,
      KEY_NUM_IMAGES: [count for count in class_image_counts.values()],
      KEY_PRECISION: class_precisions,  
      KEY_RECALL: class_recalls,        
      KEY_F1_SCORE: class_f1_scores,    
      KEY_AVG_PRECISION: class_avg_precisions 
  })

  # Reorder the dataframe based on the specified class order
  class_order = ["ANTHONY OF PADUA", "FRANCIS OF ASSISI", "JEROME", "JOHN THE BAPTIST", "MARY MAGDALENE", "PAUL", "PETER", "SAINT DOMINIC", "SAINT SEBASTIAN", "VIRGIN MARY"]
  metrics_df[KEY_CLASS_NAME] = pd.Categorical(metrics_df[KEY_CLASS_NAME], categories=class_order + ["MEAN"], ordered=True)
  metrics_df = metrics_df.sort_values(KEY_CLASS_NAME).reset_index(drop=True)

  # Add mean values to the dataframe
  mean_precision = precision_score(y_true_indices, y_pred_indices, average='macro', zero_division=0) * 100
  mean_recall = recall_score(y_true_indices, y_pred_indices, average='macro', zero_division=0) * 100
  mean_f1_score = f1_score(y_true_indices, y_pred_indices, average='macro', zero_division=0) * 100
  mean_avg_precision = average_precision_score(y_true_indices, probs, average='macro') * 100
  mean_values = {
      KEY_CLASS_NAME: 'Mean (macro)',
      KEY_NUM_IMAGES: '-',
      KEY_PRECISION: mean_precision,
      KEY_RECALL: mean_recall,
      KEY_F1_SCORE: mean_f1_score,
      KEY_AVG_PRECISION: mean_avg_precision
  }
  metrics_df = pd.concat([metrics_df, pd.DataFrame([mean_values])], ignore_index=True)

  metrics_df[[KEY_PRECISION, KEY_RECALL, KEY_F1_SCORE, KEY_AVG_PRECISION]] = metrics_df[
      [KEY_PRECISION, KEY_RECALL, KEY_F1_SCORE, KEY_AVG_PRECISION]
  ].map(lambda x: f"{x:.2f}%")

  metrics_path = os.path.join(os.pardir, 'evaluations', models[i], 'metrics.csv')
  metrics_df.to_csv(metrics_path)
  print(f"Metrics stored in {metrics_path}")

Executed clip-vit-base-patch16
Executed siglip-base-patch16-512
Executed clip-vit-large-patch14
Executed clip-vit-base-patch32
Executed siglip-large-patch16-384
Executed siglip-so400m-patch14-384
