Load images with Pillow

In [3]:
import os
import json
from PIL import Image

# Open test.txt and read the lines
with open('2_test.txt', 'r') as file:
  test_items = file.read().splitlines()

images = []

for item in test_items:
  image_path = os.path.join(os.pardir,'dataset', 'ArtDL', 'JPEGImages', f"{item}.jpg")
  try:
    image = Image.open(image_path)
    images.append(image)
  except Exception as e:
    print(f"Error loading image {image_path}: {e}")

print(f"Loaded {len(images)} images")


Loaded 1864 images


Test CLIP with these models:

* openai/clip-vit-base-patch32
* openai/clip-vit-base-patch16
* openai/clip-vit-large-patch14


Process the images and see their probability against classes.
Use small batches (16 images)

In [6]:
from transformers import AutoProcessor, AutoModelForZeroShotImageClassification
from tqdm import tqdm
import torch
import os

os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["PYDEVD_DISABLE_FILE_VALIDATION"] = "true"

model_name = "clip-vit-base-patch32"

print(f"Number of images: {len(images)}")

# Load the model and processor
processor = AutoProcessor.from_pretrained(f'openai/{model_name}')
model = AutoModelForZeroShotImageClassification.from_pretrained(f'openai/{model_name}')

classes = [
    ("11H(ANTHONY OF PADUA)", "ANTHONY OF PADUA"),
    ("11H(JOHN THE BAPTIST)", "JOHN THE BAPTIST"),
    ("11H(PAUL)", "PAUL"),
    ("11H(FRANCIS)", "FRANCIS OF ASSISI"),
    ("11HH(MARY MAGDALENE)", "MARY MAGDALENE"),
    ("11H(JEROME)", "JEROME"),
    ("11H(DOMINIC)", "SAINT DOMINIC"),
    ("11F(MARY)", "VIRGIN MARY"),
    ("11H(PETER)", "PETER"),
    ("11H(SEBASTIAN)", "SAINT SEBASTIAN")
]


# Break images into smaller batches
batch_size = 16
images_batches = [images[i:i + batch_size] for i in range(0, len(images), batch_size)]

all_probs = []
with tqdm(total=len(images), desc="Processing Images", unit="image") as pbar:
    for batch_index, batch in enumerate(images_batches):
        try:
            # Process the batch
            inputs = processor(text=[cls[1] for cls in classes], images=batch, return_tensors="pt", padding=True)
            outputs = model(**inputs)
            
            # Get probabilities for the batch
            logits_per_image = outputs.logits_per_image  
            batch_probs = logits_per_image.softmax(dim=1)
            all_probs.append(batch_probs.detach())
            
            pbar.update(len(batch))
        except Exception as e:
            print(f"Error processing batch {batch_index + 1}: {e}")
            pbar.update(len(batch))

# Get one tensor with all the probabilities
all_probs = torch.cat(all_probs, dim=0)
print(f"Probabilities shape: {all_probs.shape}")

Number of images: 1864


Processing Images: 100%|██████████| 1864/1864 [00:36<00:00, 51.11image/s]

Probabilities shape: torch.Size([1864, 10])





In [8]:
import pandas as pd
import tabulate
from sklearn.metrics import confusion_matrix

output_dir = f'../evaluations/{model_name}'
os.makedirs(output_dir, exist_ok=True)

probs = all_probs

with open(os.path.join(os.pardir, '2_ground_truth.json'), 'r') as json_file:
  ground_truth_data = json.load(json_file)
ground_truth_dict = {item['item']: item['class'] for item in ground_truth_data}

class_names = [cls[1] for cls in classes]

# Create confusion matrix using ground truth and predicted classes
y_true = [ground_truth_dict.get(os.path.basename(item.filename).split('.')[0]) for item in images]
y_pred = [classes[all_probs[i].argmax().item()][1] for i in range(len(images))]
y_true_indices = [class_names.index(cls) for cls in y_true]
y_pred_indices = [class_names.index(cls) for cls in y_pred]
cm = confusion_matrix(y_true_indices, y_pred_indices, labels=range(len(class_names)))

# Populate confusion matrix dictionary
confusion_matrices = {cls: {'TP': 0, 'FP': 0, 'FN': 0} for cls in class_names}
for i, cls in enumerate(class_names):
  confusion_matrices[cls]['TP'] = cm[i, i]
  confusion_matrices[cls]['FP'] = cm[:, i].sum() - cm[i, i]
  confusion_matrices[cls]['FN'] = cm[i, :].sum() - cm[i, i]

# Store dataframe
confusion_matrix_df = pd.DataFrame(confusion_matrices).T
confusion_matrix_df = confusion_matrix_df[['TP', 'FP', 'FN']]

print("Confusion Matrix:")
print(tabulate.tabulate(confusion_matrix_df, headers='keys', tablefmt='pretty'))
confusion_matrix_df.to_csv(os.path.join(output_dir,'confusion_matrix.csv'))

Confusion Matrix:
+-------------------+----+-----+------+
|                   | TP | FP  |  FN  |
+-------------------+----+-----+------+
| ANTHONY OF PADUA  | 4  | 111 |  10  |
| JOHN THE BAPTIST  | 22 | 107 |  77  |
|       PAUL        | 0  |  3  |  52  |
| FRANCIS OF ASSISI | 74 | 304 |  24  |
|  MARY MAGDALENE   | 76 | 485 |  14  |
|      JEROME       | 0  |  4  | 118  |
|   SAINT DOMINIC   | 19 | 319 |  10  |
|    VIRGIN MARY    | 72 |  0  | 1117 |
|       PETER       | 0  |  0  | 119  |
|  SAINT SEBASTIAN  | 50 | 214 |  6   |
+-------------------+----+-----+------+


In [9]:
from sklearn.metrics import precision_score, recall_score, f1_score, average_precision_score

class_image_counts = {cls: 0 for cls in class_names}
for item in y_true:
  class_image_counts[item] += 1

# Calculate precision for each class
class_precisions = precision_score(y_true_indices, y_pred_indices, average=None, labels=range(len(class_names)), zero_division=0) * 100
class_recalls = recall_score(y_true_indices, y_pred_indices, average=None, labels=range(len(class_names)), zero_division=0) * 100
class_f1_scores = f1_score(y_true_indices, y_pred_indices, average=None, labels=range(len(class_names)), zero_division=0) * 100
class_avg_precisions = average_precision_score(y_true_indices, all_probs, average=None) * 100

# Store precision, recall, and f1 score for each class into a dataframe
metrics_df = pd.DataFrame({
  'Class Name': class_names,
  '# of Images': [count for count in class_image_counts.values()],
  'Precision': [f"{p:.2f}%" for p in class_precisions],
  'Recall': [f"{r:.2f}%" for r in class_recalls],
  'F1 Score': [f"{f1:.2f}%" for f1 in class_f1_scores],
  'Average Precision': [f"{ap:.2f}%" for ap in class_avg_precisions]
})

# Reorder the dataframe based on the specified class order
class_order = ["ANTHONY OF PADUA", "FRANCIS OF ASSISI", "JEROME", "JOHN THE BAPTIST", "MARY MAGDALENE", "PAUL", "PETER", "SAINT DOMINIC", "SAINT SEBASTIAN", "VIRGIN MARY"]
metrics_df['Class Name'] = pd.Categorical(metrics_df['Class Name'], categories=class_order + ["MEAN"], ordered=True)
metrics_df = metrics_df.sort_values('Class Name').reset_index(drop=True)

# Add mean values to the dataframe
mean_precision = precision_score(y_true_indices, y_pred_indices, average='macro', zero_division=0) * 100
mean_recall = recall_score(y_true_indices, y_pred_indices, average='macro', zero_division=0) * 100
mean_f1_score = f1_score(y_true_indices, y_pred_indices, average='macro', zero_division=0) * 100
mean_avg_precision = average_precision_score(y_true_indices, all_probs, average='macro') * 100
metrics_df.loc['10'] = ['Mean', '-', f"{mean_precision:.2f}%", f"{mean_recall:.2f}%", f"{mean_f1_score:.2f}%", f"{mean_avg_precision:.2f}%"]

print("Metrics:")
print(tabulate.tabulate(metrics_df, headers='keys', tablefmt='pretty'))
metrics_df.to_csv(os.path.join(output_dir,'metrics.csv'))

Metrics:
+----+-------------------+-------------+-----------+--------+----------+-------------------+
|    |    Class Name     | # of Images | Precision | Recall | F1 Score | Average Precision |
+----+-------------------+-------------+-----------+--------+----------+-------------------+
| 0  | ANTHONY OF PADUA  |     14      |   3.48%   | 28.57% |  6.20%   |       3.73%       |
| 1  | FRANCIS OF ASSISI |     98      |  19.58%   | 75.51% |  31.09%  |      38.10%       |
| 2  |      JEROME       |     118     |   0.00%   | 0.00%  |  0.00%   |      15.78%       |
| 3  | JOHN THE BAPTIST  |     99      |  17.05%   | 22.22% |  19.30%  |      20.15%       |
| 4  |  MARY MAGDALENE   |     90      |  13.55%   | 84.44% |  23.35%  |      60.57%       |
| 5  |       PAUL        |     52      |   0.00%   | 0.00%  |  0.00%   |       5.24%       |
| 6  |       PETER       |     119     |   0.00%   | 0.00%  |  0.00%   |      14.73%       |
| 7  |   SAINT DOMINIC   |     29      |   5.62%   | 65.52% |