In [1]:
import torch
import clip
from PIL import Image
import pandas as pd
import os

device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)
csv_path = "data\labels\label_final_test.tsv"
image_root = "data\images"
num_samples = 200

# Prompts
class_map = {
    "0" : "This is irrelevant",
    "1" : "This is no damage",
    "2" : "This is mild damage",
    "3" : "This is severe damage"
}

class_names = list(class_map.keys())
text_prompts = [class_map[c] for c in class_names]
text_tokens = clip.tokenize(text_prompts).to(device)

In [2]:
df = pd.read_csv(csv_path, sep="\t")
df = df.sample(n=num_samples).reset_index(drop=True)
df

Unnamed: 0,image_path,damage_severity
0,nepal_eq_none_im_16361.jpg,0
1,nepal_eq_severe_im_37945.jpg,3
2,ecuador_eq_none_im_1348.jpg,0
3,nepal_eq_none_im_11633.jpg,3
4,nepal_eq_severe_im_18835.jpg,3
...,...,...
195,ecuador_eq_none_im_301.jpg,1
196,nepal_eq_none_im_8864.jpg,1
197,910563475901919232_0.jpg,2
198,nepal_eq_mild_im_13754.jpg,2


In [3]:
correct = 0
for i, row in df.iterrows():
    img_path = os.path.join(image_root, row['image_path'])
    true_label = str(row['damage_severity'])

    image = preprocess(Image.open(img_path)).unsqueeze(0).to(device)

    with torch.no_grad():
        image_features = model.encode_image(image)
        text_features = model.encode_text(text_tokens)

        image_features /= image_features.norm(dim=-1, keepdim=True)
        text_features /= text_features.norm(dim=-1, keepdim=True)

        similarity = (image_features @ text_features.T).squeeze(0)
        predicted_index = similarity.argmax().item()
        predicted_label = class_names[predicted_index]
    
    print(f"[{i+1:02d}] True: {class_map[true_label]:30} | Pred: {class_map[predicted_label]}")
    if predicted_label == true_label:
        correct += 1

[01] True: This is irrelevant             | Pred: This is severe damage
[02] True: This is severe damage          | Pred: This is mild damage
[03] True: This is irrelevant             | Pred: This is severe damage
[04] True: This is severe damage          | Pred: This is no damage
[05] True: This is severe damage          | Pred: This is no damage
[06] True: This is severe damage          | Pred: This is mild damage
[07] True: This is no damage              | Pred: This is no damage
[08] True: This is severe damage          | Pred: This is no damage
[09] True: This is severe damage          | Pred: This is severe damage
[10] True: This is severe damage          | Pred: This is mild damage
[11] True: This is severe damage          | Pred: This is severe damage
[12] True: This is severe damage          | Pred: This is severe damage
[13] True: This is irrelevant             | Pred: This is irrelevant
[14] True: This is no damage              | Pred: This is severe damage
[15] True: This i

In [4]:
correct

74