# Goal: Compare the performance of one-stage classification (only YOLO) vs two-stage classifcation (YOLO + CLIP) on test 4 class, no GBIF data

### Closed set data

In [1]:
# get test set
import numpy as np
import cv2
import pandas as pd
import torch as th
from src.experiments import ExperimentMosquitoClassifier
from src.classification import MosquitoClassifier
# from openmax.openmax import OpenMaxYOLOCLIP
import src.data_loader as dl

from openmax.clf_utils import calculate_iou

# IMG_SIZE = (299, 299) 
IMG_SIZE = (224, 224)
USE_CHANNEL_LAST = False
DATASET = "laion"
DEVICE = "cuda:0"
PRESERVE_ASPECT_RATIO = False
SHIFT = 0

# clip train has GBIF data.
clip_model_path = './checkpoints/CLIP_anno2/epoch=6-val_loss=0.5844640731811523-val_f1_score=0.9127286076545715-val_multiclass_accuracy=0.9220854043960571.ckpt'

# for openset
CLASS_DICT = {
    "albopictus":           th.tensor(0, dtype=th.float),
    "culex":                th.tensor(1, dtype=th.float),
    "japonicus/koreicus":   th.tensor(2, dtype=th.float),
    "culiseta":             th.tensor(3, dtype=th.float),
}

class_dict = {
    "albopictus":           th.tensor(0, dtype=th.float),
    "culex":                th.tensor(1, dtype=th.float),
    "japonicus/koreicus":   th.tensor(2, dtype=th.float),
    "culiseta":             th.tensor(3, dtype=th.float),
    "mosquito":             th.tensor(4, dtype=th.float)
} 

INFO:albumentations.check_version:A new version of Albumentations is available: 1.4.16 (you have 1.4.11). Upgrade using: pip install --upgrade albumentations


In [2]:
FOLD = 0
dataset = 'datacomp_xl_s13b_b90k'
aug = 'hca'
bs = 16
# img_size = (299, 299) 
img_size = (224, 224)
shift_box = False

img_dir = "" 

val_annotations_csv = "../data_round_2/mosAlert_new_annotation_2/val_annotation_2.csv"
train_annotations_csv = "../data_round_2/mosAlert_new_annotation_2/train_annotation_2.csv"
test_annotations_csv = "../data_round_2/mosAlert_new_annotation_2/test_annotation_2.csv"


train_df = pd.read_csv(train_annotations_csv)

train_df["img_fName"] = img_dir + train_df["img_fName"]

val_df = pd.read_csv(val_annotations_csv)
test_df = pd.read_csv(test_annotations_csv)
test_df = test_df.sample(frac=1).reset_index(drop=True) # shuffle

In [3]:
val_df.shape

(2412, 8)

In [4]:
closed_test = test_df[test_df["class_label"] != "mosquito"]
closed_test["class_label"].value_counts()

closed_test_dl = dl.TestYOLOCLIPDataset(annotations_df=closed_test, 
                                  class_dict=class_dict,
                                  img_dir=img_dir,
                                #   transform=dl.pre_process(dataset),
                                #   data_augment=dl.aug("resize", img_size),
                                  )

### Helper fn

In [5]:
def get_bbox(img, yolo_model):
    # detect mosquito
    results = yolo_model(img, verbose=False, device=DEVICE, max_det=1)
    img_w, img_h, _ = img.shape
    bbox = [0, 0, img_w, img_h]
    conf = 0.0
    
    for result in results:
        _bbox = [0, 0, img_w, img_h]
        # _label = "albopictus"
        _conf = 0.0

        bboxes_tmp = result.boxes.xyxy.tolist()
        # labels_tmp = result.boxes.cls.tolist()
        confs_tmp = result.boxes.conf.tolist()

        for bbox_tmp, conf_tmp in zip(bboxes_tmp, confs_tmp):
            if conf_tmp > _conf:
                _bbox = bbox_tmp
                _conf = conf_tmp

        if _conf > conf:
            bbox = _bbox
            # label = _label
            conf = _conf

    bbox = [int(float(mcb)) for mcb in bbox]

    return bbox, conf

### Models

In [6]:
#fetch yolo
from ultralytics import YOLO

# Initialize model
yolo_path = "/home/pc2/Downloads/AI-Crowd/Mosquito-Classifiction/experiments/yolo/runs/detect/train_4class_noGbif/weights/best.pt"
yolo_predictor = YOLO(yolo_path, task='predict')

In [7]:
from openmax.clf_utils import *

class YOLOCLip:
    def __init__(self, yolo_path, clip_path):

        self.yolo = YOLO(yolo_path, task='detect')
        self.clip= MosquitoClassifier.load_from_checkpoint(clip_path, 
                                                                      head_version=7, 
                                                                      map_location=th.device(DEVICE)).eval()
    def predict(self, img):
        x = prepCLIP2(img, self.yolo).to(DEVICE)
        with th.no_grad():
            logits = self.clip(x)
        return torch.argmax(logits, dim=1).item()

In [8]:
yolo_clip = YOLOCLip(yolo_path, clip_model_path)

INFO:root:Loaded ViT-L-14 model config.
INFO:root:Loading pretrained ViT-L-14 weights (datacomp_xl_s13b_b90k).


### test

In [9]:
def process_yolo(img, yolo_model):
    results = yolo_model(img, verbose=False, device=DEVICE, max_det=1)
    conf = 0.0
    label = -1
    
    for result in results:       
        _conf = 0.0

        labels_tmp = result.boxes.cls.tolist()
        # print(labels_tmp)

        if len(labels_tmp) == 0:
            _label = -1
        else:
            _label = labels_tmp[0]
        confs_tmp = result.boxes.conf.tolist()

        for label_tmp, conf_tmp in zip(labels_tmp, confs_tmp):
            if conf_tmp > _conf:
                
                _label = label_tmp
                _conf = conf_tmp

        if _conf > conf:           
            label = _label
            conf = _conf

    return label


In [10]:
trues = []
preds = []
empty_count = 0
labels = list(range(4))
for batch in closed_test_dl:
    img = batch[0]
    y_true = batch[1]
    pred = process_yolo(img, yolo_predictor)

    if pred == -1:
        # random a label different from y_true
        pred = np.random.choice([i for i in labels if i != y_true])
        empty_count += 1

    # print(x.shape)
    # pred = cls(x)
    
    # print(calculate_iou(bbox, bbox_true))
    # print(torch.argmax(pred, dim=1))
    # print(y_true)

    # print(calculate_iou(bbox, bbox_true))
    # pred is now a numpy array
    preds.append(pred)
    trues.append(y_true)

In [11]:
empty_count

42

In [12]:
# calculate accuracy, precision, recall, macro f1
from sklearn.metrics import classification_report, accuracy_score

print(classification_report(trues, preds))


              precision    recall  f1-score   support

         0.0       0.56      0.92      0.69       516
         1.0       0.92      0.39      0.54       634
         2.0       0.22      0.52      0.31        54
         3.0       0.39      0.23      0.29        96

    accuracy                           0.59      1300
   macro avg       0.52      0.51      0.46      1300
weighted avg       0.71      0.59      0.58      1300



In [13]:
# Assuming 'trues' contains true labels and 'preds' contains predicted labels
report = classification_report(trues, preds, output_dict=True)

# Calculate accuracy
accuracy = accuracy_score(trues, preds)

# Print accuracy
print(f"Accuracy: {accuracy * 100:.2f}%")

# Print F1 score for each class in percentage format
for class_label, metrics in report.items():
    if class_label != 'accuracy':
        f1_score_percentage = metrics['f1-score'] * 100
        print(f"F1 Score for class '{class_label}': {f1_score_percentage:.2f}%")

Accuracy: 59.23%
F1 Score for class '0.0': 69.44%
F1 Score for class '1.0': 54.38%
F1 Score for class '2.0': 31.28%
F1 Score for class '3.0': 28.95%
F1 Score for class 'macro avg': 46.02%
F1 Score for class 'weighted avg': 57.52%


In [14]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

print("Accuracy: ", accuracy_score(trues, preds))
print("F1: ", f1_score(trues, preds, average='macro'))
print("Precision: ", precision_score(trues, preds, average='macro'))
print("Recall: ", recall_score(trues, preds, average='macro'))

Accuracy:  0.5923076923076923
F1:  0.4601518670616525
Precision:  0.5229929690502566
Recall:  0.5136657880185959


next for yolo clip

In [None]:
trues = []
preds = []
ious = []
for batch in closed_test_dl:
    img = batch[0]
    y_true = batch[1]
    bbox_true = batch[2]

    bbox, conf = get_bbox(img, yolo_clip.yolo)

    pred = yolo_clip.predict(img)

    # print(x.shape)
    # pred = cls(x)
    
    # print(calculate_iou(bbox, bbox_true))
    # print(torch.argmax(pred, dim=1))
    # print(y_true)

    # print(calculate_iou(bbox, bbox_true))
    # pred is now a numpy array
    preds.append(pred)
    trues.append(y_true)
    ious.append(calculate_iou(bbox, bbox_true))

In [None]:
print(classification_report(trues, preds))

In [None]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

print("Accuracy: ", accuracy_score(trues, preds))
print("F1: ", f1_score(trues, preds, average='macro'))
print("Precision: ", precision_score(trues, preds, average='macro'))
print("Recall: ", recall_score(trues, preds, average='macro'))
print("mIoU: ", np.mean(ious))