## Full pipeline

In [None]:
import numpy as np
import pandas as pd
import torch as th
from src.experiments import ExperimentMosquitoClassifier
from openmax.openmax import OpenMaxYOLOCLIP
import src.data_loader as dl
from ultralytics import YOLO
from openmax.clf_utils import calculate_iou, prep_x, get_bbox

IMG_SIZE = (224, 224)
USE_CHANNEL_LAST = False
DATASET = "laion"
DEVICE = "cuda:0"
PRESERVE_ASPECT_RATIO = False
SHIFT = 0

yolo_path = "./yolo/runs/detect/train_4class_noGbif/weights/best.pt"

FOLD = "anno2_FINAL"


clip_model_path = './checkpoints/CLIP_anno2/epoch=6-val_loss=0.5844640731811523-val_f1_score=0.9127286076545715-val_multiclass_accuracy=0.9220854043960571.ckpt'

# for openset
CLASS_DICT = {
    "albopictus": th.tensor(0, dtype=th.float),
    "culex": th.tensor(1, dtype=th.float),
    "japonicus/koreicus": th.tensor(2, dtype=th.float),
    "culiseta": th.tensor(3, dtype=th.float),
}

class_dict = {
"albopictus": th.tensor(0, dtype=th.float),
"culex": th.tensor(1, dtype=th.float),
"japonicus/koreicus": th.tensor(2, dtype=th.float),
"culiseta": th.tensor(3, dtype=th.float),
"mosquito": th.tensor(4, dtype=th.float)
} 

# CLASS_DICT = {
#     "albopictus":           th.tensor(0, dtype=th.float),
#     "culex":                th.tensor(1, dtype=th.float),
#     "japonicus/koreicus":   th.tensor(2, dtype=th.float),
#     "culiseta":             th.tensor(3, dtype=th.float),
#     "aegypti":              th.tensor(4, dtype=th.float),
#     "anopheles":            th.tensor(5, dtype=th.float),
# }

# class_dict = {
#     "albopictus":           th.tensor(0, dtype=th.float),
#     "culex":                th.tensor(1, dtype=th.float),
#     "japonicus/koreicus":   th.tensor(2, dtype=th.float),
#     "culiseta":             th.tensor(3, dtype=th.float),
#     "aegypti":              th.tensor(4, dtype=th.float),
#     "anopheles":            th.tensor(5, dtype=th.float),
#     # "mosquito":             th.tensor(4, dtype=th.float)
# } 

In [None]:
dataset = 'datacomp_xl_s13b_b90k'
aug = 'hca'
bs = 16
# img_size = (299, 299) 
img_size = (224, 224)
shift_box = False

img_dir = "" 

# # New annotation for new mos alert partition
# val_annotations_csv = "../data_round_2/closedSet/val.csv"
# train_annotations_csv = "../data_round_2/closedSet/train.csv"
# test_annotations_csv = "../data_round_2/closedSet/test.csv"

val_annotations_csv = "../data_round_2/mosAlert_new_annotation_2/val_annotation_2.csv"
train_annotations_csv = "../data_round_2/mosAlert_new_annotation_2/train_annotation_2.csv"
test_annotations_csv = "../data_round_2/mosAlert_new_annotation_2/test_annotation_2.csv"

train_df = pd.read_csv(train_annotations_csv)

train_df["img_fName"] = img_dir + train_df["img_fName"]


val_df = pd.read_csv(val_annotations_csv)
test_df = pd.read_csv(test_annotations_csv)
test_df = test_df.sample(frac=1).reset_index(drop=True)


train_dataloader, _, _ = ExperimentMosquitoClassifier(".", "",
                                                       class_dict=CLASS_DICT,
                                                       class_dict_test=class_dict).get_dataloaders(
    train_df,
    val_df,
    test_df,
    dataset,
    aug,
    bs,
    img_size,
    shift_box,
)

_, val_dataloader, _ = ExperimentMosquitoClassifier(img_dir, "",
                                                       class_dict=CLASS_DICT,
                                                       class_dict_test=class_dict).get_dataloaders(
    train_df,
    val_df,
    test_df,
    dataset,
    aug,
    bs,
    img_size,
    shift_box,
)

In [4]:
closed_test = test_df[test_df["class_label"] != "mosquito"]
closed_test_dl = dl.TestYOLOCLIPDataset(annotations_df=closed_test, 
                                  class_dict=class_dict,
                                  img_dir=img_dir,
                                  )

# create test set from dl
test_set = dl.TestYOLOCLIPDataset(annotations_df=test_df, 
                                  class_dict=class_dict,
                                  img_dir=img_dir,
                                  )

In [None]:
# santiy check test set
for i in test_set:
    print(i[2])
    break

In [None]:
yolo_model = YOLO(
    yolo_path,
    task="detect",
)

open_model = OpenMaxYOLOCLIP(
    yolo_path=yolo_path,
    clip_path=clip_model_path,
    train_dl=train_dataloader,
    fold=FOLD,
    alpha_rank=4,
    tailsize=20,
    n_class=4,
    # openmax=False,
)


### Pls reconstruct this loads of code later, its so messy and inconsistent. Look a test_yolo for a proper pipeline

In [None]:
ious = []
confs = []
trues = []
preds = []
count = 0
all_count = 0
# test_loader = closed_test_dl
test_loader = test_set
for batch in test_loader:
    all_count+=1
    img = batch[0]
    y_true = batch[1]
    bbox_true = batch[2]

    bbox, conf = get_bbox(img, yolo_model)
    iou = calculate_iou(bbox, bbox_true)
    x, filtered = prep_x(bbox, img, iou)
    
    ious.append(iou)
    if not filtered:
        pred = open_model.predict_single(x)
    else:
        count+=1
        continue
    
    confs.append(conf)
    preds.append(pred)
    trues.append(y_true)

In [None]:
np.unique(preds)

In [None]:
from sklearn.metrics import classification_report, accuracy_score, f1_score

preds = np.array(preds)
trues = np.array(trues)

print(classification_report(trues, preds)) 

In [None]:
# Assuming 'trues' contains true labels and 'preds' contains predicted labels
report = classification_report(trues, preds, output_dict=True)

# Calculate accuracy
accuracy = accuracy_score(trues, preds)

# Print accuracy
print(f"Accuracy: {accuracy * 100:.2f}%")

for class_label, metrics in report.items():
    if class_label != 'accuracy':
        f1_score_percentage = metrics['f1-score'] * 100
        recall_percentage = metrics['recall'] * 100
        # print(f"F1 Score for class '{class_label}': {f1_score_percentage:.2f}%")
        print(f"Recall for class '{class_label}': {recall_percentage:.2f}%")

In [None]:
print("accuracy:", accuracy_score(trues, preds))
print("macro f1:", f1_score(trues, preds, average="macro"))

In [None]:
print((count,all_count))
count/all_count * 100

In [None]:
# avg iou and conf
print(np.mean(ious))
print(np.mean(confs))

In [None]:
from openmax.confusion_matrix_jhu import *
import os

species_list = ["albopictus","culex","japonicus/koreicus", "culiseta", 'mosquito']
plot_pretty_blue_confusion_matrix(classes=species_list,
                                  y_true = trues.tolist(), y_pred=preds.tolist(),
                                  normalize=True,  
                                  savepath='confusion_matrix_jhu',
                                  figsize=(12,12))