In [None]:
!pip install ultralytics ttach

In [None]:
import numpy as np
import pandas as pd
import regex as re
import os
import shutil

from PIL import Image

from ultralytics import YOLO
from yolo_cam.eigen_cam import EigenCAM
from yolo_cam.utils.image import show_cam_on_image, scale_cam_image

import yaml
from zipfile import ZipFile

### Data Setup

In [None]:
# Unzip Dataset
data_folder = "ISVC_Localization"
SHOT = 1
YOLO_DATASET_ROOT = f"datasets/"
FEW_SHOT_ROOT = f"/home/datascience/ISVC_2023_Tutorial_XAI_Few_Shot_on_the_Cloud/Code/Localization/Few_Shot_Sets/{SHOT}-shot/"
DATA = "/home/datascience/ISVC_2023_Tutorial_XAI_Few_Shot_on_the_Cloud/SampleDataset(s)/"

In [None]:
if not os.path.exists(os.path.join(DATA, data_folder)):
    with ZipFile(f"{DATA}/{data_folder}.zip", 'r') as zipf:
        zipf.extractall(f"{DATA}")

In [None]:
from sklearn.model_selection import train_test_split
def generate_datasets(root_dir, image_dir, annot_dir,  task="localization"):
    img_dir = os.path.join(root_dir, image_dir)
    annot_dir = os.path.join(root_dir, annot_dir)
    
    # PID regex
    pid_reg = re.compile("9[0-9]{6}")
    
    # Image names + annotation names match
    records = [[pid_reg.findall(img)[-1], img, img if task != "localization" else img.replace(".jpg", ".xml")] 
               for img in os.listdir(img_dir) if ".DS_Store" not in img and ".ipynb_checkpoints" not in img]

    data_records = pd.DataFrame(records, columns=["pid", "images", "masks"])

    train, test = train_test_split(data_records.pid.unique(), test_size=0.5, random_state=42)
    valid, test = train_test_split(test, test_size=0.5, random_state=42)

    train = data_records[data_records.pid.isin(train)].reset_index(drop=True)
    valid = data_records[data_records.pid.isin(valid)].reset_index(drop=True)
    test = data_records[data_records.pid.isin(test)].reset_index(drop=True)

    return train, valid, test

In [None]:
train, valid, test = generate_datasets(f"{DATA}/{data_folder}", "images", "labels")

In [None]:
def get_few_shot_sample(dataset, k=1, random_state=42):
    if k > len(dataset):
        return dataset

    return dataset.sample(k, random_state=random_state).reset_index(drop=True)

In [None]:
train_few = get_few_shot_sample(train, k=10)
valid_few = get_few_shot_sample(valid, k=10)

In [None]:
# YOLOv8 requires a "dataset" directory for all .yaml files to be located in
if not os.path.exists(YOLO_DATASET_ROOT):
    os.makedirs(dataset_root)

In [None]:
os.makedirs(f"{FEW_SHOT_ROOT}/train/images")
os.makedirs(f"{FEW_SHOT_ROOT}/train/labels")

os.makedirs(f"{FEW_SHOT_ROOT}/valid/images")
os.makedirs(f"{FEW_SHOT_ROOT}/valid/labels")

os.makedirs(f"{FEW_SHOT_ROOT}/test/images")
os.makedirs(f"{FEW_SHOT_ROOT}/test/labels")


In [None]:
for pid in train_few.pid:
    shutil.copy(f"{DATA}/{data_folder}/images/{pid}.jpg", f"{FEW_SHOT_ROOT}/train/images/{pid}.jpg")
    shutil.copy(f"{DATA}/{data_folder}/labels/{pid}.txt", f"{FEW_SHOT_ROOT}/train/labels/{pid}.txt")

In [None]:
for pid in valid_few.pid:
    shutil.copy(f"{DATA}/{data_folder}/images/{pid}.jpg", f"{FEW_SHOT_ROOT}/valid/images/{pid}.jpg")
    shutil.copy(f"{DATA}/{data_folder}/labels/{pid}.txt", f"{FEW_SHOT_ROOT}/valid/labels/{pid}.txt")

In [None]:
for pid in test.pid:
    shutil.copy(f"{DATA}/{data_folder}/images/{pid}.jpg", f"{FEW_SHOT_ROOT}/test/images/{pid}.jpg")
    shutil.copy(f"{DATA}/{data_folder}/labels/{pid}.txt", f"{FEW_SHOT_ROOT}/test/labels/{pid}.txt")

In [None]:
# Setup YAML file with direct path to train, val, test directories 
yaml_info = {
    "train": f"{FEW_SHOT_ROOT}/train/images",
    "val": f"{FEW_SHOT_ROOT}/valid/images",
    "test": f"{FEW_SHOT_ROOT}/test/images",
    "nc": 1,
    "names": ['KneeAPView']
}

In [None]:
with open(f'{YOLO_DATASET_ROOT}/{SHOT}_shot_localization.yaml', 'w') as f:
    yaml.dump(yaml_info, f)

### Few-Shot YOLOv8 Training

In [None]:
# Import YOLO from Ultralytics library
from ultralytics import YOLO
import os

In [None]:
model = YOLO("yolov8n.pt")  # load a pretrained model (recommended for training)

In [None]:
# Train on few-shot dataset
metrics = model.train(data=f'{YOLO_DATASET_ROOT}/{SHOT}_shot_localization.yaml', epochs=100)

In [None]:
metrics = model.val()  # evaluate model performance on the validation set

In [None]:
# Evaluate on test set
results = model.val(split="test")

In [None]:
# Single image prediction
result = model.predict("/home/datascience/ISVC_2023_Tutorial_XAI_Few_Shot_on_the_Cloud/Code/Localization/Few_Shot_Sets/10-shot/train/images/9326657.jpg", save=True)

In [None]:
# Multiple image prediction
trained_model_pth = "runs/detect/train2/weights/best.pt"
model = YOLO(trained_model_pth)

test_root = f"Few_Shot_Sets/{SHOT}-shot/test/images"
test_imgs = [os.path.join(test_root, img) for img in os.listdir(test_root)]
result = model.predict(test_imgs, save=True)

In [None]:
## Save Model

In [None]:
!mkdir checkpoints
!mkdir checkpoints/{SHOT}-shot

# will need to modify path to match output directory of your trained model if run more than once
!cp runs/detect/train/weights/best.pt checkpoints/{SHOT}_shot_local.pt 

### Explainability: EigenCAM

In [None]:
trained_model_pth = "checkpoints/10_shot_local.pt"
model = YOLO(trained_model_pth)

In [None]:
target_layers =[model.model.model[-4]]

In [None]:
import cv2
img = cv2.imread("/home/datascience/ISVC_2023_Tutorial_XAI_Few_Shot_on_the_Cloud/Code/Localization/Few_Shot_Sets/10-shot/test/images/9361281.jpg")

In [None]:
img = cv2.resize(img, (640, 640))
rgb_img = img.copy()

cam = EigenCAM(model, target_layers,task='od')
grayscale_cam = cam(rgb_img)[0, :, :]
cam_image = show_cam_on_image(np.float32(img) / 255, grayscale_cam, use_rgb=True)
g_scale = cv2.resize(np.stack([grayscale_cam] * 3, axis=2), (224,224))

In [None]:
Image.fromarray(np.hstack((rgb_img, cam_image)))