## Knee Joint Localization Dataset Setup

In [None]:
import numpy as np
import pandas as pd
import regex as re
import os
import shutil

import yaml
from zipfile import ZipFile

In [None]:
# Unzip Dataset
data_folder = "ISVC_Localization"
DATA = "/home/datascience/ISVC_2023_Tutorial_XAI_Few_Shot_on_the_Cloud/SampleDataset(s)/"

In [None]:
if not os.path.exists(os.path.join(DATA, data_folder)):
    with ZipFile(f"{DATA}/{data_folder}.zip", 'r') as zipf:
        zipf.extractall(f"{DATA}")

In [None]:
from sklearn.model_selection import train_test_split
def generate_datasets(root_dir, image_dir, annot_dir,  task="localization"):
    img_dir = os.path.join(root_dir, image_dir)
    annot_dir = os.path.join(root_dir, annot_dir)
    
    # PID regex
    pid_reg = re.compile("9[0-9]{6}")
    
    # Image names + annotation names match
    records = [[pid_reg.findall(img)[-1], img, img if task != "localization" else img.replace(".jpg", ".xml")] 
               for img in os.listdir(img_dir) if ".DS_Store" not in img and ".ipynb_checkpoints" not in img]

    data_records = pd.DataFrame(records, columns=["pid", "images", "masks"])

    train, test = train_test_split(data_records.pid.unique(), test_size=0.5, random_state=42)
    valid, test = train_test_split(test, test_size=0.5, random_state=42)

    train = data_records[data_records.pid.isin(train)].reset_index(drop=True)
    valid = data_records[data_records.pid.isin(valid)].reset_index(drop=True)
    test = data_records[data_records.pid.isin(test)].reset_index(drop=True)

    return train, valid, test

In [None]:
train, valid, test = generate_datasets(f"{DATA}/{data_folder}", "images", "labels")

In [None]:
train

In [None]:
def get_few_shot_sample(dataset, k=1, random_state=42):
    if k > len(dataset):
        return dataset

    return dataset.sample(k, random_state=random_state).reset_index(drop=True)

In [None]:
train_few = get_few_shot_sample(train, k=10)
valid_few = get_few_shot_sample(valid, k=10)

In [None]:
few_shot_dir = "yolov7-main/datasets/localization-10-shot"
if not os.path.exists(few_shot_dir):
    os.makedirs(few_shot_dir)

In [None]:
os.makedirs(f"{few_shot_dir}/train/images")
os.makedirs(f"{few_shot_dir}/train/labels")

os.makedirs(f"{few_shot_dir}/valid/images")
os.makedirs(f"{few_shot_dir}/valid/labels")

os.makedirs(f"{few_shot_dir}/test/images")
os.makedirs(f"{few_shot_dir}/test/labels")


In [None]:
for pid in train_few.pid:
    shutil.copy(f"{DATA}/{data_folder}/images/{pid}.jpg", f"{few_shot_dir}/train/images/{pid}.jpg")
    shutil.copy(f"{DATA}/{data_folder}/labels/{pid}.txt", f"{few_shot_dir}/train/labels/{pid}.txt")

In [None]:
for pid in valid_few.pid:
    shutil.copy(f"{DATA}/{data_folder}/images/{pid}.jpg", f"{few_shot_dir}/valid/images/{pid}.jpg")
    shutil.copy(f"{DATA}/{data_folder}/labels/{pid}.txt", f"{few_shot_dir}/valid/labels/{pid}.txt")

In [None]:
for pid in test.pid:
    shutil.copy(f"{DATA}/{data_folder}/images/{pid}.jpg", f"{few_shot_dir}/test/images/{pid}.jpg")
    shutil.copy(f"{DATA}/{data_folder}/labels/{pid}.txt", f"{few_shot_dir}/test/labels/{pid}.txt")

In [None]:
yaml_info = {
    "train": "../train/images",
    "val": "../valid/images",
    "test": "../test/images",
    "nc": 1,
    "names": ['KneeAPView']
}

In [None]:
with open(f'{few_shot_dir}/data.yaml', 'w') as f:
    yaml.dump(yaml_info, f)