<a href="https://colab.research.google.com/github/real-rookie/novelty-detection-algorithms-evaluation/blob/main/driver.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# env
!pip install lightning
!pip install anomalib
!pip install OpenVINO
!pip install wandb

In [None]:
# unzip code and datasets
!unzip -o /content/drive/MyDrive/novelty-detection-algorithms-evaluation.zip -d /home/
# uncomment one of the following two lines to get the correct MVTec-AD dataset
# !unzip -o /content/drive/MyDrive/MVTec-AD_old.zip -d /home/datasets/
# !unzip -o /content/drive/MyDrive/MVTec-AD_new.zip -d /home/datasets/
%cd /home

In [None]:
from torchvision import datasets
from torchvision.transforms import ToTensor
from torchvision.utils import save_image
import os
import random
import numpy as np

In [None]:
# set parameters

DATASET_INFO = {
    # idx 0: paths, idx 1: number of classes
    "MNIST": ["datasets/MNIST/images", 10],
    "FashionMNIST": ["datasets/FashionMNIST/images", 10],
    "CIFAR10": ["datasets/CIFAR10/images", 10],
    "MVTec-AD": ["datasets/MVTec-AD/images", 15],
}

dataset = "MNIST"
MVTecAD_set_type = "new" # new or old, applies to MVTec-AD only
dataset_path = DATASET_INFO[dataset][0]
num_total_classes = DATASET_INFO[dataset][1]
small_datasets = True # not used yet
normal_weight = 0.5 # proportion of normal samples in the test and cv sets
cv_weight = 0.20 # proportion of samples in original training sets used to form cv sets

In [None]:
# make datasets
%cd /home
if dataset in ["MNIST", "FashionMNIST", "CIFAR10"]:
    os.system(f"rm -rf {dataset_path}")
    for i in range(num_total_classes):
        os.system(f"mkdir -p {dataset_path}/train/{i}")
        os.system(f"mkdir -p {dataset_path}/cv/{i}/normal")
        os.system(f"mkdir -p {dataset_path}/cv/{i}/novel")
        os.system(f"mkdir -p {dataset_path}/categorized_test_cases/{i}")
        os.system(f"mkdir -p {dataset_path}/test/{i}/normal")
        os.system(f"mkdir -p {dataset_path}/test/{i}/novel")
elif dataset == "MVTec-AD":
    pass

In [None]:
train_data = None
test_data = None
if dataset == "MNIST":
    train_data = datasets.MNIST(root="datasets", train=True, download=True, transform=ToTensor())
    test_data = datasets.MNIST(root="datasets", train=False, download=True, transform=ToTensor())
elif dataset == "FashionMNIST":
    train_data = datasets.FashionMNIST(root="datasets", train=True, download=True, transform=ToTensor())
    test_data = datasets.FashionMNIST(root="datasets", train=False, download=True, transform=ToTensor())
elif dataset == "CIFAR10":
    train_data = datasets.CIFAR10(root="datasets/CIFAR10", train=True, download=True, transform=ToTensor())
    test_data = datasets.CIFAR10(root="datasets/CIFAR10", train=False, download=True, transform=ToTensor())
elif dataset == "MVTec-AD":
    train_data = None
    test_data = None
else:
    print("Wrong dataset specified")
    os.abort()

In [None]:
train_counter = np.zeros(num_total_classes, dtype=int)
test_counter = np.zeros(num_total_classes, dtype=int)
if dataset in ["MNIST", "FashionMNIST", "CIFAR10"]:
    for img, label in train_data:
        save_image(img, f"{dataset_path}/train/{label}/{label}_{train_counter[label]}.png")
        train_counter[label] += 1
    for img, label in test_data:
        save_image(img, f"{dataset_path}/categorized_test_cases/{label}/{label}_{test_counter[label]}.png")
        test_counter[label] += 1
elif dataset == "MVTec-AD":
    train_counter = np.array([280, 264, 245, 230, 247, 209, 224, 219, 391, 220, 267, 320, 60, 213, 240])
    test_counter = np.array([28, 21, 32, 33, 19, 20, 58, 23, 40, 22, 26, 41, 12, 60, 32])
print(f"train + cv: {train_counter}")
print(f"test: {test_counter}")

In [None]:
def fill_eval_sets(population, num_samples, src_cls, dest_cls, set_type):
    sample_idx = random.sample(range(population), num_samples)
    folder_type = "normal" if src_cls == dest_cls else "novel"
    cmd = "mv -f" if src_cls == dest_cls else "cp"
    src_set = None
    dest_set = None
    if set_type == "cv":
        src_set = "train"
        dest_set = "cv"
    elif set_type == "test":
        src_set = "categorized_test_cases"
        dest_set = "test"
    else:
        print("Wrong set type")
    for index in sample_idx:
            os.system(f"{cmd} {dataset_path}/{src_set}/{src_cls}/{src_cls}_{index}.png {dataset_path}/{dest_set}/{dest_cls}/{folder_type}")

if dataset == "MVTec-AD" and MVTecAD_set_type == "new":
    for normal in range(num_total_classes):
        os.system(f"cp {dataset_path}/categorized_cv_cases/{normal}/* {dataset_path}/cv/{normal}/normal")
        os.system(f"cp {dataset_path}/categorized_test_cases/{normal}/* {dataset_path}/test/{normal}/normal")
        for novel in range(num_total_classes):
            if(novel == normal):
                continue
            os.system(f"cp {dataset_path}/categorized_cv_cases/{novel}/* {dataset_path}/cv/{normal}/novel")
            os.system(f"cp {dataset_path}/categorized_test_cases/{novel}/* {dataset_path}/test/{normal}/novel")
else:
    for normal in range(num_total_classes):
        random.seed(normal)
        # cv normal
        num_cv_samples = np.floor(train_counter[normal] * cv_weight).astype(int)
        num_normal_cv_samples = np.floor(num_cv_samples * normal_weight).astype(int)
        fill_eval_sets(train_counter[normal], num_normal_cv_samples, normal, normal, "cv") # fill the cv sets with some normal samples

        # test normal
        num_normal_test_samples = np.floor(test_counter[normal] * normal_weight).astype(int)
        fill_eval_sets(test_counter[normal], num_normal_test_samples, normal, normal, "test") # fill the cv sets with some normal samples

        num_novel_cv_from_each_class = np.floor((num_cv_samples - num_normal_cv_samples) / (num_total_classes - 1)).astype(int)
        num_novel_test_from_each_class = np.floor((test_counter[normal] - num_normal_test_samples) / (num_total_classes - 1)).astype(int)
        for novel in range(num_total_classes):
            if(novel == normal):
                continue
            # cv novel
            fill_eval_sets(train_counter[novel], num_novel_cv_from_each_class, novel, normal, "cv") # fill the cv sets with some novel samples
            # test novel
            fill_eval_sets(test_counter[novel], num_novel_test_from_each_class, novel, normal, "test") # fill the test sets with some novel samples

In [None]:
# train and testing
%cd /home/novelty-detection-algorithms-evaluation
!python main.py --mode train --data MNIST --model RD4AD

In [None]:
!python main.py --mode test --data MNIST --model RD4AD