# Models Evaluator
Evaluate all the models on all the test datasets

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/luca-martinelli-09/orco-gan/blob/main/modelEvaluator.ipynb)

In [32]:
# @markdown ## Setup project
# @markdown This section will download the datasets from GitHub to use for the training phase

if not os.path.exists("./datasets"):
    !git clone "https://github.com/luca-martinelli-09/orco-gan.git"

    %cd orco-gan/

In [33]:
import os
import torch
import pandas as pd
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
import numpy as np

print("PyTorch Version:", torch.__version__)
print("Torchvision Version:", torchvision.__version__)

PyTorch Version: 1.10.1+cu113
Torchvision Version: 0.11.2+cu113


In [34]:
# Detect if we have a GPU available
print("CUDA available:", torch.cuda.is_available())
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

CUDA available: True


### Set a manual seed

In [35]:
SEED = 151836

def setSeed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)

setSeed(SEED)

## Utils

In [36]:
def printGPUStats():
    print('Using device:', device)
    print()

    # Additional Info when using cuda
    if device.type == 'cuda':
        print(torch.cuda.get_device_name(0))
        print('[💻 MEMORY USAGE]')
        print('[📌 ALLOCATED]', round(
            torch.cuda.memory_allocated(0) / 1024 ** 3, 1), 'GB')
        print('[🧮 CACHED]', round(
            torch.cuda.memory_reserved(0) / 1024 ** 3, 1), 'GB')


In [37]:
def getSubDirs(dir):
    return [x for x in os.listdir(dir) if os.path.isdir(os.path.join(dir, x))]

In [38]:
def getClassPercents(sizes):
    totalSize = np.sum(np.array(sizes))
    percents = []
    for size in sizes:
        percents.append(int((size / totalSize) * 100))
    
    return percents

In [39]:
def getBestScores(hist, key, min=False):
    scores = [x[key] for x in hist]

    if min:
        i = np.argmin(np.array(scores))
    else:
        i = np.argmax(np.array(scores))

    return hist[i], i

## Settings

In [40]:
# @markdown ### Datasets
datasetsDir = "./datasets" # @param {type: "string"}

# @markdown ### Models
modelsDir = "./models" # @param {type: "string"}

inputSize = 224 # Specified for alexnet, resnet, vgg

# Normalization values
normalizationVals = {
    "bing": {
        "train": [[0.5407, 0.5059, 0.4523], [0.2830, 0.2794, 0.2898]],
        "val": [[0.5341, 0.5012, 0.4385], [0.2809, 0.2752, 0.2863]],
        "test": [[0.5257, 0.4953, 0.4290], [0.2799, 0.2730, 0.2844]]
    },
    "ddg": {
        "train": [[0.5366, 0.5061, 0.4544], [0.2860, 0.2820, 0.2917]],
        "val": [[0.5364, 0.5036, 0.4522], [0.2868, 0.2817, 0.2917]],
        "test": [[0.5323, 0.5006, 0.4465], [0.2825, 0.2784, 0.2881]]
    },
    "google": {
        "train": [[0.5635, 0.5371, 0.4781], [0.2899, 0.2861, 0.3035]],
        "val": [[0.5653, 0.5397, 0.4751], [0.2872, 0.2835, 0.3018]],
        "test": [[0.5736, 0.5468, 0.4893], [0.2954, 0.2914, 0.3083]]
    }
}

## Models evaluations

In [41]:
def getScores(labels, predicted):
    acc = torch.sum(predicted == labels) / len(predicted)

    tp = (labels * predicted).sum()
    tn = ((1 - labels) * (1 - predicted)).sum()
    fp = ((1 - labels) * predicted).sum()
    fn = (labels * (1 - predicted)).sum()

    precision = tp / (tp + fp)
    recall = tp / (tp + fn)

    f1 = 2 * (precision * recall) / (precision + recall)

    return acc, precision, recall, f1

In [42]:
def evaluateModel(model, dataloader):
    model.eval()
    labelsOutputs = torch.tensor([]).to(device, non_blocking=True)
    labelsTargets = torch.tensor([]).to(device, non_blocking=True)

    for inputs, labels in dataloader:
        inputs = inputs.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)

        with torch.set_grad_enabled(False):
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

        labelsOutputs = torch.cat([labelsOutputs, preds], dim=0)
        labelsTargets = torch.cat([labelsTargets, labels], dim=0)
    
    acc, precision, recall, f1 = getScores(labelsTargets, labelsOutputs)

    return {
        "acc": acc.cpu().numpy(),
        "precision": precision.cpu().numpy(),
        "recall": recall.cpu().numpy(),
        "f1": f1.cpu().numpy()
    }


### Informations about models

In [43]:
print("[🧠 MODELS INFORMATION]")

modelsInformation = []

for dataset in getSubDirs(modelsDir):
    print("\n" + "-" * 15)
    print("[🗃️ DATASET] {}".format(dataset))

    datasetDir = os.path.join(modelsDir, dataset)

    for modelType in getSubDirs(datasetDir):
        print("\n[🧮 MODEL TYPE] {}".format(modelType))

        modelsTypeDir = os.path.join(datasetDir, modelType)
        
        for model in os.listdir(modelsTypeDir):
            print("\n\t[🧠 MODEL] {}".format(model))

            path = os.path.join(modelsTypeDir, model)

            checkpoint = torch.load(path)
            
            bestScore, i = getBestScores(checkpoint["scores_history"], "f1")
            classBalancing = getClassPercents(checkpoint["dataset_sizes"])
            balancingStr = "/".join([str(x) for x in classBalancing])

            modelsInformation.append({
                "dataset": dataset,
                "model": checkpoint["model_name"],
                "epochs": len(checkpoint["scores_history"]),
                "balancing": balancingStr,
                "f-score": bestScore["f1"],
            })

            print("\tModel:", checkpoint["model_name"])
            print("\tEpochs:", len(checkpoint["scores_history"]))
            print("\tBalancing:", classBalancing)
            print("\tBest epoch:", i)
            print("\tBest F-Score:", bestScore["f1"])
            print("\tHistory:", [float(x["f1"]) for x in checkpoint["scores_history"]])

            torch.cuda.empty_cache()

modelsInformationDF = pd.DataFrame(modelsInformation)

printGPUStats()

[🧠 MODELS INFORMATION]

---------------
[🗃️ DATASET] bing

[🧮 MODEL TYPE] alexnet

[🧮 MODEL TYPE] resnet

[🧮 MODEL TYPE] vgg

---------------
[🗃️ DATASET] ddg

[🧮 MODEL TYPE] alexnet

[🧮 MODEL TYPE] resnet

	[🧠 MODEL] resnet_50_50_2_all.pt
	Model: resnet
	Epochs: 24
	Balancing: [50, 50]
	Best epoch: 14
	Best F-Score: 0.9771144
	History: [0.9276437759399414, 0.9706180095672607, 0.966360867023468, 0.9770230054855347, 0.9722222089767456, 0.9760478138923645, 0.9749247431755066, 0.9767910838127136, 0.9738430976867676, 0.9758065342903137, 0.9747729301452637, 0.9747219085693359, 0.9736308455467224, 0.9706774353981018, 0.9771143794059753, 0.9768844246864319, 0.9738430976867676, 0.9748237133026123, 0.9716024398803711, 0.9730269312858582, 0.9686552286148071, 0.9676768183708191, 0.9694581031799316, 0.9574247598648071]

[🧮 MODEL TYPE] vgg

---------------
[🗃️ DATASET] google

[🧮 MODEL TYPE] alexnet

[🧮 MODEL TYPE] resnet

[🧮 MODEL TYPE] vgg
Using device: cuda:0

NVIDIA GeForce GTX 1050
[💻 MEMORY U

In [None]:
modelsInformationDF

### Evaluations

In [None]:
from imageLimitedDataset import ImageLimitedDataset

print("[🧠 MODELS EVALUATION]")

modelsEvals = []

for dataset in getSubDirs(datasetsDir):
    print("\n" + "-" * 15)
    print("[🗃️ TEST DATASET] {}".format(dataset))
    
    datasetDir = os.path.join(datasetsDir, dataset)
    testDir = os.path.join(datasetDir, "test")

    normalizationParams = normalizationVals[dataset]
    dataTransform = transforms.Compose([
        transforms.Resize(inputSize),
        transforms.ToTensor(),
        transforms.Normalize(
            normalizationParams["test"][0],
            normalizationParams["test"][1]
        )
    ])

    testDataset = ImageLimitedDataset(testDir, transform=dataTransform, use_cache=True, check_images=False)

    for cls in testDataset.classes:
        cls_index = testDataset.class_to_idx[cls]
        num_cls = np.count_nonzero(
            np.array(testDataset.targets) == cls_index)
        print("\t[🧮 # ELEMENTS] {}: {}".format(cls, num_cls))

    setSeed(SEED)
    testDataLoader = DataLoader(testDataset, batch_size=64, shuffle=True, num_workers=0, pin_memory=True)

    for root, _, fnames in sorted(os.walk(modelsDir, followlinks=True)):
        for fname in sorted(fnames):
            path = os.path.join(root, fname)
            model = torch.load(path)

            modelDataset = model["dataset"] if "dataset" in model.keys() else root.split(os.sep)[1]

            modelPercents = "/".join([str(x) for x in getClassPercents(model["dataset_sizes"])])

            print()
            print("[🧮 EVALUATING] {} - {} {}".format(
                modelDataset,
                model["model_name"],
                modelPercents
            ))

            modelToTest = model["model"]
            modelToTest = modelToTest.to(device, non_blocking=True)

            scores = evaluateModel(modelToTest, testDataLoader)

            modelsEvals.append({
                    "dataset": dataset,
                    "model": model["model_name"],
                    "model_dataset": modelDataset,
                    "balancing": modelPercents,
                    "acc": scores["acc"],
                    "precision": scores["precision"],
                    "recall": scores["recall"],
                    "f1": scores["f1"],
                })
            
            print("\tAcc: {:.4f}".format(scores["acc"]))
            print("\tPre: {:.4f}".format(scores["precision"]))
            print("\tRec: {:.4f}".format(scores["recall"]))
            print("\tF-Score: {:.4f}".format(scores["f1"]))

            torch.cuda.empty_cache()
            printGPUStats()


In [None]:
modelsEvalsDF = pd.DataFrame(modelsEvals)

In [None]:
modelsEvalsDF

### Save evaluations

In [None]:
modelsEvalsDF.to_csv("modelsEvaluations.csv")
