In [1]:
import os
import numpy as np
import torch
from PIL import Image
from torchvision.models import detection
import torchvision
from torchvision import datasets, models, transforms
import cv2
import matplotlib.pyplot as plt
import torchvision
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import time
from tempfile import TemporaryDirectory

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Some code is from here: https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}
# maybe we need to change the transformations. Maybe the model weights were calculated using data with different transformations. Or maybe we need to finetune each model to our transformations

if 'David' in os.getcwd():  # checking in whose computer we are
    train_dir = "C:/Users/David/PycharmProjects/reliableML/data/imagenet/train"
    val_dir = 'C:/Users/David/PycharmProjects/reliableML/data/imagenet/val'
else:
    print("The path of the files is in Amitay's computer")

batch_size = 4

# imagenet_data = torchvision.datasets.ImageNet('path/to/imagenet_root/')

# they used ImageNet as the base dataset
# they used ImageNet-V2, ImageNet-VidRobust, ImageNet-Rendition and ImageNet-Sketch for ImageNet-Adversarial for natural OOD shift. They also used ImageNet-Adversarial as a natural OOD shift. This dataset was examined separately
# they used ImageNet-C as a synthetic OOD shift
# They also somehow used ImageNet-Validation

trainset = torchvision.datasets.ImageNet(root=train_dir, train=True,
                                        download=True, transform=data_transforms)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

valset = torchvision.datasets.ImageNet(root=val_dir, train=False,
                                       download=True, transform=data_transforms)
valloader = torch.utils.data.DataLoader(valset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

# not really our classes so should be changed or deleted
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

image_datasets = {'train': trainset,
                  'test': valset}
dataloaders = {'train': trainloader,
                  'test': valloader}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes

# the models that were used in the paper: resnet18, resnet34, resnet50, resnet101, resnet152, vgg19, alexnet, resnext101_32x8d or resnext101_64x4d (they didn't specify which resnet101 they used. Also used wide_resnet101_2 and AugMix, DeepAugment, AM-DeepAugment and Deep Ensembles
resnet18 = models.resnet18(weights='IMAGENET1K_V1')

RuntimeError: The archive ILSVRC2012_devkit_t12.tar.gz is not present in the root directory or is corrupted. You need to download it externally and place it in C:/Users/David/PycharmProjects/reliableML/data/imagenet/train.

In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    # The function does training including validation
    # it's taken from here:
    # https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html#training-the-model

    since = time.time()

    # Create a temporary directory to save training checkpoints
    with TemporaryDirectory() as tempdir:
        best_model_params_path = os.path.join(tempdir, 'best_model_params.pt')

        torch.save(model.state_dict(), best_model_params_path)
        best_acc = 0.0

        for epoch in range(num_epochs):
            print(f'Epoch {epoch}/{num_epochs - 1}')
            print('-' * 10)

            # Each epoch has a training and validation phase
            for phase in ['train', 'val']:
                if phase == 'train':
                    model.train()  # Set model to training mode
                else:
                    model.eval()   # Set model to evaluate mode

                running_loss = 0.0
                running_corrects = 0

                # Iterate over data.
                for inputs, labels in dataloaders[phase]:
                    inputs = inputs.to(device)
                    labels = labels.to(device)

                    # zero the parameter gradients
                    optimizer.zero_grad()

                    # forward
                    # track history if only in train
                    with torch.set_grad_enabled(phase == 'train'):
                        outputs = model(inputs)
                        _, preds = torch.max(outputs, 1)
                        loss = criterion(outputs, labels)

                        # backward + optimize only if in training phase
                        if phase == 'train':
                            loss.backward()
                            optimizer.step()

                    # statistics
                    running_loss += loss.item() * inputs.size(0)
                    running_corrects += torch.sum(preds == labels.data)
                if phase == 'train':
                    scheduler.step()

                epoch_loss = running_loss / dataset_sizes[phase]
                epoch_acc = running_corrects.double() / dataset_sizes[phase]

                print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

                # deep copy the model
                if phase == 'val' and epoch_acc > best_acc:
                    best_acc = epoch_acc
                    torch.save(model.state_dict(), best_model_params_path)

            print()

        time_elapsed = time.time() - since
        print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
        print(f'Best val Acc: {best_acc:4f}')

        # load best model weights
        model.load_state_dict(torch.load(best_model_params_path))
    return model


def AC(model, dataset):
    # the function does inference and then calculates the average confidence of the model on the dataset

    was_training = model.training
    model.eval()

    loader = torch.utils.data.DataLoader(trainset, batch_size=len(dataset),
                                          shuffle=True, num_workers=2)  # a dataloader that loads all the samples at once
    inputs, _ = next(iter(loader))  # all samples without labels

    with torch.no_grad():
        outputs = model(inputs)
        probmax = torch.max(outputs, axis = 0)  # the score(s) of the class with the highest score(s)
        average_confidence = torch.mean(probmax)  # calculating average confidence

        model.train(mode=was_training)

    return average_confidence


def DOC(model, in_distribution_dataset, out_distribution_dataset):
    # the function calculates the average confidence of the model on two datasets and then calculates the DoC (difference of confidences) over the two datasets

    DoC = AC(model, in_distribution_dataset) - AC(model, out_distribution_dataset)
    return DoC

In [None]:
model_ft = resnet18
num_ftrs = model_ft.fc.in_features
# Here the size of each output sample is set to 2.
# Alternatively, it can be generalized to ``nn.Linear(num_ftrs, len(class_names))``.
model_ft.fc = nn.Linear(num_ftrs, 2)

model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

In [None]:
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=25)

In [5]:
# I found an implementation of DoC but they did a bit different from what I did. They subtracted the accuracy from the average confidence and said that the result is the DoC. I don't know why: https://github.com/ZerojumpLine/ModelEvaluationUnderClassImbalance/blob/main/Prostate.ipynb