<a href="https://colab.research.google.com/github/linzhe001/tutorial_notebooks/blob/Notes/CIFAR_10N_CNN_withNotes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Cloning repository of CIFAR-10H Annotation
Paper: Human uncertainty makes classification more robust (https://arxiv.org/pdf/1908.07086.pdf)

Label:  CIFAR10 [0: airplane, 1: automobile, 2: bird, 3: cat, 4: deer, 5: dog, 6: frog, 7: horse, 8: ship, 9: truck] <br>

<img src="https://miro.medium.com/max/1010/1*r8S5tF_6naagKOnlIcGXoQ.png" alt="alternatetext">




In [None]:
!git clone https://github.com/UCSC-REAL/cifar-10-100n.git
%cd cifar-10-100n

fatal: destination path 'cifar-10-100n' already exists and is not an empty directory.
/content/cifar-10-100n


![image](https://github.com/linzhe001/tutorial_notebooks/blob/Notes/media/CIFAR_10N_CNN%20code%20flow%20chart.png?raw=1)


# main script

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
from torchvision import models
import torchvision.transforms as transforms
import os
import argparse
import copy
import random
import numpy as np
device = 'cuda' if torch.cuda.is_available() else 'cpu'
def seed_everything(seed=12):
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
# seed is used to ristrict randomness for the reproducibility purpose

parser = argparse.ArgumentParser(description='CIFAR-10H Training') #create an argparse object
parser.add_argument('--lr', default=0.1, type=float, help='learning rate') #add some augments
parser.add_argument('--lr_schedule', default=0, type=int, help='lr scheduler')
parser.add_argument('--batch_size', default=1024, type=int, help='batch size')
parser.add_argument('--test_batch_size', default=2048, type=int, help='batch size')
parser.add_argument('--num_epoch', default=100, type=int, help='epoch number')
parser.add_argument('--num_classes', type=int, default=10, help='number classes')
args = parser.parse_args(args=[]) #store theses data in args.Argument; can print by ```args.lr```
# notice ```parser.parse_args``` is necessary for this section, can not run ```parser.lr```

def train(model, trainloader, criterion, optimizer):
    model.train()
    for batch_idx, (inputs, targets, ad) in enumerate(trainloader):
# enumerate will return two values: index and tuple; index is named batch idx, tuple is (inputs, targets, ad)
        inputs, targets = inputs.to(device), targets.to(device)
# ```data.to(device)``` is the function in Pytoch, means data stay in CPU or move to GPU. depends on device
        optimizer.zero_grad()
# reset the gradients as 0, beacuse in Pytorch, gradients are accumulated by default during each iteration
        outputs = model(inputs) ## forward pass: run this model using inputs to get output
        loss = criterion(outputs, targets)
# criterion is a loss function: used to calculate the difference outputs and targets
        loss.backward()
# calculate the gradient of the loss function
## gradient here means: partial derivative of the loss function with a specific parameter; all parameters have their own gradient
        optimizer.step()
# optimizer use gradient``.grad`` calculated by ```loss.backwar()```
# summary 1.forward pass to get output 2.compare output with target to get loss 3.calculate gradient by backward propagation 4.optimize the parameter

def test(model, testloader):
    model.eval() #change model to evaluation module
    correct = 0
    total = 0 #intial correct test number and total test number for calculating accuarcy
    with torch.no_grad(): #ban gradient calculation to boost computing
        for batch_idx, (inputs, targets) in enumerate(testloader): #similar with train function
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs) #forward
            _, predicted = outputs.max(1) #the output prediction, max possibility for classficiation
            total += targets.size(0) #calculate the total test number
# ```targets.size(0)``` is the length of this tensor, namely batch size eg.32 tests each batch.
            correct += predicted.eq(targets).sum().item() #calculate the correct test number
# ```predicted.eq(targets)``` compare predicted with targets and return True or False
## ```.sum()``` used to sum Boolean value, True: 1; False: 0
## ```.item()``` return tensor with only single value to integer
    return correct / total #accuarcy
#summary 1.forward pass to get output 2.compare output with targert 3.calculate accuracy

# CIFAR-10H_dataloader

In [None]:
from PIL import Image
import numpy as np
import torchvision

class CIFAR10N(torchvision.datasets.CIFAR10): #obtain properties from torchvision.datasets.CIFAR10

    def __init__(self, root,  rand_number=0, train=False, transform=None, target_transform=None,
                 download=False, istrain=False):
        super(CIFAR10N, self).__init__(root, train, transform, target_transform, download)
# call CIFAR10N's function ```__init__``` to initialize the CIFAR10N dataset
        self.istrain = istrain
        self.transform = transform
        self.target_transform = target_transform #assign value for ```__getitem__``` function from intial input
        ann_all = torch.load('./data/CIFAR-10_human.pt') #.pt is pytorch file
        ann_ = np.array([ann_all['random_label1'], ann_all['random_label2'], ann_all['random_label3']]).transpose(1,0)
# transfer 3 label dictionary to a 3-dim numpy array and transpose it
## random_label[n] means different people labeled
        self.ad = np.zeros((50000, 10))
        for idx, ann_per_img in enumerate(ann_):
            for ann_per_rater in ann_per_img:
# ann_per_rater contains the number of label eg.4, so give the loction to self.ad[] eg.self.ad[index,4]
                self.ad[idx, ann_per_rater] += 1
# summary 1.initialize the CIFAR10N dataset 2.load the labels from 3 rater 3.calculate the label number from human of each classfication

    def __getitem__(self, index: int):
        img, target = self.data[index], self.targets[index]
# both .data and .targets are from parent class, `.data` stores images eg.32x32x3 numpy array `.targets` stores labels
        img = Image.fromarray(img) #change images from numpy arrary to PIL image

        if self.transform is not None:
            img = self.transform(img) #includes multiple transforms eg.resize
# data augmentation and preprocessing
        if self.target_transform is not None:
            target = self.target_transform(target) #change label's format eg.one-hot encoding
        if self.istrain:
            ad = self.ad[index] #get human labels during training
            return img, target, ad
# compare targert and ad: target is hard label which is correct; ad is soft labels by human
## target is traditional one; ad used to considering label uncertainty like noise
        else:
            return img, target
# summary 1.get image and target labels into different container 2.transform image and target labels 3.if in train involve human labels


# Run script

In [None]:
seed_everything()
mean_cifar10, std_cifar10 = (0.5071, 0.4866, 0.4409), (0.2009, 0.1984, 0.2023)
transform_train = transforms.Compose([transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(), transforms.ToTensor(),
            transforms.Normalize(mean_cifar10, std_cifar10), ]) #the function from torchvision.transforms
# Augmentation and preprocess
transform_test = transforms.Compose([transforms.ToTensor(),
    transforms.Normalize(mean_cifar10, std_cifar10),])
# only preprocess

train_dataset = CIFAR10N(root='./data', train=True, download=True, transform=transform_train, istrain=True)
test_dataset = CIFAR10N(root='./data', train=False, download=True, transform=transform_test, istrain=False)
# use the dataloader 'CIFAR10N' to get datasets

#test_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_test)
print('train samples:',len(train_dataset), 'test samples:',len(test_dataset))
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=2)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.test_batch_size, shuffle=False, num_workers=2)
# dataloader get data from datasets on demond: batch_size, shuffle or not, multiple processes in parallel

model = models.resnet34(pretrained=True).to(device) #get the model
model.fc = nn.Linear(model.fc.in_features, args.num_classes) #change the fc layer to make sure the number output features is same with number of classes
model = model.to(device) #move model to GPU or CPU

optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, nesterov=False, weight_decay=0.0001) #optimizer
criterion = nn.CrossEntropyLoss() #loss function used in train function

best_epoch, best_acc = 0.0, 0
for epoch in range(args.num_epoch):
    train(model, train_loader, criterion, optimizer) # iterations = number of sample in trainsets / batch_size in each epoch
    accuracy = test(model, test_loader)
    if accuracy > best_acc:
        patience = 0
        best_acc = accuracy
        best_epoch = epoch
        best_model = copy.deepcopy(model)
        torch.save(best_model.state_dict(), 'best_model_cifar10h.pth.tar')
    print('epoch: {}  acc: {:.4f}  best epoch: {}  best acc: {:.4f}'.format(
            epoch, accuracy, best_epoch, best_acc, optimizer.param_groups[0]['lr']))
# train loop to find the best model

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:13<00:00, 12956801.73it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data


  ann_all = torch.load('./data/CIFAR-10_human.pt')


Files already downloaded and verified
train samples: 50000 test samples: 10000


Downloading: "https://download.pytorch.org/models/resnet34-b627a593.pth" to /root/.cache/torch/hub/checkpoints/resnet34-b627a593.pth
100%|██████████| 83.3M/83.3M [00:00<00:00, 145MB/s]


epoch: 0  acc: 0.6230  best epoch: 0  best acc: 0.6230
epoch: 1  acc: 0.7623  best epoch: 1  best acc: 0.7623
epoch: 2  acc: 0.7848  best epoch: 2  best acc: 0.7848
epoch: 3  acc: 0.8146  best epoch: 3  best acc: 0.8146
epoch: 4  acc: 0.8140  best epoch: 3  best acc: 0.8146
epoch: 5  acc: 0.7897  best epoch: 3  best acc: 0.8146


In [None]:
import tensorflow_probability as tfp

def evaluation_all(model, testloader):
    model.eval()
    logits_list = []
    labels_list = []
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            logits_list.append(outputs)
            labels_list.append(targets)

        logits = torch.cat(logits_list).cpu().numpy()
        labels = torch.cat(labels_list).cpu().numpy()
    return correct / total, logits, labels

model.load_state_dict(torch.load('best_model_cifar10h.pth.tar'))
acc, logits_tf, labels_tf = evaluation_all(model, test_loader)
ece = tfp.stats.expected_calibration_error(args.num_classes, logits=logits_tf, labels_true=labels_tf, labels_predicted=np.argmax(logits_tf,1))
print("Acc:{:.4f}, ECE:{:.4f}".format(acc, np.array(ece)))

Acc:0.8076, ECE:0.0799
