<a href="https://colab.research.google.com/github/mobarakol/tutorial_notebooks/blob/main/CIFAR_10H_Swine_Transformer_V2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Cloning repository of CIFAR-10H Annotation
Paper: Human uncertainty makes classification more robust (https://arxiv.org/pdf/1908.07086.pdf)

Label:  CIFAR10 [0: airplane, 1: automobile, 2: bird, 3: cat, 4: deer, 5: dog, 6: frog, 7: horse, 8: ship, 9: truck] <br>

<img src="https://miro.medium.com/max/1010/1*r8S5tF_6naagKOnlIcGXoQ.png" alt="alternatetext">




In [1]:
!git clone https://github.com/jcpeterson/cifar-10h
%cd cifar-10h

Cloning into 'cifar-10h'...
remote: Enumerating objects: 49, done.[K
remote: Counting objects: 100% (1/1), done.[K
remote: Total 49 (delta 0), reused 0 (delta 0), pack-reused 48[K
Unpacking objects: 100% (49/49), done.
/content/cifar-10h


## Installing huggingface transformer 

In [2]:
! pip -q install transformers

[K     |████████████████████████████████| 3.8 MB 4.8 MB/s 
[K     |████████████████████████████████| 6.5 MB 9.5 MB/s 
[K     |████████████████████████████████| 67 kB 3.1 MB/s 
[K     |████████████████████████████████| 895 kB 33.7 MB/s 
[K     |████████████████████████████████| 596 kB 34.7 MB/s 
[?25h

In [None]:
%cd cifar-10h

/content/cifar-10h


# main script

In [21]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
from torchvision import models
import torchvision.transforms as transforms
import os
import argparse
import copy
import random
import numpy as np
device = 'cuda' if torch.cuda.is_available() else 'cpu'
def seed_everything(seed=12):
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
parser = argparse.ArgumentParser(description='CIFAR-10H Training')
parser.add_argument('--lr', default=3e-2, type=float, help='learning rate')
parser.add_argument('--lr_schedule', default=0, type=int, help='lr scheduler')
parser.add_argument('--batch_size', default=32, type=int, help='batch size')
parser.add_argument('--test_batch_size', default=64, type=int, help='batch size')
parser.add_argument('--num_epoch', default=10, type=int, help='epoch number')
parser.add_argument('--num_classes', type=int, default=10, help='number classes')
args = parser.parse_args(args=[])

def train(model, trainloader, criterion, optimizer):
    model.train()
    for batch_idx, (inputs, targets, ad) in enumerate(trainloader):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs).logits
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        if batch_idx%100 == 0:
            print(batch_idx,'/',len(trainloader),'loss:',loss.item())

def test(model, testloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs).logits
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
    return correct / total

# CIFAR-10H dataloader

In [22]:
from PIL import Image
import numpy as np
import torchvision
from transformers import AutoFeatureExtractor, SwinForImageClassification
#from transformers import ViTFeatureExtractor, ViTForImageClassification, BatchFeature
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import ToTensor, Normalize, Resize, Compose

import torch

class AutoFeatureExtractorTransforms:
    def __init__(self, model_name_or_path):
        feature_extractor = AutoFeatureExtractor.from_pretrained(model_name_or_path)
        transform = []

        if feature_extractor.do_resize:
            transform.append(Resize(feature_extractor.size))

        transform.append(ToTensor())

        if feature_extractor.do_normalize:
            transform.append(Normalize(feature_extractor.image_mean, feature_extractor.image_std))

        self.transform = Compose(transform)

    def __call__(self, x):
        return self.transform(x)

class CIFAR10H(torchvision.datasets.CIFAR10):

    def __init__(self, root,  rand_number=0, train=False, transform=None, target_transform=None,
                 download=False):
        super(CIFAR10H, self).__init__(root, train, transform, target_transform, download) 
        self.transform = transform
        self.target_transform = target_transform
        self.ad = np.load(os.path.join(root,'cifar10h-probs.npy'))

    def __getitem__(self, index: int):
        img, target = self.data[index], self.targets[index]
        img = Image.fromarray(img)
        ad = self.ad[index]
        if self.transform is not None:
            img = self.transform(img)
        if self.target_transform is not None:
            target = self.target_transform(target)
        return img, target, ad

# Run script

In [None]:

def main():
    seed_everything()
    #2e-5,
    model_name_or_path = 'microsoft/swin-tiny-patch4-window7-224'

    train_dataset = CIFAR10H(root='./data', train=False, download=True, transform=AutoFeatureExtractorTransforms(model_name_or_path))
    test_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=AutoFeatureExtractorTransforms(model_name_or_path))
    print('train samples:',len(train_dataset), 'test samples:',len(test_dataset))
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=2)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.test_batch_size, shuffle=False, num_workers=2)

    model = SwinForImageClassification.from_pretrained(model_name_or_path)
    model.classifier = nn.Linear(model.classifier.in_features, args.num_classes)
    model = model.to(device)

    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, nesterov=False, weight_decay=0.0001)
    criterion = nn.CrossEntropyLoss()

    best_epoch, best_acc = 0.0, 0
    for epoch in range(args.num_epoch):
        train(model, train_loader, criterion, optimizer)
        accuracy = test(model, test_loader)
        if accuracy > best_acc:
            patience = 0
            best_acc = accuracy
            best_epoch = epoch
            best_model = copy.deepcopy(model)
            torch.save(best_model.state_dict(), 'best_model_cifar10h_vit.pth.tar')
        print('epoch: {}  acc: {:.4f}  best epoch: {}  best acc: {:.4f}'.format(
                epoch, accuracy, best_epoch, best_acc, optimizer.param_groups[0]['lr']))
        
main()

Files already downloaded and verified
Files already downloaded and verified
train samples: 10000 test samples: 50000
0 / 313 loss: 2.414297580718994
100 / 313 loss: 2.076033353805542
200 / 313 loss: 2.1324853897094727
300 / 313 loss: 1.9993892908096313
epoch: 0  acc: 0.2242  best epoch: 0  best acc: 0.2242
0 / 313 loss: 1.8297532796859741
100 / 313 loss: 1.886186122894287
200 / 313 loss: 1.9788832664489746
300 / 313 loss: 1.372273564338684
epoch: 1  acc: 0.3924  best epoch: 1  best acc: 0.3924
0 / 313 loss: 1.5487279891967773
100 / 313 loss: 1.8245866298675537
200 / 313 loss: 1.564370036125183
300 / 313 loss: 1.503949522972107
epoch: 2  acc: 0.4190  best epoch: 2  best acc: 0.4190
0 / 313 loss: 1.834380030632019
100 / 313 loss: 1.3016908168792725
200 / 313 loss: 1.2501261234283447
300 / 313 loss: 1.158250093460083
epoch: 3  acc: 0.6294  best epoch: 3  best acc: 0.6294
0 / 313 loss: 1.0765539407730103
100 / 313 loss: 1.3277177810668945
200 / 313 loss: 1.3941457271575928
300 / 313 loss: 

## Changing configuration in Swine Transformer

In [None]:
from transformers import AutoFeatureExtractor, SwinForImageClassification, SwinConfig
import torch
from datasets import load_dataset


dataset = load_dataset("huggingface/cats-image")
image = dataset["test"]["image"][0]

feature_extractor = AutoFeatureExtractor.from_pretrained("microsoft/swin-tiny-patch4-window7-224")
#model = SwinForImageClassification.from_pretrained("microsoft/swin-tiny-patch4-window7-224")
configuration = SwinConfig()
configuration.num_labels = 10
model = SwinForImageClassification(configuration)
model.from_pretrained("microsoft/swin-tiny-patch4-window7-224", num_labels=100);

inputs = feature_extractor(image, return_tensors="pt")

with torch.no_grad():
    logits = model(**inputs).logits

# model predicts one of the 1000 ImageNet classes
predicted_label = logits.argmax(-1).item()
print(model.config.id2label[predicted_label])