<a href="https://colab.research.google.com/github/sourcecode369/Kaggle-Notebooks/blob/master/Tutorials/Cifar_100_with_Optimizers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!nvidia-smi

Sun Jul 19 14:32:17 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.51.05    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   35C    P8    26W / 149W |      0MiB / 11441MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
!pip install torch_optimizer
!pip install pretrainedmodels

Collecting torch_optimizer
[?25l  Downloading https://files.pythonhosted.org/packages/10/1a/a3f86e67c4f650916cb7d16849331bda302bf0b155f7c8240607cc97664b/torch_optimizer-0.0.1a14-py3-none-any.whl (40kB)
[K     |████████▏                       | 10kB 19.1MB/s eta 0:00:01[K     |████████████████▎               | 20kB 1.8MB/s eta 0:00:01[K     |████████████████████████▌       | 30kB 2.3MB/s eta 0:00:01[K     |████████████████████████████████| 40kB 1.8MB/s 
[?25hCollecting pytorch-ranger>=0.1.1
  Downloading https://files.pythonhosted.org/packages/0d/70/12256257d861bbc3e176130d25be1de085ce7a9e60594064888a950f2154/pytorch_ranger-0.1.1-py3-none-any.whl
Installing collected packages: pytorch-ranger, torch-optimizer
Successfully installed pytorch-ranger-0.1.1 torch-optimizer-0.0.1a14
Collecting pretrainedmodels
[?25l  Downloading https://files.pythonhosted.org/packages/84/0e/be6a0e58447ac16c938799d49bfb5fb7a80ac35e137547fc6cee2c08c4cf/pretrainedmodels-0.7.4.tar.gz (58kB)
[K     |███

In [3]:
import os
import numpy as np
import torch
import pretrainedmodels
import torch.nn as nn
import torch.optim as to
import torch.nn.functional as F
import torch_optimizer as optim
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import Dataset, DataLoader

In [4]:
mean = {
'cifar10': (0.4914, 0.4822, 0.4465),
'cifar100': (0.5071, 0.4867, 0.4408),
}

std = {
'cifar10': (0.2023, 0.1994, 0.2010),
'cifar100': (0.2675, 0.2565, 0.2761),
}

In [5]:
transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(15),
        transforms.ToTensor(),
        transforms.Normalize(mean['cifar100'], std['cifar100'])
    ])

dataset_train = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform_train)
train_loader = DataLoader(
    dataset_train, shuffle=True, num_workers=4, batch_size=64)

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/cifar-100-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/cifar-100-python.tar.gz to ./data


In [15]:
class ResNet(nn.Module):
    def __init__(self, pretrained=None, classes=100):
        super(ResNet, self).__init__()
        self.classes = classes
        self.pretrained = pretrained
        self.resnet = pretrainedmodels.__dict__['resnet50'](pretrained=self.pretrained)
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.ln = nn.LayerNorm(2048)
        self.dropout = nn.Dropout(p=0.2)
        self.classifier = nn.Linear(2048,self.classes)
    def forward(self, images):
        features = self.resnet.features(images)
        avg_pool = self.avg_pool(features).squeeze(-1).squeeze(-1)
        layer_norm = self.ln(avg_pool)
        dropout = self.dropout(layer_norm)
        logits = self.classifier(dropout)
        return logits

In [7]:
model = ResNet(pretrained='imagenet')

Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /root/.cache/torch/checkpoints/resnet50-19c8e357.pth


HBox(children=(FloatProgress(value=0.0, max=102502400.0), HTML(value='')))




In [8]:
model(torch.rand(5,3,512,512)).shape

torch.Size([5, 100])

In [16]:
class FocalLoss(nn.Module):
    def __init__(self, alpha=0.5, gamma=2, logits=False, reduce=True):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.logits = logits
        self.reduce = reduce

    def forward(self, inputs, targets):
        if self.logits:
            BCE_loss = F.binary_cross_entropy_with_logits(inputs, targets)
        else:
            BCE_loss = F.binary_cross_entropy(inputs, targets)
        pt = torch.exp(-BCE_loss)
        F_loss = self.alpha * (1-pt)**self.gamma * BCE_loss
        if self.reduce:
            return torch.mean(F_loss)
        else:
            return F_loss

In [17]:
class RocAucMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.y_true = np.array([0,1])
        self.y_pred = np.array([0.5,0.5])
        self.score = 0

    def update(self, y_true, y_pred):
        y_true = y_true.cpu().numpy().argmax(axis=1)
        y_pred = nn.functional.softmax(y_pred, dim=1).data.cpu().numpy()[:,1]
        self.y_true = np.hstack((self.y_true, y_true))
        self.y_pred = np.hstack((self.y_pred, y_pred))
        self.score = metrics.roc_auc_score(self.y_true, self.y_pred, labels=np.array([0, 1]))
    
    @property
    def avg(self):
        return self.score

class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [46]:
model = ResNet(pretrained='imagenet').to("cuda")
model = nn.DataParallel(model)

#criterion = FocalLoss(alpha=0.5, gamma=2).to("cuda")
criterion = nn.CrossEntropyLoss().to("cuda")

# optimizer = optim.QHAdam(model.parameters(), 
                        #  lr=1e-3, 
                        #  betas=(0.995, 0.999), 
                        #  nus=(0.7, 1.0), 
                        #  weight_decay=0.0, 
                        #  eps=1e-8)

optimizer = optim.Ranger(model.parameters(), 
                         lr=1e-3, 
                         alpha=0.5, 
                         k=6, 
                         N_sma_threshhold=5, 
                         betas=(.95, 0.999), 
                         eps=1e-5,
                         weight_decay=0)

# optimizer = optim.Lamb(model.parameters(), 
#                        lr=1e-3, 
#                        betas=(0.9, 0.999), 
#                        eps=1e-6, 
#                        weight_decay=0, 
#                        adam=False)



scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, **dict(mode='max',
                                                                         factor=0.8,
                                                                         patience=2,
                                                                         verbose=False, 
                                                                         threshold=0.0001,
                                                                         threshold_mode='abs',
                                                                         cooldown=0, 
                                                                         min_lr=1e-8,
                                                                         eps=1e-08
                                                                         ))

In [47]:
from tqdm import tqdm
import time

model.train()
for e in range(10):
    total_loss = AverageMeter()
    total_score = RocAucMeter()
    start_time = time.time()
    
    for step, (images, labels) in enumerate(train_loader):
        batch_size, _, _, _ = images.shape
        images = images.cuda()
        targets = labels.cuda()

        optimizer.zero_grad()
        outputs = model(images)
        # print(outputs.shape, targets.shape)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        loss = loss.detach().item()

        # total_score.update(targets, outputs)
        total_loss.update(loss, batch_size)

        

        if step % 10 == 0:
            print('Training Epoch: {epoch} [{trained_samples}/{total_samples}]\tLoss: {:0.4f}\tLR: {:0.10f}'.format(
                total_loss.avg,
                optimizer.param_groups[0]['lr'],
                epoch=e,
                trained_samples=step*batch_size+len(images),
                total_samples=len(train_loader.dataset)
                ))
            # print(f'Step={step}, loss={total_loss.avg:<8.4f}, auc={total_score.avg:<8.4f} {time.time()-start_time:<2.2f}')
        
    if scheduler is not None:
        scheduler.step(metrics=total_loss.avg)

Training Epoch: 0 [64/50000]	Loss: 4.7445	LR: 0.0010000000
Training Epoch: 0 [704/50000]	Loss: 4.7721	LR: 0.0010000000
Training Epoch: 0 [1344/50000]	Loss: 4.7800	LR: 0.0010000000
Training Epoch: 0 [1984/50000]	Loss: 4.7629	LR: 0.0010000000
Training Epoch: 0 [2624/50000]	Loss: 4.7534	LR: 0.0010000000
Training Epoch: 0 [3264/50000]	Loss: 4.7242	LR: 0.0010000000
Training Epoch: 0 [3904/50000]	Loss: 4.7013	LR: 0.0010000000
Training Epoch: 0 [4544/50000]	Loss: 4.6801	LR: 0.0010000000
Training Epoch: 0 [5184/50000]	Loss: 4.6518	LR: 0.0010000000
Training Epoch: 0 [5824/50000]	Loss: 4.6179	LR: 0.0010000000
Training Epoch: 0 [6464/50000]	Loss: 4.5859	LR: 0.0010000000
Training Epoch: 0 [7104/50000]	Loss: 4.5527	LR: 0.0010000000
Training Epoch: 0 [7744/50000]	Loss: 4.5127	LR: 0.0010000000
Training Epoch: 0 [8384/50000]	Loss: 4.4728	LR: 0.0010000000
Training Epoch: 0 [9024/50000]	Loss: 4.4361	LR: 0.0010000000
Training Epoch: 0 [9664/50000]	Loss: 4.3994	LR: 0.0010000000
Training Epoch: 0 [10304/50

# Lamb

Training Epoch: 0 [64/50000]	Loss: 4.6812	LR: 0.0010000000
Training Epoch: 0 [704/50000]	Loss: 4.7955	LR: 0.0010000000
Training Epoch: 0 [1344/50000]	Loss: 4.7391	LR: 0.0010000000
Training Epoch: 0 [1984/50000]	Loss: 4.7414	LR: 0.0010000000
Training Epoch: 0 [2624/50000]	Loss: 4.7256	LR: 0.0010000000
Training Epoch: 0 [3264/50000]	Loss: 4.7075	LR: 0.0010000000
Training Epoch: 0 [3904/50000]	Loss: 4.6920	LR: 0.0010000000
Training Epoch: 0 [4544/50000]	Loss: 4.6770	LR: 0.0010000000
Training Epoch: 0 [5184/50000]	Loss: 4.6490	LR: 0.0010000000
Training Epoch: 0 [5824/50000]	Loss: 4.6201	LR: 0.0010000000
Training Epoch: 0 [6464/50000]	Loss: 4.5994	LR: 0.0010000000
Training Epoch: 0 [7104/50000]	Loss: 4.5767	LR: 0.0010000000
Training Epoch: 0 [7744/50000]	Loss: 4.5514	LR: 0.0010000000
Training Epoch: 0 [8384/50000]	Loss: 4.5291	LR: 0.0010000000
Training Epoch: 0 [9024/50000]	Loss: 4.5068	LR: 0.0010000000
Training Epoch: 0 [9664/50000]	Loss: 4.4816	LR: 0.0010000000
Training Epoch: 0 [10304/50000]	Loss: 4.4519	LR: 0.0010000000
Training Epoch: 0 [10944/50000]	Loss: 4.4259	LR: 0.0010000000
Training Epoch: 0 [11584/50000]	Loss: 4.4006	LR: 0.0010000000
Training Epoch: 0 [12224/50000]	Loss: 4.3780	LR: 0.0010000000
Training Epoch: 0 [12864/50000]	Loss: 4.3509	LR: 0.0010000000
Training Epoch: 0 [13504/50000]	Loss: 4.3255	LR: 0.0010000000
Training Epoch: 0 [14144/50000]	Loss: 4.3014	LR: 0.0010000000
Training Epoch: 0 [14784/50000]	Loss: 4.2764	LR: 0.0010000000
Training Epoch: 0 [15424/50000]	Loss: 4.2493	LR: 0.0010000000
Training Epoch: 0 [16064/50000]	Loss: 4.2287	LR: 0.0010000000
Training Epoch: 0 [16704/50000]	Loss: 4.2010	LR: 0.0010000000
Training Epoch: 0 [17344/50000]	Loss: 4.1778	LR: 0.0010000000
Training Epoch: 0 [17984/50000]	Loss: 4.1552	LR: 0.0010000000
Training Epoch: 0 [18624/50000]	Loss: 4.1326	LR: 0.0010000000
Training Epoch: 0 [19264/50000]	Loss: 4.1070	LR: 0.0010000000
Training Epoch: 0 [19904/50000]	Loss: 4.0861	LR: 0.0010000000
Training Epoch: 0 [20544/50000]	Loss: 4.0647	LR: 0.0010000000
Training Epoch: 0 [21184/50000]	Loss: 4.0414	LR: 0.0010000000
Training Epoch: 0 [21824/50000]	Loss: 4.0211	LR: 0.0010000000
Training Epoch: 0 [22464/50000]	Loss: 4.0006	LR: 0.0010000000
Training Epoch: 0 [23104/50000]	Loss: 3.9794	LR: 0.0010000000
Training Epoch: 0 [23744/50000]	Loss: 3.9577	LR: 0.0010000000
Training Epoch: 0 [24384/50000]	Loss: 3.9362	LR: 0.0010000000