### All 4 Pretrained Models sweep

In [1]:
!ls '/kaggle/input/inaturalist/inaturalist_12K'

train_val  val


In [3]:
!ls '/kaggle/input/xception/'

xception.py


In [6]:
!ls '/kaggle/input/incepresv2/'

inceptionresnetv2.py


In [7]:
import torch
import torch.nn as nn
from torchvision.transforms import RandomResizedCrop, RandomHorizontalFlip, Resize, CenterCrop, ToTensor, Normalize, Compose
import torchvision
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
import shutil
import sys
sys.path.append('/kaggle/input/xception/')
sys.path.append('/kaggle/input/incepresv2/')
from xception import*
from inceptionresnetv2 import*
## Dataset info
iNaturalist = {
    'Normalize': {
        'mean': (0.485, 0.456, 0.406),
        'std':  (0.229, 0.224, 0.225)
    }
}

## Dataloaders
def data_loader(train_data, val_data, test_data, batchSize):
    train_dataLoader = torch.utils.data.DataLoader(train_data, batch_size=batchSize, shuffle=True)
    val_dataLoader = torch.utils.data.DataLoader(val_data, batch_size=batchSize, shuffle=True)
    test_dataLoader = torch.utils.data.DataLoader(test_data, batch_size=batchSize, shuffle=False)
    loaders = {
        'train' : train_dataLoader,
        'valid' : val_dataLoader,
        'test'  : test_dataLoader
    }
    return loaders

## getModdel fn
def getModel(modelName):
    import torchvision as tv 
    model = None
    
    if modelName == 'InceptionV3':
        model = tv.models.inception_v3(pretrained=True)
    elif modelName == 'InceptionResNetV2':
        model = inceptionresnetv2(pretrained='imagenet')
    elif modelName == 'ResNet50':
        model = tv.models.resnet50(pretrained=True)
    elif modelName == 'Xception':
        model = xception(pretrained=True)
    elif modelName == 'Vgg16':
        model = tv.models.vgg16(pretrained=True)
        
    return model

## transforms to match model input dims
def transform(modelName):
    if modelName == 'Xception' or modelName == 'InceptionV3' or modelName == 'InceptionResNetV2':
        resize = 299
        val_resize = 333
        val_center_crop = resize
        
    
    else:
        resize = 224
        val_resize = 256
        val_center_crop = resize
    
    
    train_t = Compose([RandomResizedCrop(resize),
                       RandomHorizontalFlip(),
                       ToTensor(),
                       Normalize(**iNaturalist['Normalize'])])
    valid_t = Compose([Resize(val_resize),
                       CenterCrop(resize),
                       ToTensor(),
                       Normalize(**iNaturalist['Normalize'])])
    test_t = Compose([Resize((resize,resize)), 
                      ToTensor(), 
                      Normalize(**iNaturalist['Normalize'])])
    
    transforms = {
        'training':   train_t,
        'validation': valid_t,
        'test': test_t
    }
    
    return transforms

## Load dataset fn
def load_datasets(modelName):
    transforms=transform(modelName)
    trainset  = torchvision.datasets.ImageFolder('/kaggle/input/inaturalist/inaturalist_12K/train_val/train', transforms['training'])
    valset    = torchvision.datasets.ImageFolder('/kaggle/input/inaturalist/inaturalist_12K/train_val/val', transforms['validation'])
    testset   = torchvision.datasets.ImageFolder('/kaggle/input/inaturalist/inaturalist_12K/val', transforms['test'])
    
    return trainset, valset, testset


In [8]:
import os
!pip install wandb
!pip install wandb --upgrade
import wandb

Collecting wandb
  Downloading wandb-0.10.25-py2.py3-none-any.whl (2.1 MB)
[K     |████████████████████████████████| 2.1 MB 2.7 MB/s eta 0:00:01
Installing collected packages: wandb
  Attempting uninstall: wandb
    Found existing installation: wandb 0.10.23
    Uninstalling wandb-0.10.23:
      Successfully uninstalled wandb-0.10.23
Successfully installed wandb-0.10.25


In [9]:
wandb.login()

[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize


[34m[1mwandb[0m: Paste an API key from your profile and hit enter:  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [10]:
# check if CUDA is available
use_cuda = torch.cuda.is_available()
if use_cuda == True:
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

In [25]:
def model_change_classifier(model):
    modelName = model.name
    if modelName == 'InceptionV3':
        model.fc = nn.Sequential(nn.Linear(model.fc.in_features,500),
                                 nn.ReLU(),
                                 nn.Dropout(),
                                 nn.Linear(500,10))


    if modelName == 'Vgg16':
        model.classifier[6] = nn.Sequential(nn.Linear(model.classifier[6].in_features,500),
                                 nn.ReLU(),
                                 nn.Dropout(),
                                 nn.Linear(500,10))


    if modelName == 'ResNet50':
        model.fc = nn.Sequential(nn.Linear(model.fc.in_features,500),
                             nn.ReLU(),
                             nn.Dropout(),
                             nn.Linear(500,10))

    if modelName == 'Xception':
        model.fc = nn.Sequential(nn.Linear(model.fc.in_features,500),
                             nn.ReLU(),
                             nn.Dropout(),
                             nn.Linear(500,10))
        
    if modelName == 'InceptionResNetV2':
        model.last_linear = nn.Sequential(nn.Linear(model.last_linear.in_features,500),
                             nn.ReLU(),
                             nn.Dropout(),
                             nn.Linear(500,10))
    return

In [33]:
def train(start_epochs, n_epochs, loaders, model, optimizer, criterion, use_cuda):
    
    
    for epoch in range(start_epochs, start_epochs+n_epochs):
        train_loss = 0.0
        valid_loss = 0.0
        
        ###################
        # train the model #
        ###################
        model.train()
        tnum_correct = 0
        tnum_examples = 0
        for batch_idx, (data, target) in enumerate(loaders['train']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
                
            optimizer.zero_grad()
            if model.name == 'InceptionV3':
                output, aux_output = model(data)
                loss1 = criterion(output, target)
                loss2 = criterion(aux_output, target)
                loss = loss1 + 0.4*loss2
                
            else:
                output = model(data)
                loss = criterion(output, target)
            
            correct = torch.eq(torch.max(F.softmax(output, dim=1), dim=1)[1],target).view(-1)
            tnum_correct += torch.sum(correct).item()
            tnum_examples += correct.shape[0]
            loss.backward()
            optimizer.step()
            train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
        train_acc = tnum_correct / tnum_examples
        
        ######################    
        # validate the model #
        ######################
        model.eval()
        num_correct = 0
        num_examples = 0
        for batch_idx, (data, target) in enumerate(loaders['valid']):
            
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            
            output = model(data)
            loss = criterion(output, target)
            valid_loss = valid_loss + ((1 / (batch_idx + 1)) * (loss.data - valid_loss))
            
            correct = torch.eq(torch.max(F.softmax(output, dim=1), dim=1)[1],target).view(-1)
            num_correct += torch.sum(correct).item()
            num_examples += correct.shape[0]
            
        train_loss = train_loss/len(loaders['train'].dataset)
        valid_loss = valid_loss/len(loaders['valid'].dataset)
        valid_acc = num_correct / num_examples
        
        print('Epoch: {} \tTraining Loss: {:.6f} \tTrain Accuracy: {:.2f} \tValidation Loss: {:.6f} \tvalidation Accuracy: {:.2f}'.format(
            epoch, 
            train_loss,
            train_acc,
            valid_loss,
            valid_acc
            ))
        
        wandb.log({'epoch': epoch,'train loss': train_loss,'train accuracy': train_acc,
                   'val loss': valid_loss, 'val accuracy': valid_acc})
            
    # return trained model
    return model

In [34]:
def sp_train():
    config_defaults = {
        'model_name':'ResNet50',
        'preTrain_epochs': 3,
        'fineTune_epochs': 5,
        'learning_rate_1': 1e-4,
        'learning_rate_2': 1e-4,
        'batchnorm_pretrain':'YES',
        'optimizer': 'sgd'
    }
    # Initialize a new wandb run
    wandb.init(config=config_defaults)
    # Config is a variable that holds and saves hyperparameters and inputs
    config = wandb.config
    run_name="Model:["+config.model_name+"] pT_ep:["+str(config.preTrain_epochs)+"] fT_ep:["+str(config.fineTune_epochs)+"] lr1:["+str(config.learning_rate_1)+"] lr2:["+str(config.learning_rate_2)+"] op:["+config.optimizer+"] BNpT:["+config.batchnorm_pretrain+"]"
    wandb.run.name=run_name
    modelName = config.model_name
    model = getModel(modelName)
    model.name = modelName
    if modelName =='Xception' or modelName == 'InceptionResNetV2':
        batch_size = 4
    else: 
        batch_size = 8
    
    datasetTrain, datasetVal, datasetTest = load_datasets(modelName)
    
    loaders = data_loader(datasetTrain, datasetVal, datasetTest, batch_size)
    
    # Get Batchnorm Layers
    msBN = list(filter(lambda m: type(m) == torch.nn.modules.BatchNorm2d, model.modules()))
    
    # Freezing layer
    for param in model.parameters():
        if param.requires_grad:
            param.requires_grad=False
    
    model_change_classifier(model)
    
    # Batchnorm layers unfreeze
    if config.batchnorm_pretrain=='YES':
        for i, m in enumerate(msBN):
            for param in m.parameters():
                param.requires_grad= True
                
    model = model.to(device)
    
    if config.optimizer=='sgd':
        optimizer = optim.SGD(model.parameters(), lr=config.learning_rate_1, momentum = 0.9)
    elif config.optimizer=='adam':
        optimizer = optim.Adam(model.parameters(), lr=config.learning_rate_1, betas=(0.9, 0.999))
    criterion = nn.CrossEntropyLoss()
    
    trained_model = train(start_epochs = 1,
                      n_epochs = config.preTrain_epochs,
                      loaders = loaders,
                      model = model,
                      optimizer = optimizer,
                      criterion = criterion,
                      use_cuda = use_cuda
                     )
    
    #model, optimizer, start_epoch, valid_loss, valid_acc, valid_loss_min = load_ckp(ckp_path, model, optimizer)
    
    for name, param in trained_model.named_parameters():
        param.requires_grad = True
        
    for g in optimizer.param_groups:
        g['lr'] = config.learning_rate_2
        
    trained_model = train(start_epochs = config.preTrain_epochs+1,
                      n_epochs = config.fineTune_epochs,
                      loaders = loaders,
                      model = trained_model,
                      optimizer = optimizer,
                      criterion = criterion,
                      use_cuda = use_cuda
                     )

In [35]:
sweep_config = {
    'method': 'grid', 
    'metric': {
      'name': 'val accuracy',
      'goal': 'maximize'   
    },
    'parameters': {
        'model_name': {
            #'values':[modelName]
            'values':['InceptionV3', 'ResNet50','Xception','InceptionResNetV2']
        },
        'preTrain_epochs': {
            'values':[3]
        },
        'fineTune_epochs': {
            'values': [5]
        },
        'learning_rate_1': {
            'values':[0.001] 
        },
        'learning_rate_2':{
            'values':[0.0001]
        },
        'batchnorm_pretrain':{
            'values': ['YES']
        },
        'optimizer': {
            'values':['sgd']
        }
    }
}

In [36]:
sweep_id = wandb.sweep(sweep_config, project="All_Models_Sweep_1")

Create sweep with ID: f6ktmkwp
Sweep URL: https://wandb.ai/rayanz/All_Models_Sweep_1/sweeps/f6ktmkwp


In [37]:
wandb.agent(sweep_id, sp_train)

[34m[1mwandb[0m: Agent Starting Run: tpyzlg5g with config:
[34m[1mwandb[0m: 	batchnorm_pretrain: YES
[34m[1mwandb[0m: 	fineTune_epochs: 5
[34m[1mwandb[0m: 	learning_rate_1: 0.001
[34m[1mwandb[0m: 	learning_rate_2: 0.0001
[34m[1mwandb[0m: 	model_name: InceptionV3
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	preTrain_epochs: 3


Epoch: 1 	Training Loss: 0.000595 	Train Accuracy: 0.40 	Validation Loss: 0.000473 	validation Accuracy: 0.77
Epoch: 2 	Training Loss: 0.000457 	Train Accuracy: 0.60 	Validation Loss: 0.000346 	validation Accuracy: 0.80
Epoch: 3 	Training Loss: 0.000383 	Train Accuracy: 0.65 	Validation Loss: 0.000336 	validation Accuracy: 0.80
Epoch: 4 	Training Loss: 0.000235 	Train Accuracy: 0.67 	Validation Loss: 0.000270 	validation Accuracy: 0.83
Epoch: 5 	Training Loss: 0.000179 	Train Accuracy: 0.71 	Validation Loss: 0.000250 	validation Accuracy: 0.86
Epoch: 6 	Training Loss: 0.000162 	Train Accuracy: 0.73 	Validation Loss: 0.000235 	validation Accuracy: 0.85
Epoch: 7 	Training Loss: 0.000150 	Train Accuracy: 0.75 	Validation Loss: 0.000230 	validation Accuracy: 0.86
Epoch: 8 	Training Loss: 0.000138 	Train Accuracy: 0.76 	Validation Loss: 0.000226 	validation Accuracy: 0.86


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,8.0
train loss,0.00014
train accuracy,0.7641
val loss,0.00023
val accuracy,0.8645
_runtime,2474.0
_timestamp,1618141549.0
_step,7.0


0,1
epoch,▁▂▃▄▅▆▇█
train loss,█▆▅▂▂▁▁▁
train accuracy,▁▅▆▆▇▇██
val loss,█▄▄▂▂▁▁▁
val accuracy,▁▄▃▅▇▇██
_runtime,▁▂▃▄▅▆▇█
_timestamp,▁▂▃▄▅▆▇█
_step,▁▂▃▄▅▆▇█


[34m[1mwandb[0m: Agent Starting Run: jozsbvng with config:
[34m[1mwandb[0m: 	batchnorm_pretrain: YES
[34m[1mwandb[0m: 	fineTune_epochs: 5
[34m[1mwandb[0m: 	learning_rate_1: 0.001
[34m[1mwandb[0m: 	learning_rate_2: 0.0001
[34m[1mwandb[0m: 	model_name: ResNet50
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	preTrain_epochs: 3


Epoch: 1 	Training Loss: 0.000177 	Train Accuracy: 0.52 	Validation Loss: 0.000317 	validation Accuracy: 0.79
Epoch: 2 	Training Loss: 0.000128 	Train Accuracy: 0.66 	Validation Loss: 0.000285 	validation Accuracy: 0.81
Epoch: 3 	Training Loss: 0.000122 	Train Accuracy: 0.68 	Validation Loss: 0.000267 	validation Accuracy: 0.83
Epoch: 4 	Training Loss: 0.000110 	Train Accuracy: 0.70 	Validation Loss: 0.000261 	validation Accuracy: 0.83
Epoch: 5 	Training Loss: 0.000105 	Train Accuracy: 0.72 	Validation Loss: 0.000249 	validation Accuracy: 0.84
Epoch: 6 	Training Loss: 0.000098 	Train Accuracy: 0.74 	Validation Loss: 0.000243 	validation Accuracy: 0.84
Epoch: 7 	Training Loss: 0.000097 	Train Accuracy: 0.74 	Validation Loss: 0.000244 	validation Accuracy: 0.84
Epoch: 8 	Training Loss: 0.000092 	Train Accuracy: 0.77 	Validation Loss: 0.000238 	validation Accuracy: 0.85


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,8.0
train loss,9e-05
train accuracy,0.76535
val loss,0.00024
val accuracy,0.85
_runtime,1932.0
_timestamp,1618143484.0
_step,7.0


0,1
epoch,▁▂▃▄▅▆▇█
train loss,█▄▃▂▂▁▁▁
train accuracy,▁▅▅▆▇▇▇█
val loss,█▅▄▃▂▁▁▁
val accuracy,▁▃▅▆▆▇▇█
_runtime,▁▂▃▄▅▆▇█
_timestamp,▁▂▃▄▅▆▇█
_step,▁▂▃▄▅▆▇█


[34m[1mwandb[0m: Agent Starting Run: nlwcylj8 with config:
[34m[1mwandb[0m: 	batchnorm_pretrain: YES
[34m[1mwandb[0m: 	fineTune_epochs: 5
[34m[1mwandb[0m: 	learning_rate_1: 0.001
[34m[1mwandb[0m: 	learning_rate_2: 0.0001
[34m[1mwandb[0m: 	model_name: Xception
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	preTrain_epochs: 3


Epoch: 1 	Training Loss: 0.000195 	Train Accuracy: 0.52 	Validation Loss: 0.000351 	validation Accuracy: 0.80
Epoch: 2 	Training Loss: 0.000129 	Train Accuracy: 0.68 	Validation Loss: 0.000288 	validation Accuracy: 0.82
Epoch: 3 	Training Loss: 0.000114 	Train Accuracy: 0.71 	Validation Loss: 0.000256 	validation Accuracy: 0.84
Epoch: 4 	Training Loss: 0.000103 	Train Accuracy: 0.74 	Validation Loss: 0.000241 	validation Accuracy: 0.85
Epoch: 5 	Training Loss: 0.000099 	Train Accuracy: 0.75 	Validation Loss: 0.000233 	validation Accuracy: 0.85
Epoch: 6 	Training Loss: 0.000094 	Train Accuracy: 0.76 	Validation Loss: 0.000228 	validation Accuracy: 0.86
Epoch: 7 	Training Loss: 0.000090 	Train Accuracy: 0.77 	Validation Loss: 0.000222 	validation Accuracy: 0.85
Epoch: 8 	Training Loss: 0.000088 	Train Accuracy: 0.78 	Validation Loss: 0.000220 	validation Accuracy: 0.86


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,8.0
train loss,9e-05
train accuracy,0.77672
val loss,0.00022
val accuracy,0.8595
_runtime,2503.0
_timestamp,1618145991.0
_step,7.0


0,1
epoch,▁▂▃▄▅▆▇█
train loss,█▄▃▂▂▁▁▁
train accuracy,▁▅▆▇▇███
val loss,█▅▃▂▂▁▁▁
val accuracy,▁▄▆▇▇█▇█
_runtime,▁▂▃▄▅▆▇█
_timestamp,▁▂▃▄▅▆▇█
_step,▁▂▃▄▅▆▇█


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: gtcxwbaw with config:
[34m[1mwandb[0m: 	batchnorm_pretrain: YES
[34m[1mwandb[0m: 	fineTune_epochs: 5
[34m[1mwandb[0m: 	learning_rate_1: 0.001
[34m[1mwandb[0m: 	learning_rate_2: 0.0001
[34m[1mwandb[0m: 	model_name: InceptionResNetV2
[34m[1mwandb[0m: 	optimizer: sgd
[34m[1mwandb[0m: 	preTrain_epochs: 3


Downloading: "http://data.lip6.fr/cadene/pretrainedmodels/inceptionresnetv2-520b38e4.pth" to /root/.cache/torch/hub/checkpoints/inceptionresnetv2-520b38e4.pth


  0%|          | 0.00/213M [00:00<?, ?B/s]

Epoch: 1 	Training Loss: 0.000193 	Train Accuracy: 0.50 	Validation Loss: 0.000686 	validation Accuracy: 0.74
Epoch: 2 	Training Loss: 0.000136 	Train Accuracy: 0.66 	Validation Loss: 0.001513 	validation Accuracy: 0.74
Epoch: 3 	Training Loss: 0.000125 	Train Accuracy: 0.69 	Validation Loss: 0.001092 	validation Accuracy: 0.78
Epoch: 4 	Training Loss: 0.000109 	Train Accuracy: 0.73 	Validation Loss: 0.005183 	validation Accuracy: 0.73
Epoch: 5 	Training Loss: 0.000101 	Train Accuracy: 0.74 	Validation Loss: 0.009558 	validation Accuracy: 0.75
Epoch: 6 	Training Loss: 0.000095 	Train Accuracy: 0.76 	Validation Loss: 0.006939 	validation Accuracy: 0.75
Epoch: 7 	Training Loss: 0.000090 	Train Accuracy: 0.77 	Validation Loss: 0.003650 	validation Accuracy: 0.79
Epoch: 8 	Training Loss: 0.000084 	Train Accuracy: 0.79 	Validation Loss: 0.006844 	validation Accuracy: 0.75


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,8.0
train loss,8e-05
train accuracy,0.7901
val loss,0.00684
val accuracy,0.748
_runtime,5656.0
_timestamp,1618151681.0
_step,7.0


0,1
epoch,▁▂▃▄▅▆▇█
train loss,█▄▄▃▂▂▁▁
train accuracy,▁▅▅▆▇▇▇█
val loss,▁▂▁▅█▆▃▆
val accuracy,▃▂▇▁▄▄█▃
_runtime,▁▂▃▄▅▆▇█
_timestamp,▁▂▃▄▅▆▇█
_step,▁▂▃▄▅▆▇█


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.
