In [None]:
!pip install pycm livelossplot
!pip install wandb

# make sure you have the utils.py and models.py locally
from utils import * 
from models import *

import pprint
import wandb
import copy
import matplotlib.pyplot as plt
import numpy as np
import os
from pycm import *

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader, Subset
import torchvision.transforms as transforms
from torchvision import models, datasets, transforms

from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedShuffleSplit

Collecting livelossplot
  Downloading https://files.pythonhosted.org/packages/57/26/840be243088ce142d61c60273408ec09fa1de4534056a56d6e91b73f0cae/livelossplot-0.5.4-py3-none-any.whl
Installing collected packages: livelossplot
Successfully installed livelossplot-0.5.4


In [None]:
def set_seed(seed):
    """
    Use this to set ALL the random seeds to a fixed value and take out any 
    randomness from cuda kernels
    """
    
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    #uses the inbuilt cudnn auto-tuner to find the fastest convolution algorithms.
    torch.backends.cudnn.benchmark = False  
    torch.backends.cudnn.enabled   = False

    return True

device = 'cpu'
if torch.cuda.device_count() > 0 and torch.cuda.is_available():
    print("Cuda installed! Running on GPU!")
    device = 'cuda'
else:
    print("No GPU available!")

Cuda installed! Running on GPU!


In [None]:
!wandb login

In [None]:
from google.colab import drive
drive.mount('/content/gdrive/')

In [None]:
# upload your kaggle.json file here
from google.colab import files
files.upload()

{}

In [None]:
# download the files in google colab from kaggle
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/kaggle.json
!pip install --upgrade --force-reinstall --no-deps kaggle
!kaggle competitions download -c acse4-ml-2020
!unzip acse4-ml-2020.zip

mv: cannot stat 'kaggle.json': No such file or directory
Processing /root/.cache/pip/wheels/a1/6a/26/d30b7499ff85a4a4593377a87ecf55f7d08af42f0de9b60303/kaggle-1.5.12-cp37-none-any.whl
Installing collected packages: kaggle
  Found existing installation: kaggle 1.5.12
    Uninstalling kaggle-1.5.12:
      Successfully uninstalled kaggle-1.5.12
Successfully installed kaggle-1.5.12
acse4-ml-2020.zip: Skipping, found more recently modified local copy (use --force to force download)
Archive:  acse4-ml-2020.zip
replace submission_sample.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [None]:
def train_wandb(config=None):
    """
    Function for training/validation editted for hyperparameter optimization with wandb platform.
    Logs the epoch, train_accuracy, train_loss, valid_accuracy, valid_loss to wandb.

    Args:
        config (dict, optional) : config for the hyperparameter search
    Returns:
        None
    """

    with wandb.init(config=config, tags=["non-normalized"]):
        # If called by wandb.agent, as below,
        # this config will be set by Sweep Controller
        config = wandb.config

        transform = transforms.Compose([transforms.ToTensor(),
                                        transforms.Grayscale(num_output_channels=1),
                                      ])
        
        covid_train_full = datasets.ImageFolder('xray-data/xray-data/train', transform=transform)

        # Creating a training validation split that will be used for grid search
        shuffler = StratifiedShuffleSplit(n_splits=1, test_size=0.1, random_state=42).split(covid_train_full.samples, covid_train_full.targets)
        indices = [(train_idx, validation_idx) for train_idx, validation_idx in shuffler][0]
        print("No. of Training: ", indices[0])
        print("No. of Validation: ", indices[1].shape)

        # Creating the subsets for training and validation data
        covid_train = Subset(covid_train_full, indices[0])
        covid_validation = Subset(covid_train_full, indices[1])

        # test_loader = DataLoader(covid_test , batch_size=1, shuffle=False, num_workers=1, drop_last=False)
        train_loader = DataLoader(covid_train, batch_size=config.batch_size, shuffle=True, num_workers=6)
        validation_loader = DataLoader(covid_train, batch_size=config.batch_size, shuffle=True, num_workers=6)
        
        print(device)
        try:
          model = globals()[config.model_name]
          model = model().to(device)
        except:
          raise NotImplementedError("Model of name %s has not been found in this file"%config.model_name)
        config.model = model
        
        # Make optimizer
        try:
            optimizer = getattr(torch.optim, config.optimizer_name)
            optimizer = optimizer(model.parameters(), lr=config.learning_rate)

        except:
            raise NotImplementedError("Optimizer of name %s has not been found in torch.optim"%config.optimizer_name)
        try:
            for g in optimizer.param_groups:
                g['momentum'] = config.momentum
                g['weight_decay'] = config.weight_decay
        except:
            config.momentum = 0
            config.weight_decay = 0
            pass
        config.optimizer = optimizer
            
        # Make loss
        try:
            criterion = getattr(torch.nn, config.criterion_name)
            criterion = criterion()    
        except:
            raise NotImplementedError("Criterion of name %s has not been found in torch.nn"%config.criterion_name)
        config.criterion = criterion
    
        for epoch in range(config.epochs): 
          train_loss, train_accuracy = train(model, optimizer, criterion, train_loader, device='cuda')
          validation_loss, validation_accuracy = validate(model, criterion, validation_loader, device='cuda')
          log = {"epoch": epoch + 1, "train_loss":train_loss.item(), "train_accuracy": train_accuracy.item(), "valid_loss":validation_loss.item(), "valid_accuracy":validation_accuracy.item()}
          print(log)
          wandb.log(log)

In [None]:
# search method - random. Choose 'grid' for grid search
sweep_config = {
    'method': 'random'
    }

# the metric which we want to maximize/minimize
metric = {
    'name': 'valid_accuracy',
    'goal': 'maximize'   
    }

sweep_config['metric'] = metric

In [None]:
# hyperparameters which we are optimising over
parameters_dict = {
      'batch_size': {
          'values': [64, 128]
        },
        'weight_decay': {
            'values':  [0, 0.01]
        },
        'learning_rate': {
            'values': [5e-1, 5e-2, 5e-3, 5e-4]
        },
        'momentum': {
            'values': [0.7, 0.8, 0.9]
        }
    }

sweep_config['parameters'] = parameters_dict

In [None]:
# constant parameters being logged in wandb
parameters_dict.update({
    'epochs': {
        'value': 40},
    'dataset': {
        'value': "X_Ray"},
    'model_name': {
        'value': "CustomGooglenet"},
    'criterion_name': {
        'value': "CrossEntropyLoss"},
    'optimizer_name': {
        'value': "SGD"},
})

In [None]:
pprint.pprint(sweep_config)

{'method': 'random',
 'metric': {'goal': 'maximize', 'name': 'valid_accuracy'},
 'parameters': {'batch_size': {'values': [64, 128]},
                'criterion_name': {'value': 'CrossEntropyLoss'},
                'dataset': {'value': 'X_Ray'},
                'epochs': {'value': 40},
                'learning_rate': {'values': [0.5, 0.05, 0.005, 0.0005]},
                'model_name': {'value': 'CustomGooglenet'},
                'momentum': {'values': [0.7, 0.8, 0.9]},
                'optimizer_name': {'value': 'SGD'},
                'test_batch_size': {'value': 128},
                'weight_decay': {'values': [0, 0.01]}}}


In [None]:
# Assigning the same entity and project allows for parallel searches on mutiple machines/agents.
# Hence, we chose random search method when running on multiple agents.
sweep_id = wandb.sweep(sweep_config, project='transfer_googlenet', entity='losslandscape21')

Create sweep with ID: 8aq0x78r
Sweep URL: https://wandb.ai/losslandscape21/transfer_googlenet/sweeps/8aq0x78r


In [None]:
wandb.agent(sweep_id, train_wandb, count=20)

[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 24qgeolo with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	criterion_name: CrossEntropyLoss
[34m[1mwandb[0m: 	dataset: X_Ray
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	learning_rate: 0.5
[34m[1mwandb[0m: 	model_name: CustomGooglenet
[34m[1mwandb[0m: 	momentum: 0.8
[34m[1mwandb[0m: 	optimizer_name: SGD
[34m[1mwandb[0m: 	test_batch_size: 128
[34m[1mwandb[0m: 	weight_decay: 0


No. of Training:  [15281 14945 10725 ...  8599  7820 14999]
No. of Validation:  (2022,)
cuda


  cpuset_checked))


0


[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.
