In [6]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import copy
import os
import pandas as pd

%load_ext autoreload
%autoreload 2
    
plt.ion()   # interactive mode

%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
from util import *

In [8]:
## Load data and define transforms

imagenet_means = [0.485, 0.456, 0.406]
imagenet_stds = [0.229, 0.224, 0.225]

data_transforms = {
    'train': transforms.Compose([
        transforms.Scale(size = 256),
        transforms.RandomSizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(imagenet_means, imagenet_stds)
    ]),
    'val': transforms.Compose([
        transforms.Scale(224),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(imagenet_means, imagenet_stds)
    ]),
    'test_kaggle': transforms.Compose([
        transforms.Scale(224),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(imagenet_means, imagenet_stds)
    ]),
}

data_dir = '../../data/all_data_scaled_alt' # directory containing the data

dsets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])
         for x in ['train', 'val']}

dset_loaders = {x: torch.utils.data.DataLoader(dsets[x], batch_size = 64,
                                               shuffle=True, num_workers = 8, pin_memory = True)
                for x in ['train', 'val']}

dset_sizes = {x: len(dsets[x]) for x in ['train', 'val']}
dset_classes = dsets['train'].classes

use_gpu = True

In [None]:
np.random.seed(123)
exp_str = 'phase1_submission' # labels the directory where figures, kaggle predictions, and model checkpoints will be saved
pretrained = True
augmentations = 'basic'
# Define hyperparameters used to generate the models used in the ensemble
# num_epochs_list = [12, 17, 22, 16, 7]
num_epochs_list = [40, 40, 40, 40, 40]
lr_list = [1.65e-4, 5.96e-4, 1.21e-4, 3.86e-4, 1.55e-4]
weight_decay_list = [3.17e-8, 1.99e-6, 2.08e-2, 8,70e-5, 3.17e-2]
lr_decay_epochs_list = [12, 8, 11, 12, 12]
lr_decay_const_list = [0.3, 0.5, 0.4, 0.7, 0.6]
model_name_list = ['resnet18', 'resnet18', 'resnet18', 'resnet34', 'resnet34']

In [None]:
performance_df_list = []
best_model_list = []
for i in range(len(num_epochs_list)):
    ## Define hyperparameters
    num_epochs = num_epochs_list[i]
    lr = lr_list[i]
    weight_decay = weight_decay_list[i]

    # For learning rate annealing
    lr_decay_epochs = lr_decay_epochs_list[i]
    lr_decay_const = lr_decay_const_list[i]
    
    model_name = model_name_list[i]
    
    # For naming files
    description_string = model_name + 'lr' + str(lr) + \
                            '_wd' + str(weight_decay) + \
                            '_decayep' + str(lr_decay_epochs) + \
                            '_decayconst' + str(lr_decay_const) + \
                            'stamp_' + str(time.time())

    # For passing to functions                
    hyperparam_dict = {'num_epochs':num_epochs,
                       'lr':lr,
                       'weight_decay':weight_decay,
                       'lr_decay_epochs':lr_decay_epochs,
                       'lr_decay_const':lr_decay_const,
                       'pretrained':str(pretrained),
                       'model_name':model_name,
                       'description':description_string,
                       'augmentations':augmentations
                       }
    
    ## Fine tuning conv-net
    if model_name == 'resnet18':
        model_ft = models.resnet18(pretrained=pretrained)
    elif model_name == 'resnet34':
        model_ft = models.resnet34(pretrained=pretrained)
        
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Linear(num_ftrs, 3)
    model_ft = torch.nn.DataParallel(model_ft).cuda()
    criterion = nn.CrossEntropyLoss()
    optimizer_ft = optim.Adam(model_ft.parameters(), lr = lr, weight_decay = weight_decay)
    
    from PIL import ImageFile, Image
    ImageFile.LOAD_TRUNCATED_IMAGES = True

    scheduler = exp_lr_scheduler

    model_ft, performance_dict = train_model(model_ft, 
                                             criterion, 
                                             optimizer_ft, 
                                             scheduler, 
                                             dset_loaders = dset_loaders, 
                                             dset_sizes = dset_sizes, 
                                             num_epochs = num_epochs, 
                                             init_lr = lr,
                                             lr_decay_epochs = lr_decay_epochs,
                                             lr_decay_const = lr_decay_const)
    best_model_list.append(model_ft)
    
    ## Save the model weights
    model_dir = '../model_weights/experiments/'
    maybe_makedir(model_dir)
    model_dir = model_dir + exp_str + '/'
    maybe_makedir(model_dir)
    
    torch.save(model_ft.state_dict(), model_dir + description_string + '.chkpt')
    
    # Convert performance to a dataframe
    performance_df = convert_performance_dict(performance_dict, hyperparam_dict)
    performance_df_list.append(performance_df)
    
    # Save performance results
    performance_path = '../performance/'
    maybe_makedir(performance_path)
    performance_path = performance_path + exp_str + '/'
    maybe_makedir(performance_path)
    performance_df.to_csv(path_or_buf = performance_path + description_string + '.csv', index = False)
    
    # # Kaggle Test Set Predictions
    data_transforms = {
        'test_kaggle': transforms.Compose([
            transforms.Scale(224),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(imagenet_means, imagenet_stds)
        ]),
    }

    # Kaggle Test Set Predictions

    dsets_kaggle = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x])
             for x in ['test_kaggle']}

    dset_loaders_kaggle = {x: torch.utils.data.DataLoader(dsets_kaggle[x], batch_size = 64,
                                                   shuffle=False, num_workers = 8, pin_memory = True)
                    for x in ['test_kaggle']}
    
    dset_classes_kaggle = dsets_kaggle['test_kaggle'].classes
    
    phase = 'test_kaggle'
    test_loader = dset_loaders_kaggle[phase]

    file_names = test_loader.dataset.imgs
    # Extract just the short file name
    file_names = [x[0].split('/')[-1] for x in file_names]

    dtype = torch.cuda.FloatTensor

    probs = predict_on_test(model_ft, test_loader, dtype)
    import pandas as pd
    predictions_df = pd.DataFrame(data = probs, columns = (dset_classes_kaggle))

    predictions_df.insert(0, column = 'image_name', value = file_names)
    print(predictions_df)
    predictions_dir = '../test_predictions/' + 'experiments' + '/'
    maybe_makedir(predictions_dir)
    predictions_dir = predictions_dir + exp_str + '/'
    maybe_makedir(predictions_dir)
    predictions_df.to_csv(path_or_buf = predictions_dir + description_string + '.csv', index = False)

Epoch 0/39
----------
LR is set to 0.000165
train Loss: 0.9705 Acc: 0.5316
Type_1 F1: 0.2631 Precision: 0.3765, Recall: 0.2022, Specificity: 0.9286
Type_2 F1: 0.6473 Precision: 0.5708, Recall: 0.7475, Specificity: 0.3679
Type_3 F1: 0.3958 Precision: 0.4727, Recall: 0.3405, Specificity: 0.8411
val Loss: 0.9838 Acc: 0.5811
Type_1 F1: 0.1034 Precision: 0.3750, Recall: 0.0600, Specificity: 0.9797
Type_2 F1: 0.7000 Precision: 0.5738, Recall: 0.8974, Specificity: 0.2571
Type_3 F1: 0.4328 Precision: 0.6591, Recall: 0.3222, Specificity: 0.9272
new best model
Epoch 1/39
----------


In [None]:
# Combine the performance dicts together
performance_df_all = pd.concat(performance_df_list)
performance_df_all.to_csv(performance_path + exp_str + '_all.csv', index=False)
# print(performance_df_all)