# Modeling - First Pass

### Installs, Packages, Seeds

In [12]:
%pip install efficientnet_pytorch

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
You should consider upgrading via the '/home/ec2-user/anaconda3/envs/pytorch_p38/bin/python -m pip install --upgrade pip' command.[0m[33m
[0mNote: you may need to restart the kernel to use updated packages.


In [13]:
%pip install torch
# %pip install torchvision

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
You should consider upgrading via the '/home/ec2-user/anaconda3/envs/pytorch_p38/bin/python -m pip install --upgrade pip' command.[0m[33m
[0mNote: you may need to restart the kernel to use updated packages.


In [14]:
%matplotlib inline

# python libraties
import os
import cv2
import itertools
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import time
import seaborn as sns
from itertools import combinations, product

# import imblearn
import logging
from tqdm import tqdm
from glob import glob
from PIL import Image
import ipywidgets

# pytorch libraries
import torch
from torch import optim,nn
from torch.autograd import Variable
from torch.utils.data import DataLoader,Dataset
from torchvision import models,transforms
from efficientnet_pytorch import EfficientNet

# sklearn libraries
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# google drive
# from google.colab import drive # Connect colab to google drive

# custom modeling libraries
from build_model2 import initialize_model, load_split_data, build_loader, evaluate, train_model, model_scores, eval_model, add_results

# other
import importlib

In [15]:
import build_model2
importlib.reload(build_model2)

from build_model2 import initialize_model, load_split_data, build_loader, evaluate, train_model, model_scores, eval_model, add_results

# import matplotlib as mpl
# print(mpl.get_cachedir())

## Grid Components

In [16]:
epoch_search = [5, 10, 15, 20]
optim_search = ['SGD', 'Adam', 'AdamW']
model_search = ['resnet', 'vgg', 'efficientnet']
prods = list(product(epoch_search, optim_search, model_search))

es = pd.Series(list(zip(*prods))[0], name = 'epochs', dtype = 'int')
optims = pd.Series(list(zip(*prods))[1], name = 'optimizer')
mods = pd.Series(list(zip(*prods))[2], name = 'pretrained_model')

g_search = pd.concat([es, optims, mods], axis = 1)

## Parameters

In [17]:
model_dict = {'pretrained_model': None, 
              'epochs': None, # NEEDS UPDATE
              'home_directory': '/home/ec2-user/SageMaker/teledermatologyAI_capstone',
              'mod_directory': '/home/ec2-user/SageMaker/teledermatologyAI_capstone/model/gridsearch2',
              'csv_name': 'full_data_rename',
              'split': 'split_3',
              'cl': 'label_0',
              'dev_state': False,
              'dev_sample': 15000,
              'seed': 99,
              'lr': .0035,                  # from prior gridsearch
              'batch_size':64,
              'num_workers':24,
              'transform':3,
              'results_file':'gridsearch_results',
              'model':None, # NEEDS UPDATE
              'device': torch.device('cuda:0'), # NEEDS UPDATE
              'optimizer': None, # NEEDS UPDATE
              'criterion': None, # NEEDS UPDATE
              'tuned_model_name': None, # NEEDS UPDATE
              'show_val_cm': False,
             }

np.random.seed(model_dict['seed'])
torch.cuda.manual_seed(model_dict['seed'])

# Check GPU
print('GPU Type:', torch.cuda.get_device_name())
print('GPU Count:', torch.cuda.device_count())

HOME = model_dict['home_directory']

GPU Type: Tesla T4
GPU Count: 1


In [18]:
cd $HOME

/home/ec2-user/SageMaker/teledermatologyAI_capstone


## Data

In [19]:
data, train, test, val = load_split_data(directory = model_dict['home_directory'],
                                         csv_name = model_dict['csv_name'], 
                                         data_split = model_dict['split'], 
                                         label = model_dict['cl'],
                                         mode = 'all',
                                         dev_state = model_dict['dev_state'], 
                                         dev_sample = model_dict['dev_sample'], 
                                         seed = model_dict['seed']
                                         )

In [20]:
# Label dictionary for evaluation
labels_idx = np.sort(data.label_idx.unique())
label_map = data[['label', 'label_idx']].drop_duplicates().sort_values('label_idx')
label_dict = dict(zip(label_map.label_idx, label_map['label']))
model_dict['label_dict'] = label_dict

## In for loop

In [21]:
g_search = g_search[2:]

In [31]:
g_search = g_search[g_search['pretrained_model'] != 'efficientnet'][2:]

In [33]:
# g_search

In [None]:
# Gridsearch

for i in g_search.iterrows():
    
    # extract gridsearch features
    model_dict['epochs'] = i[1]['epochs']
    model_dict['pretrained_model'] = i[1]['pretrained_model']
    model_dict['optimizer_name'] = i[1]['optimizer']
    me = i[1]['epochs']
    mn = i[1]['pretrained_model']
    mo = i[1]['optimizer']
    model_dict['alias'] = i[0]
    model_dict['tuned_model_name'] = f'{mn}_{me}e_{mo}_GS{i[0]}'
    direc = model_dict['mod_directory']
    nam = model_dict['tuned_model_name']
    
    # set batch size
    if model_dict['pretrained_model'] == 'efficientnet':
        model_dict['batch_size'] = 3
    else: 
        model_dict['batch_size'] = 64
    
    # Load each model
    model_ft, input_size = initialize_model(model_name = model_dict['pretrained_model'], 
                                            num_classes = len(data.label.unique()),
                                            feature_extract = False, 
                                            use_pretrained=True)
    
    # Move model to GPU
    model = model_ft.to(model_dict['device'])
    
    model_dict.update({
                       'model':model,
                       'criterion': nn.CrossEntropyLoss().to(model_dict['device']),
    })
    
    # Define optimizer options:
    if model_dict['optimizer_name'] == 'SGD':
        model_dict.update({'optimizer': optim.SGD(model.parameters(), lr=model_dict['lr'])})
    elif model_dict['optimizer_name'] == 'Adam':
        model_dict.update({'optimizer': optim.Adam(model.parameters(), lr=model_dict['lr'])})
    elif model_dict['optimizer_name'] == 'AdamW':
        model_dict.update({'optimizer': optim.AdamW(model.parameters(), lr=model_dict['lr'])})
    
    # Update dictionary
    model_dict['resize'] = int(input_size/.85)

    
    # Set Transforms
    transform_header = [
                        transforms.Resize(model_dict['resize']), #255
                        transforms.CenterCrop(input_size)
                        ]

    transform_body = [
                      transforms.RandomHorizontalFlip(), # a
                      transforms.RandomVerticalFlip(), # b
                      transforms.RandomRotation(20), # c
                      transforms.RandomCrop(size=(input_size,input_size)), # d
#                       transforms.RandomInvert(), transforms.RandomPosterize(bits=2), # e
#                       transforms.RandomAdjustSharpness(sharpness_factor=2), # f
#                       transforms.RandomSolarize(threshold=192.0), # g
#                       transforms.ColorJitter(brightness=0.1, contrast=0.1, hue=0.1) # h
                      ]

    transform_footer = [transforms.ToTensor(), 
                      transforms.Normalize(mean=[.541, .414, .382], std=[.256,.215,.209])]
    
    val_transform = transforms.Compose([
                                      transforms.Resize(model_dict['resize']),
                                      transforms.CenterCrop(input_size),
                                      transforms.ToTensor(), 
                                      transforms.Normalize(mean=[.541, .414, .382], std=[.256,.215,.209])
                                    ])
    
    test_loader = build_loader(mode = 'test', df = test, transform = val_transform, batch_size = model_dict['batch_size'], num_workers = model_dict['num_workers'])
    val_loader = build_loader(mode = 'val', df = val, transform = val_transform, batch_size = model_dict['batch_size'], num_workers = model_dict['num_workers'])   
    
    transform_list = transform_header + transform_body + transform_footer
    train_transform = transforms.Compose(transform_list)
    train_loader = build_loader(mode = 'train', df = train, transform = train_transform, batch_size = model_dict['batch_size'], num_workers = model_dict['num_workers'])


    loaders = {'train_loader':train_loader,
                            'val_loader': val_loader,
                            'test_loader': test_loader}
    model_dict['loader'] = loaders

    pred_df, val_scores, tot_time = train_model(model_dict = model_dict)

    acc, f1, f2, f5, prec, rec, d_0, d_1, d_2, d_3, d_4 = val_scores
    

    pred_df.to_pickle(f'{direc}/{nam}_preds.pkl')
    
    col_dict = {
#              'model': pd.Series(dtype = 'int'),
#              'file': pd.Series(dtype = 'str'),
             'tuned_model': model_dict['tuned_model_name'],
             'transform': model_dict['transform'],
             'lr': model_dict['lr'],
             'pretrained_model': model_dict['pretrained_model'],
             'optimizer': model_dict['optimizer_name'],
             'epochs': model_dict['epochs'],
#              'num_classes': model_dict['num_classes'],
             'batch_size': model_dict['batch_size'],
             'workers': model_dict['num_workers'],
             'train_time': tot_time,
             'data_split': model_dict['split'],
             'label_set': model_dict['cl'],
             'accur': acc,
             'F1': f1,
             'F0.5': f5,
             'F2': f2,
             'benign_accur': d_0,
             'noncancerous_accur': d_1,
             'malignant_accur': d_2,
             'infection_accur': d_3,
             'unclassified_accur': d_4
    }
    
#     print(tdf.iloc[:i[0]+1][['transform', 'lr', 'accur']])
    add_results(model_dict['results_file'], direc, pd.DataFrame(col_dict, index = [i[0]]))
    print('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')

Starting Training resnet_5e_AdamW_GS6
[epoch 1], [iter 100 of 211],[train loss 1.55903], [train acc 0.33234]
[epoch 1], [iter 200 of 211],[train loss 1.45048], [train acc 0.36797]
------------------------------------------------------------
[epoch 1], [val loss 1.35239], [val acc 0.41328]
------------------------------------------------------------

EPOCH 1 :
*****************************************************
Complete in 2m 18s
best record: [epoch 1], [val loss 1.35239], [val acc 0.41328]
*****************************************************


  _warn_prf(average, modifier, msg_start, len(result))


[epoch 2], [iter 100 of 211],[train loss 1.27439], [train acc 0.44344]
[epoch 2], [iter 200 of 211],[train loss 1.25140], [train acc 0.45414]
------------------------------------------------------------
[epoch 2], [val loss 1.94949], [val acc 0.33285]
------------------------------------------------------------

EPOCH 2 :
*****************************************************
Complete in 2m 24s
best record: [epoch 2], [val loss 1.94949], [val acc 0.33285]
*****************************************************
[epoch 3], [iter 100 of 211],[train loss 1.19784], [train acc 0.48531]
[epoch 3], [iter 200 of 211],[train loss 1.19362], [train acc 0.48180]
------------------------------------------------------------
[epoch 3], [val loss 1.21186], [val acc 0.49225]
------------------------------------------------------------

EPOCH 3 :
*****************************************************
Complete in 2m 28s
best record: [epoch 3], [val loss 1.21186], [val acc 0.49225]
****************************

  _warn_prf(average, modifier, msg_start, len(result))



EPOCH 1 :
*****************************************************
Complete in 2m 15s
best record: [epoch 1], [val loss 1.61100], [val acc 0.19695]
*****************************************************
[epoch 2], [iter 100 of 211],[train loss 1.60602], [train acc 0.21813]
[epoch 2], [iter 200 of 211],[train loss 1.55547], [train acc 0.25000]
------------------------------------------------------------
[epoch 2], [val loss 1.71549], [val acc 0.19404]
------------------------------------------------------------

EPOCH 2 :
*****************************************************
Complete in 2m 14s
best record: [epoch 2], [val loss 1.71549], [val acc 0.19404]
*****************************************************


  _warn_prf(average, modifier, msg_start, len(result))


[epoch 3], [iter 100 of 211],[train loss 1.44054], [train acc 0.34063]
[epoch 3], [iter 200 of 211],[train loss 1.42591], [train acc 0.35445]
------------------------------------------------------------
[epoch 3], [val loss 1.52776], [val acc 0.30560]
------------------------------------------------------------

EPOCH 3 :
*****************************************************
Complete in 2m 17s
best record: [epoch 3], [val loss 1.52776], [val acc 0.30560]
*****************************************************
[epoch 4], [iter 100 of 211],[train loss 1.37145], [train acc 0.39578]
[epoch 4], [iter 200 of 211],[train loss 1.35000], [train acc 0.40328]
------------------------------------------------------------
[epoch 4], [val loss 1.53244], [val acc 0.32049]
------------------------------------------------------------

EPOCH 4 :
*****************************************************
Complete in 2m 18s
best record: [epoch 4], [val loss 1.53244], [val acc 0.32049]
****************************

  _warn_prf(average, modifier, msg_start, len(result))


[epoch 2], [iter 100 of 211],[train loss 1.28354], [train acc 0.42125]
[epoch 2], [iter 200 of 211],[train loss 1.27574], [train acc 0.43242]
------------------------------------------------------------
[epoch 2], [val loss 1.34138], [val acc 0.43193]
------------------------------------------------------------

EPOCH 2 :
*****************************************************
Complete in 2m 16s
best record: [epoch 2], [val loss 1.34138], [val acc 0.43193]
*****************************************************
[epoch 3], [iter 100 of 211],[train loss 1.21286], [train acc 0.46563]
[epoch 3], [iter 200 of 211],[train loss 1.20861], [train acc 0.46844]
------------------------------------------------------------
[epoch 3], [val loss 1.31471], [val acc 0.44041]
------------------------------------------------------------

EPOCH 3 :
*****************************************************
Complete in 2m 20s
best record: [epoch 3], [val loss 1.31471], [val acc 0.44041]
****************************

  _warn_prf(average, modifier, msg_start, len(result))



EPOCH 2 :
*****************************************************
Complete in 2m 12s
best record: [epoch 2], [val loss 1.59071], [val acc 0.27907]
*****************************************************
[epoch 3], [iter 100 of 211],[train loss 1.34045], [train acc 0.40719]
[epoch 3], [iter 200 of 211],[train loss 1.42088], [train acc 0.39422]
------------------------------------------------------------
[epoch 3], [val loss 1.46370], [val acc 0.32280]
------------------------------------------------------------

EPOCH 3 :
*****************************************************
Complete in 2m 16s
best record: [epoch 3], [val loss 1.46370], [val acc 0.32280]
*****************************************************
[epoch 4], [iter 100 of 211],[train loss 1.37896], [train acc 0.39313]
[epoch 4], [iter 200 of 211],[train loss 1.34662], [train acc 0.40773]
------------------------------------------------------------
[epoch 4], [val loss 1.36895], [val acc 0.40746]
-----------------------------------

  _warn_prf(average, modifier, msg_start, len(result))


[epoch 7], [iter 100 of 211],[train loss 1.26656], [train acc 0.44562]
[epoch 7], [iter 200 of 211],[train loss 1.24687], [train acc 0.44953]
------------------------------------------------------------
[epoch 7], [val loss 1.19757], [val acc 0.45712]
------------------------------------------------------------

EPOCH 7 :
*****************************************************
Complete in 2m 11s
best record: [epoch 7], [val loss 1.19757], [val acc 0.45712]
*****************************************************
[epoch 8], [iter 100 of 211],[train loss 1.20752], [train acc 0.46891]
[epoch 8], [iter 200 of 211],[train loss 1.21399], [train acc 0.46266]
------------------------------------------------------------
[epoch 8], [val loss 1.28722], [val acc 0.42188]
------------------------------------------------------------

EPOCH 8 :
*****************************************************
Complete in 2m 10s
best record: [epoch 8], [val loss 1.28722], [val acc 0.42188]
****************************

  _warn_prf(average, modifier, msg_start, len(result))


[epoch 10], [iter 100 of 211],[train loss 1.19317], [train acc 0.46156]
[epoch 10], [iter 200 of 211],[train loss 1.18935], [train acc 0.46352]
------------------------------------------------------------
[epoch 10], [val loss 1.20383], [val acc 0.45349]
------------------------------------------------------------

EPOCH 10 :
*****************************************************
Complete in 2m 12s
best record: [epoch 10], [val loss 1.20383], [val acc 0.45349]
*****************************************************

Total run Complete in 21m 49s
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
Starting Training resnet_10e_AdamW_GS15
[epoch 1], [iter 100 of 211],[train loss 1.49528], [train acc 0.33734]
[epoch 1], [iter 200 of 211],[train loss 1.43453], [train acc 0.36891]
------------------------------------------------------------
[epoch 1], [val loss 1.36429], [val acc 0.41994]
------------------------------------------------------------

EPOCH 1 :
********

  _warn_prf(average, modifier, msg_start, len(result))



EPOCH 2 :
*****************************************************
Complete in 2m 19s
best record: [epoch 2], [val loss 1.29874], [val acc 0.42539]
*****************************************************
[epoch 3], [iter 100 of 211],[train loss 1.19617], [train acc 0.46375]
[epoch 3], [iter 200 of 211],[train loss 1.18115], [train acc 0.47477]
------------------------------------------------------------
[epoch 3], [val loss 1.17967], [val acc 0.47699]
------------------------------------------------------------

EPOCH 3 :
*****************************************************
Complete in 2m 25s
best record: [epoch 3], [val loss 1.17967], [val acc 0.47699]
*****************************************************
[epoch 4], [iter 100 of 211],[train loss 1.14296], [train acc 0.49938]
[epoch 4], [iter 200 of 211],[train loss 1.13370], [train acc 0.50133]
------------------------------------------------------------
[epoch 4], [val loss 1.15163], [val acc 0.50206]
-----------------------------------

  _warn_prf(average, modifier, msg_start, len(result))



EPOCH 1 :
*****************************************************
Complete in 2m 8s
best record: [epoch 1], [val loss 1.47482], [val acc 0.31359]
*****************************************************
[epoch 2], [iter 100 of 211],[train loss 1.37540], [train acc 0.39109]
[epoch 2], [iter 200 of 211],[train loss 1.34092], [train acc 0.40914]
------------------------------------------------------------
[epoch 2], [val loss 1.29970], [val acc 0.44767]
------------------------------------------------------------

EPOCH 2 :
*****************************************************
Complete in 2m 12s
best record: [epoch 2], [val loss 1.29970], [val acc 0.44767]
*****************************************************
[epoch 3], [iter 100 of 211],[train loss 1.22797], [train acc 0.47141]
[epoch 3], [iter 200 of 211],[train loss 1.21467], [train acc 0.47031]
------------------------------------------------------------
[epoch 3], [val loss 1.28035], [val acc 0.44331]
------------------------------------

In [None]:
2+2