# Modeling - First Pass

### Installs, Packages, Seeds

In [12]:
%pip install efficientnet_pytorch

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
You should consider upgrading via the '/home/ec2-user/anaconda3/envs/pytorch_p38/bin/python -m pip install --upgrade pip' command.[0m[33m
[0mNote: you may need to restart the kernel to use updated packages.


In [13]:
%pip install torch
# %pip install torchvision

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
You should consider upgrading via the '/home/ec2-user/anaconda3/envs/pytorch_p38/bin/python -m pip install --upgrade pip' command.[0m[33m
[0mNote: you may need to restart the kernel to use updated packages.


In [14]:
%matplotlib inline

# python libraties
import os
import cv2
import itertools
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import time
import seaborn as sns
from itertools import combinations, product

# import imblearn
import logging
from tqdm import tqdm
from glob import glob
from PIL import Image
import ipywidgets

# pytorch libraries
import torch
from torch import optim,nn
from torch.autograd import Variable
from torch.utils.data import DataLoader,Dataset
from torchvision import models,transforms
from efficientnet_pytorch import EfficientNet

# sklearn libraries
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# google drive
# from google.colab import drive # Connect colab to google drive

# custom modeling libraries
from build_model2 import initialize_model, load_split_data, build_loader, evaluate, train_model, model_scores, eval_model, add_results

# other
import importlib

In [15]:
import build_model2
importlib.reload(build_model2)

from build_model2 import initialize_model, load_split_data, build_loader, evaluate, train_model, model_scores, eval_model, add_results

# import matplotlib as mpl
# print(mpl.get_cachedir())

## Grid Components

In [16]:
epoch_search = [5, 10, 15, 20]
optim_search = ['SGD', 'Adam', 'AdamW']
model_search = ['resnet', 'vgg', 'efficientnet']
prods = list(product(epoch_search, optim_search, model_search))

es = pd.Series(list(zip(*prods))[0], name = 'epochs', dtype = 'int')
optims = pd.Series(list(zip(*prods))[1], name = 'optimizer')
mods = pd.Series(list(zip(*prods))[2], name = 'pretrained_model')

g_search = pd.concat([es, optims, mods], axis = 1)

## Parameters

In [17]:
model_dict = {'pretrained_model': None, 
              'epochs': None, # NEEDS UPDATE
              'home_directory': '/home/ec2-user/SageMaker/teledermatologyAI_capstone',
              'mod_directory': '/home/ec2-user/SageMaker/teledermatologyAI_capstone/model/gridsearch2',
              'csv_name': 'full_data_rename',
              'split': 'split_3',
              'cl': 'label_0',
              'dev_state': False,
              'dev_sample': 15000,
              'seed': 99,
              'lr': .0035,                  # from prior gridsearch
              'batch_size':64,
              'num_workers':24,
              'transform':3,
              'results_file':'gridsearch_results',
              'model':None, # NEEDS UPDATE
              'device': torch.device('cuda:0'), # NEEDS UPDATE
              'optimizer': None, # NEEDS UPDATE
              'criterion': None, # NEEDS UPDATE
              'tuned_model_name': None, # NEEDS UPDATE
              'show_val_cm': False,
             }

np.random.seed(model_dict['seed'])
torch.cuda.manual_seed(model_dict['seed'])

# Check GPU
print('GPU Type:', torch.cuda.get_device_name())
print('GPU Count:', torch.cuda.device_count())

HOME = model_dict['home_directory']

GPU Type: Tesla T4
GPU Count: 1


In [18]:
cd $HOME

/home/ec2-user/SageMaker/teledermatologyAI_capstone


## Data

In [19]:
data, train, test, val = load_split_data(directory = model_dict['home_directory'],
                                         csv_name = model_dict['csv_name'], 
                                         data_split = model_dict['split'], 
                                         label = model_dict['cl'],
                                         mode = 'all',
                                         dev_state = model_dict['dev_state'], 
                                         dev_sample = model_dict['dev_sample'], 
                                         seed = model_dict['seed']
                                         )

In [20]:
# Label dictionary for evaluation
labels_idx = np.sort(data.label_idx.unique())
label_map = data[['label', 'label_idx']].drop_duplicates().sort_values('label_idx')
label_dict = dict(zip(label_map.label_idx, label_map['label']))
model_dict['label_dict'] = label_dict

## In for loop

In [21]:
g_search = g_search[2:]

In [None]:
# Gridsearch

for i in g_search.iterrows():
    
    # extract gridsearch features
    model_dict['epochs'] = i[1]['epochs']
    model_dict['pretrained_model'] = i[1]['pretrained_model']
    model_dict['optimizer_name'] = i[1]['optimizer']
    me = i[1]['epochs']
    mn = i[1]['pretrained_model']
    mo = i[1]['optimizer']
    model_dict['alias'] = i[0]
    model_dict['tuned_model_name'] = f'{mn}_{me}e_{mo}_GS{i[0]}'
    direc = model_dict['mod_directory']
    nam = model_dict['tuned_model_name']
    
    # set batch size
    if model_dict['pretrained_model'] == 'efficientnet':
        model_dict['batch_size'] = 3
    else: 
        model_dict['batch_size'] = 64
    
    # Load each model
    model_ft, input_size = initialize_model(model_name = model_dict['pretrained_model'], 
                                            num_classes = len(data.label.unique()),
                                            feature_extract = False, 
                                            use_pretrained=True)
    
    # Move model to GPU
    model = model_ft.to(model_dict['device'])
    
    model_dict.update({
                       'model':model,
                       'criterion': nn.CrossEntropyLoss().to(model_dict['device']),
    })
    
    # Define optimizer options:
    if model_dict['optimizer_name'] == 'SGD':
        model_dict.update({'optimizer': optim.SGD(model.parameters(), lr=model_dict['lr'])})
    elif model_dict['optimizer_name'] == 'Adam':
        model_dict.update({'optimizer': optim.Adam(model.parameters(), lr=model_dict['lr'])})
    elif model_dict['optimizer_name'] == 'AdamW':
        model_dict.update({'optimizer': optim.AdamW(model.parameters(), lr=model_dict['lr'])})
    
    # Update dictionary
    model_dict['resize'] = int(input_size/.85)

    
    # Set Transforms
    transform_header = [
                        transforms.Resize(model_dict['resize']), #255
                        transforms.CenterCrop(input_size)
                        ]

    transform_body = [
                      transforms.RandomHorizontalFlip(), # a
                      transforms.RandomVerticalFlip(), # b
                      transforms.RandomRotation(20), # c
                      transforms.RandomCrop(size=(input_size,input_size)), # d
#                       transforms.RandomInvert(), transforms.RandomPosterize(bits=2), # e
#                       transforms.RandomAdjustSharpness(sharpness_factor=2), # f
#                       transforms.RandomSolarize(threshold=192.0), # g
#                       transforms.ColorJitter(brightness=0.1, contrast=0.1, hue=0.1) # h
                      ]

    transform_footer = [transforms.ToTensor(), 
                      transforms.Normalize(mean=[.541, .414, .382], std=[.256,.215,.209])]
    
    val_transform = transforms.Compose([
                                      transforms.Resize(model_dict['resize']),
                                      transforms.CenterCrop(input_size),
                                      transforms.ToTensor(), 
                                      transforms.Normalize(mean=[.541, .414, .382], std=[.256,.215,.209])
                                    ])
    
    test_loader = build_loader(mode = 'test', df = test, transform = val_transform, batch_size = model_dict['batch_size'], num_workers = model_dict['num_workers'])
    val_loader = build_loader(mode = 'val', df = val, transform = val_transform, batch_size = model_dict['batch_size'], num_workers = model_dict['num_workers'])   
    
    transform_list = transform_header + transform_body + transform_footer
    train_transform = transforms.Compose(transform_list)
    train_loader = build_loader(mode = 'train', df = train, transform = train_transform, batch_size = model_dict['batch_size'], num_workers = model_dict['num_workers'])


    loaders = {'train_loader':train_loader,
                            'val_loader': val_loader,
                            'test_loader': test_loader}
    model_dict['loader'] = loaders

    pred_df, val_scores, tot_time = train_model(model_dict = model_dict)

    acc, f1, f2, f5, prec, rec, d_0, d_1, d_2, d_3, d_4 = val_scores
    

    pred_df.to_pickle(f'{direc}/{nam}_preds.pkl')
    
    col_dict = {
#              'model': pd.Series(dtype = 'int'),
#              'file': pd.Series(dtype = 'str'),
             'tuned_model': model_dict['tuned_model_name'],
             'transform': model_dict['transform'],
             'lr': model_dict['lr'],
             'pretrained_model': model_dict['pretrained_model'],
             'optimizer': model_dict['optimizer_name'],
             'epochs': model_dict['epochs'],
#              'num_classes': model_dict['num_classes'],
             'batch_size': model_dict['batch_size'],
             'workers': model_dict['num_workers'],
             'train_time': tot_time,
             'data_split': model_dict['split'],
             'label_set': model_dict['cl'],
             'accur': acc,
             'F1': f1,
             'F0.5': f5,
             'F2': f2,
             'benign_accur': d_0,
             'noncancerous_accur': d_1,
             'malignant_accur': d_2,
             'infection_accur': d_3,
             'unclassified_accur': d_4
    }
    
#     print(tdf.iloc[:i[0]+1][['transform', 'lr', 'accur']])
    add_results(model_dict['results_file'], direc, pd.DataFrame(col_dict, index = [i[0]]))
    print('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')

Loaded pretrained weights for efficientnet-b7
Starting Training efficientnet_5e_SGD_GS2
[epoch 1], [iter 100 of 4500],[train loss 1.57537], [train acc 0.25333]
[epoch 1], [iter 200 of 4500],[train loss 1.55926], [train acc 0.28167]
[epoch 1], [iter 300 of 4500],[train loss 1.53391], [train acc 0.30444]
[epoch 1], [iter 400 of 4500],[train loss 1.51185], [train acc 0.32083]
[epoch 1], [iter 500 of 4500],[train loss 1.49566], [train acc 0.33800]
[epoch 1], [iter 600 of 4500],[train loss 1.48120], [train acc 0.34889]
[epoch 1], [iter 700 of 4500],[train loss 1.46117], [train acc 0.36000]
[epoch 1], [iter 800 of 4500],[train loss 1.44990], [train acc 0.36292]
[epoch 1], [iter 900 of 4500],[train loss 1.44659], [train acc 0.36407]
[epoch 1], [iter 1000 of 4500],[train loss 1.43496], [train acc 0.36967]
[epoch 1], [iter 1100 of 4500],[train loss 1.43112], [train acc 0.37424]
[epoch 1], [iter 1200 of 4500],[train loss 1.42509], [train acc 0.38111]
[epoch 1], [iter 1300 of 4500],[train loss 1.

  _warn_prf(average, modifier, msg_start, len(result))



EPOCH 1 :
*****************************************************
Complete in 2m 14s
best record: [epoch 1], [val loss 1.69118], [val acc 0.20422]
*****************************************************
[epoch 2], [iter 100 of 211],[train loss 1.44490], [train acc 0.35734]
[epoch 2], [iter 200 of 211],[train loss 1.41414], [train acc 0.36820]
------------------------------------------------------------
[epoch 2], [val loss 1.62884], [val acc 0.27737]
------------------------------------------------------------

EPOCH 2 :
*****************************************************
Complete in 2m 18s
best record: [epoch 2], [val loss 1.62884], [val acc 0.27737]
*****************************************************
[epoch 3], [iter 100 of 211],[train loss 1.39144], [train acc 0.37719]
[epoch 3], [iter 200 of 211],[train loss 1.36434], [train acc 0.39320]
------------------------------------------------------------
[epoch 3], [val loss 1.39426], [val acc 0.37827]
-----------------------------------

In [None]:
2+2