Setup

In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import torch
import CNN
import numpy as np

# from config_plots import global_settings
# global_settings()
import warnings
warnings.filterwarnings("ignore")

Hyper parameters

In [2]:
# defining global variables
experimentName = "test_22987_suffix50_11_noZCA"
useRandomSearch = False
numOfExperiments=100

from configParser import ConfigParser, getModelName
hyperpSearchObject = ConfigParser(experimentName).getHyperpSearchObject()

import os
experimentName = os.path.join(experimentName,"hyperp-search")
import TrialStatistics
ts = TrialStatistics.TrialStatistics(experimentName)

Use_dataLoader = 3
if Use_dataLoader == 3:
    import dataLoader3 as dataLoader
elif Use_dataLoader == 2:
    import dataLoader2 as dataLoader
else:
    import dataLoader 

Cuda support

In [3]:
# CUDA support 
if torch.cuda.is_available():
    torch.cuda.set_device(0) # 0
    torch.set_default_tensor_type('torch.cuda.FloatTensor')
    print("We are using cuda")

We are using cuda


Define objective function

In [4]:
import os
from torch import nn

all_experiment_params = []
datasetManager = dataLoader.datasetManager(experimentName)
def objective(experiment_params):
    experiment_params["numOfTrials"] = experiment_params["numOfTrials"] or 1
    
    print("experiment: ", experiment_params)
    all_experiment_params.append(experiment_params)
    
    # load images
    datasetManager.updateParams(experiment_params)
    dataset = datasetManager.getDataset()
    numberOfSpecies = len(dataset.getSpeciesList())

    for i in range(experiment_params["numOfTrials"]):
        trialName = os.path.join(experimentName, getModelName(experiment_params, i))

        # Train/Load model
        model = CNN.create_model(numberOfSpecies, experiment_params)
        train_loader, validation_loader, test_loader = datasetManager.getLoaders()
        if os.path.exists(CNN.getModelFile(trialName)):
            CNN.loadModel(model, trialName)
            print("Model {0} loaded!".format(trialName))
        else:
            loss_list, accuracy_list, epochs, time_elapsed = CNN.trainModel(train_loader, 
                                                                            validation_loader, 
                                                                            experiment_params, 
                                                                            model, trialName)
        
        # Update trial outcomes for statistics
        predlist, lbllist = CNN.getLoaderPredictions(test_loader, model)
        ts.addTrialPredictions(experiment_params, predlist, lbllist, numberOfSpecies)

        ts.addTrial(experiment_params,
                    {'loss': CNN.getCrossEntropyFromLoader(test_loader, model),
                     'accuracy': CNN.getAccuracyFromLoader(test_loader, model),
                     'time': time_elapsed,
                     'epochs': epochs
                    }, i)
                
    answer ={
        'loss': ts.getStatistic(experiment_params, 'loss', 'mean'),
        'loss-std': ts.getStatistic(experiment_params, 'loss', 'std'),
        'time': ts.getStatistic(experiment_params, 'time', 'mean'),
        'time-std': ts.getStatistic(experiment_params, 'time', 'std'),
        'epochs': ts.getStatistic(experiment_params, 'epochs', 'mean'),
        'epochs-std': ts.getStatistic(experiment_params, 'epochs', 'std'),
        'accuracy': ts.getStatistic(experiment_params, 'accuracy', 'mean'),
        'accuracy-std': ts.getStatistic(experiment_params, 'accuracy', 'std'),
        'status': STATUS_OK,}
    
    return {**experiment_params, **answer}

Iterate

In [5]:
from hyperopt import fmin, hp, STATUS_OK, Trials, space_eval, plotting, rand, tpe
import pickle

trials = Trials()
bestLoss = fmin(objective, 
                        space=hyperpSearchObject, 
                        algo=rand.suggest if useRandomSearch == False else tpe.suggest, 
                        trials=trials,
                        max_evals=numOfExperiments)

experiment:                                            
{'batchSize': 200, 'imageDimension': 196, 'image_path': '/data/BGNN_data/INHS_cropped', 'kernelSize': 9, 'kernels': (11, 21, 4, 3, 2), 'n_channels': 3, 'n_epochs': 1000, 'numOfTrials': 1, 'patience': 50, 'suffix': '50_11', 'training_count': 0.64, 'useZCAWhitening': False, 'validation_count': 0.16}
Loading dataset...                                     
  0%|          | 0/100 [00:00<?, ?trial/s, best loss=?]

100% (550 of 550) |######################| Elapsed Time: 0:00:04 Time:  0:00:04


Loading saved dataset structure...


N/A% (0 of 1000) |                       | Elapsed Time: 0:00:00 ETA:  --:--:--

Training started...


100% (1000 of 1000) |####################| Elapsed Time: 0:21:38 Time:  0:21:38


Early stopping
total number of epochs:  83
  1%|          | 1/100 [22:03<36:23:25, 1323.29s/trial, best loss: 3.0468382835388184]experiment:  {'batchSize': 200, 'imageDimension': 84, 'image_path': '/data/BGNN_data/INHS_cropped', 'kernelSize': 5, 'kernels': (11, 25, 10, 11, 4), 'n_channels': 1, 'n_epochs': 1000, 'numOfTrials': 1, 'patience': 50, 'suffix': '50_11', 'training_count': 0.64, 'useZCAWhitening': False, 'validation_count': 0.16}
Loading dataset...


100% (550 of 550) |######################| Elapsed Time: 0:00:04 Time:  0:00:04


Loading saved dataset structure...


N/A% (0 of 1000) |                       | Elapsed Time: 0:00:00 ETA:  --:--:--

Training started...


100% (1000 of 1000) |####################| Elapsed Time: 0:18:48 Time:  0:18:48


Early stopping
total number of epochs:  96
  2%|▏         | 2/100 [41:13<34:36:30, 1271.33s/trial, best loss: 2.3991026878356934]

N/A% (0 of 550) |                        | Elapsed Time: 0:00:00 ETA:  --:--:--

experiment:  {'batchSize': 200, 'imageDimension': 56, 'image_path': '/data/BGNN_data/INHS_cropped', 'kernelSize': 3, 'kernels': (13, 17, 2, 15, 1), 'n_channels': 1, 'n_epochs': 1000, 'numOfTrials': 1, 'patience': 50, 'suffix': '50_11', 'training_count': 0.64, 'useZCAWhitening': False, 'validation_count': 0.16}
Loading dataset...


100% (550 of 550) |######################| Elapsed Time: 0:00:04 Time:  0:00:04


Loading saved dataset structure...


N/A% (0 of 1000) |                       | Elapsed Time: 0:00:00 ETA:  --:--:--

Training started...


100% (1000 of 1000) |####################| Elapsed Time: 0:11:03 Time:  0:11:03


Early stopping
total number of epochs:  57
  3%|▎         | 3/100 [52:37<29:30:32, 1095.18s/trial, best loss: 2.3991026878356934]

N/A% (0 of 550) |                        | Elapsed Time: 0:00:00 ETA:  --:--:--

experiment:  {'batchSize': 200, 'imageDimension': 392, 'image_path': '/data/BGNN_data/INHS_cropped', 'kernelSize': 6, 'kernels': (15, 27, 3, 3, 16), 'n_channels': 1, 'n_epochs': 1000, 'numOfTrials': 1, 'patience': 50, 'suffix': '50_11', 'training_count': 0.64, 'useZCAWhitening': False, 'validation_count': 0.16}
Loading dataset...


100% (550 of 550) |######################| Elapsed Time: 0:00:04 Time:  0:00:04


Loading saved dataset structure...


N/A% (0 of 1000) |                       | Elapsed Time: 0:00:00 ETA:  --:--:--

Training started...


100% (1000 of 1000) |####################| Elapsed Time: 0:10:49 Time:  0:10:49


Early stopping
total number of epochs:  50
  4%|▍         | 4/100 [1:03:48<25:48:32, 967.84s/trial, best loss: 2.3981385231018066]

N/A% (0 of 550) |                        | Elapsed Time: 0:00:00 ETA:  --:--:--

experiment:  {'batchSize': 200, 'imageDimension': 308, 'image_path': '/data/BGNN_data/INHS_cropped', 'kernelSize': 9, 'kernels': (15, 23, 3, 3, 6), 'n_channels': 3, 'n_epochs': 1000, 'numOfTrials': 1, 'patience': 50, 'suffix': '50_11', 'training_count': 0.64, 'useZCAWhitening': False, 'validation_count': 0.16}
Loading dataset...


100% (550 of 550) |######################| Elapsed Time: 0:00:04 Time:  0:00:04


Loading saved dataset structure...


N/A% (0 of 1000) |                       | Elapsed Time: 0:00:00 ETA:  --:--:--

Training started...


100% (1000 of 1000) |####################| Elapsed Time: 0:18:36 Time:  0:18:36


Early stopping
total number of epochs:  89
  5%|▌         | 5/100 [1:22:45<26:52:46, 1018.59s/trial, best loss: 2.3656582832336426]

N/A% (0 of 550) |                        | Elapsed Time: 0:00:00 ETA:  --:--:--

experiment:  {'batchSize': 200, 'imageDimension': 280, 'image_path': '/data/BGNN_data/INHS_cropped', 'kernelSize': 10, 'kernels': (16, 22, 2, 13, 15), 'n_channels': 3, 'n_epochs': 1000, 'numOfTrials': 1, 'patience': 50, 'suffix': '50_11', 'training_count': 0.64, 'useZCAWhitening': False, 'validation_count': 0.16}
Loading dataset...


100% (550 of 550) |######################| Elapsed Time: 0:00:04 Time:  0:00:04


Loading saved dataset structure...


N/A% (0 of 1000) |                       | Elapsed Time: 0:00:00 ETA:  --:--:--

Training started...


100% (1000 of 1000) |####################| Elapsed Time: 0:15:33 Time:  0:15:33


Early stopping
total number of epochs:  78
  6%|▌         | 6/100 [1:38:38<26:05:16, 999.11s/trial, best loss: 2.3656582832336426] 

N/A% (0 of 550) |                        | Elapsed Time: 0:00:00 ETA:  --:--:--

experiment:  {'batchSize': 200, 'imageDimension': 112, 'image_path': '/data/BGNN_data/INHS_cropped', 'kernelSize': 9, 'kernels': (9, 19, 6, 8, 6), 'n_channels': 3, 'n_epochs': 1000, 'numOfTrials': 1, 'patience': 50, 'suffix': '50_11', 'training_count': 0.64, 'useZCAWhitening': False, 'validation_count': 0.16}
Loading dataset...


100% (550 of 550) |######################| Elapsed Time: 0:00:05 Time:  0:00:05
job exception: Calculated padded input size per channel: (7 x 7). Kernel size: (9 x 9). Kernel size can't be greater than actual input size


Loading saved dataset structure...
  6%|▌         | 6/100 [1:38:44<25:47:04, 987.49s/trial, best loss: 2.3656582832336426]


RuntimeError: Calculated padded input size per channel: (7 x 7). Kernel size: (9 x 9). Kernel size can't be greater than actual input size

analyze

In [None]:
ts.showStatistics()

In [None]:
ts.showStatistics(False)

In [None]:


import pandas as pd
from IPython.display import display, HTML

# bestParams = space_eval(hyperpSearchObject, bestLoss)
# print("best params = ", pd.DataFrame(bestParams, index=[0]))
best_trial = sorted(trials.results, key=lambda x: x['loss'], reverse=False)[0]
best_trial['kernels'] = str(' '.join([str(elem) for elem in best_trial['kernels']]))
print("Best trial")
display(HTML(pd.DataFrame(best_trial, index=[0]).to_html()))

# save trials
pickle.dump(trials, open(os.path.join(experimentName,"trials.p"), "wb"))
ts.saveStatistics()
ts.saveStatistics(False)

Generate confusion matrices

In [None]:
for experiment_params in all_experiment_params:    
    print("experiment: ", experiment_params)
    
    datasetManager.updateParams(experiment_params)
    dataset = datasetManager.getDataset()
    speciesList = dataset.getSpeciesList()
    ts.printTrialConfusionMatrix(experiment_params, speciesList, True)