Setup

In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import torch
import CNN
import numpy as np

# from config_plots import global_settings
# global_settings()
import warnings
warnings.filterwarnings("ignore")

Hyper parameters

In [2]:
# defining global variables
experimentName = "test_22987_suffix50_11"
useRandomSearch = False
numOfExperiments=100

from configParser import ConfigParser, getModelName
hyperpSearchObject = ConfigParser(experimentName).getHyperpSearchObject()

experimentName = experimentName + "/hyperp-search"
import TrialStatistics
ts = TrialStatistics.TrialStatistics(experimentName)

Use_old_dataLoader = False
if Use_old_dataLoader:
    import dataLoader
else:
    import dataLoader2 as dataLoader

Cuda support

In [3]:
# CUDA support 
if torch.cuda.is_available():
    torch.cuda.set_device(0) # 0
    torch.set_default_tensor_type('torch.cuda.FloatTensor')
    print("We are using cuda")

We are using cuda


Define objective function

In [4]:
import os
from torch import nn

all_experiment_params = []
def objective(experiment_params):
    experiment_params["numOfTrials"] = experiment_params["numOfTrials"] or 1
    
    print("experiment: ", experiment_params)
    all_experiment_params.append(experiment_params)
    
    # load images
    dataset = dataLoader.FishDataset(experiment_params)
    numberOfSpecies = len(dataset.getSpeciesList())

    for i in range(experiment_params["numOfTrials"]):
        trialName = experimentName+"/"+getModelName(experiment_params, i)

        # Train/Load model
        model = CNN.CNN(numberOfSpecies, experiment_params)
        if os.path.exists(CNN.getModelFile(trialName)):
            CNN.loadModel(model, trialName)
            test_loader = dataLoader.loadTestLoader(trialName)
            print("Model {0} loaded!".format(trialName))
        else:
            train_loader, validation_loader, test_loader = dataLoader.getLoadersFromDataset(dataset, 
                                                                                            experiment_params, 
                                                                                            trialName)
            loss_list, accuracy_list, epochs, time_elapsed = CNN.trainModel(train_loader, 
                                                                            validation_loader, 
                                                                            experiment_params, 
                                                                            model, trialName)
        
        # Update trial outcomes for statistics
        predlist, lbllist = CNN.getLoaderPredictions(test_loader, model)
        ts.addTrialPredictions(experiment_params, predlist, lbllist, numberOfSpecies)

        ts.addTrial(experiment_params,
                    {'loss': CNN.getCrossEntropyFromLoader(test_loader, model),
                     'accuracy': CNN.getAccuracyFromLoader(test_loader, model),
                     'time': time_elapsed,
                     'epochs': epochs
                    }, i)
                
    answer ={
        'loss': ts.getStatistic(experiment_params, 'loss', 'mean'),
        'loss-std': ts.getStatistic(experiment_params, 'loss', 'std'),
        'time': ts.getStatistic(experiment_params, 'time', 'mean'),
        'time-std': ts.getStatistic(experiment_params, 'time', 'std'),
        'epochs': ts.getStatistic(experiment_params, 'epochs', 'mean'),
        'epochs-std': ts.getStatistic(experiment_params, 'epochs', 'std'),
        'accuracy': ts.getStatistic(experiment_params, 'accuracy', 'mean'),
        'accuracy-std': ts.getStatistic(experiment_params, 'accuracy', 'std'),
        'status': STATUS_OK,}
    
    return {**experiment_params, **answer}

Iterate

In [None]:
from hyperopt import fmin, hp, STATUS_OK, Trials, space_eval, plotting, rand, tpe
import pickle

trials = Trials()
bestLoss = fmin(objective, 
                        space=hyperpSearchObject, 
                        algo=rand.suggest if useRandomSearch == False else tpe.suggest, 
                        trials=trials,
                        max_evals=numOfExperiments)

experiment:                                            
{'batchSize': 200, 'imageDimension': 280, 'image_path': '/data/BGNN_data/INHS_cropped', 'kernelSize': 6, 'kernels': (15, 23, 5, 2, 2), 'n_channels': 1, 'n_epochs': 1000, 'numOfTrials': 1, 'patience': 50, 'suffix': '50_11', 'training_count': 0.64, 'useZCAWhitening': True, 'validation_count': 0.16}
Loading dataset...                                     
  0%|          | 0/100 [00:00<?, ?trial/s, best loss=?]

100% (550 of 550) |######################| Elapsed Time: 0:00:04 Time:  0:00:04


Loading saved dataset structure...


Analyze

In [None]:
ts.showStatistics()
ts.showStatistics(False)

import pandas as pd
from IPython.display import display, HTML

# bestParams = space_eval(hyperpSearchObject, bestLoss)
# print("best params = ", pd.DataFrame(bestParams, index=[0]))
best_trial = sorted(trials.results, key=lambda x: x['loss'], reverse=False)[0]
best_trial['kernels'] = str(' '.join([str(elem) for elem in best_trial['kernels']]))
print("Best trial")
display(HTML(pd.DataFrame(best_trial, index=[0]).to_html()))

# save trials
pickle.dump(trials, open(experimentName+"/trials.p", "wb"))
ts.saveStatistics()
ts.saveStatistics(False)

Generate confusion matrices

In [None]:
for experiment_params in all_experiment_params:    
    print("experiment: ", experiment_params)
    
    dataset = dataLoader.FishDataset(experiment_params, False)
    speciesList = dataset.getSpeciesList()
    ts.printTrialConfusionMatrix(experiment_params, speciesList, True)