In [69]:
# This script explores CNN model performance based on training image resolution.
# We test the following training image sizes:
# 64 x 64, 128 x 128, 224 x 224, 384 x 384

import numpy as np
from timeit import default_timer as timer
import matplotlib.pyplot as plt
import pydot
import random

import tensorflow as tf
from tensorflow.keras import datasets, layers, models
from tensorflow.keras.optimizers import SGD
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import Callback

        
from sklearn.metrics import recall_score, classification_report
from sklearn.datasets import make_classification

import json
import pickle
import os
import sys
sys.path.append("../python/")
from helpers import *

In [3]:
#NUM_CLASS = 2
NUM_CHANNELS = 1
RESOLUTION_LIST = [64, 128, 224, 384]
SCENARIO_LIST = ["Pr_Im", "PrPo_Im", "Pr_PoIm", "Pr_Po_Im"]

NUM_CHANNELS = 1
SCENARIO_LIST = ["Pr_Po_Im", "Pr_Im", "PrPo_Im", "Pr_PoIm"]
RESOLUTION_PERFORMANCE_METRICS_DIR = '../../results/resolution-tests'

In [2]:
def createResolutionScenarioImageDict(resolution_list, scenario_list):
    image_dict = dict.fromkeys(resolution_list)
    for p in resolution_list:
        image_dict[p] = dict.fromkeys(scenario_list)
        for s in scenario_list:
            image_dict[p][s] = np.load('../../data/tidy/preprocessed_images/size' + str(p) + '_exp5_' + s + '.npy', allow_pickle = True)
    return(image_dict)

In [6]:
image_sets = createResolutionScenarioImageDict(RESOLUTION_LIST, SCENARIO_LIST)

(64, 64, 1)
(128, 128, 1)
(224, 224, 1)
(384, 384, 1)


In [5]:
def getOptCNNHyperparams(image_size, scenario):
    #opt_params = dict.fromkeys(SCENARIO_LIST)
    #for s in SCENARIO_LIST:
    with open('../../results/models/' + str(image_size) + '/' + s + '/hyperparameters.txt') as f: 
        data = f.read() 
    opt_params_dict = json.loads(data)   
        #opt_params[s] = js
    return(opt_params_dict)

In [4]:
def constructBaseCNN(image_size, scenario):
    image_shape = (image_size, imag_size, NUM_CHANNELS)
    p_dict = getOptCNNHyperparams(image_size, scenario)
    if scenario=="Pr_Po_Im":
        num_classes = 3
    else:
        num_classes = 2
    base_model = models.Sequential([
        layers.Conv2D(filters = 64, kernel_size = p_dict['kernel_size'], strides = 2, activation="relu", padding="same", input_shape = image_shape),
        layers.MaxPooling2D(2),
        layers.Conv2D(128, 3, activation="relu", padding="same"),
        layers.Conv2D(128, 3, activation="relu", padding="same"),
        layers.MaxPooling2D(2),
        layers.Conv2D(256, 3, activation="relu", padding="same"),
        layers.Conv2D(256, 3, activation="relu", padding="same"),
        layers.MaxPooling2D(2),
        layers.Flatten(),
        
        layers.Dense(p_dict['units_1'], activation = p_dict['activation_1']),
        layers.BatchNormalization(),
        layers.Dropout(p_dict['dropout_1']), 
        
        layers.Dense(p_dict['units_2'], activation = p_dict['activation_2']), 
        layers.Dropout(p_dict['dropout_2']),
        
        layers.Dense(num_classes, activation="softmax")
    ])
    learning_rate = p_dict['learning_rate']
    return(base_model, learning_rate)

In [66]:
# The resolution test will be conducted scenario by scenario.
def testResolutionPerformance(image_size, scenario, num_epochs = 10, trial_seed = 1):  # opt_params = opt_params_64, 
    performance_dict = {}
    training_images_and_labels, test_images_and_labels = splitData(resolution_set, prop = 0.80, seed_num = trial_seed)
    training_images, training_labels = getImageAndLabelArrays(training_images_and_labels)
    validation_images, validation_labels = getImageAndLabelArrays(test_images_and_labels)
    # early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)
    # batch_training_histories = Histories()
    # metrics_multiclass = Metrics(validation_images,validation_labels)  TODO
    # K.clear_session()
    input_shape = (res_key, res_key, NUM_CHANNELS)
    model =  constructBaseCNN(image_size, scenario)
    opt_learning_rate = getOptCNNHyperparams(image_size, scenario)['learning_rate']
    reset_weights(model) # re-initialize model weights
    opt = tf.keras.optimizers.Adam(learning_rate = opt_learning_rate)
    model.compile(loss='categorical_crossentropy',  optimizer = "adam", metrics = ['accuracy'])
    hist = model.fit(training_images, training_labels, batch_size = 32, epochs = num_epochs, verbose=0, validation_data=(validation_images, validation_labels)) #, callbacks=[batch_training_histories])
    performance_dict['scenario'] = scenario
    performance_dict['image_size'] = res_key
    performance_dict['metrics'] = hist
    performance_dict['best_val_accuracy'] = np.max(hist.history['val_accuracy'])
    return(performance_dict)

In [None]:
def testScenarioPerformance

In [70]:
def main(num_trials = 5):
    scenario_performance_dict = dict.fromkeys(SCENARIO_LIST)
    if not os.path.exists(RESOLUTION_PERFORMANCE_METRICS_DIR): # check if 'tidy/preprocessed_images' subdirectory does not exist
        os.makedirs(RESOLUTION_PERFORMANCE_METRICS_DIR) # if not, create it    
    for i in range(num_trials):
        for s in SCENARIO_LIST:
            print("Conducting resolution performance test; Scenario: " + s + "; Trial: " + str(i+1))
            scenario_performance_dict[s] = testResolutionPerformance(s, opt_params = opt_params_64, num_epochs = 10, trial_seed = 1 + i) #ultimately should be averaged across trials       
            scenario_filename = "resolution_performance_" + s + "_trial_" + str(i+1) + ".txt"
            #with open(os.path.join(RESOLUTION_PERFORMANCE_METRICS_DIR, scenario_filename), 'w') as file:
            pickle.dump(scenario_performance_dict, open(os.path.join(RESOLUTION_PERFORMANCE_METRICS_DIR, scenario_filename), 'w')) # use `json.loads` to do the reverse
    return

In [72]:
if __name__ == "__main__":
    main()

Conducting resolution performance test; Scenario: Pr_Po_Im; Trial: 1


TypeError: dump() missing 1 required positional argument: 'fp'

In [None]:
# Plotting 
# p1[64]['metrics'].history['loss']
# plt.close('all')
# for m in ['recall', 'precision', 'f1-score']:
#     for c in [0,1,2]:
#         plt.plot(metrics_multiclass.get(m,c), label='Class {0} {1}'.format(c,m))
        
# plt.legend(loc='lower right')
# plt.show()