In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, Flatten, BatchNormalization, Conv2D, MaxPool2D, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import confusion_matrix
import itertools
import os
import shutil
import random
import glob
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
%matplotlib inline
import hashlib
import json
import time
from dbCon import PGCON

physical_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

## Get data and use augmentation

In [2]:
trainPath = 'imageRepo/ISICArchive_v1/train_224_224'
validPath = 'imageRepo/ISICArchive_v1/validation_224_224'
testPath = 'imageRepo/ISICArchive_v1/test_224_224'

In [3]:
trainDataGen = ImageDataGenerator(1./255)
testDataGen = ImageDataGenerator(1./255)
validationDataGen = ImageDataGenerator(1./255)

In [4]:
trainBatches = trainDataGen.flow_from_directory(
    directory=trainPath, 
    target_size=(224,224), 
    classes=['benign', 'malignant'], 
    batch_size=32
)

validBatches = validationDataGen.flow_from_directory(
    directory=validPath, 
    target_size=(224,224), 
    classes=['benign', 'malignant'], 
    batch_size=32
)

testBatches = testDataGen.flow_from_directory(
    directory=testPath, 
    target_size=(224,224), 
    classes=['benign', 'malignant'], 
    batch_size=32, 
    shuffle=False
)


Found 9142 images belonging to 2 classes.
Found 1144 images belonging to 2 classes.
Found 1142 images belonging to 2 classes.


## Supporting functions for generating models with different hyperparameter settings

In [5]:
def modelCreate(hyperParameters):
    """
    Takes hyperparameter values as input, and generates a compiled model as output.
    """
    
    model = Sequential()
    
    model.add(Conv2D(
        hyperParameters['numberOfConvFiltersInFirstLayer'],
        kernel_size=(3, 3),
        padding = 'same',
        activation='relu',
        input_shape=(224,224,3))
    )
    
    model.add(MaxPool2D(pool_size=(2, 2)))
    
    # Add layers with convolutions
    for i in range(1, hyperParameters['numberOfConvBlocks']+1):
        # Add layers in each convolutions-block
        for j in range(1, hyperParameters['numberOfConvLayersPerBlock']+1):
            model.add(
                Conv2D(filters=i*hyperParameters['numberOfConvFiltersInFirstLayer'], kernel_size=(3, 3), padding = 'same', activation='relu')
            )
        # Finish every block of convolutions with a max-pooling layer
        model.add(MaxPool2D(pool_size=(2, 2)))
    
    # Flatten the data
    model.add(Flatten())
    
    for k in range(1, hyperParameters['numberOfDenseLayerBlocks']+1):
        model.add(Dropout(hyperParameters['dropOutInDenseLayer']))
        model.add(Dense(units=64))
    
    
    # Add the final layer, classification
    model.add(Dense(units=2, activation='softmax'))
    
    #'numberOfConvBlocks': 6, 'numberOfConvLayersPerBlock': 2, 'numberOfConvFiltersInFirstLayer': 32, 'numberOfDenseLayerBlocks': 3, 'dropOutInDenseLayer': 0.5, 'learningRate': 1e-05}
        
    model.compile(
        optimizer=Adam(learning_rate=hyperParameters['learningRate']), 
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model

In [6]:
def getRandomHyperparameters(hyperparameters):
    """
    Takes a dict of hyperparameters and randomly chooses a set of parameters.
    """
    hyperParametersToUse = {}
    for hp, hpValues in hyperparameters.items():
        hyperParametersToUse[hp] = random.choice(hpValues)
    
    return hyperParametersToUse

In [7]:
def getHyperparameterSet(numberOfModels, hyperparameters):
    """
    
    """
    hyperparametersToUse = {}
    i = 0
    while i <= numberOfModels:
    #for _ in range(0, numberOfModels):
        getHyperparameters = getRandomHyperparameters(hyperparameters)
        hpHash = hash(json.dumps(getHyperparameters, sort_keys=True))

        # Only keep this combination of hyperparameters if unique
        if not hpHash in hyperparametersToUse:
            hyperparametersToUse[hpHash] = getHyperparameters
            i += 1
    return hyperparametersToUse

In [10]:
# Set parameters for Random Search
hyperparameters = {}
hyperparameters['numberOfConvBlocks'] = [2,3,4,5,6]
hyperparameters['numberOfConvLayersPerBlock'] = [1,2,3,4]
hyperparameters['numberOfConvFiltersInFirstLayer'] = [32, 64, 128]
hyperparameters['numberOfDenseLayerBlocks'] = [1,2,3,4]
hyperparameters['dropOutInDenseLayer'] = [0.2, 0.3, 0.4, 0.5]
hyperparameters['learningRate'] = [0.01, 0.001, 0.0001, 0.00001]

# Pick hyperparameters to test
numberOfModels = 25
hyperparametersToUse = getHyperparameterSet(numberOfModels, hyperparameters)

## Train the models and store the result in a local database for evaluation

In [None]:
# Fit models and store result
nameOfTraining = "Base training, from scratch (50 ep), nth run ()"
for hpId, hpSettings in hyperparametersToUse.items():    
    try:
        model = modelCreate(hpSettings)
        elapsedTime = 0
        timeStart = time.time()
        # Fit model
        history = model.fit(trainBatches,
            steps_per_epoch=len(trainBatches),
            validation_data=validBatches,
            validation_steps=len(validBatches),
            epochs=50,
            verbose=1
        )
        elapsedTime = time.time() - timeStart
        db = PGCON()
        numRows = db.insertRow("insert into californium.traininglogs (\"name\", \"data\", \"hyperparameters\", \"timeelapsed\") values ('" + nameOfTraining + "', '" + json.dumps(history.history) + "','" + json.dumps(hpSettings) + " ', " + str(elapsedTime) + ");")
        print(numRows)
    except:
        print('Exception encountered')
        db = PGCON()
        numRows = db.insertRow("insert into californium.traininglogs (\"name\", \"data\", \"hyperparameters\") values ('" + nameOfTraining + "', '" + json.dumps({'Exception':0}) + "', '" + json.dumps(hpSettings) + "');")
        print(numRows)
        

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
1
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50


Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
1
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Ep

Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
1
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
1
Epoch 1/50
Epoch 2/50
Epoch 3/50


Epoch 32/50
Epoch 33/50
Epoch 34/50

In [None]:
# Evaluate on the held-out testset
result = model.evaluate(testBatches)

In [None]:
# Predict
predictions = model.predict(x=testBatches, steps=len(testBatches))

In [None]:
#model.save('model.h5')