In [19]:
import pandas as pd
import numpy as np
import os
import shutil

from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.callbacks import EarlyStopping

REPLACE CELL BELOW WITH GETTING REAL DATA

In [51]:
#split into test and train directories
source1 = "images/small"
dest11 = "cnn_images/test"
dest12 = "cnn_images/training"
classes = {0:"/crohns", 1: "/bowel", 2:"/none"}
files = os.listdir(source1)
print(len(files))
import numpy as np

f = open('classifications.txt', 'r').read()
samples_to_label = eval(f)
print(len(samples_to_label))

for f in files:
    sample = f[:-4]
    classification = samples_to_label[sample]
    if np.random.rand(1) < 0.2:
        shutil.copyfile(source1 + '/'+ f, dest11 + classes[classification]+'/'+ f)
    else:
        shutil.copyfile(source1 + '/'+ f, dest12 + classes[classification]+'/'+ f)
        


322
322


In [3]:
#load images
train_data_dir = 'cnn_images/training/'
test_data_dir = 'cnn_images/test/'

train_gen = ImageDataGenerator(rescale = 1.0/255)

test_gen = ImageDataGenerator(rescale = 1.0/255)

train_generator = train_gen.flow_from_directory(train_data_dir,
                                        classes = ['crohns', 'bowel', 'none'],
                                        batch_size = 20,
                                        class_mode = "categorical",
                                        target_size = (150, 150))

test_generator = test_gen.flow_from_directory(test_data_dir,
                                        classes = ['crohns', 'bowel', 'none'],
                                        batch_size = 20,
                                        class_mode = "categorical",
                                        target_size = (150, 150))

Found 245 images belonging to 3 classes.
Found 77 images belonging to 3 classes.


In [5]:
'''
Build CNN model with multiple layers.
'''
def build_cnn(dropout_rate):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation = 'relu', input_shape=(150, 150, 3)))
    model.add(MaxPooling2D(pool_size=(2, 2), dim_ordering="th"))

    model.add(Conv2D(32, (3, 3), activation = 'relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), dim_ordering="th"))

    model.add(Conv2D(64, (3, 3), activation = 'relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), dim_ordering="th"))

    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(3, activation='softmax'))
    return model


In [20]:
#in order to save model with best weights in multiple epochs
early_stopping_monitor = EarlyStopping(
    monitor='val_loss',
    min_delta=0,
    patience=0,
    verbose=0,
    mode='auto',
    baseline=None,
    restore_best_weights=True
)

In [27]:
#Define set of options for hyperparameters
dropout_rates = [0.2,  0.5, 0.8]
learning_rates = [0.1, 0.01, 1e-3]
accs = np.zeros((len(dropout_rates), len(learning_rates)))

In [None]:
for i in range(len(dropout_rates)):
    dropout_rate = dropout_rates[i]
    for j in range(len(learning_rates)):
        learning_rate = learning_rates[j]
        model = build_cnn(dropout_rate)
        #compile thee model
        model.compile(optimizer=RMSprop(lr = learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])
        history = model.fit_generator(train_generator,
                                      validation_data = test_generator,
                                      steps_per_epoch = 100,
                                      epochs = 15,
                                      validation_steps = 50,
                                      callbacks = [early_stopping_monitor],
                                      verbose = 1)        
        accs[i][j] = max(history.history["val_loss"])

  import sys
  # Remove the CWD from sys.path while we load stuff.
  del sys.path[0]


Epoch 1/15

In [None]:
#Get the accuracy and parameters for which we have best accuracy
accuracy_df = pd.DataFrame(data=accs, index=dropout_rates, columns=learning_rates)  
accuracy_df.to_csv("CNN_all_results.csv")

ind = np.unravel_index(np.argmax(accs, axis=None), accs.shape) 
dropout_rate = dropout_rates[ind[0]]
learning_rate = learning_rates[ind[1]]

with open("CNN_best_result.txt", "a+") as f:
    f.write("Best Model Dropout Rate: %f" %dropout_rate)
    f.write("Best Model Learning Rate: %f" %learning_rate)
    f.write("Best Model Loss: %f" %accs[ind[0]][ind[1]])

In [None]:
#train and save model with best hyperparameters
model.compile(optimizer=RMSprop(lr = learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit_generator(train_generator,
                              validation_data = test_generator,
                              steps_per_epoch = 100,
                              epochs = 1,
                              validation_steps = 50,
                              callbacks = [early_stopping_monitor],
                              verbose = 1)
model.save('best_CNN_model.h5')
