In [28]:
import numpy as np
np.random.seed(1337)
import pandas as pd
import keras
from keras.models import Sequential
from keras import initializers
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout
import matplotlib.pylab as plt
%matplotlib inline
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn import preprocessing
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from sklearn.model_selection import train_test_split
import cv2
import os
import random
from glob import glob
import seaborn as sns
from sklearn.utils import class_weight
from keras import regularizers
from imblearn.over_sampling import RandomOverSampler

## Setup (Data Preparation)

In [29]:
# Load npz file containing image arrays
x_npz = np.load("x_images_arrays.npz")
x = x_npz['arr_0']
# Load disease labels
y_npz = np.load("y_disease_labels.npz")
y = y_npz['arr_0']

In [49]:
X_train, X_test, y_train, y_test = train_test_split(x,y, test_size=0.3)

In [50]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(3168, 128, 128, 3)
(1359, 128, 128, 3)
(3168,)
(1359,)


In [51]:
X_train_reshape = X_train.reshape(X_train.shape[0], -1).astype('float32')
X_test_reshape = X_test.reshape(X_test.shape[0], -1).astype('float32')

In [33]:
#one-hot encode y_train & y_test
y_trainHot = keras.utils.to_categorical(y_train, 8)
y_testHot = keras.utils.to_categorical(y_test, 8)

In [34]:
epochs=5
optimizer = optimizers.Adam(lr=0.03, decay=0.0001)
# optimizer= adam
batch_size=100
loss='categorical_crossentropy'
activation='relu'
out_activation='softmax'

In [35]:
tbcallback = keras.callbacks.TensorBoard(log_dir='./logs', histogram_freq=0, batch_size=32, 
                                         write_graph=True, write_images=True)

## Balanced Class Weights

In [36]:
class_weights = class_weight.compute_class_weight('balanced', np.unique(y), y)
class_weights = dict(enumerate(class_weights))
class_weights

{0: 0.1858984888304862,
 1: 7.859375,
 2: 1.125,
 3: 4.963815789473684,
 4: 2.7875615763546797,
 5: 3.9296875,
 6: 2.947265625,
 7: 2.2191176470588236}

In [38]:
model = Sequential()
model.add(Dense(100, input_dim=49152, activation=activation))
model.add(Dropout(0.3))
model.add(Dense(75, activation=activation))
model.add(Dropout(0.3))
model.add(Dense(25, activation=activation))
model.add(Dropout(0.3))
model.add(Dense(25, activation=activation))
model.add(Dropout(0.3))
model.add(Dense(10, activation=activation))
model.add(Dropout(0.3))
model.add(Dense(8, activation=out_activation))
model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy']) 
history = model.fit(X_train_reshape, y_trainHot, epochs=epochs, batch_size=batch_size, 
                    class_weight = class_weights, verbose=1, 
                    validation_data=[X_test_reshape, y_testHot], callbacks=[tbcallback])

Train on 3168 samples, validate on 1359 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [39]:
model = Sequential()
model.add(Dense(150, input_dim=49152, activation=activation))
model.add(Dropout(0.3))
model.add(Dense(125, activation=activation))
model.add(Dropout(0.3))
model.add(Dense(100, activation=activation))
model.add(Dropout(0.3))
model.add(Dense(75, activation=activation))
model.add(Dropout(0.3))
model.add(Dense(50, activation=activation))
model.add(Dropout(0.3))
model.add(Dense(8, activation=out_activation))
model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy']) 
history = model.fit(X_train_reshape, y_trainHot, epochs=epochs, batch_size=batch_size, 
                    class_weight = class_weights, verbose=1, 
                    validation_data=[X_test_reshape, y_testHot], callbacks=[tbcallback])

Train on 3168 samples, validate on 1359 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [40]:
model.save_weights('my_model_weights.h5')

## Random Oversampling

In [52]:
ros = RandomOverSampler(ratio='auto')
X_trainRos, y_trainRos = ros.fit_sample(X_train_reshape, y_train)
X_testRos, y_testRos = ros.fit_sample(X_test_reshape, y_test)

y_trainRosHot = keras.utils.to_categorical(y_trainRos, num_classes = 8)
y_testRosHot = keras.utils.to_categorical(y_testRos, num_classes = 8)

In [53]:
print("X_train: ", X_train.shape)
print("X_trainFlat: ", X_train_reshape.shape)
print("X_trainRos Shape: ",X_trainRos.shape)
print("X_testRos Shape: ",X_testRos.shape)
print("Y_trainRosHot Shape: ",y_trainRosHot.shape)
print("Y_testRosHot Shape: ",y_testRosHot.shape)

X_train:  (3168, 128, 128, 3)
X_trainFlat:  (3168, 49152)
X_trainRos Shape:  (17160, 49152)
X_testRos Shape:  (7192, 49152)
Y_trainRosHot Shape:  (17160, 8)
Y_testRosHot Shape:  (7192, 8)


In [56]:
model = Sequential()
model.add(Dense(100, input_dim=49152, activation=activation))
model.add(Dropout(0.3))
model.add(Dense(25, activation=activation))
model.add(Dropout(0.3))
model.add(Dense(25, activation=activation))
model.add(Dropout(0.3))
model.add(Dense(25, activation=activation))
model.add(Dropout(0.3))
model.add(Dense(75, activation=activation))
model.add(Dropout(0.3))
model.add(Dense(10, activation=activation))
model.add(Dropout(0.3))
model.add(Dense(8, activation=out_activation))
model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy']) 
history = model.fit(X_trainRos, y_trainRosHot, epochs=15, batch_size=batch_size, verbose=1, 
                    validation_data=(X_testRos, y_testRosHot), callbacks=[tbcallback])

Train on 17160 samples, validate on 7192 samples
Epoch 1/5
Epoch 2/5

KeyboardInterrupt: 