In [1]:
import numpy as np
np.random.seed(1337)
import pandas as pd
import keras
from keras.models import Sequential
from keras import initializers
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout
import matplotlib.pylab as plt
%matplotlib inline
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn import preprocessing
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from sklearn.model_selection import train_test_split
import cv2
import os
import random
from glob import glob
import seaborn as sns
from sklearn.utils import class_weight
from keras import regularizers
from imblearn.over_sampling import RandomOverSampler
from sklearn.externals import joblib

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Setup (Data Preparation)

In [6]:
# Load npz file containing image arrays
x_npz = np.load("x_images_arrays_bin.npz")
x = x_npz['arr_0']
# Load disease labels
y_npz = np.load("y_disease_labels_bin.npz")
y = y_npz['arr_0']
print(len(y))
print(len(x))

4626
4626


In [7]:
X_train, X_test, y_train, y_test = train_test_split(x,y, test_size=0.3)

In [8]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(3238, 128, 128, 3)
(1388, 128, 128, 3)
(3238,)
(1388,)


In [9]:
X_train_reshape = X_train.reshape(X_train.shape[0], -1).astype('float32')
X_test_reshape = X_test.reshape(X_test.shape[0], -1).astype('float32')

In [10]:
#one-hot encode y_train & y_test
y_trainHot = keras.utils.to_categorical(y_train, 2)
y_testHot = keras.utils.to_categorical(y_test, 2)

In [11]:
epochs=5
# optimizer = optimizers.Adam(lr=0.00003, decay=0.0001)
optimizer = optimizers.SGD(lr=0.003, decay=0.0001, momentum=0.9)
# optimizer= adam
batch_size=100
loss='categorical_crossentropy'
activation='relu'
out_activation='softmax'

In [12]:
tbcallback = keras.callbacks.TensorBoard(log_dir='./logs', histogram_freq=0, batch_size=32, 
                                         write_graph=True, write_images=True)

## Balanced Class Weights

In [13]:
class_weights = class_weight.compute_class_weight('balanced', np.unique(y), y)
class_weights = dict(enumerate(class_weights))
class_weights

{0: 0.7598554533508541, 1: 1.4620733249051834}

In [15]:
model = Sequential()
model.add(Dense(100, input_dim=49152, activation=activation))
model.add(Dropout(0.3))
model.add(Dense(75, activation=activation))
model.add(Dropout(0.3))
model.add(Dense(25, activation=activation))
model.add(Dropout(0.3))
model.add(Dense(25, activation=activation))
model.add(Dropout(0.3))
model.add(Dense(10, activation=activation))
model.add(Dropout(0.3))
model.add(Dense(2, activation=out_activation))
model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy']) 
history = model.fit(X_train_reshape, y_trainHot, epochs=epochs, batch_size=batch_size, 
                    class_weight = class_weights, verbose=1, 
                    validation_data=[X_test_reshape, y_testHot], callbacks=[tbcallback])

Train on 3238 samples, validate on 1388 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [83]:
model = Sequential()
model.add(Dense(150, input_dim=49152, activation=activation))
model.add(Dropout(0.3))
model.add(Dense(125, activation=activation))
model.add(Dropout(0.3))
model.add(Dense(100, activation=activation))
model.add(Dropout(0.3))
model.add(Dense(75, activation=activation))
model.add(Dropout(0.3))
model.add(Dense(50, activation=activation))
model.add(Dropout(0.3))
model.add(Dense(8, activation=out_activation))
model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy']) 
history = model.fit(X_train_reshape, y_trainHot, epochs=epochs, batch_size=batch_size, 
                    class_weight = class_weights, verbose=1, 
                    validation_data=[X_test_reshape, y_testHot], callbacks=[tbcallback])

Train on 3168 samples, validate on 1359 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [68]:
model.save_weights('my_model_weights.h5')
joblib.dump(model, 'mlp_balance.pickle')

TypeError: can't pickle _thread.RLock objects

## Random Oversampling

In [16]:
ros = RandomOverSampler(ratio='auto')
X_trainRos, y_trainRos = ros.fit_sample(X_train_reshape, y_train)
X_testRos, y_testRos = ros.fit_sample(X_test_reshape, y_test)

y_trainRosHot = keras.utils.to_categorical(y_trainRos, num_classes = 8)
y_testRosHot = keras.utils.to_categorical(y_testRos, num_classes = 8)

In [17]:
print("X_train: ", X_train.shape)
print("X_trainFlat: ", X_train_reshape.shape)
print("X_trainRos Shape: ",X_trainRos.shape)
print("X_testRos Shape: ",X_testRos.shape)
print("Y_trainRosHot Shape: ",y_trainRosHot.shape)
print("Y_testRosHot Shape: ",y_testRosHot.shape)

X_train:  (3238, 128, 128, 3)
X_trainFlat:  (3238, 49152)
X_trainRos Shape:  (4230, 49152)
X_testRos Shape:  (1858, 49152)
Y_trainRosHot Shape:  (4230, 8)
Y_testRosHot Shape:  (1858, 8)


In [18]:
model = Sequential()
model.add(Dense(100, input_dim=49152, activation=activation))
model.add(Dropout(0.3))
model.add(Dense(25, activation=activation))
model.add(Dropout(0.3))
model.add(Dense(25, activation=activation))
model.add(Dropout(0.3))
model.add(Dense(25, activation=activation))
model.add(Dropout(0.3))
model.add(Dense(75, activation=activation))
model.add(Dropout(0.3))
model.add(Dense(10, activation=activation))
model.add(Dropout(0.3))
model.add(Dense(8, activation=out_activation))
model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy']) 
history = model.fit(X_trainRos, y_trainRosHot, epochs=15, batch_size=batch_size, verbose=1, 
                    validation_data=(X_testRos, y_testRosHot), callbacks=[tbcallback])

Train on 4230 samples, validate on 1858 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [58]:
model.save_weights('my_model_weights_ros.h5')