In [1]:
from keras import backend as K
K.set_image_dim_ordering('tf')
print('The backend is:',K.backend())
import tensorflow as tf

Using TensorFlow backend.
  return f(*args, **kwds)


The backend is: tensorflow


In [2]:
# Import relevant packages
import h5py
import os, cv2, random
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline 

from PIL import Image

from keras.models import Sequential
from keras.layers import Input, Dropout, Flatten, Convolution2D, MaxPooling2D, Dense, Activation, ZeroPadding2D, Conv2D
from keras.optimizers import RMSprop
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import array_to_img, img_to_array, load_img
from keras.models import model_from_json
from keras.preprocessing import image

from sklearn.preprocessing import normalize
from sklearn.metrics import confusion_matrix

from IPython.display import Image, display

In [3]:
# Loading in the data

# Loading in the index
dropped = np.load("../../data/dropped.npy")

# Loading in the cropped 2D matrix
mfc = np.load("../../data/z1.npy")
lfc = np.load("../../data/z2.npy")

# Loading in the augmentations
mfc_aug = np.load("../../data/z1_aug_formatted.npy")
mfc_aug_2 = np.load("../../data/z1_fil_aug.npy")
mfc_aug_3 = np.load("../../data/z1_intensity.npy")

# Normalizing the data
z1 = normalize(mfc, norm = "max")
z2 = normalize(lfc, norm = "max")
z3 = normalize(mfc_aug[0:1585], norm = "max")
z4 = normalize(mfc_aug_2[0:1585], norm = "max")
z5 = mfc_aug_3 # This one is already normalized

In [51]:
def matrix_to_df(matrix, augmentation = False):
    df = pd.read_csv("../clean_all_path.csv")

    df['patient'] = df['mriFile'].apply(lambda f: os.path.splitext(os.path.basename(f))[0].split('_')[0])
    df = df.drop(labels=[ 'mfcWorms', 'lfcWorms'],
                axis=1)

    np.shape(dropped)[0] + np.shape(matrix)[0] == df.shape[0]

    df=df.drop(dropped, axis=0)

    #Reseting indexes and forming x dataframe

    df=df.reset_index()
    df=df.drop(labels='index', axis=1)

    x=pd.DataFrame(matrix).reset_index()
    x=x.drop(labels='index', axis=1)


    #Let's merge them into a single dataset to remove NaN values

    df=df.merge(x,left_index=True,right_index=True)

    #Removing NaN values

    df[df['mfcBME'].isnull()]
    df=df.drop([1356, 1565], axis=0)

    df.isnull().sum().sum()


    df['mfcBME'][df['mfcBME']>0]=1
    
    if augmentation:
        df['set']=2
    else:
        df['set']=0

    if not augmentation:
        for i in df['patient'].unique():
            if np.random.uniform(0,1)>0.8:
                df.loc[df['patient']==i, 'set'] = 1
    
    if augmentation:
    # Only use the minority data       
        df = df[df["mfcBME"] > 0]
            
    return df


In [57]:
baseline = matrix_to_df(z1)
affine_aug = matrix_to_df(z3, True)
filter_aug = matrix_to_df(z4, True)
intensity_aug = matrix_to_df(z5, True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [58]:
df = baseline
# Append the augmentations
# Uncomment to add augmentation data
df = df.append(affine_aug)
#df = df.append(filter_aug)
#df = df.append(intensity_aug)

# Splitting the dataset
x_train = df[(df['set']==0) | (df['set'] == 2)].drop(['mriFile','segFile','mfcBME','lfcBME','patient','set'],axis=1)
y_train = df[(df['set']==0) | (df['set'] == 2)]['mfcBME']
x_test = df[df['set']==1].drop(['mriFile','segFile','mfcBME','lfcBME','patient', 'set'],axis=1)
y_test = df[df['set']==1]['mfcBME']

In [None]:
# training variables

img_width, img_height = 120, 120
input_shape = (img_width, img_height, 1)
n_train_samples = len(z1)
n_validation_samples = len(z1)
epochs = 2
batch_size = 50
n_test_samples = 100

In [None]:
# Reshaping training and testing data to fit model

mfc_x_train = np.array(x_train)
mfc_x_train = mfc_x_train.reshape(mfc_x_train.shape[0], img_width, img_height, 1)
mfc_y_train = np.array(y_train)

mfc_x_test = np.array(x_test)
mfc_x_test = mfc_x_test.reshape(mfc_x_test.shape[0], img_width, img_height, 1)
mfc_y_test = np.array(y_test)

lfc_x_train = z2
lfc_x_train = lfc_x_train.reshape(lfc_x_train.shape[0], img_width, img_height, 1)
lfc_y_train = df["lfcBME"]
lfc_y_train = np.array(lfc_y_train).reshape((len(df), 1))

In [None]:
# Creating CNN

model = Sequential()

model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape))

model.add(Conv2D(64, (3, 3), activation='relu'))

model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())

model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(1, activation='sigmoid'))

model.summary()

In [None]:
model.compile(loss=K.binary_crossentropy,
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
model.fit(mfc_x_train, mfc_y_train,
          batch_size=batch_size,
          epochs=1,
          verbose=1)

In [None]:
# Test accuracy

score = model.evaluate(mfc_x_test, mfc_y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

In [None]:
# Test confusion matrix

predictions = model.predict_classes(mfc_x_test)
cm = confusion_matrix(mfc_y_test.reshape(len(mfc_y_test), 1), predictions)
cm

In [None]:
# Validation accuracy

score = model.evaluate(mfc_x_train, mfc_y_train, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

In [None]:
# Validation confusion matrix

predictions = model.predict_classes(mfc_x_train)
cm = confusion_matrix(mfc_y_train.reshape(len(mfc_y_train), 1), predictions)
cm