In [3]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution3D, MaxPooling3D, Convolution2D, AveragePooling2D, MaxPooling2D, ZeroPadding3D, ZeroPadding2D
from keras.utils import np_utils
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
import cv2
import operator
from tqdm import tqdm_notebook as tqdm
import keras
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="3"

from scipy import stats
import pandas as pd
import numpy as np

# Models
from models_keras import CNNT4, CNNT5, VGG_11

In [4]:
dataset_config = 'D1'  # or 'D2'

train_non = pd.read_csv('/data/train_data_0_{}.csv'.format(dataset_config))
val_non = pd.read_csv('/data/val_data_0_{}.csv'.format(dataset_config))
train_nod = pd.read_csv('/data/train_data_1.csv')
val_nod = pd.read_csv('/data/val_data_1.csv')

candidates_train = pd.concat([train_non, train_nod])
candidates_val = pd.concat([val_non, val_nod])

In [5]:
# Load 2-D DATA

X_train, Y_train = [], []
X_test, Y_test = [], []
train_mean, val_mean = [], []

train_names, val_names = [], []

for row in tqdm(candidates_train.iterrows()):
    image = row[1]
    y_class = int(image['class'])
    lung_img = np.load(image['filename'])
    if lung_img.shape[0] == 32:
        X = lung_img[16, :, :].reshape((32, 32))
        train_mean.append(np.mean(X))
        if np.mean(X) > 1:
            X_train.append(X.reshape((32, 32, 1))), Y_train.append(y_class)
            train_names.append(image['filename'])
            
for row in tqdm(candidates_val.iterrows()):
    image = row[1]
    y_class = int(image['class'])
    lung_img = np.load(image['filename'])
    if lung_img.shape[0] == 32:
        X = lung_img[16, :, :].reshape((32, 32))
        val_mean.append(np.mean(X))
        if np.mean(X) > 1:
            X_test.append(X.reshape((32, 32, 1))), Y_test.append(y_class)
            val_names.append(image['filename'])


X_train, Y_train = np.array(X_train), np.array(Y_train)
X_test, Y_test = np.array(X_test), np.array(Y_test)

Y_train = np_utils.to_categorical(Y_train, 2)
Y_test = np_utils.to_categorical(Y_test, 2)

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




In [6]:
X_train.shape, Y_train.shape, X_test.shape, Y_test.shape, len(train_names), len(val_names)

((30436, 32, 32, 1), (30436, 2), (16705, 32, 32, 1), (16705, 2), 30436, 16705)

In [7]:
# Some statistics on data
np.mean(X_train), np.std(X_train), np.mean(X_test), np.std(X_test)

(58.531994, 59.46621, 58.556988, 59.63016)

In [8]:
# Range changing
# [-1, 1]
# X_train_range = X_train*np.std(X_train)+np.mean(X_train)
# X_test_range = X_test*np.std(X_test)+np.mean(X_test)
# [0, 1]
X_train_range = (X_train-np.mean(X_train))/np.std(X_train)
X_test_range = (X_test-np.mean(X_test))/np.std(X_test)

## Train a model

In [9]:
opt = keras.optimizers.rmsprop(lr=0.0001, rho=0.95)
# opt = keras.optimizers.adam(lr=0.0001)
model = CNNT4() 

model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])

callback = [EarlyStopping(monitor='val_loss', patience=7),
            ReduceLROnPlateau(patience=5, verbose=1)]


history = model.fit(x=X_train_range, y=Y_train, epochs=30, validation_data=(X_test_range, Y_test),
          batch_size=128, callbacks=callback) 

Train on 30436 samples, validate on 16705 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30

Epoch 00026: ReduceLROnPlateau reducing learning rate to 9.999999747378752e-06.
Epoch 27/30
Epoch 28/30


In [10]:
history.history.keys()

dict_keys(['acc', 'val_loss', 'loss', 'val_acc', 'lr'])

In [11]:
df_logs = pd.DataFrame(columns=['val_acc', 'val_loss', 'train_loss', 'train_acc'])
df_logs['val_acc'] =history.history['val_acc']
df_logs['val_loss'] = history.history['val_loss']
df_logs['train_acc'] = history.history['acc']
df_logs['train_loss'] = history.history['loss']

In [12]:
df_logs.to_csv('/logs/models/cnnt4_{}.csv'.format(dataset_config), index=False)

## Saving the trained model

In [13]:
LOGS = '/SubmitModels/'
# serialize model to JSON
model_json = model.to_json()
with open(LOGS+"cnnt4_{}.json".format(dataset_config), "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5 
model.save_weights(LOGS + 'cnnt4_{}.h5'.format(dataset_config))