## Train model for task 2 method 2

In [None]:
import numpy as np
import pandas as pd
import keras 

from keras import backend as K
from keras.models import Sequential,Model
from keras.layers import Dense, Dropout,Input
from keras.layers import Conv2D, MaxPooling2D, Flatten,GlobalAveragePooling2D,BatchNormalization
from keras_preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt 
from keras import regularizers
%matplotlib inline

In [None]:
mobidity_file = pd.read_csv('../input/mobidity/morbidity.csv')
mobidity_file
mobidity_file['type'] = 3
mobidity_file.loc[mobidity_file['Morbidity']==1,'type'] = 0
mobidity_file.loc[(mobidity_file['Morbidity']==3) |(mobidity_file['Morbidity']==4) ,'type'] = 1
mobidity_file.loc[(mobidity_file['Morbidity']==5) |(mobidity_file['Morbidity']==6) ,'type'] = 2
y_train = np.array(mobidity_file['type'])[:999]
y_test = np.array(mobidity_file['type'])[999:]
# 10 most possible postive CT images

# X_train = np.load('../input/task2trial2/train_10ct.npy')
# X_test = np.load('../input/task2trial2/test_10ct.npy')

# ten CT images selected from the middle
X_train = np.load('../input/10middle-ct/train_10.npy')
X_test = np.load('../input/10middle-ct/test_10.npy')

### Delete patients with suspected morbidity

In [None]:
# training data
keep = np.ones(y_train.shape, dtype=bool)
for pos, val in enumerate(y_train):
    if val ==3:
        keep[pos] = False
y_train = y_train[keep]
X_train = X_train[keep]

#for test data
keep1 = np.ones(y_test.shape, dtype=bool)
for pos, val in enumerate(y_test):
    if val ==3:
        keep1[pos] = False
y_test = y_test[keep1]
X_test= X_test[keep1]
print(len(y_train))
print(len(y_test))

In [None]:
# because the distribution of the suspected patients are not uniformly distributed. we resplit the data
X = np.concatenate((X_train,X_test),axis=0)
y = np.concatenate((y_train,y_test),axis=0)
from sklearn.model_selection import train_test_split
X_train,X_test,y_train, y_test = train_test_split(X,y,test_size =0.2,random_state = 42,shuffle = True)


### Show examples 

In [None]:
plt.figure(figsize=(20,8))
x, y =5,2
for i in range(10):
    n=89
    plt.subplot(y, x, i+1)
    plt.imshow(X_train[n,:,:,i],cmap='gray')
    plt.title('target: {}'.format(y_train[n]))
    plt.axis('off')
plt.show()

### Solve imbalanced data problem with class_weight

In [None]:
type0 = 0
type1 = 0
type2 = 0
for i in y_train:
    if i == 0:
        type0+=1
    elif i==1:
        type1+=1
    else:
        type2+=1
print(type0,type1,type2)

In [None]:
weight_for_0 = (1 / type0)*(999)/3.0 
weight_for_1 = (1 / type1)*(999)/3.0
weight_for_2 = (1 / type2)*(999)/3.0
class_weights = {0: weight_for_0, 1: weight_for_1, 2:weight_for_2}

print('Weight for class 0: {:.2f}'.format(weight_for_0))
print('Weight for class 1: {:.2f}'.format(weight_for_1))
print('Weight for class 2: {:.2f}'.format(weight_for_2))

In [None]:
# change y into a hot vestor
y_train = keras.utils.to_categorical(y_train,3)
y_test = keras.utils.to_categorical(y_test,3)

In [None]:
# Data augmention using ImageDataGenetator
train = ImageDataGenerator(horizontal_flip=True,
                            rotation_range=20)
#                           zoom_range=1.2)

test =  ImageDataGenerator(horizontal_flip=True,
                          rotation_range=20)
#                           zoom_range=1.2)

train_generator = train.flow(X_train,y_train,batch_size=32)
test_generator = test.flow(X_test,y_test,batch_size=32)


### Model 

In [None]:
model=Sequential()
model.add(Conv2D(input_shape=(256,256,10),filters=64,kernel_size=(3,3),padding="same", activation="relu",kernel_initializer='he_normal'))
model.add(Conv2D(filters=64,kernel_size=(3,3),padding="same", activation="relu",kernel_initializer='he_normal'))
model.add(Conv2D(filters=64,kernel_size=(3,3),padding="same", activation="relu",kernel_initializer='he_normal'))
model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))

model.add(Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu",kernel_initializer='he_normal'))
model.add(Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu",kernel_initializer='he_normal'))
model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))

model.add(Flatten())
model.add(BatchNormalization())

# model.add(Dense(128, activation='relu'))
# model.add(Dropout(0.5))
model.add(Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
model.add(Dropout(0.5))
model.add(Dense(3, activation='softmax'))
model.summary()


In [None]:
# model = VGG_Simple()
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(lr=0.001,decay=0.05),
              metrics=['accuracy'])

early_stopping_monitor = EarlyStopping(monitor='val_loss', patience=10)


epochs = 40

history = model.fit(X_train,y_train,
          epochs=epochs,
          verbose=1,
          class_weight = class_weights,
          callbacks=[early_stopping_monitor],
          validation_data=(X_test,y_test))

In [None]:
accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1,len(accuracy)+1)

plt.plot(epochs, accuracy,'b', label='Training')
plt.plot(epochs, val_accuracy, 'r', label='Test')
plt.xlabel('Epoch', size=14)
plt.ylabel('Accuracy', size=14)
plt.xticks(np.arange(0,len(epochs), step=4))
plt.title('Accuracy')
plt.legend()
plt.show()


plt.figure()
plt.plot(epochs, loss, 'b', label='Training')
plt.plot(epochs, val_loss, 'r', label='Test')
plt.xlabel('Epoch', size=14)
plt.ylabel('Loss', size=14)
plt.xticks(np.arange(0, len(epochs), step=4))
plt.title('Loss')
plt.legend()
plt.show()


### Save weigths

In [None]:
import json
trial = 'model_task2_10middle'

statistics = {'train':(loss,accuracy), 'eval':(val_loss,val_accuracy)}
json.dump(statistics, open(trial+'.json', 'w'))
model.save_weights(trial+'.h5')

### Comfusion matrix

In [None]:
import seaborn as sns
from sklearn import metrics

y_pred = model.predict(X_test)
# convert one hot to vector
Y_test = np.argmax(y_test, axis = 1)
Y_pred = np.argmax(y_pred, axis = 1)

cm1 = metrics.confusion_matrix(Y_test, Y_pred)
cmap = sns.diverging_palette(220,10,center = "light", as_cmap=True)
plt.figure(figsize=(9,9))
sns.heatmap(cm1, annot=True, fmt=".0f", linewidths=.5, square=True, cmap=cmap)
plt.ylabel('true number', size=17)
plt.xlabel('predicted number', size=17)
plt.show()