In [1]:
from google.colab import drive
drive.mount('/content/Gdrive')

Mounted at /content/Gdrive


In [2]:
cd /content/Gdrive/My Drive/LIDC-IDRI/

/content/Gdrive/My Drive/LIDC-IDRI


In [2]:
#File paths
dir= '/content/Gdrive/My Drive/LIDC-IDRI/'
datafolder=dir+'ProcessedData'
weightsfolder=dir+'modelweights'
import numpy as np
noduleimages=np.load(datafolder+"/noduleimages.npy")
nodulemasks=np.load(datafolder+"/nodulemasks.npy")

In [3]:
from glob import glob
import matplotlib.pyplot as plt
import os
from keras.models import Sequential,load_model,Model
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D, SpatialDropout2D
from keras.layers import Input, merge, UpSampling2D, BatchNormalization
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras import backend as K
import pandas as pd
from keras.callbacks import ModelCheckpoint
import h5py
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, auc

In [None]:
nodulesize=[np.sum(mask) for mask in nodulemasks]
plt.hist([nod for nod in nodulesize if nod<300],bins=50)
plt.xlabel("Area")
plt.ylabel("frequency")
plt.show()

In [None]:
noduleimages=noduleimages.reshape(noduleimages.shape[0],1,512,512)
nodulemasks=nodulemasks.reshape(nodulemasks.shape[0],1,512,512)
imagestrain, imagestest, maskstrain, maskstest = train_test_split(noduleimages,nodulemasks,test_size=.20)

In [None]:
nodulelocations=pd.read_csv(list32file)
meta=pd.read_csv(metafile)
meta=meta.drop(meta[meta['Modality']!='CT'].index)
meta=meta.reset_index()

#Get folder names of CT data for each patient
patients=[DOIpath+meta['Patient Id'][i] for i in range(len(meta))]
datfolder=[]
for i in range(0,len(meta)-1):
    for path in os.listdir(patients[i]):
        if os.path.exists(patients[i]+'/'+path+'/'+meta['Series UID'][i]):
            datfolder.append(patients[i]+'/'+path+'/'+meta['Series UID'][i])
patients=datfolder

In [None]:
#Load nodules locations
smooth = 1.0
width = 32

def dice_coef(y_true, y_pred):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
def dice_coef_loss(y_true, y_pred):
    return -dice_coef(y_true, y_pred)

def unet_model():
    inputs = Input((1, 512, 512))
    conv1 = Conv2D(width, 3, 3, activation='relu', border_mode='same')(inputs)
    conv1 = BatchNormalization(axis = 1)(conv1)
    conv1 = Conv2D(width, 3, 3, activation='relu', border_mode='same')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

    conv2 = Conv2D(width*2, 3, 3, activation='relu', border_mode='same')(pool1)
    conv2 = BatchNormalization(axis = 1)(conv2)
    conv2 = Conv2D(width*2, 3, 3, activation='relu', border_mode='same')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

    conv3 = Conv2D(width*4, 3, 3, activation='relu', border_mode='same')(pool2)
    conv3 = BatchNormalization(axis = 1)(conv3)
    conv3 = Conv2D(width*4, 3, 3, activation='relu', border_mode='same')(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

    conv4 = Conv2D(width*8, 3, 3, activation='relu', border_mode='same')(pool3)
    conv4 = BatchNormalization(axis = 1)(conv4)
    conv4 = Conv2D(width*8, 3, 3, activation='relu', border_mode='same')(conv4)
    pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)

    conv5 = Conv2D(width*16, 3, 3, activation='relu', border_mode='same')(pool4)
    conv5 = BatchNormalization(axis = 1)(conv5)
    conv5 = Conv2D(width*16, 3, 3, activation='relu', border_mode='same')(conv5)

    up6 = merge([UpSampling2D(size=(2, 2))(conv5), conv4], mode='concat', concat_axis=1)
    conv6 = SpatialDropout2D(0.35)(up6)
    conv6 = Conv2D(width*8, 3, 3, activation='relu', border_mode='same')(conv6)
    conv6 = Conv2D(width*8, 3, 3, activation='relu', border_mode='same')(conv6)

    up7 = merge([UpSampling2D(size=(2, 2))(conv6), conv3], mode='concat', concat_axis=1)
    conv7 = SpatialDropout2D(0.35)(up7)
    conv7 = Conv2D(width*4, 3, 3, activation='relu', border_mode='same')(conv7)
    conv7 = Conv2D(width*4, 3, 3, activation='relu', border_mode='same')(conv7)

    up8 = merge([UpSampling2D(size=(2, 2))(conv7), conv2], mode='concat', concat_axis=1)
    conv8 = SpatialDropout2D(0.35)(up8)
    conv8 = Conv2D(width*2, 3, 3, activation='relu', border_mode='same')(conv8)
    conv8 = Conv2D(width*2, 3, 3, activation='relu', border_mode='same')(conv8)

    up9 = merge([UpSampling2D(size=(2, 2))(conv8), conv1], mode='concat', concat_axis=1)
    conv9 = SpatialDropout2D(0.35)(up9)
    conv9 = Conv2D(width, 3, 3, activation='relu', border_mode='same')(conv9)
    conv9 = Conv2D(width, 3, 3, activation='relu', border_mode='same')(conv9)
    conv10 = Conv2D(1, 1, 1, activation='sigmoid')(conv9)

    model = Model(input=inputs, output=conv10)
    model.compile(optimizer=Adam(lr=1e-5), loss=dice_coef_loss, metrics=[dice_coef])
    return model

In [None]:
model=unet_model()
filepath=weightsfolder+"/unet-weights-improvement.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True)
history=model.fit(imagestrain, maskstrain, batch_size=4, nb_epoch=20, verbose=1, shuffle=True, callbacks=[checkpoint],validation_data=(imagestest,maskstest))

In [None]:
plt.plot(history.history['dice_coef'], color='b')
plt.plot(history.history['val_dice_coef'], color='g')
plt.xlabel("Epoch")
plt.ylabel("Dice Coefficient")
plt.legend(["Train", "Validation"])
plt.show()

In [None]:
model.evaluate(imagestest,maskstest, batch_size=4)

In [None]:
num_test=imagestest.shape[0]
imgs_mask_test = np.ndarray([num_test,1,512,512],dtype=np.float32)
for i in range(num_test):
    imgs_mask_test[i] = model.predict([imagestest[i:i+1]], verbose=0)[0]

sumoverlap=[]
for i in range(num_test):
    sumoverlap.append(np.sum(maskstest[i,0]*imgs_mask_test[i,0]))

#nodulemasks
len([ov for ov in sumoverlap if ov>1])/len(sumoverlap)

In [None]:
#Visualize prediction mask

index=47
print("Predicted")
plt.imshow(imgs_mask_test[index,0], cmap="gray")
plt.show()
print("Ground Truth")
plt.imshow(maskstest[index,0],cmap="gray")
plt.show()
print("Image")
plt.imshow(imagestest[index,0], cmap="gray")
plt.show()