In [14]:
import numpy as np 
import pandas as pd 
from glob import glob 
from skimage.io import imread 
import os
import shutil
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, roc_auc_score
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.nasnet import NASNetMobile
from keras.applications.xception import Xception
from keras.utils.vis_utils import plot_model
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D, Average, Input, Concatenate, GlobalMaxPooling2D
from keras.models import Model
from keras.callbacks import CSVLogger, ModelCheckpoint
from keras.optimizers import Adam
from livelossplot import PlotLossesKeras

In [25]:
# Output files

TRAINING_LOGS_FILE = "training_logs.csv"
MODEL_SUMMARY_FILE = "model_summary.txt"
MODEL_PLOT_FILE = "model_plot.png"
MODEL_FILE = "model.h5"
TRAINING_PLOT_FILE = "training.png"
VALIDATION_PLOT_FILE = "validation.png"

ROC_PLOT_FILE = "roc.png"
KAGGLE_SUBMISSION_FILE = "kaggle_submission.csv"

In [97]:
#Hyper Parameter

SAMPLE_COUNT = 85000
TRAINING_RATIO = 0.25
IMAGE_SIZE = 224
EPOCHS = 10
BATCH_SIZE = 192
VERBOSITY = 1

TESTING_BATCH_SIZE = 5000

In [73]:
training_path = (r"C:/Users/Kul Garima/histopathologic-cancer-detection/training")
training_dir = input_dir + 'train/'

data_frame = pd.DataFrame({'path': glob(os.path.join(training_dir,'*.tif'))})
print(data_frame)

                                                     path
0       C:/Users/Kul Garima/histopathologic-cancer-det...
1       C:/Users/Kul Garima/histopathologic-cancer-det...
2       C:/Users/Kul Garima/histopathologic-cancer-det...
3       C:/Users/Kul Garima/histopathologic-cancer-det...
4       C:/Users/Kul Garima/histopathologic-cancer-det...
5       C:/Users/Kul Garima/histopathologic-cancer-det...
6       C:/Users/Kul Garima/histopathologic-cancer-det...
7       C:/Users/Kul Garima/histopathologic-cancer-det...
8       C:/Users/Kul Garima/histopathologic-cancer-det...
9       C:/Users/Kul Garima/histopathologic-cancer-det...
10      C:/Users/Kul Garima/histopathologic-cancer-det...
11      C:/Users/Kul Garima/histopathologic-cancer-det...
12      C:/Users/Kul Garima/histopathologic-cancer-det...
13      C:/Users/Kul Garima/histopathologic-cancer-det...
14      C:/Users/Kul Garima/histopathologic-cancer-det...
15      C:/Users/Kul Garima/histopathologic-cancer-det...
16      C:/Use

In [98]:
# Data 
#input_dir
input_dir = (r"C:/Users/Kul Garima/histopathologic-cancer-detection/")
training_dir = input_dir + 'train/'

data_frame = pd.DataFrame({'path': glob(os.path.join(training_dir,'*.tif'))})

data_frame['id'] = data_frame.path.map(lambda x: x.split('/')[2].split('.')[0])
labels = pd.read_csv(input_dir + 'train_labels.csv')
data_frame = data_frame.merge(labels, on='id')

negatives = data_frame[data_frame.label == 0].sample(SAMPLE_COUNT) if len(data_frame) > 0 else data_frame

positives = data_frame[data_frame.label == 1].sample(SAMPLE_COUNT) if len(data_frame) > 0 else data_frame

data_frame = pd.concat([negatives, positives]).reset_index()
data_frame = data_frame[['path', 'id', 'label']]
data_frame['image'] = data_frame['path'].map(imread)

training_path = (r"C:/Users/Kul Garima/histopathologic-cancer-detection/training")
validation_path = (r"C:/Users/Kul Garima/histopathologic-cancer-detection/validation")

for folder in [training_path, validation_path]:
    for subfolder in ['0', '1']:
        path = os.path.join(folder, subfolder)
        os.makedirs(path, exist_ok=True)

training, validation = train_test_split(data_frame,n_sample = 0 train_size=TRAINING_RATIO,test_size= 0.25, stratify=data_frame['label'])
data_frame.set_index('id', inplace=True)

for images_and_path in [(training, training_path), (validation, validation_path)]:
    images = images_and_path[0]
    path = images_and_path[1]
    for image in images['id'].values:
        file_name = image + '.tif'
        label = str(data_frame.loc[image,'label'])
        destination = os.path.join(path, label, file_name)
        
        if not os.path.exists(destination):
            source = os.path.join(input_dir + 'train', file_name)
            shutil.copyfile(source, destination)
            
             


ValueError: With n_samples=0, test_size=0.25 and train_size=0.25, the resulting train set will be empty. Adjust any of the aforementioned parameters.

In [100]:
# Data augmentation
training_data_generator = ImageDataGenerator(rescale=1./255,
                                             horizontal_flip=True,
                                             vertical_flip=True,
                                             rotation_range=180,
                                             zoom_range=0.4, 
                                             width_shift_range=0.3,
                                             height_shift_range=0.3,
                                             shear_range=0.3,
                                             channel_shift_range=0.3)

In [101]:
# Data generation

training_generator = training_data_generator.flow_from_directory(training_path,
                                                                 target_size=(IMAGE_SIZE,IMAGE_SIZE),
                                                                 batch_size=BATCH_SIZE,
                                                                 class_mode='binary')

validation_generator = ImageDataGenerator(rescale=1./255).flow_from_directory(validation_path,
                                                                              target_size=(IMAGE_SIZE,IMAGE_SIZE),
                                                                              batch_size=BATCH_SIZE,
                                                                              class_mode='binary')

testing_generator = ImageDataGenerator(rescale=1./255).flow_from_directory(validation_path,
                                                                           target_size=(IMAGE_SIZE,IMAGE_SIZE),
                                                                           batch_size=BATCH_SIZE,
                                                                           class_mode='binary',
                                                                           shuffle=False)


Found 0 images belonging to 2 classes.
Found 0 images belonging to 2 classes.
Found 0 images belonging to 2 classes.


In [None]:
# Model

input_shape = (IMAGE_SIZE, IMAGE_SIZE, 3)
inputs = Input(input_shape)

xception = Xception(include_top=False, input_shape=input_shape)(inputs)
nas_net = NASNetMobile(include_top=True, input_shape=input_shape)(inputs)

outputs = Concatenate(axis=-1)([GlobalAveragePooling2D()(xception), GlobalAveragePooling2D()(nas_net)])
outputs = Dropout(0.5)(outputs)
outputs = Dense(1, activation='sigmoid')(outputs)

model = Model(inputs, outputs)
model.compile(optimizer=Adam(lr=0.0001, decay=0.00001),
              loss='binary_crossentropy',
              metrics=['accuracy'])
model.summary()

plot_model(model,
           to_file=MODEL_PLOT_FILE,
           show_shapes=True,
           show_layer_names=True)

In [None]:
#  Training

history = Model.fit_generator(training_generator,
                              steps_per_epoch=len(training_generator), 
                              validation_data=validation_generator,
                              validation_steps=len(validation_generator),
                              epochs=EPOCHS,
                              verbose=VERBOSITY,
                              callbacks=[PlotLossesKeras(),
                                         ModelCheckpoint(MODEL_FILE,
                                                         monitor='val_acc',
                                                         verbose=VERBOSITY,
                                                         save_best_only=True,
                                                         mode='max'),
                                         CSVLogger(TRAINING_LOGS_FILE,
                                                   append=False,
                                                   separator=';')])


In [26]:
# Kaggle testing

testing_files = glob(os.path.join(r'C:/Users/Kul Garima/.jupyter/Dataset Cancer/histopathologic-cancer-detection/input/test/','*.tif'))
submission = pd.DataFrame()

for index in range(0, len(testing_files), TESTING_BATCH_SIZE):
    
    data_frame = pd.DataFrame({'path': testing_files[index:index+TESTING_BATCH_SIZE]})
    data_frame['id'] = data_frame.path.map(lambda x: x.split('/')[3].split(".")[0])
    data_frame['image'] = data_frame['path'].map(imread)
    
    images = np.stack(data_frame.image, axis=0)
    predicted_labels = [model.predict(np.expand_dims(image/255.0, axis=0))[0][0] for image in images]
    predictions = np.array(predicted_labels)
    data_frame['label'] = predictions
    submission = pd.concat([submission, data_frame[["id", "label"]]])
    
submission.to_csv(KAGGLE_SUBMISSION_FILE, index=False, header=True)

In [76]:
input_dir = (r"C:/Users/Kul Garima/histopathologic-cancer-detection/")
training_dir = input_dir + 'train/'

data_frame = pd.DataFrame({'path': glob(os.path.join(training_dir,'*.tif'))})

data_frame['id'] = data_frame.path.map(lambda x: x.split('/')[3].split('.')[0])
print(data_frame)

                                                     path  \
0       C:/Users/Kul Garima/histopathologic-cancer-det...   
1       C:/Users/Kul Garima/histopathologic-cancer-det...   
2       C:/Users/Kul Garima/histopathologic-cancer-det...   
3       C:/Users/Kul Garima/histopathologic-cancer-det...   
4       C:/Users/Kul Garima/histopathologic-cancer-det...   
5       C:/Users/Kul Garima/histopathologic-cancer-det...   
6       C:/Users/Kul Garima/histopathologic-cancer-det...   
7       C:/Users/Kul Garima/histopathologic-cancer-det...   
8       C:/Users/Kul Garima/histopathologic-cancer-det...   
9       C:/Users/Kul Garima/histopathologic-cancer-det...   
10      C:/Users/Kul Garima/histopathologic-cancer-det...   
11      C:/Users/Kul Garima/histopathologic-cancer-det...   
12      C:/Users/Kul Garima/histopathologic-cancer-det...   
13      C:/Users/Kul Garima/histopathologic-cancer-det...   
14      C:/Users/Kul Garima/histopathologic-cancer-det...   
15      C:/Users/Kul Gar