In [None]:
import numpy as np
import pandas as pd
import os
print(os.listdir("../input"))
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras import layers
from keras.models import Sequential
from keras import optimizers
from keras import backend as K
from keras.applications.densenet import DenseNet121
from keras.callbacks import EarlyStopping, ReduceLROnPlateau,ModelCheckpoint
import matplotlib.pyplot as plt
import PIL
import csv
import cv2

In [None]:
TRAIN_DIR = '../input/train'
TEST_DIR = '../input/test'
IMG_ORIG_SIZE = 96
BATCH_SIZE = 32

In [None]:
df = pd.read_csv("../input/train_labels.csv")
print(df['label'].value_counts())

In [None]:
#Displaying some images with cancer
cancer_images = df.loc[df['label']==1]['id'].values
plt.rcParams['figure.figsize'] = (10.0, 10.0)
plt.subplots_adjust(wspace=0, hspace=0)
for i in range(25):
    img_path = TRAIN_DIR + '/' + cancer_images[i] + '.tif'
    img = cv2.imread(img_path)
    cv2.rectangle(img, (32,32), (64,64), (0,255,0), 2)
    plt.subplot(5, 5, i+1)
    plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    plt.axis('off')

In [None]:
#Displaying some images without cancer
no_cancer_images = df.loc[df['label']==0]['id'].values
plt.rcParams['figure.figsize'] = (10.0, 10.0)
plt.subplots_adjust(wspace=0, hspace=0)
for i in range(25):
    img_path = TRAIN_DIR + '/' + no_cancer_images[i] + '.tif'
    img = cv2.imread(img_path)
    cv2.rectangle(img, (32,32), (64,64), (0,255,0), 2)
    plt.subplot(5, 5, i+1)
    plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    plt.axis('off')

In [None]:
train_df, valid_df = train_test_split(df, test_size=0.1)
print(train_df['label'].value_counts())
print(valid_df['label'].value_counts())

In [None]:
trainAugs = ImageDataGenerator(rescale=1./255,horizontal_flip=True,vertical_flip=True)
train_generator = trainAugs.flow_from_dataframe(
    dataframe = train_df,
    directory = TRAIN_DIR,
    x_col = "id",
    y_col = "label",
    has_ext = False,
    classes = [0,1],
    class_mode = "binary",
    target_size = (IMG_ORIG_SIZE,IMG_ORIG_SIZE),
    shuffle=True,
    batch_size = BATCH_SIZE)

In [None]:
validAugs = ImageDataGenerator(rescale=1./255)
valid_generator = validAugs.flow_from_dataframe(
    dataframe = valid_df,
    directory = TRAIN_DIR,
    x_col = "id",
    y_col = "label",
    has_ext = False,
    classes = [0,1],
    class_mode = "binary",
    target_size = (IMG_ORIG_SIZE,IMG_ORIG_SIZE),
    shuffle=False,
    batch_size = BATCH_SIZE)

In [None]:
input_tensor = layers.Input(shape=(IMG_ORIG_SIZE,IMG_ORIG_SIZE,3))
base_model = DenseNet121(weights='imagenet',include_top=False,input_tensor=input_tensor)
x = base_model.output
x = layers.Flatten()(x)
x = layers.Dense(512,activation='relu')(x)
x = layers.Dense(512,activation='relu')(x)
predictions = layers.Dense(1, activation = "sigmoid")(x)
model = Model(inputs=input_tensor, outputs=predictions)

model.summary()

In [None]:
#Freezing the Densenet layers and just training the FC layers added to Densenet
for layer in base_model.layers:
    layer.trainable = False

steps_per_epoch = int(train_df.shape[0]/BATCH_SIZE)
validation_steps = int(valid_df.shape[0]/BATCH_SIZE)
save_model_path = 'weights.hdf5'
cp = ModelCheckpoint(filepath=save_model_path, monitor='val_loss', save_best_only=True, verbose=1)
earlyStopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1, restore_best_weights=True)
reduceLROnPlateau = ReduceLROnPlateau(monitor='val_loss', patience=1, verbose=1, factor=0.2, cooldown=1)
model.compile(optimizers.Adam(0.001), loss = "binary_crossentropy", metrics=["accuracy"])
history = model.fit_generator(generator=train_generator,
                    validation_data=valid_generator,
                    steps_per_epoch=steps_per_epoch,
                    validation_steps=validation_steps,
                    callbacks=[earlyStopping, reduceLROnPlateau,cp],
                    epochs=3)

In [None]:
#Freezing the top layers of Densenet
for layer in model.layers[:53]:
   layer.trainable = False
for layer in model.layers[53:]:
   layer.trainable = True

model.compile(optimizers.Adam(0.001), loss = "binary_crossentropy", metrics=["accuracy"])
history = model.fit_generator(generator=train_generator,
                    validation_data=valid_generator,
                    steps_per_epoch=steps_per_epoch,
                    validation_steps=validation_steps,
                    callbacks=[earlyStopping, reduceLROnPlateau,cp],
                    epochs=3)

In [None]:
#Unfreezing all the layers
for layer in model.layers:
   layer.trainable = True

model.compile(optimizers.Adam(0.001), loss = "binary_crossentropy", metrics=["accuracy"])
history = model.fit_generator(generator=train_generator,
                    validation_data=valid_generator,
                    steps_per_epoch=steps_per_epoch,
                    validation_steps=validation_steps,
                    callbacks=[earlyStopping, reduceLROnPlateau,cp],
                    epochs=10)

In [None]:
# Plot training & validation accuracy values
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

In [None]:
print(history.history['acc'])
print(history.history['val_acc'])

In [None]:
#Running model on validation set to analyse results
validAugs = ImageDataGenerator(rescale=1./255)
valid_generator = validAugs.flow_from_dataframe(
    dataframe = valid_df,
    directory = TRAIN_DIR,
    x_col = "id",
    y_col = "label",
    has_ext = False,
    class_mode = None,
    target_size = (IMG_ORIG_SIZE,IMG_ORIG_SIZE),
    shuffle=False,
    batch_size = BATCH_SIZE)

val_preds = model.predict_generator(valid_generator)

In [None]:
#Displaying some True positives
plt.rcParams['figure.figsize'] = (20.0, 20.0)
plt.subplots_adjust(wspace=0, hspace=0)
i = 0
for n_sample in range(len(val_preds)):
    if(i>=5):
        break
    n_batch = int(n_sample/BATCH_SIZE)
    index = n_sample % BATCH_SIZE
    target = valid_generator[n_batch][1][index]
    if(target > 0.5 and val_preds[n_sample] > 0.5):
        img = (valid_generator[n_batch][0][index]*255).astype('uint8')
        cv2.rectangle(img, (32,32), (64,64), (0,255,0), 2)
        plt.subplot(5, 5, i+1)
        plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        plt.axis('off')
        i+=1
    

In [None]:
#Displaying some True negatives
plt.rcParams['figure.figsize'] = (20.0, 20.0)
plt.subplots_adjust(wspace=0, hspace=0)
i = 0
for n_sample in range(len(val_preds)):
    if(i>=5):
        break
    n_batch = int(n_sample/BATCH_SIZE)
    index = n_sample % BATCH_SIZE
    target = valid_generator[n_batch][1][index]
    if(target < 0.5 and val_preds[n_sample] < 0.5):
        img = (valid_generator[n_batch][0][index]*255).astype('uint8')
        cv2.rectangle(img, (32,32), (64,64), (0,255,0), 2)
        plt.subplot(5, 5, i+1)
        plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        plt.axis('off')
        i+=1

In [None]:
#Displaying some False positives
plt.rcParams['figure.figsize'] = (20.0, 20.0)
plt.subplots_adjust(wspace=0, hspace=0)
i = 0
for n_sample in range(len(val_preds)):
    if(i>=5):
        break
    n_batch = int(n_sample/BATCH_SIZE)
    index = n_sample % BATCH_SIZE
    target = valid_generator[n_batch][1][index]
    if(target < 0.5 and val_preds[n_sample] > 0.5):
        img = (valid_generator[n_batch][0][index]*255).astype('uint8')
        cv2.rectangle(img, (32,32), (64,64), (0,255,0), 2)
        plt.subplot(5, 5, i+1)
        plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        plt.axis('off')
        i+=1

In [None]:
#Displaying some False negatives
plt.rcParams['figure.figsize'] = (20.0, 20.0)
plt.subplots_adjust(wspace=0, hspace=0)
i = 0
for n_sample in range(len(val_preds)):
    if(i>=5):
        break
    n_batch = int(n_sample/BATCH_SIZE)
    index = n_sample % BATCH_SIZE
    target = valid_generator[n_batch][1][index]
    if(target > 0.5 and val_preds[n_sample] < 0.5):
        img = (valid_generator[n_batch][0][index]*255).astype('uint8')
        cv2.rectangle(img, (32,32), (64,64), (0,255,0), 2)
        plt.subplot(5, 5, i+1)
        plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        plt.axis('off')
        i+=1

In [None]:
#Predicting on test set
filenames = {'id': os.listdir(TEST_DIR)}
test_df = pd.DataFrame.from_dict(filenames)
testAugs = ImageDataGenerator(rescale=1./255)
test_generator = testAugs.flow_from_dataframe(
    dataframe = test_df,
    directory = TEST_DIR,
    x_col = "id",
    has_ext = True,
    class_mode = None,
    target_size = (IMG_ORIG_SIZE,IMG_ORIG_SIZE),
    shuffle=False,
    batch_size = BATCH_SIZE)
test_preds = model.predict_generator(test_generator)

In [None]:
#Creating submission file
with open('submission.csv', 'w') as csvfile:
    writer = csv.writer(csvfile, delimiter=',')
    writer.writerow(['id','label'])
    filenames = test_generator.filenames
    for i in range(len(filenames)):
        writer.writerow([filenames[i].replace('.tif',''),test_preds[i][0]])

In [None]:
model.save('densenet_model.hdf5')