In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
from tensorflow import keras

plt.style.use('fivethirtyeight')
%matplotlib inline

In [None]:
import os
test = pd.read_csv("../input/histopathologic-cancer-detection/sample_submission.csv",dtype=str)
data = pd.read_csv("../input/histopathologic-cancer-detection/train_labels.csv",dtype=str)

train_dir         = "../input/histopathologic-cancer-detection/train/"
train_files       = os.listdir(train_dir)

test_dir          = "../input/histopathologic-cancer-detection/test/"
test_files        = os.listdir(test_dir)

In [None]:
%%time

from distutils.dir_util import copy_tree

os.mkdir('test_folder')
os.mkdir('test_folder/test_images')

fromDirectory = test_dir
toDirectory = "test_folder/test_images"

copy_tree(fromDirectory, toDirectory, verbose=0)

test_file = "test_folder"


In [None]:
print(data.head())

In [None]:
data["id"] = data["id"].apply(lambda x: x + ".tif")
test["id"] = test["id"].apply(lambda x: x + ".tif")

data = data[data['id'] != 'dd6dfed324f9fcb6f93f46f32fc800f2ec196be2.tif']
data = data[data['id'] != '9369c7278ec8bcc6c880d99194de09fc2bd4efbe.tif']
print(data.head())

In [None]:
samples = 8
    
fig, axes = plt.subplots(1, len(data[:samples]), figsize = (20, 2))
for idx, ax in enumerate(axes):
    ax.imshow(cv2.imread(train_dir + data.id[idx]))
    ax.set_title("Label: " + str(data.label[idx]))

In [None]:
print(data.label.value_counts())

In [None]:
from plotly.graph_objects import Figure, Pie

labels = ["No Cancer Detected", "Cancer Detected"]
values = data["label"].value_counts()

graph = Figure(data=[Pie(labels=labels, values=values, hole=0.5, marker_colors=["rgb(0, 203, 0)", "rgb(203, 0, 0)"])])
graph.show()

In [None]:
from sklearn.model_selection import train_test_split
X = data["id"]
Y = data["label"]

#X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20, random_state=42, stratify = Y)
trainer, tester = train_test_split(data, test_size=0.20, random_state=42, stratify = Y)


In [None]:
%%time
from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(rescale=1.0/255, validation_split = 0.25)
testgen = ImageDataGenerator(rescale=1.0/255)


train_gen = datagen.flow_from_dataframe(dataframe=trainer, directory = train_dir,x_col="id", y_col="label", class_mode="binary" , target_size=(96,96), batch_size=100, subset = "training", seed = 42)

valid_gen = datagen.flow_from_dataframe(dataframe=trainer, directory = train_dir,x_col="id", y_col="label", class_mode="binary" , target_size=(96,96), batch_size=100, subset = "validation", seed = 42)


test_gen = testgen.flow_from_dataframe(dataframe=tester, directory = train_dir,x_col="id", y_col="label", seed = 42, class_mode="binary", target_size=(96,96), batch_size=1, shuffle = False)


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, Dropout, MaxPooling2D
from tensorflow.distribute import MirroredStrategy


strat = MirroredStrategy()
with strat.scope():
    this_model = Sequential(
        [
            Conv2D(filters=32, kernel_size = 3, padding='same', activation = 'relu', input_shape = (96, 96, 3), name = 'Conv2D_1'),
            Conv2D(filters=32, kernel_size = 3, padding='same', activation = 'relu', name = 'Conv2D_2'),
            Conv2D(filters=32, kernel_size = 3, padding='same', activation = 'relu', name = 'Conv2D_3'),
            Dropout(0.3, name = 'Dropout_1'),
            MaxPooling2D(pool_size = 3, name = 'MaxPooling2D_1'),
        
            Conv2D(filters=64, kernel_size = 3, padding='same', activation = 'relu', name = 'Conv2D_4'),
            Conv2D(filters=64, kernel_size = 3, padding='same', activation = 'relu', name = 'Conv2D_5'),
            Conv2D(filters=64, kernel_size = 3, padding='same', activation = 'relu', name = 'Conv2D_6'),
            Dropout(0.3, name = 'Dropout_2'),
            MaxPooling2D(pool_size = 3, name = 'MaxPooling2D_2'),
        
            Conv2D(filters=128, kernel_size = 3, padding='same', activation = 'relu', name = 'Conv2D_7'),
#            Conv2D(filters=128, kernel_size = 3, padding='same', activation = 'relu', name = 'Conv2D_8'),
#            Conv2D(filters=128, kernel_size = 3, padding='same', activation = 'relu', name = 'Conv2D_9'),
#            Dropout(0.25, name = 'Dropout_3'),
#            MaxPooling2D(pool_size = 3, name = 'MaxPooling2D_3'),
            
            Conv2D(filters=256, kernel_size = 3, padding='same', activation = 'relu', name = 'Conv2D_10'),
            Conv2D(filters=512, kernel_size = 3, padding='same', activation = 'relu', name = 'Conv2D_11'),
            Flatten(name = 'Flatten_1'),
            Dense(512, activation="relu", name = 'Dense_1'),
            Dense(1, activation = "sigmoid", name = 'Dense_2')
        ]
)
this_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

this_model.summary()

In [None]:
from keras.callbacks import EarlyStopping, ModelCheckpoint,ReduceLROnPlateau
batch_size = 100
num_samples = len(data)


es = EarlyStopping(monitor = "val_loss", patience = 3)
cp = ModelCheckpoint(filepath = "best_weights.hdf5", verbose=1, save_best_only=True)
cb = [cp,es]
eps = train_gen.n // batch_size
valid_steps = valid_gen.n // batch_size

prev_model = this_model.fit_generator(train_gen, epochs = 5,steps_per_epoch=eps, validation_data=valid_gen, validation_steps=valid_steps,callbacks = cb)


In [None]:

this_model.load_weights('best_weights.hdf5')
test_gen.reset()

model_predict = this_model.predict_generator(test_gen, steps=len(test_gen.classes)-1, verbose=1)


In [None]:
from sklearn.metrics import confusion_matrix, classification_report

val_preds = np.argmax(model_predict, axis=1)
val_trues = valid_gen.classes
cm = confusion_matrix(val_trues, val_preds)
print(cm)

In [None]:
plt.plot(prev_model.history['accuracy'])
plt.plot(prev_model.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='best')
plt.show()


In [None]:
plt.plot(prev_model.history['loss'])
plt.plot(prev_model.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='best')
plt.show()

In [None]:
from tensorflow.keras.utils import plot_model


dot_img_file = '/tmp/this_model.png'
plot_model(this_model, to_file=dot_img_file, show_shapes=True)


In [None]:
shutil.rmtree(test_file)