In [None]:
import os
import shutil
import cv2
import random
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow.keras.layers as layers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from tqdm import tqdm
import glob

In [None]:
data=pd.read_csv('/kaggle/input/histopathologic-cancer-detection/train_labels.csv')
data.head()

In [None]:
labels=data['label'].unique()
print(labels)

In [None]:
label_map={}
for i in tqdm(range(len(data))):
    label_map[data.iloc[i][0]]=data.iloc[i][1]

In [None]:
del data

In [None]:
path='/kaggle/input/histopathologic-cancer-detection/train'
for file in os.listdir(path):
    file_path=os.path.join(path,file)
    image=cv2.imread(file_path)
    print(image.shape)
    break

In [None]:
data_dir='/kaggle/data'
train_path=os.path.join(data_dir,'train')
valid_path=os.path.join(data_dir,'valid')
test_path=os.path.join(data_dir,'test')
if not os.path.isdir(data_dir):
    os.mkdir(data_dir)
    os.mkdir(train_path)
    os.mkdir(valid_path)
    os.mkdir(test_path)
    for label in labels:
        os.mkdir(os.path.join(train_path,str(label)))
        os.mkdir(os.path.join(valid_path,str(label)))
        os.mkdir(os.path.join(test_path,str(label)))

In [None]:
def load_data(mode,n,dest_dir):
    file_set=random.sample(os.listdir(path),2*n)
    n1,n0=0,0
    if mode=='train':
        print(f'Moving training files to {dest_dir}')
    elif mode=='valid':
        print(f'Moving validation files to {dest_dir}')
    else:
        print(f'Moving test files to {dest_dir}')
    for file in tqdm(file_set):
        file_path=os.path.join(path,file)
        file_name,_=file.split('.')
        shutil.copy(file_path,os.path.join(dest_dir,str(label_map[file_name])))
        if label_map[file_name]==1:
            n1+=1
        else:
            n0+=1
    print(f'The number of examples for each class are {n1} and {n0}')

In [None]:
load_data('train',10000,train_path)

In [None]:
load_data('valid',7000,valid_path)

In [None]:
load_data('test',4000,test_path)

In [None]:
del label_map

In [None]:
def find_images_paths(path):
    image_paths,names=[],[]
    for folder in os.listdir(path):
        folder_path=os.path.join(path,folder)
        image_set=random.sample(os.listdir(folder_path),1)
        for file in image_set:
            file_path=os.path.join(folder_path,file)
            image_paths.append(file_path)
            names.append(folder)
    return image_paths,names
image_paths,names=find_images_paths(train_path)

In [None]:
def plot_images(image_paths,names,row,col):
    fig=plt.figure(figsize=(16,16))
    for i in range(len(names)):
        fig.add_subplot(row,col,i+1)
        plt.title(names[i])
        plt.axis('off')
        plt.imshow(cv2.imread(image_paths[i]))
    plt.tight_layout()
    plt.show()
plot_images(
    image_paths=image_paths,
    names=names,
    row=1,col=2
)

In [None]:
target_size=(96,96)
batch_size=32

In [None]:
string_labels=[]
for label in labels:
    string_labels.append(str(label))
print(string_labels)

In [None]:
datagen=ImageDataGenerator(
    preprocessing_function=tf.keras.applications.vgg16.preprocess_input,
)

train_data=ImageDataGenerator(
    preprocessing_function=tf.keras.applications.vgg16.preprocess_input,
    horizontal_flip=True
).flow_from_directory(
    directory=train_path,
    target_size=target_size,
    classes=string_labels,
    batch_size=batch_size,
)

valid_data=datagen.flow_from_directory(
    directory=valid_path,
    target_size=target_size,
    classes=string_labels,
    batch_size=batch_size
)

test_data=datagen.flow_from_directory(
    directory=test_path,
    target_size=target_size,
    classes=string_labels,
    batch_size=batch_size,
    shuffle=False
)

In [None]:
class MCDropout(layers.Dropout):
    def call(self,inputs):
        return super().call(inputs,training=True)

In [None]:
class ResnetLayer(layers.Layer):
    def __init__(self,filters,n_conv=4,kernel_size=3,strides=1,**kwargs):
        super().__init__(**kwargs)
        self.resnet_layers=[]
        for _ in range(n_conv):
            self.resnet_layers.append(layers.Conv2D(
                filters=filters,kernel_size=kernel_size,
                strides=strides,activation='relu',padding='same'
            ))
            self.resnet_layers.append(layers.BatchNormalization())
    def call(self,inputs):
        output=inputs
        for residual_layer in self.resnet_layers:
            output=residual_layer(output)
        output=layers.Concatenate()([output,inputs])
        return tf.keras.activations.relu(output)

In [None]:
class InceptionModule(layers.Layer):
    def __init__(self,filter_list,**kwargs):
        super().__init__(**kwargs)
        self.inception_module=[]
        for i in range(len(filter_list)):
            if i==0:
                self.inception_module.append(layers.Conv2D(
                    filters=filter_list[i],kernel_size=1,
                    strides=1,activation='relu',padding='same'
                ))
            elif i==len(filter_list)-1:
                self.inception_module.append(layers.MaxPool2D(
                    pool_size=3,strides=1,padding='same'
                ))
                self.inception_module.append(layers.Conv2D(
                    filters=filter_list[i],kernel_size=1,
                    strides=1,activation='relu',padding='same'
                ))
            else:
                self.inception_module.append(layers.Conv2D(
                    filters=filter_list[i],kernel_size=1,
                    strides=1,activation='relu',padding='same'
                ))
                self.inception_module.append(layers.Conv2D(
                    filters=filter_list[i],kernel_size=2*i-1,
                    strides=1,activation='relu',padding='same'
                ))
    def call(self,inputs):
        outputs=[]
        for module in self.inception_module:
            outputs.append(module(inputs))
        final_output=layers.Concatenate()(outputs)
        return tf.keras.activations.relu(final_output)

In [None]:
rate=0.45

In [None]:
def build_model(target_size,rate,n_conv=4):
    steps=int(np.log2(target_size[0]))
    inputs=layers.Input(shape=(*target_size,3))
    filters=8
    cnt=1
    model=ResnetLayer(filters=filters,n_conv=n_conv)(inputs)
    model=layers.AvgPool2D(pool_size=2,strides=2)(model)
    for _ in range(steps-1):
        filters*=2
        if cnt%2==0:
            model=ResnetLayer(filters=filters,n_conv=n_conv)(model)
        else:
            model=InceptionModule(filter_list=[int(filters)/4,filters,filters*2,filters/2])(model)
        model=layers.AvgPool2D(pool_size=2,strides=2)(model)
        cnt+=1
    model=MCDropout(rate)(model)
    model=layers.Conv2D(
        filters=4096,kernel_size=1,
        strides=1,padding='valid',activation='relu'
    )(model)
    model=MCDropout(rate)(model)
    model=layers.Conv2D(
        filters=4096,kernel_size=1,
        strides=1,padding='valid',activation='relu'
    )(model)
    model=layers.Flatten()(model)
    model=layers.Dense(units=len(string_labels),activation='softmax')(model)
    inception_resnet_model=tf.keras.models.Model(inputs=inputs,outputs=model)
    inception_resnet_model.compile(
        optimizer=Adam(learning_rate=0.0001),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    return inception_resnet_model

In [None]:
model=build_model(target_size,rate)

In [None]:
model.summary()

In [None]:
checkpoint_path='/kaggle/check'
if not os.path.isdir(checkpoint_path):
    os.mkdir(checkpoint_path)

In [None]:
model_checkpoint=ModelCheckpoint(
    filepath=checkpoint_path,
    monitor='val_accuracy',
    save_best_only=True,
    save_weights_only=True,
    mode='max'
)

In [None]:
history=model.fit(
    x=train_data,
    batch_size=batch_size,
    callbacks=[model_checkpoint],
    validation_data=valid_data,
    epochs=50
)

In [None]:
model.load_weights(checkpoint_path)

In [None]:
def predict(data,steps=20):
    y_probs=np.stack([model.predict(data) for _ in tqdm(range(steps))])
    p=np.mean(y_probs,axis=0)
    cm=confusion_matrix(y_true=data.classes,y_pred=np.argmax(p,axis=-1))
    acc=cm.trace()/cm.sum()
    return acc*100

In [None]:
acc=predict(test_data)
print(f'Accuracy on the test dataset is {acc}%')

In [None]:
val_loss=history.history['val_loss']
loss=history.history['loss']
plt.figure()
plt.title('Loss vs Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.plot(loss,'ro--')
plt.plot(val_loss,'bo--')
plt.legend(['Train','Valid'])
plt.show()