In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
"""for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))"""

# Any results you write to the current directory are saved as output.

In [None]:
os.chdir('/kaggle/input/histopathologic-cancer-detection')
os.listdir()
test_files=os.listdir('test') # Name of the images of the test dataset

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.applications.resnet import ResNet50
from keras.layers import Dense,Conv2D,MaxPool2D,BatchNormalization,Dropout,Flatten,AvgPool2D
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD,RMSprop,Adam
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA,LatentDirichletAllocation
import eli5

In [None]:
import pandas as pd
sample_submission = pd.read_csv("sample_submission.csv")
train_labels = pd.read_csv("train_labels.csv")
train_labels['label']=train_labels['label'].apply(lambda x:str(x))
train_labels['id']=train_labels['id'].apply(lambda x:str(x)+'.tif')

In [None]:
train_labels['label'].value_counts().plot(kind='bar')
print(train_labels['label'].value_counts())
plt.yticks(color='yellow')
plt.xticks(color='yellow')

In [None]:
# The dataset is imbalanced. So we will check ADASYN or SMOTE

# Dataset Generator

In [None]:
%%time
train_datagen=ImageDataGenerator(rescale=1./255,validation_split=0.15)
test_datagen=ImageDataGenerator(rescale=1./255)
traindir='train'
testdir='test'
train=train_datagen.flow_from_dataframe(train_labels,directory=traindir,
                                        x_col='id',y_col='label',
                                        subset='training',
                                        target_size=(96,96),
                                        batch_size=64,class_mode='binary')

validation=train_datagen.flow_from_dataframe(train_labels,directory=traindir,
                                        x_col='id',y_col='label',
                                        subset='validation',
                                        target_size=(96,96),
                                        batch_size=64,class_mode='binary')

In [None]:
print(f"Length of the training dataset is {len(train)*64} ,validation {len(validation)*64} ,test {len(test_files)}")

In [None]:
# Visualizing some examples
temp_img,temp_label=next(iter(train))
j=0
fig=plt.figure(figsize=(10,10))
for idx, img in enumerate(temp_img):
    if j==16:
        break
    j+=1
    ax = fig.add_subplot(4,4, idx+1)
    plt.imshow(img)
    lab = temp_label[idx]
    ax.set_title('Label: %s'%lab,color='yellow')

# Visualizing with T-Stochastic Neighbour Embedding

In [None]:
tsne=TSNE(n_components=2,init='pca')

In [None]:
%time
ims=[] # For Images
lbs=[] # For labels
for idx,batch in enumerate(train):
    image,label=batch
    image=[i.reshape(-1) for i in image]
    ims+=image
    for i in label:
        lbs.append(i)
    if len(ims)>1000:
        print(len(ims))
        break

In [None]:
%%time
plt.figure(figsize=(10,10))
ims=tsne.fit_transform(ims)
plt.scatter(ims[:,0],ims[:,1],c=lbs)
plt.legend(["Not a Cancer Cell","Cancer Cell"])

In [None]:
# So we can see how the classes are clustered and how one class actually

In [None]:
classifier=Sequential()

# Conv1 Layer
classifier.add(Conv2D(16,(3,3),strides=(1,1),input_shape=(96,96,3),activation='relu'))
classifier.add(Conv2D(16,(3,3),activation='relu'))
classifier.add(MaxPool2D(2))
classifier.add(BatchNormalization())

# Conv2 Layer
classifier.add(Conv2D(32,(3,3),strides=1,activation='relu'))
classifier.add(Conv2D(32,(3,3),strides=1,activation='relu'))
classifier.add(Conv2D(32,(3,3),strides=1,activation='relu'))
classifier.add(MaxPool2D(2))
classifier.add(BatchNormalization())


# Conv3 Layer
classifier.add(Conv2D(64,(3,3),strides=1,activation='relu'))
classifier.add(Conv2D(64,(3,3),strides=1,activation='relu'))
classifier.add(MaxPool2D(2))
classifier.add(BatchNormalization())


# Conv4 Layer
classifier.add(Conv2D(128,(3,3),strides=1,activation='relu'))
classifier.add(Conv2D(128,(3,3),strides=1,activation='relu'))
classifier.add(MaxPool2D(2))
classifier.add(BatchNormalization())


# Dense Layer
classifier.add(Flatten())
classifier.add(Dense(units=128,activation='relu'))
classifier.add(Dense(units=64,activation='relu'))
classifier.add(Dense(units=32,activation='relu'))
classifier.add(Dense(units=1,activation='sigmoid'))

classifier.compile(optimizer=Adam(learning_rate=0.01),loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
classifier.summary()

In [None]:
history=classifier.fit_generator(train,epochs=9,validation_data=validation)

In [None]:
def occlusion_analysis(image,label,occluding_size,occluding_pixel,occluding_stride):
    """ Convnet Visualization """
    
    
    height,width,_=image.shape
    image=np.expand_dims(image,axis=0)
    out=classifier.predict(image)
    
    # Setting up output height and output width
    
    output_height=int(np.floor((height-occluding_size)/occluding_stride+1))
    output_width=int(np.floor((width-occluding_size)/occluding_stride+1))
    heatmap=np.zeros((output_height,output_width))
    
    for h in range(output_height):
        for w in range(output_width):
            # Occluder region
            
            h_start=h*occluding_stride
            h_end=min(height,h_start+occluding_size)
            
            w_start=w*occluding_stride
            w_end=min(width,w_start+occluding_size)
            
            input_image=image.copy()
            
            input_image[:,h_start:h_end,w_start:w_end,:]=occluding_pixel
            
            predict=classifier.predict(input_image)
            
            heatmap[h,w]=predict
            
    f=plt.figure(figsize=(10,10))
    f.add_subplot(2,2,1)
    sns.heatmap(heatmap,xticklabels=True)
    f.add_subplot(2,2,3"]))
    plt.imshow(image[0])
    plt.title(label)
    plt.show()

In [None]:
#y1,l1=next(iter(train))

In [None]:
#%%time
#occlusion_analysis(y1[2],l1[2],occluding_size=40,occluding_pixel=2,occluding_stride=1)

In [None]:
#plt.imshow(y1[2])