In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import shutil
import glob

In [None]:
os.mkdir('augmented')

In [None]:
cd augmented

In [None]:
mkdir benign

In [None]:
mkdir malignant

In [None]:
cd ..

In [None]:
def getListOfFiles(dirName):
    # create a list of file and sub directories 
    # names in the given directory 
    listOfFile = os.listdir(dirName)
    allFiles = list()
    # Iterate over all the entries
    for entry in listOfFile:
        # Create full path
        fullPath = os.path.join(dirName, entry)
        # If entry is a directory then get the list of files in this directory 
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
                
    return allFiles

In [None]:
files_benign=getListOfFiles('../input/breakhis/BreaKHis_v1/BreaKHis_v1/histology_slides/breast/benign')
for f in files_benign:
    if f.endswith('.png'):
        shutil.copy(f,'augmented/benign')

In [None]:
files_malignant=getListOfFiles('../input/breakhis/BreaKHis_v1/BreaKHis_v1/histology_slides/breast/malignant')
for f in files_malignant:
    if f.endswith('.png'):
        shutil.copy(f,'augmented/malignant')

In [None]:
pwd

In [None]:
onlyfiles = next(os.walk('augmented/benign'))[2] 
benign=len(onlyfiles)
onlyfiles = next(os.walk('augmented/malignant'))[2] 
malignant=len(onlyfiles)
print('Total no of files in both benign and malignant is :',malignant+benign)

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential,Model
from tensorflow.keras.layers import Activation, Dense, Flatten, BatchNormalization, Conv2D, MaxPooling2D,Input,Dropout
from tensorflow.keras.optimizers import Adam,RMSprop
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import confusion_matrix
import itertools
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
%matplotlib inline

In [None]:
ig=ImageDataGenerator(rescale=1.0/255.0,validation_split=0.2)

In [None]:
path='augmented'

In [None]:
train_batches = ig.flow_from_directory(directory=path,
                                       target_size=(224,224),
                                       classes=['benign', 'malignant'],
                                       batch_size=16,
                                       subset='training')
valid_batches = ig.flow_from_directory(directory=path,
                                       target_size=(224,224),
                                       classes=['benign', 'malignant'],
                                       batch_size=16,
                                       subset='validation')

In [None]:
type(train_batches)

In [None]:
imgs, labels = next(train_batches)

In [None]:
type(labels),type(imgs)

In [None]:
imgs.shape,labels.shape

In [None]:
def plot_metric(history, metric):
    train_metrics = history.history[metric]
    val_metrics = history.history['val_'+metric]
    epochs = range(1, len(train_metrics) + 1)
    plt.plot(epochs, train_metrics, 'bo--')
    plt.plot(epochs, val_metrics, 'ro-')
    plt.title('Training and validation '+ metric)
    plt.xlabel("Epochs")
    plt.ylabel(metric)
    plt.legend(["train_"+metric, 'val_'+metric])
    plt.show()

In [None]:
def plotImages(images_arr):
    fig, axes = plt.subplots(1, 10, figsize=(20,20))
    axes = axes.flatten()
    for img, ax in zip( images_arr, axes):
        ax.imshow(img)
        ax.axis('off')
    plt.tight_layout()
    plt.show()

In [None]:
plotImages(imgs)
print(labels)

In [None]:
train_batches.class_indices

In [None]:
# Hand made neural net trained from scratch without any image preprocessing

In [None]:
def create_model():
    # I/P Layer
    inputs=Input(batch_shape=(None,224,224,3),name='Input')

    # Conv_1 Layer
    x=Conv2D(filters=16,kernel_size=3,activation='relu',name='Conv_1')(inputs)

    # Max_pool_1 layer
    x=MaxPooling2D(name='Max_pool_1')(x)

    # Conv_2 Layer
    x=Conv2D(filters=32,kernel_size=3,activation='relu',name='Conv_2')(x)

    # Max_pool_2 layer
    x=MaxPooling2D(name='Max_pool_2')(x)
    
    # Conv_3 Layer
    x=Conv2D(filters=64,kernel_size=3,activation='relu',name='Conv_3')(x)

    # Max_pool_3 layer
    x=MaxPooling2D(name='Max_pool_3')(x)
    
    # Conv_4 Layer
    x=Conv2D(filters=128,kernel_size=3,activation='relu',name='Conv_4')(x)

    # Max_pool_4 layer
    x=MaxPooling2D(name='Max_pool_4')(x)

    # Flatten layer
    x=Flatten()(x)
    
    # Dense_1/FC layer
    x=Dense(256,activation='relu',name='Dense_1')(x)
    
    # Droput Layer_1
    x=Dropout(0.2,name='Dropout_1')(x)

    # Output layer
    output=Dense(2, activation='softmax',name='Softmax_layer')(x)

    # Create model using Model class
    model=Model(inputs=inputs,outputs=output)

    return model

In [None]:
simple_model=create_model()

In [None]:
simple_model.summary()

In [None]:
simple_model.compile(optimizer=RMSprop(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
history=simple_model.fit(x=train_batches,
    steps_per_epoch=len(train_batches),
    validation_data=valid_batches,
    validation_steps=len(valid_batches),
    epochs=10
)

In [None]:
plot_metric(history, 'accuracy')

In [None]:
plot_metric(history, 'loss')

In [None]:
# At end of 3rd epoch best Cross val accurqcy of 83% has been achieved

In [None]:
# Transfer learning without image preprocessing using VGG 16

In [None]:
from keras.applications.resnet50 import ResNet50

In [None]:
restnet = ResNet50()

In [None]:
restnet.summary()

In [None]:
restnet.layers[-2].output

In [None]:
base_model=Model(inputs=restnet.inputs,outputs=restnet.layers[-2].output)

In [None]:
# for layer in base_model.layers:
#     layer.trainable=False

In [None]:
x=Dense(units=1024,activation='relu',name='fc3')(base_model.output)
outputs=x=Dense(units=2,activation='softmax',name='Output_layer')(x)

In [None]:
final_model=Model(inputs=base_model.inputs,outputs=outputs)

In [None]:
final_model.summary()

In [None]:
final_model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
ig=ImageDataGenerator(preprocessing_function=tf.keras.applications.resnet50.preprocess_input,validation_split=0.2)

In [None]:
train_batches = ig.flow_from_directory(directory=path,
                                       target_size=(224,224),
                                       classes=['benign', 'malignant'],
                                       batch_size=16,
                                       subset='training')
valid_batches = ig.flow_from_directory(directory=path,
                                       target_size=(224,224),
                                       classes=['benign', 'malignant'],
                                       batch_size=16,
                                       subset='validation')

In [None]:
history=final_model.fit(x=train_batches,
    steps_per_epoch=len(train_batches),
    validation_data=valid_batches,
    validation_steps=len(valid_batches),
    epochs=10
)

In [None]:
plot_metric(history, 'loss')

In [None]:
plot_metric(history, 'accuracy')

In [None]:
# At end of 6th epoch best Cross val accurqcy of 85% has been achieved

In [None]:
# Preprocessing the images.

In [None]:
import cv2

In [None]:
def bens_color(img):
    image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    image = cv2.resize(image, (224, 224))
    image=cv2.addWeighted ( image, 4 , cv2.GaussianBlur( image , (0 ,0 ) , 30) ,-4 ,128)
    return image

In [None]:
ig=ImageDataGenerator(preprocessing_function=bens_color,validation_split=0.2)

In [None]:
train_batches = ig.flow_from_directory(directory=path,
                                       target_size=(224,224),
                                       classes=['benign', 'malignant'],
                                       batch_size=16,
                                       subset='training')
valid_batches = ig.flow_from_directory(directory=path,
                                       target_size=(224,224),
                                       classes=['benign', 'malignant'],
                                       batch_size=16,
                                       subset='validation')

In [None]:
imgs, labels = next(train_batches)