# CNN Model for BreastCancer Calssification

## Model structure

#### Import necessary Libraries

In [1]:
from keras.models import Sequential
from keras.layers import SeparableConv2D,Dense,MaxPool2D,BatchNormalization,Activation,Flatten,Dropout
from keras import backend as k

#### Build Cancer Model

In [2]:
# building a model class that takes the size and depth of the image along with the number of classes
# which in this project is 2 
class Cancer:
    @staticmethod
    def build(width,height,depth,classes):

        model = Sequential()
        input_shape = (height,width,depth)
        channel_dim = -1

        if k.image_data_format()=="channels_first": # in case the format of the image is reversed
            input_shape=(depth,height,width)
            channel_dim = 1
                

        model.add(SeparableConv2D(32,(3,3),input_shape = input_shape,padding = "same"))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis = channel_dim))
        model.add(MaxPool2D((2,2)))
        model.add(Dropout(0.25))

        model.add(SeparableConv2D(64,(3,3),padding = "same"))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis = channel_dim))
        model.add(SeparableConv2D(64,(3,3),padding = "same"))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis = channel_dim))
        model.add(MaxPool2D((2,2)))
        model.add(Dropout(0.25))

        model.add(SeparableConv2D(128,(3,3),padding = "same"))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis = channel_dim))
        model.add(SeparableConv2D(128,(3,3),padding = "same"))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis = channel_dim))
        model.add(SeparableConv2D(128,(3,3),padding = "same"))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis = channel_dim))
        model.add(MaxPool2D((2,2)))
        model.add(Dropout(0.25))

        model.add(Flatten())
        model.add(Dense(256))
        model.add(Activation("relu"))
        model.add(BatchNormalization(axis = channel_dim))
        model.add(Dropout(0.5))

        model.add(Dense(classes))
        model.add(Activation("softmax"))
        
        return model

## Data Preprocessing

#### Import necessary libraries

In [9]:
import matplotlib
matplotlib.use("Agg")

import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler
from keras.optimizers import Adagrad,Adam
from keras.utils import np_utils
from sklearn.utils import class_weight
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import config
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import os

#### Prepare the data

In [23]:
num_epochs = 40
lr = 1e-2
bs = 32

# importing the data paths
trainPaths=list(paths.list_images(config.Train_path))
lenTrain=len(trainPaths)
lenVal=len(list(paths.list_images(config.Val_path)))
lenTest=len(list(paths.list_images(config.Test_path)))


trainLabels=[int(p.split(os.path.sep)[-2]) for p in trainPaths]
trainLabels=np_utils.to_categorical(trainLabels)
classTotals=trainLabels.sum(axis=0)
classWeight=dict(enumerate(classTotals.max()/classTotals))


# augmenting the train data
TrainAug = ImageDataGenerator(rescale=1/255.0,shear_range=0.05,rotation_range=10,width_shift_range=0.01,
                              height_shift_range=0.01,zoom_range=0.05,horizontal_flip=True,vertical_flip=True
                             ,fill_mode="nearest")

# Normalizing the validation data (it will be used for test data as well)
ValAug = ImageDataGenerator(rescale = 1/255.0)

TrainAugmented = TrainAug.flow_from_directory(directory = config.Train_path,
                                              class_mode="categorical",
                                              target_size=(48,48),
                                              color_mode="rgb",
                                              shuffle=True,
                                              batch_size=bs)
ValAugmented = ValAug.flow_from_directory(directory = config.Val_path,
                                              class_mode="categorical",
                                              target_size=(48,48),
                                              color_mode="rgb",
                                              shuffle=False,
                                              batch_size=bs)
TestAugmented = ValAug.flow_from_directory(directory = config.Test_path,
                                              class_mode="categorical",
                                              target_size=(48,48),
                                              color_mode="rgb",
                                              shuffle=False,
                                              batch_size=bs)

optimizers = [Adagrad(learning_rate=lr,decay = lr/num_epochs)]#Adam(learning_rate=lr,decay = lr/num_epochs),
for opt in optimizers:
#     if opt == optimizers[0]:
#         opt_name = "adam"
#     else:
#         opt_name = "adagrad"
        

    model = Cancer.build(height = 48,width = 48,depth = 3,classes = 2)
    model.compile(loss = "binary_crossentropy",optimizer = opt,metrics = ["accuracy"])

    M=model.fit(TrainAugmented,
                steps_per_epoch=lenTrain//bs,
                validation_data=ValAugmented,
                validation_steps=lenVal//bs,
                class_weight=classWeight,
                epochs=num_epochs)

    print("Evaluating the model")
    TestAugmented.reset()
    pred_indices = model.predict(TestAugmented,steps=(lenTest//bs)+1,verbose=1)
    pred_indices=np.argmax(pred_indices,axis=1)

    print(classification_report(TestAugmented.classes, pred_indices, target_names=TestAugmented.class_indices.keys()))

    confusion = confusion_matrix(TestAugmented.classes,pred_indices)
    total=sum(sum(confusion))
    accuracy=(confusion[0,0]+confusion[1,1])/total
    precision=confusion[0,0]/(confusion[0,0]+confusion[1,0])
    recall=confusion[0,0]/(confusion[0,0]+confusion[0,1])
    print(confusion)
    print(f'Accuracy: {accuracy}')
    print(f'Precision: {precision}')
    print(f'Recall: {recall}')
    
    N = num_epochs
    plt.figure()
    plt.plot(np.arange(0,N), M.history["loss"], label="train_loss")
    plt.plot(np.arange(0,N), M.history["val_loss"], label="val_loss")
    plt.title(f"Training and Validation Loss on the IDC Dataset using {opt_name}")
    plt.xlabel("Epoch No.")
    plt.ylabel("Loss")
    plt.legend(loc="lower left")
    plt.savefig('plot.png')


Found 255815 images belonging to 2 classes.
Found 42660 images belonging to 2 classes.
Found 99906 images belonging to 2 classes.
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
Evaluating the model
              precision    recall  f1-score   support

           0       0.96      0.75      0.84     71451
           1       0.59      0.92      0.72     28455

    accuracy                           0.80     99906
   macro avg       0.77      0.83      0.78     99906
weighted avg       0.85      0.80      0.80     99906

[[53392 18059]
 [ 2418 26037]]
Accuracy: 0.79503733

In [22]:
print("Evaluating the model")
TestAugmented.reset()
pred_indices = model.predict(TestAugmented,steps=(lenTest//bs)+1,verbose=1)
pred_indices=np.argmax(pred_indices,axis=1)

print(classification_report(TestAugmented.classes, pred_indices, target_names=TestAugmented.class_indices.keys()))

confusion = confusion_matrix(TestAugmented.classes,pred_indices)
total=sum(sum(confusion))
accuracy=(confusion[0,0]+confusion[1,1])/total
precision=confusion[0,0]/(confusion[0,0]+confusion[1,0])
recall=confusion[0,0]/(confusion[0,0]+confusion[0,1])
print(confusion)
print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')

N = num_epochs
plt.figure()
plt.plot(np.arange(0,N), M.history["loss"], label="train_loss")
plt.plot(np.arange(0,N), M.history["val_loss"], label="val_loss")
plt.title("Training and Validation Loss on the IDC Dataset using adam")
plt.xlabel("Epoch No.")
plt.ylabel("Loss")
plt.legend(loc="lower left")
plt.savefig('plot.png')

Evaluating the model
              precision    recall  f1-score   support

           0       0.97      0.79      0.87     71451
           1       0.64      0.94      0.76     28455

    accuracy                           0.83     99906
   macro avg       0.81      0.87      0.82     99906
weighted avg       0.88      0.83      0.84     99906

[[56387 15064]
 [ 1639 26816]]
Accuracy: 0.832812844073429
Precision: 0.9717540412918347
Recall: 0.7891702005570251
