### Cancer Detection using Deep Learning 

In [1]:
import warnings 
warnings.filterwarnings("ignore")
import numpy as np
import matplotlib.pyplot as plt
import os
import math
import shutil
import glob

In [2]:
# count number of images in the respective classes 0 - tumor 1 - healthy
ROOT_DIR = 'brain_tumor_dataset'
number_of_images = {}
for dir in os.listdir(ROOT_DIR):
    number_of_images[dir] = len(os.listdir(os.path.join(ROOT_DIR,dir)))
print(number_of_images)

{'no': 32, 'yes': 49}


In [11]:
# split data into train - 70%, 15% for validation  and 15% test

In [54]:
def dataFolder(p, split):
    # Create train, validation, and test folder
    if not os.path.exists('./' + p):
        os.mkdir('./' + p)
        for dir in os.listdir(ROOT_DIR):
            os.makedirs('./' + p + '/' + dir)
            # Get all images in the directory
            images = os.listdir(os.path.join(ROOT_DIR, dir))
            # Determine the number of images to move
            num_to_move = math.floor(split * number_of_images[dir])
            if num_to_move > 0 and num_to_move <= len(images):
                selected_images = np.random.choice(images, size=num_to_move, replace=False)
                for img in selected_images:
                    o = os.path.join(ROOT_DIR, dir, img)
                    d = os.path.join('./' + p, dir)
                    shutil.copy(o, d)
                    # to keep working with original brain_tumor_dataset as it is less bruv 
                    # upon running train fxn validation and tests won't have enough images to split 
                    # comment os.remove(o) as i want to keep original dataset
                    # bad practice though
                    # os.remove(o)
            else:
                print(f"Not enough images to move for {dir} in {p} set")
    else:
        print(f"{p} folder already exists")




In [43]:
# train folder with 70% of data
dataFolder('train', 0.7)

In [57]:
# validation folder with 1% of data
dataFolder('validation', 0.15)

In [58]:
# test folder with 25% of data
dataFolder('test', 0.15)

### Model Build

In [68]:
import keras
from keras.layers import Conv2D, MaxPool2D, Flatten, Dropout, Dense, BatchNormalization, GlobalAvgPool2D
from keras.models import Sequential
from keras.preprocessing.image import load_img, img_to_array
from keras.src.legacy.preprocessing.image import ImageDataGenerator


In [62]:
# CNN Model
model = Sequential()
model.add(Conv2D(filters=16, kernel_size=(3, 3), activation='relu', input_shape=(224, 224, 3)))

model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))

model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))

model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))

model.add(Dropout(rate=0.25))

model.add(Flatten())
model.add(Dense(units=64, activation='relu'))
model.add(Dropout(rate=0.25))
model.add(Dense(units=1, activation='sigmoid'))

model.summary()

In [63]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [69]:
# to input data into model
def preprocessingImages(path):
    """"input : path
    output : images"""
    image_data = ImageDataGenerator(rescale=1./255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True)
    image =  image_data.flow_from_directory(directory=path, target_size=(224, 224), batch_size=32, class_mode='binary')

    return image

In [71]:
path = './train'
train_data = preprocessingImages(path)

Found 21 images belonging to 2 classes.


In [74]:
# to input data into model
def preprocessingImagesValidationandTest(path):
    """"input : path
    output : images"""
    image_data = ImageDataGenerator(rescale=1./255)
    image =  image_data.flow_from_directory(directory=path, target_size=(224, 224), batch_size=32, class_mode='binary')

    return image

In [85]:
path = './validation'
val_data = preprocessingImagesValidationandTest(path)

Found 11 images belonging to 2 classes.


In [86]:
path = './test'
test_data = preprocessingImagesValidationandTest(path)

Found 11 images belonging to 2 classes.


In [79]:
# Early stopping and model checkpoint
# Early stopping
from keras.callbacks import EarlyStopping, ModelCheckpoint

es = EarlyStopping(monitor='val_accuracy', min_delta=0.01, patience=3, verbose=1, mode='auto')

# Model Checkpoint
mc = ModelCheckpoint(monitor='val_accuracy', filepath='./best_model.keras', verbose=1, save_best_only=True, mode='auto')

cd = [es, mc]

### Model Training

In [88]:
hs = model.fit(train_data, 
                         steps_per_epoch=8, 
                         epochs=10,
                         verbose=1, 
                         validation_data=val_data, 
                         validation_steps=16, 
                         callbacks=cd)