In [None]:
from PIL import Image
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.vgg16 import preprocess_input
from tqdm import tqdm
import keras
import numpy as np
import pandas as pd
import cv2
import imutils 
import os
import random
import math

In [None]:
from google.colab import drive

drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
BASE_PATH = '/content/gdrive/MyDrive/Colab Notebooks/Brain Tumor Classification/'
PATH = BASE_PATH + 'original_dataset/'

AUGMENTED_TRAIN_PATH_YES = BASE_PATH + 'augmented_dataset/train/yes/'
AUGMENTED_TRAIN_PATH_NO = BASE_PATH + 'augmented_dataset/train/no/'

AUGMENTED_VALIDATION_PATH_YES = BASE_PATH + 'augmented_dataset/validation/yes/'
AUGMENTED_VALIDATION_PATH_NO = BASE_PATH + 'augmented_dataset/validation/no/'

AUGMENTED_TEST_PATH_YES = BASE_PATH + 'augmented_dataset/test/yes/'
AUGMENTED_TEST_PATH_NO = BASE_PATH + 'augmented_dataset/test/no/'

TARGET_IMAGE_SIZE = (224, 224)

In [None]:
dataset = []

In [None]:
for feature_class in tqdm(os.listdir(PATH)):
  if not feature_class.startswith("."):
      feature_class_path = PATH + feature_class
      for (i, file_name) in enumerate(os.listdir(feature_class_path)):
          file_path = feature_class_path + '/' + file_name
          image = cv2.imread(file_path)
          image_and_class = (image, feature_class)
          dataset.append(image_and_class)

100%|██████████| 2/2 [00:27<00:00, 13.94s/it]


In [None]:
random.shuffle(dataset)

In [None]:
TRAINING_SAMPLES_SIZE = 0.7
VALIDATION_SAMPLES_SIZE = 0.15
TESTING_SAMPLES_SIZE = 0.15

In [None]:
if (TRAINING_SAMPLES_SIZE + VALIDATION_SAMPLES_SIZE + TESTING_SAMPLES_SIZE) > 100:
    raise ValueError

In [None]:
total_dataset_size = len(dataset)

training_start_index = 0
training_end_index = training_start_index + math.floor(total_dataset_size * TRAINING_SAMPLES_SIZE) 

validation_start_index = training_end_index
validation_end_index = validation_start_index + math.floor(total_dataset_size * VALIDATION_SAMPLES_SIZE) 

testing_start_index = validation_end_index
testing_end_index = testing_start_index + math.floor(total_dataset_size * TESTING_SAMPLES_SIZE)

In [None]:
training_dataset = dataset[training_start_index:training_end_index]
validation_dataset = dataset[validation_start_index:validation_end_index]
testing_dataset = dataset[testing_start_index:testing_end_index]

In [None]:
def count_class_labels(dataset, dataset_type):
    yes_count = 0
    no_count = 0
    for data in dataset:
        label = data[1]
        if label == 'yes':
            yes_count = yes_count + 1
        else:
            no_count = no_count + 1
    print("Number of YES labels in the {0} dataset are {1}".format(dataset_type, yes_count))
    print("Number of NO labels in the {0} dataset are {1}".format(dataset_type, no_count))
    print("---")

In [None]:
count_class_labels(training_dataset, "training")
count_class_labels(validation_dataset, "validation")
count_class_labels(testing_dataset, "testing")

Number of YES labels in the training dataset are 108
Number of NO labels in the training dataset are 69
---
Number of YES labels in the validation dataset are 20
Number of NO labels in the validation dataset are 17
---
Number of YES labels in the testing dataset are 26
Number of NO labels in the testing dataset are 11
---


In [None]:
def reshape_image_array(image):
    '''
    Reshapes the image numpy array to make it four dimension since ImageDataGenerator requires a four dimensioned array
    '''
    return image.reshape((1,) + image.shape)

In [None]:
def resize_image(image, target_image_size=TARGET_IMAGE_SIZE):
    resized = cv2.resize(image, dsize=TARGET_IMAGE_SIZE, interpolation=cv2.INTER_CUBIC)
    return resized

In [None]:
def split_yes_no_dataset(dataset):
    yes = []
    no = []
    for data in tqdm(dataset):
        label = data[1]
        image = data[0]
        resized = resize_image(image)
        reshaped = reshape_image_array(resized)
        if label == "yes":
            yes.append(reshaped)
        elif label == "no":
            no.append(reshaped)
    return yes, no

In [None]:
training_yes_dataset, training_no_dataset = split_yes_no_dataset(training_dataset)

100%|██████████| 177/177 [00:00<00:00, 974.25it/s]


In [None]:
validation_yes_dataset, validation_no_dataset = split_yes_no_dataset(validation_dataset)

100%|██████████| 37/37 [00:00<00:00, 889.32it/s]


In [None]:
testing_yes_dataset, testing_no_dataset = split_yes_no_dataset(testing_dataset)

100%|██████████| 37/37 [00:00<00:00, 1224.76it/s]


In [None]:
def augment_images(dataset, output_path):
    if not os.path.exists(output_path):
          os.makedirs(output_path)

    datagen = ImageDataGenerator(
        rotation_range=10,
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=0.1,
        zoom_range=0.1,
        horizontal_flip=True,
        vertical_flip=True,
        preprocessing_function=preprocess_input
    )
    image_count = 0
    for image in tqdm(dataset):
        image_count = image_count + 1
        generator = datagen.flow(
            image,
            save_to_dir=output_path
            )
        iteration = 0
        for batch in generator:
            iteration = iteration + 1
            if iteration == 5:
              break

In [None]:
augment_images(dataset=training_yes_dataset, output_path=AUGMENTED_TRAIN_PATH_YES)

100%|██████████| 107/107 [00:23<00:00,  4.65it/s]


In [None]:
augment_images(dataset=training_no_dataset, output_path=AUGMENTED_TRAIN_PATH_NO)

100%|██████████| 70/70 [00:14<00:00,  4.95it/s]


In [None]:
augment_images(dataset=validation_yes_dataset, output_path=AUGMENTED_VALIDATION_PATH_YES)

100%|██████████| 25/25 [00:05<00:00,  4.83it/s]


In [None]:
augment_images(dataset=validation_no_dataset, output_path=AUGMENTED_VALIDATION_PATH_NO)

100%|██████████| 12/12 [00:02<00:00,  5.32it/s]


In [None]:
augment_images(dataset=testing_yes_dataset, output_path=AUGMENTED_TEST_PATH_YES)

100%|██████████| 21/21 [00:04<00:00,  4.71it/s]


In [None]:
augment_images(dataset=testing_no_dataset, output_path=AUGMENTED_TEST_PATH_NO)

100%|██████████| 16/16 [00:03<00:00,  4.25it/s]
