In [6]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import shutil
import random

# Set the path to the folder containing the subfolders of images
data_path = r'/content/drive/MyDrive/JohnSmith_FinalProject_BioMed/Brain_Tumors_Dataset'

# Set the path to the folder where the training and testing sets will be saved
train_path = r'/content/drive/MyDrive/JohnSmith_FinalProject_BioMed/Brain_Tumors_Dataset_train'
test_path = r'/content/drive/MyDrive/JohnSmith_FinalProject_BioMed/Brain_Tumors_Dataset_test'

# Set the proportion of data to use for training and testing
train_prop = 0.7
test_prop = 0.3

# Iterate over the subfolders and split the images randomly
for folder in os.listdir(data_path):
    folder_path = os.path.join(data_path, folder)
    files = os.listdir(folder_path)
    random.shuffle(files)
    train_size = int(len(files) * train_prop)
    train_files = files[:train_size]
    test_files = files[train_size:]
    
    # Create the train and test subfolders
    train_folder = os.path.join(train_path, folder)
    test_folder = os.path.join(test_path, folder)
    os.makedirs(train_folder, exist_ok=True)
    os.makedirs(test_folder, exist_ok=True)
    
    # Copy the images to the train and test subfolders
    for file in train_files:
        src = os.path.join(folder_path, file)
        dst = os.path.join(train_folder, file)
        shutil.copy(src, dst)
    for file in test_files:
        src = os.path.join(folder_path, file)
        dst = os.path.join(test_folder, file)
        shutil.copy(src, dst)

In [2]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
plt.style.use('ggplot')

import os
import random

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import cv2

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.applications import VGG19
from tensorflow.keras.optimizers import Adam, Adamax
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Activation, Dropout, BatchNormalization
from tensorflow.keras import regularizers

In [3]:
data_path = '/content/drive/MyDrive/JohnSmith_FinalProject_BioMed/Brain_Tumors_Dataset_train'

In [4]:
def main(train_data_path):

    for dirs in [train_data_path]:
        filepaths = []
        labels = []
        classes_list = sorted(os.listdir(dirs))
        filepaths,labels = get_fileList(classes_list, dirs, filepaths, labels)
        train_df = obtain_train_df(filepaths, labels)   
        
    return train_df



def get_fileList(classes_list, dirs, filepaths, labels):

    for s_class in classes_list:
        label = s_class
        classpath = os.path.join(dirs, s_class)
        flist = sorted(os.listdir(classpath))
        store_image_names_and_its_labels(flist, classpath, filepaths, labels, label)
        
    return filepaths, labels

def store_image_names_and_its_labels(flist, classpath, filepaths, labels, label):

    for file in flist:
        file_path = os.path.join(classpath, file)
        filepaths.append(file_path)
        labels.append(label)

def obtain_train_df(filepaths, labels):

    train_df =pd.DataFrame({
        'image_path' : filepaths,
        'label' : labels
    })
    return train_df

In [7]:
data_df = main(data_path)

In [8]:
data_df.head(20)

Unnamed: 0,image_path,label
0,/content/drive/MyDrive/JohnSmith_FinalProject_...,Astrocitoma T1
1,/content/drive/MyDrive/JohnSmith_FinalProject_...,Astrocitoma T1
2,/content/drive/MyDrive/JohnSmith_FinalProject_...,Astrocitoma T1
3,/content/drive/MyDrive/JohnSmith_FinalProject_...,Astrocitoma T1
4,/content/drive/MyDrive/JohnSmith_FinalProject_...,Astrocitoma T1
5,/content/drive/MyDrive/JohnSmith_FinalProject_...,Astrocitoma T1
6,/content/drive/MyDrive/JohnSmith_FinalProject_...,Astrocitoma T1
7,/content/drive/MyDrive/JohnSmith_FinalProject_...,Astrocitoma T1
8,/content/drive/MyDrive/JohnSmith_FinalProject_...,Astrocitoma T1
9,/content/drive/MyDrive/JohnSmith_FinalProject_...,Astrocitoma T1


In [9]:
num_classes = len(data_df['label'].unique())
print(f"There are {num_classes} classes containing {data_df.shape[0]} total images.")

There are 44 classes containing 4033 total images.


In [10]:
data_df['label'].value_counts()

Meningioma T1C+           333
Meningioma T1             244
_NORMAL T2                243
_NORMAL T1                222
Astrocitoma T1C+          217
Meningioma T2             213
Neurocitoma T1C+          199
Schwannoma T1C+           173
Astrocitoma T1            159
Astrocitoma T2            156
Schwannoma T1             130
Neurocitoma T1            117
Schwannoma T2             108
Carcinoma T1C+             99
Papiloma T1C+              98
Neurocitoma T2             94
Glioblastoma T1C+          89
Oligodendroglioma T1       79
Tuberculoma T1C+           73
Carcinoma T2               66
Oligodendroglioma T1C+     62
Papiloma T1                61
Meduloblastoma T1C+        60
Oligodendroglioma T2       59
Carcinoma T1               59
Papiloma T2                56
Glioblastoma T2            52
Glioblastoma T1            52
Ependimoma T2              50
Ependimoma T1C+            44
Ependimoma T1              42
Meduloblastoma T2          37
Germinoma T1C+             34
Tuberculom

In [11]:
test_split_size = 0.3
train_df, test_df = train_test_split(data_df, test_size=test_split_size, shuffle=True, random_state=123, stratify=data_df['label'])
test_df, valid_df = train_test_split(test_df, test_size=0.5, shuffle=True, random_state=123, stratify=test_df['label'])

In [12]:
train_df.head()

Unnamed: 0,image_path,label
1923,/content/drive/MyDrive/JohnSmith_FinalProject_...,Meningioma T1C+
1504,/content/drive/MyDrive/JohnSmith_FinalProject_...,Meningioma T1
3848,/content/drive/MyDrive/JohnSmith_FinalProject_...,_NORMAL T2
2870,/content/drive/MyDrive/JohnSmith_FinalProject_...,Papiloma T1
1179,/content/drive/MyDrive/JohnSmith_FinalProject_...,Glioblastoma T2


In [13]:
test_df.head()

Unnamed: 0,image_path,label
2334,/content/drive/MyDrive/JohnSmith_FinalProject_...,Neurocitoma T1C+
979,/content/drive/MyDrive/JohnSmith_FinalProject_...,Germinoma T1C+
3018,/content/drive/MyDrive/JohnSmith_FinalProject_...,Papiloma T2
1753,/content/drive/MyDrive/JohnSmith_FinalProject_...,Meningioma T1C+
2285,/content/drive/MyDrive/JohnSmith_FinalProject_...,Neurocitoma T1


In [14]:
valid_df.head()

Unnamed: 0,image_path,label
2620,/content/drive/MyDrive/JohnSmith_FinalProject_...,Oligodendroglioma T1
3119,/content/drive/MyDrive/JohnSmith_FinalProject_...,Schwannoma T1
1556,/content/drive/MyDrive/JohnSmith_FinalProject_...,Meningioma T1
2655,/content/drive/MyDrive/JohnSmith_FinalProject_...,Oligodendroglioma T1
1588,/content/drive/MyDrive/JohnSmith_FinalProject_...,Meningioma T1


In [15]:
img_size = (224, 224)
channels = 3
color = 'rgb'
img_shape = (img_size[0], img_size[1], channels)
batch_size = 32
ts_length = len(test_df)
test_batch_size = max(sorted([ts_length // n for n in range(1, ts_length + 1) if ts_length%n == 0 and ts_length/n <= 80]))
test_steps = ts_length // test_batch_size
def scalar(img):
    return img

In [16]:
tr_gen = ImageDataGenerator(preprocessing_function= scalar, 
                            horizontal_flip= True)

ts_gen = ImageDataGenerator(preprocessing_function= scalar)

train_gen = tr_gen.flow_from_dataframe( train_df, 
                                       x_col= 'image_path', 
                                       y_col= 'label', 
                                       target_size= img_size, 
                                       class_mode= 'categorical',
                                       color_mode= color, 
                                       shuffle= True, 
                                       batch_size= batch_size)

valid_gen = ts_gen.flow_from_dataframe( valid_df, 
                                       x_col= 'image_path', 
                                       y_col= 'label', 
                                       target_size= img_size, 
                                       class_mode= 'categorical',
                                       color_mode= color, 
                                       shuffle= True, 
                                       batch_size= batch_size)

test_gen = ts_gen.flow_from_dataframe( test_df, 
                                      x_col= 'image_path', 
                                      y_col= 'label', 
                                      target_size= img_size, 
                                      class_mode= 'categorical',
                                      color_mode= color, 
                                      shuffle= False, 
                                      batch_size= test_batch_size)

Found 2823 validated image filenames belonging to 44 classes.
Found 605 validated image filenames belonging to 44 classes.
Found 604 validated image filenames belonging to 44 classes.




In [17]:
class_count = len(list(train_gen.class_indices.keys())) 

In [18]:
base_model = tf.keras.applications.efficientnet.EfficientNetB5(include_top= False, 
                                                               weights= "imagenet", 
                                                               input_shape= img_shape,
                                                               pooling= 'max')

model = Sequential([
    base_model,
    BatchNormalization(axis= -1, momentum= 0.99, epsilon= 0.001),
    Dense(256, 
          kernel_regularizer= regularizers.l2(l= 0.016), 
          activity_regularizer= regularizers.l1(0.006),
          bias_regularizer= regularizers.l1(0.006), 
          activation= 'relu'),
    
    Dropout(rate= 0.45, 
            seed= 123),
    
    Dense(class_count, activation= 'softmax')
])

model.compile(Adamax(learning_rate= 0.001), loss= 'categorical_crossentropy', metrics= ['accuracy'])

model.summary()

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb5_notop.h5
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 efficientnetb5 (Functional)  (None, 2048)             28513527  
                                                                 
 batch_normalization (BatchN  (None, 2048)             8192      
 ormalization)                                                   
                                                                 
 dense (Dense)               (None, 256)               524544    
                                                                 
 dropout (Dropout)           (None, 256)               0         
                                                                 
 dense_1 (Dense)             (None, 44)                11308     
                                                                 
Total params: 29,057,571
T

In [None]:
early_stop = EarlyStopping(monitor='val_loss', 
                           patience=5,
                           verbose=1)

checkpoint = ModelCheckpoint('model_weights.h5', 
                             monitor='val_loss', 
                             save_best_only=True, 
                             save_weights_only=True, 
                             mode='min', 
                             verbose=1)


history = model.fit(x= train_gen, 
                    epochs= 20, 
                    callbacks=[early_stop, checkpoint],
                    validation_data= valid_gen)

Epoch 1/20
12/89 [===>..........................] - ETA: 23:16 - loss: 11.8443 - accuracy: 0.1532