In [6]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
plt.style.use('ggplot')

import os
import random

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import cv2

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.applications import VGG19
from tensorflow.keras.optimizers import Adam, Adamax
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Activation, Dropout, BatchNormalization
from tensorflow.keras import regularizers

In [7]:
data_path = 'C:\pythonn\brain_tumor/'

In [8]:
def main(data_path):
    filepaths = []
    labels = []
    classes_list = sorted(os.listdir(data_path))

    for s_class in classes_list:
        
        label = s_class
        classpath = os.path.join(data_path, s_class)
        flist = sorted(os.listdir(classpath))
        
        for file in flist:
            file_path = os.path.join(classpath, file)
            filepaths.append(file_path)
            labels.append(label)

    train_df = pd.DataFrame({
        'image_path': filepaths,
        'label': labels
    })
    
    return train_df

In [9]:
data_df = main(data_path)

In [10]:
data_df.head()

Unnamed: 0,image_path,label
0,/kaggle/input/brain-tumor-mri-images-44c/Astro...,Astrocitoma T1
1,/kaggle/input/brain-tumor-mri-images-44c/Astro...,Astrocitoma T1
2,/kaggle/input/brain-tumor-mri-images-44c/Astro...,Astrocitoma T1
3,/kaggle/input/brain-tumor-mri-images-44c/Astro...,Astrocitoma T1
4,/kaggle/input/brain-tumor-mri-images-44c/Astro...,Astrocitoma T1


In [11]:
num_classes = len(data_df['label'].unique())
print(f"We have {num_classes} classes")
print(f"We have {data_df.shape[0]} images")

We have 44 classes
We have 4479 images


In [12]:
data_df['label'].value_counts()

Meningioma T1C+           369
Meningioma T1             272
_NORMAL T2                271
_NORMAL T1                251
Astrocitoma T1C+          233
Meningioma T2             233
Neurocitoma T1C+          223
Schwannoma T1C+           194
Astrocitoma T1            176
Astrocitoma T2            171
Schwannoma T1             148
Neurocitoma T1            130
Schwannoma T2             123
Carcinoma T1C+            112
Papiloma T1C+             108
Neurocitoma T2            104
Glioblastoma T1C+          94
Oligodendroglioma T1       86
Tuberculoma T1C+           84
Carcinoma T2               73
Oligodendroglioma T1C+     72
Meduloblastoma T1C+        67
Papiloma T1                66
Oligodendroglioma T2       66
Carcinoma T1               66
Papiloma T2                63
Ependimoma T2              57
Glioblastoma T2            55
Glioblastoma T1            55
Ependimoma T1C+            48
Ependimoma T1              45
Meduloblastoma T2          41
Germinoma T1C+             40
Tuberculom

In [13]:
import plotly.express as px
import plotly.graph_objs as go

def count_plot(x, title, xlabel, ylabel, width, length, order, rotation=True, palette='winter'):
    
    fig = px.histogram(x=x, color_discrete_sequence=px.colors.qualitative.Plotly, 
                       color=x, nbins=len(set(x)), template='simple_white')
    
    fig.update_layout(title=title, width=width, height=length, 
                      xaxis_title=xlabel, yaxis_title=ylabel,
                      font=dict(size=20))
    
    if rotation:
        fig.update_layout(xaxis_tickangle=-45)
    
    fig.show()

In [14]:
x = data_df['label']
order = x.value_counts().index
count_plot(x, "Labels distribution (Train Data)", "Label", 'Frequency', 800,600, order=order, rotation = True)

In [15]:
test_split_size = 0.3
train_df, test_df = train_test_split(data_df, test_size=test_split_size, shuffle=True, random_state=123, stratify=data_df['label'])
test_df, valid_df = train_test_split(test_df, test_size=0.5, shuffle=True, random_state=123, stratify=test_df['label'])

In [16]:
train_df.head()

Unnamed: 0,image_path,label
2175,/kaggle/input/brain-tumor-mri-images-44c/Menin...,Meningioma T1C+
1735,/kaggle/input/brain-tumor-mri-images-44c/Menin...,Meningioma T1
1191,/kaggle/input/brain-tumor-mri-images-44c/Gliob...,Glioblastoma T1
129,/kaggle/input/brain-tumor-mri-images-44c/Astro...,Astrocitoma T1
1645,/kaggle/input/brain-tumor-mri-images-44c/Menin...,Meningioma T1


In [17]:
test_df.head()

Unnamed: 0,image_path,label
3506,/kaggle/input/brain-tumor-mri-images-44c/Schwa...,Schwannoma T1C+
1822,/kaggle/input/brain-tumor-mri-images-44c/Menin...,Meningioma T1
3265,/kaggle/input/brain-tumor-mri-images-44c/Papil...,Papiloma T1C+
4371,/kaggle/input/brain-tumor-mri-images-44c/_NORM...,_NORMAL T2
244,/kaggle/input/brain-tumor-mri-images-44c/Astro...,Astrocitoma T1C+


In [18]:
valid_df.head()

Unnamed: 0,image_path,label
3652,/kaggle/input/brain-tumor-mri-images-44c/Schwa...,Schwannoma T1C+
300,/kaggle/input/brain-tumor-mri-images-44c/Astro...,Astrocitoma T1C+
481,/kaggle/input/brain-tumor-mri-images-44c/Astro...,Astrocitoma T2
4051,/kaggle/input/brain-tumor-mri-images-44c/_NORM...,_NORMAL T1
892,/kaggle/input/brain-tumor-mri-images-44c/Epend...,Ependimoma T1C+


In [19]:
print(f"We have {len(train_df)} images in the training dataset\nWe have {len(valid_df)} images in the validating dataset\nWe have {len(test_df)} images in the testing dataset")

We have 3135 images in the training dataset
We have 672 images in the validating dataset
We have 672 images in the testing dataset


In [20]:
x = train_df['label']
order = x.value_counts().index
count_plot(x, "Labels distribution (Train Data)", "Label", 'Frequency', 800,600, rotation = True, order=order)

In [21]:
x = valid_df['label']
order = x.value_counts().index
count_plot(x, "Labels distribution (Valid Data)", "Label", 'Frequency', 800,600, rotation = True, order=order)

In [22]:
x = test_df['label']
order = x.value_counts().index
count_plot(x, "Labels distribution (Test Data)", "Label", 'Frequency', 800,600, rotation = True, order=order)

In [23]:
def get_images_avg_width_height(df):
    
    avg_width, avg_height = 0,0
    for i in df.index:
        
        try:
            img_path = df['image_path'].loc[i]
            img = cv2.imread(img_path)
            avg_width += img.shape[0]
            avg_height += img.shape[1]
        except:
            pass

    avg_width // len(df)
    avg_height // len(df)
    
    return avg_width, avg_height

In [24]:
len(data_df)

4479

In [25]:
data_avg_width, data_avg_height = get_images_avg_width_height(data_df)
print(f"Average width and height for the dataset is {data_avg_width//len(data_df)}x{data_avg_height//len(data_df)} with aspect_ratio {data_avg_width/data_avg_height}")

Average width and height for the dataset is 607x567 with aspect_ratio 1.0706256936627756


In [26]:
img_size = (224, 224)
channels = 3
color = 'rgb'
img_shape = (img_size[0], img_size[1], channels)
batch_size = 32
ts_length = len(test_df)
test_batch_size = max(sorted([ts_length // n for n in range(1, ts_length + 1) if ts_length%n == 0 and ts_length/n <= 80]))
test_steps = ts_length // test_batch_size
def scalar(img):
    return img

In [27]:
tr_gen = ImageDataGenerator(preprocessing_function= scalar, 
                            horizontal_flip= True)

ts_gen = ImageDataGenerator(preprocessing_function= scalar)

train_gen = tr_gen.flow_from_dataframe( train_df, 
                                       x_col= 'image_path', 
                                       y_col= 'label', 
                                       target_size= img_size, 
                                       class_mode= 'categorical',
                                       color_mode= color, 
                                       shuffle= True, 
                                       batch_size= batch_size)

valid_gen = ts_gen.flow_from_dataframe( valid_df, 
                                       x_col= 'image_path', 
                                       y_col= 'label', 
                                       target_size= img_size, 
                                       class_mode= 'categorical',
                                       color_mode= color, 
                                       shuffle= True, 
                                       batch_size= batch_size)

test_gen = ts_gen.flow_from_dataframe( test_df, 
                                      x_col= 'image_path', 
                                      y_col= 'label', 
                                      target_size= img_size, 
                                      class_mode= 'categorical',
                                      color_mode= color, 
                                      shuffle= False, 
                                      batch_size= test_batch_size)


Found 3134 validated image filenames belonging to 44 classes.



Found 1 invalid image filename(s) in x_col="image_path". These filename(s) will be ignored.



Found 672 validated image filenames belonging to 44 classes.
Found 672 validated image filenames belonging to 44 classes.


In [28]:
class_count = len(list(train_gen.class_indices.keys())) 

In [29]:
base_model = tf.keras.applications.VGG19(include_top= False, weights= "imagenet", 
                                         input_shape= img_shape,pooling= 'max')

model = Sequential([
    base_model,
    BatchNormalization(axis= -1, momentum= 0.99, epsilon= 0.001),
    Dense(256, 
          kernel_regularizer= regularizers.l2(l= 0.016), 
          activity_regularizer= regularizers.l1(0.006),
          bias_regularizer= regularizers.l1(0.006), 
          activation= 'relu'),
    
    Dropout(rate= 0.45, 
            seed= 123),
    
    Dense(class_count, activation= 'softmax')
])

model.compile(Adamax(learning_rate= 0.001), loss= 'categorical_crossentropy', metrics= ['accuracy'])

model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg19 (Functional)          (None, 512)               20024384  
                                                                 
 batch_normalization (BatchN  (None, 512)              2048      
 ormalization)                                                   
                                                                 
 dense (Dense)               (None, 256)               131328    
                                                                 
 dropout (Dropout)           (None, 256)               0         
                                                                 
 dense_1 (Dense)             (None, 44)                11308     
                                                 

In [None]:
early_stop = EarlyStopping(monitor='val_loss', 
                           patience=5,
                           verbose=1)

checkpoint = ModelCheckpoint('model_weights.h5', 
                             monitor='val_loss', 
                             save_best_only=True, 
                             save_weights_only=True, 
                             mode='min', 
                             verbose=1)


history = model.fit(x= train_gen, 
                    epochs= 200, 
                    callbacks=[early_stop, checkpoint],
                    validation_data= valid_gen)

Epoch 1/200
Epoch 1: val_loss improved from inf to 9.02097, saving model to model_weights.h5
Epoch 2/200
Epoch 2: val_loss improved from 9.02097 to 6.92295, saving model to model_weights.h5
Epoch 3/200
Epoch 3: val_loss improved from 6.92295 to 5.94883, saving model to model_weights.h5
Epoch 4/200
Epoch 4: val_loss improved from 5.94883 to 5.68354, saving model to model_weights.h5
Epoch 5/200
Epoch 5: val_loss improved from 5.68354 to 5.11977, saving model to model_weights.h5
Epoch 6/200
Epoch 6: val_loss improved from 5.11977 to 4.41902, saving model to model_weights.h5
Epoch 7/200
Epoch 7: val_loss did not improve from 4.41902
Epoch 8/200
Epoch 8: val_loss improved from 4.41902 to 4.24463, saving model to model_weights.h5
Epoch 9/200
Epoch 9: val_loss improved from 4.24463 to 3.54573, saving model to model_weights.h5
Epoch 10/200
Epoch 10: val_loss improved from 3.54573 to 3.23652, saving model to model_weights.h5
Epoch 11/200
Epoch 11: val_loss improved from 3.23652 to 3.01645, savi

In [None]:
def plot_training(hist):
    '''
    This function take training model and plot history of accuracy and losses with the best epoch in both of them.
    '''

    tr_acc = hist.history['accuracy']
    tr_loss = hist.history['loss']
    val_acc = hist.history['val_accuracy']
    val_loss = hist.history['val_loss']
    index_loss = np.argmin(val_loss)
    val_lowest = val_loss[index_loss]
    index_acc = np.argmax(val_acc)
    acc_highest = val_acc[index_acc]
    Epochs = [i+1 for i in range(len(tr_acc))]
    loss_label = f'best epoch= {str(index_loss + 1)}'
    acc_label = f'best epoch= {str(index_acc + 1)}'

    plt.figure(figsize= (20, 8))
    plt.style.use('fivethirtyeight')

    plt.subplot(1, 2, 1)
    plt.plot(Epochs, tr_loss, 'r', label= 'Training loss')
    plt.plot(Epochs, val_loss, 'g', label= 'Validation loss')
    plt.scatter(index_loss + 1, val_lowest, s= 150, c= 'blue', label= loss_label)
    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(Epochs, tr_acc, 'r', label= 'Training Accuracy')
    plt.plot(Epochs, val_acc, 'g', label= 'Validation Accuracy')
    plt.scatter(index_acc + 1 , acc_highest, s= 150, c= 'blue', label= acc_label)
    plt.title('Training and Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.tight_layout
    plt.show()

In [None]:
plot_training(history)

In [None]:
ts_length = len(test_df)
test_batch_size = max(sorted([ts_length // n for n in range(1, ts_length + 1) if ts_length%n == 0 and ts_length/n <= 80]))
test_steps = ts_length // test_batch_size

train_score = model.evaluate(train_gen, steps= test_steps, verbose= 1)
valid_score = model.evaluate(valid_gen, steps= test_steps, verbose= 1)
test_score = model.evaluate(test_gen, steps= test_steps, verbose= 1)

print("Train Loss: ", train_score[0])
print("Train Accuracy: ", train_score[1])
print('-' * 20)
print("Validation Loss: ", valid_score[0])
print("Validation Accuracy: ", valid_score[1])
print('-' * 20)
print("Test Loss: ", test_score[0])
print("Test Accuracy: ", test_score[1])

In [None]:
y_pred = model.predict(test_gen)

y_pred_labels = np.argmax(y_pred, axis=1)

y_true_labels = test_gen.classes

In [None]:
class_names = list(test_gen.class_indices.keys())

confusion_mtx = confusion_matrix(y_true_labels, y_pred_labels)

plt.figure(figsize=(10,8))
sns.heatmap(confusion_mtx, cmap="Blues", annot=True, fmt="d", xticklabels=class_names, yticklabels=class_names)
plt.title("Confusion Matrix")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()

In [None]:
report = classification_report(y_true_labels, y_pred_labels, target_names=class_names)

print("Classification Report: ")
print(report)

In [None]:
report = classification_report(y_true_labels, y_pred_labels, target_names=class_names)

print("Classification Report: ")
print(report)

In [None]:
base_model1 = tf.keras.applications.inception_v4 import InceptionV4(include_top= False, 
                                                               weights= "imagenet", 
                                                               input_shape= img_shape,
                                                               pooling= 'max')

model1 = Sequential([
    base_model1,
    BatchNormalization(axis= -1, momentum= 0.99, epsilon= 0.001),
    Dense(256, 
          kernel_regularizer= regularizers.l2(l= 0.016), 
          activity_regularizer= regularizers.l1(0.006),
          bias_regularizer= regularizers.l1(0.006), 
          activation= 'relu'),
    
    Dropout(rate= 0.45, 
            seed= 123),
    
    Dense(class_count, activation= 'softmax')
])

model1.compile(Adamax(learning_rate= 0.001), loss= 'categorical_crossentropy', metrics= ['accuracy'])

model1.summary()

In [None]:
early_stop = EarlyStopping(monitor='val_loss', 
                           patience=5,
                           verbose=1)

checkpoint = ModelCheckpoint('model_weights.h5', 
                             monitor='val_loss', 
                             save_best_only=True, 
                             save_weights_only=True, 
                             mode='min', 
                             verbose=1)


history = model1.fit(x= train_gen, 
                    epochs= 200, 
                    callbacks=[early_stop, checkpoint],
                    validation_data= valid_gen)

In [None]:
def plot_training(hist):
    '''
    This function take training model and plot history of accuracy and losses with the best epoch in both of them.
    '''

    tr_acc = hist.history['accuracy']
    tr_loss = hist.history['loss']
    val_acc = hist.history['val_accuracy']
    val_loss = hist.history['val_loss']
    index_loss = np.argmin(val_loss)
    val_lowest = val_loss[index_loss]
    index_acc = np.argmax(val_acc)
    acc_highest = val_acc[index_acc]
    Epochs = [i+1 for i in range(len(tr_acc))]
    loss_label = f'best epoch= {str(index_loss + 1)}'
    acc_label = f'best epoch= {str(index_acc + 1)}'

    plt.figure(figsize= (20, 8))
    plt.style.use('fivethirtyeight')

    plt.subplot(1, 2, 1)
    plt.plot(Epochs, tr_loss, 'r', label= 'Training loss')
    plt.plot(Epochs, val_loss, 'g', label= 'Validation loss')
    plt.scatter(index_loss + 1, val_lowest, s= 150, c= 'blue', label= loss_label)
    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(Epochs, tr_acc, 'r', label= 'Training Accuracy')
    plt.plot(Epochs, val_acc, 'g', label= 'Validation Accuracy')
    plt.scatter(index_acc + 1 , acc_highest, s= 150, c= 'blue', label= acc_label)
    plt.title('Training and Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.tight_layout
    plt.show()

In [None]:
plot_training(history)

In [None]:
ts_length = len(test_df)
test_batch_size = max(sorted([ts_length // n for n in range(1, ts_length + 1) if ts_length%n == 0 and ts_length/n <= 80]))
test_steps = ts_length // test_batch_size

train_score = model1.evaluate(train_gen, steps= test_steps, verbose= 1)
valid_score = model1.evaluate(valid_gen, steps= test_steps, verbose= 1)
test_score = model1.evaluate(test_gen, steps= test_steps, verbose= 1)

print("Train Loss: ", train_score[0])
print("Train Accuracy: ", train_score[1])
print('-' * 20)
print("Validation Loss: ", valid_score[0])
print("Validation Accuracy: ", valid_score[1])
print('-' * 20)
print("Test Loss: ", test_score[0])
print("Test Accuracy: ", test_score[1])

In [None]:
y_pred = model1.predict(test_gen)

y_pred_labels = np.argmax(y_pred, axis=1)

y_true_labels = test_gen.classes

In [None]:
class_names = list(test_gen.class_indices.keys())

confusion_mtx = confusion_matrix(y_true_labels, y_pred_labels)

plt.figure(figsize=(10,8))
sns.heatmap(confusion_mtx, cmap="Blues", annot=True, fmt="d", xticklabels=class_names, yticklabels=class_names)
plt.title("Confusion Matrix")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()

In [None]:
report = classification_report(y_true_labels, y_pred_labels, target_names=class_names)

print("Classification Report: ")
print(report)