In [40]:
# General Imports
import tensorflow as tf
import pandas as pd
import numpy as np
import random
import os
import kagglehub
import cv2


# Visualization
import matplotlib.pyplot as plt

# Building Model
from keras.utils import plot_model
from tensorflow.keras import models
from tensorflow.keras.models import Sequential

from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.optimizers import legacy
from tensorflow.keras.optimizers import Adam

# Training Model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.callbacks import ModelCheckpoint

# Data Processing
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import array_to_img
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing import image_dataset_from_directory

# Accuracy
from sklearn.metrics import accuracy_score

In [41]:
#Download data set and get the path
path = kagglehub.dataset_download("masoudnickparvar/brain-tumor-mri-dataset")

print("Path to dataset files:", path);

Path to dataset files: C:\Users\erin elagoz\.cache\kagglehub\datasets\masoudnickparvar\brain-tumor-mri-dataset\versions\1


In [42]:
train_dir = path + "/Training"
test_dir = path + "/Testing"
#train_dataset = train_dir.flow_from_directory(train_dir, target_size=(250,250))
#test_dataset = test_dir.flow_from_directory(test_dir, target_size=(250,250))

In [43]:
cv2.imread(path + "/Training/glioma/Tr-glTr_0000.jpg").shape

(512, 512, 3)

In [44]:
img = load_img(path + "/Training/glioma/Tr-glTr_0000.jpg")

In [45]:
#for getting class indices
datagen = ImageDataGenerator(rescale=1./255)

train_dataset = datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='categorical'
)
test_dataset = datagen.flow_from_directory(
    test_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='categorical'
)
print(train_dataset.class_indices)
print(train_dataset.classes)
print(test_dataset.class_indices)
print(test_dataset.classes)

Found 5712 images belonging to 4 classes.
Found 1311 images belonging to 4 classes.
{'glioma': 0, 'meningioma': 1, 'notumor': 2, 'pituitary': 3}
[0 0 0 ... 3 3 3]
{'glioma': 0, 'meningioma': 1, 'notumor': 2, 'pituitary': 3}
[0 0 0 ... 3 3 3]


In [46]:
# Augmented Data Generator (with augmentation)
augmented_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

augmented_train_dataset = augmented_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='categorical'
)

augmented_test_dataset = augmented_datagen.flow_from_directory(
    test_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='categorical'
)

print(augmented_train_dataset.class_indices)
print(augmented_train_dataset.classes)
print(augmented_test_dataset.class_indices)
print(augmented_test_dataset.classes)

Found 5712 images belonging to 4 classes.
Found 1311 images belonging to 4 classes.
{'glioma': 0, 'meningioma': 1, 'notumor': 2, 'pituitary': 3}
[0 0 0 ... 3 3 3]
{'glioma': 0, 'meningioma': 1, 'notumor': 2, 'pituitary': 3}
[0 0 0 ... 3 3 3]


In [47]:
#utilizing cnn with maxpooling, and all the layers are given in a form of list, one by one we provide the layers, 
# so Conv2D(how many filters you want to keep inside this layer which is how many features to learn from, 
# (size of the filters), activation function, input_shape=(256, 256, 3-->dimension because rgb) )
model = tf.keras.models.Sequential([tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(150, 150, 3)),
                                    #then adding a max pooling layer
                                    tf.keras.layers.MaxPooling2D(2, 2),
                                    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
                                    tf.keras.layers.MaxPooling2D(2,2),
                                    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
                                    tf.keras.layers.MaxPooling2D(2,2),
                                    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
                                    tf.keras.layers.MaxPooling2D(2,2),
                                    tf.keras.layers.Flatten(),
                                    tf.keras.layers.Dense(512, activation='relu'),
                                    #4 because 4 classes
                                    #softmax because we have multiple classes and not binary
                                    tf.keras.layers.Dense(4, activation='softmax')])
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [48]:
model.compile(loss='categorical_crossentropy', 
              optimizer = Adam(learning_rate=0.001),
              metrics=['accuracy'])

In [49]:
model_fit = model.fit(train_dataset,
          epochs=15,
          validation_data=test_dataset) 

Epoch 1/15


  self._warn_if_super_not_called()


[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 97ms/step - accuracy: 0.5994 - loss: 0.9214 - val_accuracy: 0.7910 - val_loss: 0.5001
Epoch 2/15
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 110ms/step - accuracy: 0.8697 - loss: 0.3515 - val_accuracy: 0.8574 - val_loss: 0.3589
Epoch 3/15
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 111ms/step - accuracy: 0.9224 - loss: 0.2140 - val_accuracy: 0.9138 - val_loss: 0.2129
Epoch 4/15
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 111ms/step - accuracy: 0.9489 - loss: 0.1379 - val_accuracy: 0.9314 - val_loss: 0.1863
Epoch 5/15
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 111ms/step - accuracy: 0.9642 - loss: 0.1010 - val_accuracy: 0.9428 - val_loss: 0.1668
Epoch 6/15
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 112ms/step - accuracy: 0.9728 - loss: 0.0739 - val_accuracy: 0.9626 - val_loss: 0.1118
Epoch 7/15
[1m179/179

In [50]:
# Train the model on the augmented dataset
augmented_model_fit = model.fit(
    augmented_train_dataset,
    epochs=15,
    validation_data=augmented_test_dataset
)

  self._warn_if_super_not_called()


Epoch 1/15
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 191ms/step - accuracy: 0.5826 - loss: 1.2551 - val_accuracy: 0.6865 - val_loss: 0.7797
Epoch 2/15
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 181ms/step - accuracy: 0.7392 - loss: 0.6424 - val_accuracy: 0.7536 - val_loss: 0.6431
Epoch 3/15
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 186ms/step - accuracy: 0.7997 - loss: 0.5111 - val_accuracy: 0.7483 - val_loss: 0.6396
Epoch 4/15
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 194ms/step - accuracy: 0.7991 - loss: 0.5166 - val_accuracy: 0.7811 - val_loss: 0.5462
Epoch 5/15
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 197ms/step - accuracy: 0.8354 - loss: 0.4270 - val_accuracy: 0.8154 - val_loss: 0.4890
Epoch 6/15
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 204ms/step - accuracy: 0.8637 - loss: 0.3655 - val_accuracy: 0.8131 - val_loss: 0.4712
Epoch 7/15

In [56]:
# Get predictions for test dataset
y_pred_probs = model.predict(test_dataset)
y_pred = np.argmax(y_pred_probs, axis=1)

y_true = test_dataset.classes
class_labels = list(test_dataset.class_indices.keys())

class_accuracies = {}
for class_id, class_name in enumerate(class_labels):
    class_mask = (y_true == class_id)
    class_accuracy = accuracy_score(y_true[class_mask], y_pred[class_mask])
    class_accuracies[class_name] = class_accuracy

# Print results
for class_name, accuracy in class_accuracies.items():
    print(f"Accuracy for class {class_name}: {accuracy:.2%}")

[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step
Accuracy for class glioma: 19.33%
Accuracy for class meningioma: 23.86%
Accuracy for class notumor: 35.06%
Accuracy for class pituitary: 21.00%


In [55]:
# Get predictions for the augmented test dataset
y_pred_probs_augmented = model.predict(augmented_test_dataset)
y_pred_augmented = np.argmax(y_pred_probs_augmented, axis=1)

y_true_augmented = augmented_test_dataset.classes
class_labels_augmented = list(augmented_test_dataset.class_indices.keys())

class_accuracies_augmented = {}
for class_id, class_name in enumerate(class_labels_augmented):
    class_mask_augmented = (y_true_augmented == class_id)
    class_accuracy_augmented = accuracy_score(y_true_augmented[class_mask_augmented], y_pred_augmented[class_mask_augmented])
    class_accuracies_augmented[class_name] = class_accuracy_augmented

for class_name, accuracy in class_accuracies_augmented.items():
    print(f"Accuracy for class {class_name} on augmented test data: {accuracy:.2%}")

[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 85ms/step
Accuracy for class glioma on augmented test data: 22.00%
Accuracy for class meningioma on augmented test data: 23.20%
Accuracy for class notumor on augmented test data: 33.58%
Accuracy for class pituitary on augmented test data: 21.00%
