Import Libraries


In [None]:
# Install dependencies
!pip install tensorflow numpy matplotlib opencv-python

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import (classification_report, confusion_matrix,precision_recall_curve,
                             accuracy_score, precision_score, recall_score, auc,
                             f1_score, roc_auc_score, roc_curve, precision_recall_fscore_support)

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import (BaggingClassifier, RandomForestClassifier,
                              AdaBoostClassifier, GradientBoostingClassifier,
                              VotingClassifier, StackingClassifier)
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.decomposition import PCA

import warnings
warnings.filterwarnings('ignore')
import tensorflow as tf
from tensorflow.keras import layers, models

# 1. Initialize the Brain
model = models.Sequential()

# 2. The "Eyes" (Convolutional Layers)
# Look for 32 different features (edges/shapes) using a 3x3 filter
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)))
model.add(layers.MaxPooling2D((2, 2))) # Shrink the image (focus on what matters)

# Look for 64 more complex features
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

# 3. The "Thinking" (Dense Layers)
model.add(layers.Flatten()) # Flatten the 2D image into a 1D list of numbers
model.add(layers.Dense(64, activation='relu')) # A layer of neurons to think

# 4. The Output
# 1 neuron. If output is near 0 -> Healthy. If near 1 -> Cancer.
model.add(layers.Dense(4, activation='sigmoid'))#1 is for 2 categories, 4 is for 4 categories)

Import Dataset


In [2]:
import tensorflow as tf
import kagglehub

# Download latest version
path = kagglehub.dataset_download("masoudnickparvar/brain-tumor-mri-dataset")

print("Path to dataset files:", path)

file_list = os.listdir(path)
print("Files in folder:", file_list)

# 1. Setup variables (Constraints)
# We resize everything to 150x150 so the CNN doesn't get confused by different sizes.
IMG_HEIGHT = 150
IMG_WIDTH = 150
BATCH_SIZE = 32  # The AI learns from 32 images at a time

# 2. Load the 'Training' Data
print("Loading Training Data:")
train = tf.keras.utils.image_dataset_from_directory(
    '/kaggle/input/brain-tumor-mri-dataset/Training',        # Point to the folder
    image_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    label_mode='categorical',   # Use 'categorical' because you have 4 folder types
    shuffle=True                # Shuffle so the AI doesn't memorize the order
)

# 3. Load the 'Testing' Data (Validation)
print("\nLoading Testing Data:")
test = tf.keras.utils.image_dataset_from_directory(
    '/kaggle/input/brain-tumor-mri-dataset/Testing',
    image_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE,
    label_mode='categorical',
    shuffle=False
)

# 4. Check the class names (The folders it found)
class_names = train.class_names
print(f"\nClasses found: {class_names}")

Using Colab cache for faster access to the 'brain-tumor-mri-dataset' dataset.
Path to dataset files: /kaggle/input/brain-tumor-mri-dataset
Files in folder: ['Training', 'Testing']
Loading Training Data:
Found 5712 files belonging to 4 classes.

Loading Testing Data:
Found 1311 files belonging to 4 classes.

Classes found: ['glioma', 'meningioma', 'notumor', 'pituitary']


Pre-trained model test:

In [None]:
# 1. Define data augmentation FIRST
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.2),
    layers.RandomZoom(0.2),
    layers.RandomBrightness(0.2),
    layers.RandomContrast(0.2),
])

# 2. Choose ONE model approach - Transfer Learning (recommended)
base_model = tf.keras.applications.MobileNetV2(
    input_shape=(150, 150, 3),
    include_top=False,
    weights='imagenet'
)
base_model.trainable = False  # Freeze pre-trained weights initially

model = models.Sequential([
    data_augmentation,
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dropout(0.5),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(4, activation='softmax')
])

# 3. Compile the model
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# 4. Set up callback
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=3,
    min_lr=1e-7
)

# 5. Train
history = model.fit(
    train,
    validation_data=test,
    epochs=30,
    callbacks=[reduce_lr]
)

Epoch 1/30
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 69ms/step - accuracy: 0.4044 - loss: 1.5759 - val_accuracy: 0.4920 - val_loss: 1.1004 - learning_rate: 0.0010
Epoch 2/30
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 64ms/step - accuracy: 0.5652 - loss: 1.0045 - val_accuracy: 0.5225 - val_loss: 1.0512 - learning_rate: 0.0010
Epoch 3/30
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 58ms/step - accuracy: 0.6055 - loss: 0.9560 - val_accuracy: 0.5675 - val_loss: 0.9917 - learning_rate: 0.0010
Epoch 4/30
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 54ms/step - accuracy: 0.6172 - loss: 0.9281 - val_accuracy: 0.4989 - val_loss: 1.0675 - learning_rate: 0.0010
Epoch 5/30
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 59ms/step - accuracy: 0.6294 - loss: 0.8913 - val_accuracy: 0.5271 - val_loss: 1.0198 - learning_rate: 0.0010
Epoch 6/30
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

Train


In [None]:
data_augmentation = tf.keras.Sequential([
  layers.RandomFlip("horizontal", input_shape=(150, 150, 3)),
  layers.RandomRotation(0.1),
  layers.RandomZoom(0.1),
])

model = models.Sequential([
  # 1. The Augmentation Block (New!)
  data_augmentation,

  # 2. Convolution Layer 1
  layers.Conv2D(32, (3, 3), activation='relu'),
  layers.MaxPooling2D((2, 2)),

  # 3. Convolution Layer 2
  layers.Conv2D(64, (3, 3), activation='relu'),
  layers.MaxPooling2D((2, 2)),

  # 4. Convolution Layer 3 (Added for more depth)
  layers.Conv2D(128, (3, 3), activation='relu'),
  layers.MaxPooling2D((2, 2)),

  # 5. Flatten & Dense
  layers.Flatten(),
  layers.Dropout(0.5),  # <--- DROPOUT: Kills 50% of neurons randomly to prevent memorization
  layers.Dense(128, activation='relu'),

  # 6. Output (4 classes)
  layers.Dense(4, activation='softmax')
])

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train for a bit longer since the problem is harder now
history = model.fit(train, validation_data=test, epochs=15)

Epoch 1/15
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 97ms/step - accuracy: 0.4919 - loss: 16.9026 - val_accuracy: 0.7063 - val_loss: 0.8379
Epoch 2/15
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 60ms/step - accuracy: 0.6746 - loss: 0.8234 - val_accuracy: 0.6842 - val_loss: 1.1986
Epoch 3/15
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 53ms/step - accuracy: 0.7534 - loss: 0.6466 - val_accuracy: 0.7635 - val_loss: 0.6553
Epoch 4/15
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 57ms/step - accuracy: 0.7679 - loss: 0.5987 - val_accuracy: 0.7117 - val_loss: 0.8714
Epoch 5/15
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 48ms/step - accuracy: 0.7925 - loss: 0.5464 - val_accuracy: 0.7750 - val_loss: 0.6094
Epoch 6/15
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 60ms/step - accuracy: 0.8057 - loss: 0.5069 - val_accuracy: 0.7277 - val_loss: 0.7530
Epoch 7/15
[1m17

Test


In [None]:
# Evaluate the model on the test data
print("Evaluating model...")
test_loss, test_acc = model.evaluate(test)

print(f"\nTest Accuracy: {test_acc * 100:.2f}%")

Random Test


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# --- THE FIX: Chain .shuffle() before .take() ---
# buffer_size=1000 means "mix up 1000 images before picking a batch"
for images, labels in test.shuffle(1000).take(1):

    # 1. Pick a RANDOM index from this batch (instead of always index 0)
    # 'images' usually has 32 items. We pick a random number between 0 and 31.
    random_index = np.random.randint(0, len(images))

    # 2. Grab that specific random image
    img = images[random_index].numpy().astype("uint8")
    actual_label_index = np.argmax(labels[random_index])

    # 3. Predict
    img_prediction = tf.expand_dims(images[random_index], 0)
    predictions = model.predict(img_prediction)
    predicted_label_index = np.argmax(predictions)

    # 4. Show result
    plt.imshow(img)
    plt.title(f"Actual: {class_names[actual_label_index]} \nAI Pred: {class_names[predicted_label_index]}")
    plt.axis("off")
    plt.show()

    break

Save Model

In [None]:
# Save the entire model as a single file
model.save('mri_tumor_detector.keras')
print("Model saved! Check your file browser on the left.")