# 1) Imports

In [8]:
# Core
import os
import random
import shutil
from pathlib import Path
import matplotlib.pyplot as plt

# TensorFlow / Keras
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.applications import ResNet50, resnet50

# Utilities
from tqdm import tqdm

# 2) Dataset Paths

In [9]:
# All folders directly containing images
CLASS_FOLDERS = {
    "glioma": [
        "/kaggle/input/brain-mri-scans-for-brain-tumor-classification/data/glioma",
        "/kaggle/input/brain-tumor-mri-scans/glioma",
        "/kaggle/input/brain-tumor-mri-dataset/Testing/glioma",
        "/kaggle/input/brain-tumor-mri-dataset/Training/glioma",
        "/kaggle/input/brain-tumor-mri-dataset-for-deep-learning/Train/Train/Glioma/images",
        "/kaggle/input/brain-tumor-mri-dataset-for-deep-learning/test/test/Glioma/images",
        "/kaggle/input/brain-tumor-classification/Testing/glioma_tumor",
        "/kaggle/input/brain-tumor-classification/Training/glioma_tumor",
        "/kaggle/input/brain-tumor-classification-mri/Testing/glioma_tumor",
        "/kaggle/input/brain-tumor-classification-mri/Training/glioma_tumor",
        "/kaggle/input/brain-tumors-dataset/Data/Tumor/glioma_tumor"
    ],
    "meningioma": [
        "/kaggle/input/brain-mri-scans-for-brain-tumor-classification/data/meningioma",
        "/kaggle/input/brain-tumor-mri-scans/meningioma",
        "/kaggle/input/brain-tumor-mri-dataset/Testing/meningioma",
        "/kaggle/input/brain-tumor-mri-dataset/Training/meningioma",
        "/kaggle/input/brain-tumor-mri-dataset-for-deep-learning/Train/Train/Meningioma/images",
        "/kaggle/input/brain-tumor-mri-dataset-for-deep-learning/test/test/Meningioma/images",
        "/kaggle/input/brain-tumor-classification/Testing/meningioma_tumor",
        "/kaggle/input/brain-tumor-classification/Training/meningioma_tumor",
        "/kaggle/input/brain-tumor-classification-mri/Testing/meningioma_tumor",
        "/kaggle/input/brain-tumor-classification-mri/Training/meningioma_tumor",
        "/kaggle/input/brain-tumors-dataset/Data/Tumor/meningioma_tumor"
    ],
    "pituitary": [
        "/kaggle/input/brain-mri-scans-for-brain-tumor-classification/data/pituitary",
        "/kaggle/input/brain-tumor-mri-scans/pituitary",
        "/kaggle/input/brain-tumor-mri-dataset/Testing/pituitary",
        "/kaggle/input/brain-tumor-mri-dataset/Training/pituitary",
        "/kaggle/input/brain-tumor-mri-dataset-for-deep-learning/Train/Train/Pituitary/images",
        "/kaggle/input/brain-tumor-mri-dataset-for-deep-learning/test/test/Pituitary/images",
        "/kaggle/input/brain-tumor-classification/Testing/pituitary_tumor",
        "/kaggle/input/brain-tumor-classification/Training/pituitary_tumor",
        "/kaggle/input/brain-tumor-classification-mri/Testing/pituitary_tumor",
        "/kaggle/input/brain-tumor-classification-mri/Training/pituitary_tumor",
        "/kaggle/input/brain-tumors-dataset/Data/Tumor/pituitary_tumor"
    ],
    "notumor": [
        "/kaggle/input/brain-mri-scans-for-brain-tumor-classification/data/notumor",
        "/kaggle/input/brain-tumor-mri-scans/healthy",
        "/kaggle/input/brain-tumor-mri-dataset/Testing/notumor",
        "/kaggle/input/brain-tumor-mri-dataset/Training/notumor",
        "/kaggle/input/brain-tumor-mri-dataset-for-deep-learning/Train/Train/No Tumor/images",
        "/kaggle/input/brain-tumor-mri-dataset-for-deep-learning/test/test/No Tumor/images",
        "/kaggle/input/brain-tumor-classification/Testing/no_tumor",
        "/kaggle/input/brain-tumor-classification/Training/no_tumor",
        "/kaggle/input/brain-tumor-classification-mri/Testing/no_tumor",
        "/kaggle/input/brain-tumor-classification-mri/Training/no_tumor",
        "/kaggle/input/brain-tumors-dataset/Data/Normal"
    ]
}

# Quick sanity check
for cls, paths in CLASS_FOLDERS.items():
    print(f"{cls}: {len(paths)} folders")

glioma: 11 folders
meningioma: 11 folders
pituitary: 11 folders
notumor: 11 folders


# 3) Create Merged Dataset Folder Structure

In [10]:
MERGED_ROOT = "/kaggle/working/merged_dataset"
for split in ["train","val","test"]:
    for cls in CLASS_FOLDERS.keys():
        os.makedirs(os.path.join(MERGED_ROOT, split, cls), exist_ok=True)
print("Merged dataset directories created.")

Merged dataset directories created.


# 4) Merge All Images into Train/Val/Test (Memory-Safe)

In [11]:
TRAIN_RATIO, VAL_RATIO, TEST_RATIO = 0.7, 0.15, 0.15
random.seed(42)

for cls, folders in CLASS_FOLDERS.items():
    all_images = []
    for folder in folders:
        if not os.path.exists(folder):
            continue
        for f in os.listdir(folder):
            if f.lower().endswith((".jpg",".jpeg",".png")):
                all_images.append(os.path.join(folder, f))
    random.shuffle(all_images)
    n = len(all_images)
    n_train = int(n * TRAIN_RATIO)
    n_val = int(n * VAL_RATIO)

    for i, img_path in enumerate(tqdm(all_images, desc=f"Merging {cls}")):
        if i < n_train:
            split = "train"
        elif i < n_train + n_val:
            split = "val"
        else:
            split = "test"
        dst = os.path.join(MERGED_ROOT, split, cls, os.path.basename(img_path))
        shutil.copy2(img_path, dst)

Merging glioma: 100%|██████████| 14994/14994 [00:18<00:00, 810.16it/s]
Merging meningioma: 100%|██████████| 15454/15454 [00:20<00:00, 772.46it/s]
Merging pituitary: 100%|██████████| 13084/13084 [00:15<00:00, 818.41it/s]
Merging notumor: 100%|██████████| 9282/9282 [00:10<00:00, 857.36it/s]


# 5) TF Dataset Pipelines (Prefetch + Augmentation)

In [12]:
IMG_SIZE = (224,224)
BATCH_SIZE = 128
AUTOTUNE = tf.data.AUTOTUNE

def preprocess(image, label):
    image = tf.image.resize(image, IMG_SIZE)
    image = resnet50.preprocess_input(image)
    return image, label

def augment(image, label):
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_brightness(image, 0.1)
    return image, label

def load_dataset(split):
    ds = tf.keras.preprocessing.image_dataset_from_directory(
        os.path.join(MERGED_ROOT, split),
        image_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        label_mode="categorical",
        shuffle=(split=="train")
    )
    if split=="train":
        ds = ds.map(augment, num_parallel_calls=AUTOTUNE)
    ds = ds.map(preprocess, num_parallel_calls=AUTOTUNE)
    return ds.prefetch(AUTOTUNE)

train_ds = load_dataset("train")
val_ds = load_dataset("val")
test_ds = load_dataset("test")

Found 31651 files belonging to 4 classes.
Found 7631 files belonging to 4 classes.
Found 7638 files belonging to 4 classes.


# 6) Multi-GPU Strategy & ResNet50 Model

In [13]:
strategy = tf.distribute.MirroredStrategy()
print(f"Number of GPUs: {strategy.num_replicas_in_sync}")

with strategy.scope():
    base_model = ResNet50(weights="imagenet", include_top=False, input_shape=(224,224,3))
    base_model.trainable = False

    model = models.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.Dropout(0.5),
        layers.Dense(train_ds.element_spec[1].shape[-1], activation="softmax")
    ])

    model.compile(
        optimizer=optimizers.Adam(learning_rate=1e-4),
        loss="categorical_crossentropy",
        metrics=["accuracy"]
    )

model.summary()

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')
Number of GPUs: 2


# 7) Train Model with Logging

In [14]:
EPOCHS = 10

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS,
    verbose=1
)

INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
Epoch 1/10
INFO:tensorflow:Collective all_reduce tensors: 2 all_reduces, num_devices = 2, group_size = 2, implementation = CommunicationImplementation.NCCL, num_packs = 1
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m131s[0m 492ms/step - accuracy: 0.3940 - loss: 1.6056 - val_accuracy: 0.7361 - val_loss: 0.6948
Epoch 2/10
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 512ms/step - accuracy: 0.6436 - loss: 0.9120 - val_accuracy: 0.7888 - val_loss: 0.5565
Epoch 3/10
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m129s[0m 518ms/step - accuracy: 0.7163 - loss: 0.7243 - val_accuracy: 0.8140 - val_loss: 0.4950
Epoch 4/10
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

# 8) Plot Training Metrics

In [None]:
plt.figure(figsize=(12,5))

plt.subplot(1,2,1)
plt.plot(history.history["loss"], label="Train Loss")
plt.plot(history.history["val_loss"], label="Val Loss")
plt.legend()
plt.title("Loss")

plt.subplot(1,2,2)
plt.plot(history.history["accuracy"], label="Train Accuracy")
plt.plot(history.history["val_accuracy"], label="Val Accuracy")
plt.legend()
plt.title("Accuracy")

plt.show()

# 9) Evaluate on Test Set

In [16]:
results = model.evaluate(test_ds, verbose=1)
print(f"Test Loss: {results[0]:.4f}, Test Accuracy: {results[1]:.4f}")

[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 368ms/step - accuracy: 0.8344 - loss: 0.4343
Test Loss: 0.3582, Test Accuracy: 0.8741


# 10) Save Model & Class Mapping

In [17]:
model.save("/kaggle/working/brain_tumor_resnet50.h5")
print("Model saved: /kaggle/working/brain_tumor_resnet50.h5")

import json
class_names = list(train_ds.class_names)
with open("/kaggle/working/class_mapping.json", "w") as f:
    json.dump({"classes": class_names}, f)
print("Class mapping saved: /kaggle/working/class_mapping.json")



Model saved: /kaggle/working/brain_tumor_resnet50.h5


AttributeError: '_PrefetchDataset' object has no attribute 'class_names'