## <b> <span style='color:blue'>|</span> 🧠 Brain Tumor Detection using VGG16 </b> <hr>

• A deep learning-based project to classify MRI brain images into Tumor / No Tumor using VGG16 architecture. 
# <br>

## <b> 1 <span style='color:red'>|</span> 📁 Data Preparation and Folder Structure Creation </b> 

• This section defines the paths for the dataset and ensures that subfolders for each class (no, yes) exist in the training, validation, and testing directories.

# <br>

## <b> 2 <span style='color:red'>|</span> 🔀 Split the Dataset into Train, Test, and Validation </b> 

• This function takes all images from a class, shuffles them, and splits them according to the defined ratios into train, test, and validation folders.

In [None]:
import os
import shutil
import random

# Original data directory
source_path = r"D:\Projects\Brain Tumor\VGG16\Data"

# Paths for training, testing, and validation sets
train_path = os.path.join(source_path, r"D:\Projects\Brain Tumor\VGG16\Data\TRAIN")
test_path = os.path.join(source_path, r"D:\Projects\Brain Tumor\VGG16\Data\TEST")
val_path = os.path.join(source_path, r"D:\Projects\Brain Tumor\VGG16\Data\VAL")

# Create class subfolders ('no', 'yes') inside TRAIN, TEST, and VAL if they don't exist
for folder in [train_path, test_path, val_path]:
    for cls in ["no", "yes"]:
        os.makedirs(os.path.join(folder, cls), exist_ok=True)

# Function to split images into TRAIN, TEST, and VAL sets
def split_data(class_name, split_ratio=(0.7, 0.15, 0.15)):
    class_path = os.path.join(source_path, class_name)  # Path to class folder (e.g., 'no' or 'yes')
    images = os.listdir(class_path)  # List all images in the class folder
    random.shuffle(images)  # Shuffle images randomly

    # Calculate number of images for each split
    train_size = int(len(images) * split_ratio[0])
    test_size = int(len(images) * split_ratio[1])

    # Split the images into train, test, and validation
    train_images = images[:train_size]
    test_images = images[train_size:train_size + test_size]
    val_images = images[train_size + test_size:]

    # Move images to their corresponding folders
    for img in train_images:
        shutil.move(os.path.join(class_path, img), os.path.join(train_path, class_name, img))
    for img in test_images:
        shutil.move(os.path.join(class_path, img), os.path.join(test_path, class_name, img))
    for img in val_images:
        shutil.move(os.path.join(class_path, img), os.path.join(val_path, class_name, img))

# Split data for both classes
split_data("no")
split_data("yes")

## <b> 3 <span style='color:red'>|</span> 🧪 Data Generators (Augmentation & Preprocessing) </b> 

• This section prepares the images for training, validation, and testing using ImageDataGenerator.
It applies data augmentation only for training to make the model more robust.

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os

# Paths to training, testing, and validation datasets
train_path = os.path.join(source_path, r"D:\Projects\Brain Tumor\VGG16\Data\TRAIN")
test_path = os.path.join(source_path, r"D:\Projects\Brain Tumor\VGG16\Data\TEST")
val_path = os.path.join(source_path, r"D:\Projects\Brain Tumor\VGG16\Data\VAL")

# Define ImageDataGenerator for training with data augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,             # Normalize pixel values to [0, 1]
    rotation_range=20,          # Randomly rotate images within 20 degrees
    width_shift_range=0.2,      # Randomly shift images horizontally
    height_shift_range=0.2,     # Randomly shift images vertically
    horizontal_flip=True,       # Randomly flip images horizontally
    fill_mode='nearest'         # Fill in missing pixels after transformation
)

# Create training data generator
train_generator = train_datagen.flow_from_directory(
    train_path,
    target_size=(224, 224),     # Resize images to 224x224 (suitable for VGG16)
    batch_size=32,
    class_mode='binary'         # Binary classification: tumor / no tumor
)

# Define ImageDataGenerator for validation (no augmentation)
val_datagen = ImageDataGenerator(rescale=1./255)

# Create validation data generator
val_generator = val_datagen.flow_from_directory(
    val_path,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary'
)

# Create test data generator (also no augmentation)
test_generator = val_datagen.flow_from_directory(
    test_path,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary'
)

Found 2100 images belonging to 2 classes.
Found 450 images belonging to 2 classes.
Found 450 images belonging to 2 classes.


## <b> 4 <span style='color:red'>|</span> 🏗️ 4. Model Building using VGG16 </b> 

• Here, we load the VGG16 model pre-trained on ImageNet without the fully connected layers on top.
We freeze all layers except the last two, and add custom layers for binary classification.

In [3]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout

# Load the VGG16 model without the top fully connected layers
# Use pre-trained ImageNet weights and input shape suitable for our data
base_model = VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

# Freeze all layers except the last two for fine-tuning
for layer in base_model.layers[:-2]:
    layer.trainable = False

# Build the final model
model = Sequential([
    base_model,   # Base VGG16 model
    Flatten(),  # Flatten the output to feed into Dense layers
    Dense(512, activation='relu'),  # Fully connected hidden layer with ReLU activation
    Dropout(0.5),  # Dropout layer to reduce overfitting
    Dense(1, activation='sigmoid')  # Output layer with sigmoid for binary classification
])

# Compile the model with Adam optimizer and binary crossentropy loss
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Display a summary of the model architecture
model.summary()


## <b> 5 <span style='color:red'>|</span> 🏋️‍♂️ 5. Model Training with Early Stopping </b>

In [4]:
from tensorflow.keras.callbacks import EarlyStopping

# Add early stopping to prevent overfitting
# It stops training if the validation loss doesn't improve for 5 consecutive epochs

early_stopping = EarlyStopping(
    monitor='val_loss',           # Monitor validation loss
    patience=5,                   # Wait 5 epochs before stopping if no improvement
    restore_best_weights=True     # Restore the best model weights after stopping
)

# Train the model
history = model.fit(
    train_generator,              # Training data
    epochs=20,                    # Number of epochs (can be adjusted)
    validation_data=val_generator,  # Validation data
    callbacks=[early_stopping]   # Apply early stopping during training
)

print("✅ Training completed!")


  self._warn_if_super_not_called()


Epoch 1/20
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.6251 - loss: 1.6956

  self._warn_if_super_not_called()


[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m236s[0m 4s/step - accuracy: 0.6266 - loss: 1.6825 - val_accuracy: 0.8533 - val_loss: 0.3320
Epoch 2/20
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m236s[0m 4s/step - accuracy: 0.8894 - loss: 0.2632 - val_accuracy: 0.8667 - val_loss: 0.2918
Epoch 3/20
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m241s[0m 4s/step - accuracy: 0.9211 - loss: 0.2046 - val_accuracy: 0.9000 - val_loss: 0.2009
Epoch 4/20
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m239s[0m 4s/step - accuracy: 0.9445 - loss: 0.1554 - val_accuracy: 0.9556 - val_loss: 0.1147
Epoch 5/20
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m242s[0m 4s/step - accuracy: 0.9613 - loss: 0.1070 - val_accuracy: 0.9489 - val_loss: 0.1271
Epoch 6/20
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m238s[0m 4s/step - accuracy: 0.9821 - loss: 0.0640 - val_accuracy: 0.9333 - val_loss: 0.1875
Epoch 7/20
[1m66/66[0m [32m━━━━━━━━━━━━━━━

## <b> 6 <span style='color:red'>|</span> 🧪 Model Evaluation on Test Dataset </b> 

In [5]:
# Evaluate the model on the test dataset
test_loss, test_acc = model.evaluate(test_generator)

# Print the model's accuracy on test data
print(f"🔥 Model Accuracy On Test Data: {test_acc * 100:.2f}%")


[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 2s/step - accuracy: 0.9777 - loss: 0.1003
🔥 Model Accuracy On Test Data: 97.56%


## <b> 7 <span style='color:red'>|</span> 💾 7. Save the Trained Model </b>

In [None]:
# Save the trained model to an HDF5 file
model.save("brain_tumor_vgg16.h5")

