# Deep Learning Project

**Group:** Songbird  
**Members:** Charlotte de Vries, Jiazhen Tang, Paulo Zirlis

In [None]:
# Setup block
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight

print("Setup OK")

In [None]:
# Check if GPU is available
import tensorflow as tf
print(tf.config.list_physical_devices())

## 1. Project Overview

### 1.1 Objective*

Apply different deep learning architectures to the visual classification task of identifying brain tumors in MRI images and compare them based on accuracy and time to converge.


### 1.2 Neural Network Architectures

We will implement and compare the following architectures:
- Custom Convolutional Neural Network (CNN) with keras sequential
- Custom Residual Network (ResNet)
- Pre-trained Residual Network (ResNet50)
- Pre-trained Residual Network (ResNet50) with fine-tuning
- Pre-trained Visual Transformer (ViT)
- Pre-trained Visual Transformer (ViT) with fine-tuning


### 1.3 Dataset Description

The dataset includes high-resolution CT and MRI images captured from multiple patients, with each image labeled with the corresponding tumor type (e.g., glioma, meningioma, etc.). For this project we will focus solely on the **MRI** images for simplicity. The dataset's creator collected these data from different sources to assist researchers and healthcare professionals in developing AI models for the automatic detection, classification, and segmentation of brain tumors.

The images are divided as follows:
- Healty images: 2000
- Tumor images: 3000
    - Meningioma: 1112
    - Glioma: 672
    - Pituitary: 629
    - Tumor: 587
- **Total of images:** 5000

Source: [Brain tumor multimodal image (Kaggle)](https://www.kaggle.com/datasets/murtozalikhon/brain-tumor-multimodal-image-ct-and-mri/data)

***

## 2. Data Preprocessing

### 2.1 Load data

In [None]:
# Load data

# 1. SETUP PATHS
dataset_path = 'Data/Brain Tumor MRI images'

print(f"Checking contents of: {dataset_path}")
try:
    items = os.listdir(dataset_path)
    print("Found these items:", items)
except:
    print("Error: The dataset_path does not exist.")

filepaths = []
labels = []



# Get list of all folders in the main directory
all_items = os.listdir(dataset_path)

for item in all_items:
    item_path = os.path.join(dataset_path, item)
    
    # We only care if it's a folder (directory)
    if os.path.isdir(item_path):
        
        # --- CASE A: The 'Healthy' Folder ---
        if 'healthy' in item.lower():
            print(f"Processing Healthy folder: {item}")
            for filename in os.listdir(item_path):
                if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.tif')):
                    filepaths.append(os.path.join(item_path, filename))
                    labels.append('Healthy')
                    
        # --- CASE B: The 'Tumour' Folder (Anything that isn't Healthy) ---
        else:
            print(f"Processing Tumour folder: {item}")
            for filename in os.listdir(item_path):
                if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.tif')):
                    full_path = os.path.join(item_path, filename)
                    name_lower = filename.lower()
                    
                    # Determine Subtype based on filename
                    if 'glioma' in name_lower:
                        label = 'Glioma'
                    elif 'meningioma' in name_lower:
                        label = 'Meningioma'
                    elif 'pituitary' in name_lower:
                        label = 'Pituitary'
                    else:
                        label = 'Tumor (Unspecified)' 
                    
                    filepaths.append(full_path)
                    labels.append(label)

# Create DataFrame
df = pd.DataFrame({'filepath': filepaths, 'label': labels})

# Check results
print(f"Total images found: {len(df)}")
print(df['label'].value_counts())


### 2.2 Train-test split

In [None]:
# 2. Split: 80% Train, 20% Test (using stratify to keep classes balanced)
train_df, test_df = train_test_split(df, test_size=0.2, shuffle=True, random_state=42, stratify=df['label'])

# 3. Split Train again to get Validation set (e.g. 10% of total)
train_df, val_df = train_test_split(train_df, test_size=0.125, shuffle=True, random_state=42, stratify=train_df['label'])

print(f"Train size: {len(train_df)}")
print(f"Val size:   {len(val_df)}")
print(f"Test size:  {len(test_df)}")

# 4. Visualize to confirm labels are correct

plt.figure(figsize=(14, 8))

# Get a random sample to check
sample_df = df.sample(10)

for i, (index, row) in enumerate(sample_df.iterrows()):
    plt.subplot(2, 5, i + 1)
    img = mpimg.imread(row['filepath'])
    plt.imshow(img, cmap='gray')
    plt.title(f"{row['label']}\n{os.path.basename(row['filepath'])[:10]}...", fontsize=9) # Show label + part of filename
    plt.axis('off')

plt.tight_layout()
plt.show()

### 2.3 Build Keras generators

In [None]:
### Images to correct format

# 1. Define image size and batch size
IMG_SIZE = (256, 256) 
BATCH_SIZE = 32

# 2. Create ImageDataGenerators
train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen   = ImageDataGenerator(rescale=1./255)
test_datagen  = ImageDataGenerator(rescale=1./255)

# 3. Build generators FROM DATAFRAMES
train_gen = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col="filepath",
    y_col="label",
    target_size=IMG_SIZE,
    color_mode="grayscale",
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=True
)

val_gen = val_datagen.flow_from_dataframe(
    dataframe=val_df,
    x_col="filepath",
    y_col="label",
    target_size=IMG_SIZE,
    color_mode="grayscale",
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=False
)

test_gen = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    x_col="filepath",
    y_col="label",
    target_size=IMG_SIZE,
    color_mode="grayscale",
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=False
)

***

## 3. Custom Convolutional Neural Network

The Convolutional Neural Network (CNN) model designed for this project consists of four convolutional blocks followed by a final block with pooling, dropout and fully connected layers. Each convolutional block has a convolution layer, batch normalization, activation function (ReLU) and max pooling. Early stopping was added to control for overfitting and underfitting. Batch normalization was used to improve training speed and stability. Dropout was included in the final block to further prevent overfitting. The model was compiled with the Adam optimizer, categorical cross-entropy loss function, and accuracy as the evaluation metric.

<br>

### 3.1 CNN Architecture

**Input and Data Augmentation**
- Input layer: shape (256, 256, 1)
- Data Augmentation: Random rotations and horizontal flips

**First Convolutional Block**
- Conv2d layer: 32 filters, 3x3 kernel, stride of 1, same padding
- Batch Normalization
- ReLU Activation
- MaxPooling2d layer: 2x2 pool size, stride of 2.

**Other Convolutional Blocks**
- same as the first block but with increasing number of filters (64, 128, 256)

**Classifier Head**
- Global Average Pooling layer
- Dense layer: 64 units, ReLU activation
- Dropout layer: 0.3 dropout rate
- Dense layer: 5 units (nÂº of classes), Softmax activation

In [None]:
### CNN Architecture

# Seed for reproducibility
np.random.seed(42)

# Custom CNN
CNN = keras.Sequential([
    
    # Input
    layers.InputLayer(shape=[256, 256, 1]),
    
    # Data Augmentation
    layers.RandomFlip("horizontal"), # flip images horizontally
    layers.RandomRotation(0.1),      # rotate images randomly by 10%


    # First Convolutional Block
    layers.Conv2D(filters=16, kernel_size=3, strides=1, padding='same'),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.MaxPool2D(pool_size=2, strides=2),

    # Second Convolutional Block
    layers.Conv2D(filters=32, kernel_size=3, strides=1, padding='same'),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.MaxPool2D(pool_size=2, strides=2),

    # Third Convolutional Block
    layers.Conv2D(filters=64, kernel_size=3, strides=1, padding='same'),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.MaxPool2D(pool_size=2, strides=2),

    # Fourth Convolutional Block
    layers.Conv2D(filters=128, kernel_size=3, strides=1, padding='same'),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.MaxPool2D(pool_size=2, strides=2),

    # Fifth Convolutional Block
    layers.Conv2D(filters=256, kernel_size=3, strides=1, padding='same'),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.SpatialDropout2D(0.2),
    layers.MaxPool2D(pool_size=2, strides=2),

    # Sixth Convolutional Block
    layers.Conv2D(filters=512, kernel_size=3, strides=1, padding='same'),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    layers.SpatialDropout2D(0.2),
    layers.MaxPool2D(pool_size=2, strides=2),

    # Classifier Head
    layers.GlobalAveragePooling2D(),
    layers.Dense(units=128),
    layers.Dense(units=64),
    layers.Dense(units=5, activation='softmax')  # 5 classes
])

CNN.summary()


# Compile the model
CNN.compile(
    optimizer = Adam(0.001),
    loss = 'categorical_crossentropy',
    metrics = ['accuracy']
)

### 3.2 Define class weights

In [None]:
# Class counts
class_counts = np.array([2000, 1112, 672, 629, 587]) 

# Use sklearn's utility function to calculate balanced weights
# This function calculates weights inversely proportional to class frequencies.
weights = class_weight.compute_class_weight(
    'balanced',
    classes=np.unique(np.arange(len(class_counts))), # [0, 1, 2, 3, 4]
    y=np.repeat(np.arange(len(class_counts)), class_counts) # Create a pseudo-y list for the counts
)

# Convert to dictionary format for Keras
class_weight_dict = dict(enumerate(weights))
print("Class Weights:", class_weight_dict)

### 3.3 Train and Evaluate

In [None]:
### Training

# Define early stopping
early_stopping = EarlyStopping(
    min_delta = 0.001,
    patience = 10,
    restore_best_weights = True
)

# Fit the model
hist_CNN = CNN.fit(
    train_gen,
    validation_data = val_gen,
    batch_size = 32,
    epochs = 20,
    callbacks = [early_stopping],
    class_weight = class_weight_dict,
    verbose = 1
)

### Evaluation

# Loss
plt.plot(hist_CNN.history['loss'], label='train_loss')
plt.plot(hist_CNN.history['val_loss'], label='val_loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Accuracy
plt.plot(hist_CNN.history['accuracy'], label='train_accuracy')
plt.plot(hist_CNN.history['val_accuracy'], label='val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

***

## 2.2 Pre-trained Residual Network
Charlotte

#### **ResNet without finetuning**

#### Preprocessing data

In [None]:
ds_train_c5_, ds_val_c5_ = image_dataset_from_directory(
    '/tmp/BrainTumorDataset',
    validation_split=0.2,
    subset='both',
    seed=42,
    image_size=(224,224),
    batch_size=32,
    label_mode='categorical'
)

#### Data preprocessing

In [None]:
from tensorflow.keras.applications.resnet import preprocess_input

def preprocess(image, label):
    image = preprocess_input(image)
    return image, label

ds_train_c5 = ds_train_c5_.map(preprocess)
ds_val_c5 = ds_val_c5_.map(preprocess)

#### Data augmentation

In [None]:
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.1),
    tf.keras.layers.RandomZoom(0.1),
])

def augment(image, label):
    image = data_augmentation(image)
    return image, label

ds_train_c5 = ds_train_c5_.map(augment).map(preprocess)

#### Create the model

In [None]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras import layers, models

## pretrained base
base_model = ResNet50(
    weights='imagenet',
    include_top=False,
    input_shape=(224, 224, 3)
)
base_model.trainable = False   # Freeze weights

## attach head
model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(5, activation='softmax')
])

#### Train the model

In [None]:
## train model
model.compile(
    optimizer=Adam(1e-5),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

history_c5 = model.fit(
    ds_train_c5,
    validation_data=ds_val_c5,
    epochs=10
)

#### Visualizing the loss and accuracy

In [None]:
import matplotlib.pyplot as plt

# # Loss
# plt.plot(history_c5.history['loss'], label='train_loss')
# plt.plot(history_c5.history['val_loss'], label='val_loss')
# plt.xlabel('Epoch')
# plt.ylabel('Loss')
# plt.legend()
# plt.show()

# # Accuracy
# plt.plot(history_c5.history['accuracy'], label='train_accuracy')
# plt.plot(history_c5.history['val_accuracy'], label='val_accuracy')
# plt.xlabel('Epoch')
# plt.ylabel('Accuracy')
# plt.legend()
# plt.show()

#### **ResNet with finetuning**

#### Create the model

In [None]:
# Unfreeze the model
base_model.trainable = True

# Freeze the first 140 layers "freeze"
for layer in base_model.layers[:140]:
    layer.trainable = False

## attach head
model_finetuned = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(5, activation='softmax')
])

#### Train the model

In [None]:
from tensorflow.keras.optimizers import Adam

model_finetuned.compile(
    optimizer=Adam(1e-5),    
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

history_c5_finetune = model_finetuned.fit(
    ds_train_c5,
    validation_data=ds_val_c5,
    epochs=10
)

#### Visualizing the loss and accuracy after fine tuning

In [None]:
# Loss
# plt.plot(history_c5_finetune.history['loss'], label='train_loss')
# plt.plot(history_c5_finetune.history['val_loss'], label='val_loss')
# plt.xlabel('Epoch')
# plt.ylabel('Loss')
# plt.legend()
# plt.show()

# # Accuracy
# plt.plot(history_c5_finetune.history['accuracy'], label='train_accuracy')
# plt.plot(history_c5_finetune.history['val_accuracy'], label='val_accuracy')
# plt.xlabel('Epoch')
# plt.ylabel('Accuracy')
# plt.legend()
# plt.show()

***

## 2.3 Pre-trained Vision Transformer
Jiazhen

In [None]:
# 1. Ensure we know the number of classes (should be 5)
num_classes = len(train_generator.class_indices)

# 2. Load the ViT Base Model (86M Parameters)
model_id = "google/vit-base-patch16-224"

model = TFAutoModelForImageClassification.from_pretrained(
    model_id, 
    num_labels=num_classes, 
    ignore_mismatched_sizes=True
)

# 3. Define Optimizer and Loss (Using tf_keras to avoid version errors)
# ViT requires a small learning rate (5e-5)
optimizer = Adam(learning_rate=5e-5)
loss = CategoricalCrossentropy(from_logits=True)

# 4. Compile the model
# jit_compile=True will help speed up this heavy model on the GPU
model.compile(
    optimizer=optimizer, 
    loss=loss, 
    metrics=['accuracy'],
    jit_compile=True 
)

print(f"Successfully loaded and compiled {model_id}")

In [None]:
# 1. Setup Generators with 'channels_first'
# We add data_format='channels_first' to match the Hugging Face ViT requirements
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    horizontal_flip=True,
    zoom_range=0.1,
    data_format='channels_first'  # <--- THIS IS THE KEY FIX
)

test_datagen = ImageDataGenerator(
    rescale=1./255,
    data_format='channels_first'  # <--- THIS IS THE KEY FIX
)

# 2. Flow from DataFrame
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='filepath',
    y_col='label',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    shuffle=True
)

val_generator = test_datagen.flow_from_dataframe(
    dataframe=val_df,
    x_col='filepath',
    y_col='label',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    shuffle=False
)

print("Generators recreated with Channels First format.")

# 3. Now run the training code again

early_stopping = tf_keras.callbacks.EarlyStopping(
    monitor='val_loss', 
    patience=3, 
    restore_best_weights=True
)

history = model.fit(
    train_generator,
    epochs=10, 
    validation_data=val_generator,
    callbacks=[early_stopping]
)


In [None]:
# 1. Load the Backbone 
backbone = TFViTModel.from_pretrained("google/vit-base-patch16-224")
backbone.trainable = True 

# 2. Create Input (Channels First)
inputs = Input(shape=(3, 224, 224), name="input_image")

# 3. Backbone Inference
x = backbone(inputs).last_hidden_state
x = x[:, 0, :] # Extract CLS Token

# 4. Custom Hidden Layers
x = layers.Dense(512, activation='relu', name="hidden_layer_1")(x)
x = layers.Dropout(0.3)(x)

x = layers.Dense(256, activation='relu', name="hidden_layer_2")(x)
x = layers.Dropout(0.2)(x)

# 5. Output Layer
outputs = layers.Dense(num_classes, name="prediction_head")(x)

# ---------------------------------------------------------
# CHANGE IS HERE: distinct variable name and model name
# ---------------------------------------------------------
fine_tuned_model = Model(inputs=inputs, outputs=outputs, name="ViT_With_Custom_Head")

# 6. Compile
optimizer = Adam(learning_rate=1e-4)
loss = CategoricalCrossentropy(from_logits=True)

fine_tuned_model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

# Verify the name in the summary
fine_tuned_model.summary()

In [None]:
history_custom = fine_tuned_model.fit(
    train_generator,
    epochs=10, 
    validation_data=val_generator,
    callbacks=[early_stopping]
)