<a href="https://colab.research.google.com/github/steffenschneider/MRI-tumor-detection/blob/main/mri_tumor_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# import libraries


In [1]:
# !pip install tensorflow

import os
import tensorflow as tf
import numpy as np
from tensorflow import keras
import tensorflow
from tensorflow import keras
from keras import layers, metrics, callbacks, applications, regularizers
from keras.regularizers import l2
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
print(os.getcwd())

/content


# get data

In [2]:
if not os.path.exists('/content'):
    os.makedirs('/content')

import shutil
#shutil.rmtree('')

from google.colab import files
uploaded = files.upload()       # choose zip folder manually

data_dir = "./mri-tumor"  # contains "yes" and "no" folders

import zipfile
with zipfile.ZipFile('mri-data.zip', 'r') as zip_ref:
    zip_ref.extractall(data_dir)

shutil.rmtree(data_dir + '/brain_tumor_dataset')    # delete additional redundant files



Saving mri-data.zip to mri-data (1).zip


# split data into training and test

In [3]:
img_size = (224, 224)  # ResNet50 requires 224x224
batch_size = 25

# First split: 80% train, 20% validation and test
# color_mode="rgb" is needed for ResNet50, which expects 3 channels
train_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    labels='inferred',
    validation_split=0.2,
    subset="training",
    seed=42,
    image_size=img_size,        # image is resized here
    batch_size=batch_size,
    color_mode="rgb"
)

# color_mode="rgb" is needed for ResNet50, which expects 3 channels
temp_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    labels='inferred',
    validation_split=0.2,
    subset="validation",
    seed=42,
    image_size=img_size,        # image is resized here
    batch_size=batch_size,
    color_mode="rgb"
)

# Second split: Split temp into 50% validation, 50% test (10% each of original)
n_batches = 1
val_ds = temp_ds.take(n_batches)     # 25 pictures for validation
test_ds = temp_ds.skip(n_batches)    # 25 pictures for testing

data_count = 0
for batch in test_ds:
    images, labels = batch
    data_count += len(images)
print(f"Sum of data / pictures: {data_count}")



Found 253 files belonging to 2 classes.
Using 203 files for training.
Found 253 files belonging to 2 classes.
Using 50 files for validation.
Sum of data / pictures: 25


# augmentation

In [4]:
# AUGMENTATION
# no new images are created, only the original images are augmented
data_augmentation = tf.keras.Sequential([
    layers.RandomRotation(0.2),
    layers.RandomZoom(0.2),
    layers.RandomFlip("horizontal"),
    layers.RandomBrightness(0.2)
])


# normalization

In [5]:
# NORMALIZATION
# Normalize the images to [0,1] range
# ResNet50 expects images in the range [-1,1]
def preprocess(image, label):
    image = data_augmentation(image)
    #image = tf.keras.applications.resnet50.preprocess_input(image)  # Normalize images, DOES NOT WORK!!!
    image = image / 255.0  # Scale pixel values to [0, 1]
    return image, label

train_ds = train_ds.map(preprocess)
val_ds = val_ds.map(preprocess)
test_ds = test_ds.map(preprocess)

'''
data_count = 0
for batch in test_ds:
    images, labels = batch
    print(images[0][0][0])      # tf.Tensor([0.3997549 0.3997549 0.3997549], shape=(3,), dtype=float32)
'''


'\ndata_count = 0\nfor batch in test_ds:\n    images, labels = batch\n    print(images[0][0][0])      # tf.Tensor([0.3997549 0.3997549 0.3997549], shape=(3,), dtype=float32)\n'

# build and compile model

In [6]:

# MODEL
# ResNet50 as feature extractor
# use ResNet50V2 with ImageNet weights - pre-trained model!
# V2 with better accuracy!
# only two output classes: "yes" and "no"
base_model = keras.applications.ResNet50V2(
    include_top=False,              # use my own input layer and output layer
    weights="imagenet",
    input_shape=(224, 224, 3),      # ResNet50V2 needs 3 channels
    classes=2,
    name="resnet50v2",
)

# use trainable = False if < 10.000 images (avoids overfitting)
base_model.trainable = False  # Freeze the base model

# BUILDING THE MODEL
# Why sigmoid? Because we have only two classes (binary classification)
model = keras.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation="relu", kernel_regularizer=l2(0.001)),
    layers.BatchNormalization(),
    layers.Dropout(0.6),
    layers.Dense(64, activation="relu"),
    layers.BatchNormalization(),
    layers.Dropout(0.6),
    layers.Dense(1, activation="sigmoid")
])

# COMPILE MODEL
# Adam optimizer is often the best choice for transfer learning
# use binary_crossentropy for binary classification
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.00003),
    loss="binary_crossentropy",
    metrics=[
        "accuracy",                # Genauigkeit (Accuracy)
        metrics.Precision(),       # Präzision
        metrics.Recall()           # Recall (Sensitivität)
    ]
)


# class weights

In [7]:

# CLASS WEIGHTS
# Calculate class weights to balance the dataset
# n_yes = 155
# n_no = 98
dir_no = os.path.join(data_dir, 'no')
n_no = len(os.listdir(dir_no))
print(f"amount of no-files: {n_no}")

dir_yes = os.path.join(data_dir, 'yes')
n_yes = len(os.listdir(dir_yes))
print(f"amount of yes-files: {n_yes}")

ratio = n_yes / n_no
class_weights = {0: ratio, 1: 1.0}  # ratio is 1.6207...


amount of no-files: 98
amount of yes-files: 155


# model training

In [None]:
# TRAINING
early_stopping = keras.callbacks.EarlyStopping(
    monitor="val_loss",
    min_delta=0.001,
    patience=15,
    restore_best_weights=True
)

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=20,
    class_weight=class_weights,
    callbacks=[early_stopping]
)


Epoch 1/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 5s/step - accuracy: 0.5362 - loss: 1.4931 - precision: 0.7105 - recall: 0.4918 - val_accuracy: 0.4400 - val_loss: 0.9570 - val_precision: 0.4286 - val_recall: 0.2308
Epoch 2/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 4s/step - accuracy: 0.5032 - loss: 1.5919 - precision: 0.6132 - recall: 0.5336 - val_accuracy: 0.5200 - val_loss: 0.9442 - val_precision: 0.6667 - val_recall: 0.5000
Epoch 3/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 4s/step - accuracy: 0.5711 - loss: 1.4788 - precision: 0.6958 - recall: 0.5591 - val_accuracy: 0.8000 - val_loss: 0.8618 - val_precision: 0.9231 - val_recall: 0.7500
Epoch 4/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 4s/step - accuracy: 0.5826 - loss: 1.5216 - precision: 0.6960 - recall: 0.6028 - val_accuracy: 0.7200 - val_loss: 0.8773 - val_precision: 0.8125 - val_recall: 0.7647
Epoch 5/20
[1m9/9[0m [32m━━━━━━━━━━━━

# evaluation + confusion matrix

In [None]:

# EVALUATION
test_loss, test_acc = model.evaluate(test_ds)
print(f"Test Accuracy: {test_acc:.4f}")

# Plot accuracy and loss curves
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history["accuracy"], label="Train Accuracy")
plt.plot(history.history["val_accuracy"], label="Val Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history["loss"], label="Train Loss")
plt.plot(history.history["val_loss"], label="Val Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()

plt.show()

# PREDICTION
y_val = []
y_val_pred = []
for images, labels in val_ds:
    print(images.shape)
    print(labels.shape)
    y_val.extend(labels)
    y_val_pred.extend(model.predict(images))

# Confusion Matrix validation
cm = confusion_matrix(y_val, np.round(y_val_pred))
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot()
plt.show()

y_test = []
y_test_pred = []
for images, labels in test_ds:
    print(images.shape)
    print(labels.shape)
    y_test.extend(labels)
    y_test_pred.extend(model.predict(images))

# Confusion Matrix test
cm = confusion_matrix(y_test, np.round(y_test_pred))
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot()
plt.show()
