<a href="https://colab.research.google.com/github/awhitehouse1/DS4002-Project-3/blob/main/brain_tumor_models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import zipfile
with zipfile.ZipFile('brain_tumor_dataset.zip', 'r') as zip_ref:
    zip_ref.extractall('brain_tumor_dataset')

In [3]:
!ls brain_tumor_dataset/brain_tumor_dataset/

no  yes


In [4]:
import os
from PIL import Image
import matplotlib.pyplot as plt

base_dir = '/brain_tumor_dataset'

# Example: Load and visualize one image from the 'yes' folder
yes_dir = 'brain_tumor_dataset/brain_tumor_dataset/yes'
no_dir = 'brain_tumor_dataset/brain_tumor_dataset/no'

In [5]:
import numpy as np

def load_images_from_folder(folder, image_size=(128, 128)):
    images = []
    for filename in os.listdir(folder):
        file_path = os.path.join(folder, filename)
        img = Image.open(file_path).resize(image_size).convert('RGB')  # Resize and convert to RGB
        images.append(np.array(img) / 255.0)  # Normalize pixel values to [0, 1]
    return np.array(images)

# Load images from both folders
yes_images = load_images_from_folder(yes_dir)
no_images = load_images_from_folder(no_dir)

print(f"Loaded {len(yes_images)} 'yes' images and {len(no_images)} 'no' images.")

X = np.concatenate((yes_images, no_images), axis=0)
y = np.array([1] * len(yes_images) + [0] * len(no_images))  # Labels: 1 for 'yes', 0 for 'no'

print(f"Combined dataset: {X.shape[0]} images, Labels: {y.shape[0]}")

Loaded 155 'yes' images and 98 'no' images.
Combined dataset: 253 images, Labels: 253


In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training set: {X_train.shape[0]} images")
print(f"Testing set: {X_test.shape[0]} images")

Training set: 202 images
Testing set: 51 images


In [9]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# Normalize pixel values to [0, 1] (Feature Scaling)
X_train = X_train / 255.0
X_test = X_test / 255.0

# Define Data Augmentation for Training Data
datagen = ImageDataGenerator(
    rotation_range=20,         # Rotate images up to 20 degrees
    width_shift_range=0.2,     # Shift images horizontally
    height_shift_range=0.2,    # Shift images vertically
    shear_range=0.2,           # Shear transformations
    zoom_range=0.2,            # Random zoom
    horizontal_flip=True,      # Randomly flip images horizontally
    fill_mode='nearest'        # Fill in missing pixels after transformations
)

# Apply augmentation to the training data
datagen.fit(X_train)  # Precompute augmentation transformations for X_train

# Testing generator only normalizes
test_datagen = ImageDataGenerator()

# Wrap training and testing data for generators
train_generator = datagen.flow(X_train, y_train, batch_size=32)
test_generator = test_datagen.flow(X_test, y_test, batch_size=32)