# Predictive Brain Tumor Image AI Project - Data Wrangling

To make an image-processing brain tumor predictive model to automate on scale.

In [None]:
'''
Glioma: A type of cancer arising from glial cells in the brain or spinal cord.
Meningioma: Usually a benign tumor from the meninges; rarely malignant.
Notumor: Means no tumor was found.
Pituitary: Refers to the gland; tumors (mostly benign) can form, rarely cancerous.
'''


In [2]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, random_split
import matplotlib.pyplot as plt

In [None]:
'''
Reminder:
Pytorch dataset's ImageFolder is design to expect a parent folder, and inside sub folder with the images, and these subfolders will auto be used as the label in the training.
'''

In [7]:
# Define Data Transformations
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(), # when training will 50% flip the image horizontally. Making a copy of the image and flipping it may add bias, thus why we do this controlled randomness method.
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

# Data path
train_dir = './ImageData1-Training'
test_dir = './ImageData2-Testing'

# Dataset setup
train_dataset = datasets.ImageFolder(train_dir, transform=data_transforms['train'])
test_dataset = datasets.ImageFolder(test_dir, transform=data_transforms['test'])


# Add labels to dataset
class_map = {
    'glioma': 1,  # cancer
    'meningioma': 0,  # normal
    'notumor': 0,  # normal
    'pituitary': 0   # normal
}
'''
Allows me to keep the separated subfolder structure for the images.
Glioma is the only cancer, while the others are just no tumor or benign tumors. 
'''
train_dataset.targets = [class_map[train_dataset.classes[label]] for label in train_dataset.targets]
test_dataset.targets = [class_map[test_dataset.classes[label]] for label in test_dataset.targets]

# Dataloader
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)  # Shuffle for training
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)   # No shuffle for testing


In [None]:
from torchvision import models
import torch.nn as nn

model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 1)  # Single output for binary classification


In [9]:
import torch.optim as optim

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

for epoch in range(10):  # Train for 10 epochs
    model.train()
    running_loss = 0.0

    for images, labels in train_loader:
        # Move images and labels to the same device
        images, labels = images.to(device), labels.to(device).float().unsqueeze(1)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}")



In [None]:
# model


# Load a pre-trained ResNet model
model = models.resnet18(pretrained=True)

# Modify the final layer to match your dataset's number of classes
num_classes = len(train_dataset.classes)  # Number of classes in your dataset
model.fc = torch.nn.Linear(model.fc.in_features, num_classes)

# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Check the model structure
print(model)
