# Predictive Brain Tumor Image AI Project - Feature Engineer

To make an image-processing brain tumor predictive model to automate on scale.

In [None]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, random_split
import matplotlib.pyplot as plt

In [None]:
# Define Data Transformations
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(), # when training will 50% flip the image horizontally. Making a copy of the image and flipping it may add bias, thus why we do this controlled randomness method.
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

# Data path
train_dir = './ImageData1-Training'
test_dir = './ImageData2-Testing'

# Dataset setup
train_dataset = datasets.ImageFolder(train_dir, transform=data_transforms['train'])
test_dataset = datasets.ImageFolder(test_dir, transform=data_transforms['test'])
'''
Reminder:
Pytorch dataset's ImageFolder is design to expect a parent folder, and inside sub folder with the images, and these subfolders will auto be used as the label in the training.
'''


# Add labels to dataset
class_map = {
    'glioma': 1,  # cancer
    'meningioma': 0,  # normal
    'notumor': 0,  # normal
    'pituitary': 0   # normal
}
'''
As mentioned in EDA,
I am thinking of making Glioma classified as cancer in training 
and Notumor (no tumor), Meningioma, and Pituitary as no cancer in training

Allows me to keep the separated subfolder structure for the images.
Glioma is the only cancer, while the others are just no tumor or benign tumors. 
'''

train_dataset.targets = [class_map[train_dataset.classes[label]] for label in train_dataset.targets]
test_dataset.targets = [class_map[test_dataset.classes[label]] for label in test_dataset.targets]

# Dataloader
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)  # Shuffle for training
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)   # No shuffle for testing
