<a href="https://colab.research.google.com/github/mmagavi/FinalProject1291/blob/main/FinalProject1291.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Final Project: Classifying Brain Tumors With Convolutional Neural Networks and a Foundational Model**

## **Importing the Data**

Importing the data from https://www.kaggle.com/datasets/masoudnickparvar/brain-tumor-mri-dataset/data

**cjdata.label:** 1 for meningioma, 2 for glioma, 3 for pituitary tumor

**cjdata.PID:** patient ID

**cjdata.image:** image data

**cjdata.tumorBorder:** a vector storing the coordinates of discrete points on tumor border.

		For example, [x1, y1, x2, y2,...] in which x1, y1 are planar coordinates on tumor border.
		It was generated by manually delineating the tumor border. So we can use it to generate
		binary image of tumor mask.

**cjdata.tumorMask:** a binary image with 1s indicating tumor region


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Importing and Downloading Images
!gdown 1p_q_wHJZQIEeinSUskvQHtqu88s2gfq8
!gdown 1EQRVW5M31GzZjEXnULygxpvBnIGOZ6WA
!gdown 1QxVOVJ89AHKQkbW-3Ftu-ajcDuzTTrIZ
!gdown 1A1dYyanURV6B7EPgI7VS-Y6tZuheEjJy
!unzip brainTumorDataPublic_22993064.zip -d tumor_data
!unzip brainTumorDataPublic_15332298.zip -d tumor_data
!unzip brainTumorDataPublic_7671532.zip -d tumor_data
!unzip brainTumorDataPublic_1766.zip -d tumor_data

Downloading...
From: https://drive.google.com/uc?id=1p_q_wHJZQIEeinSUskvQHtqu88s2gfq8
To: /content/brainTumorDataPublic_15332298.zip
100% 216M/216M [00:01<00:00, 193MB/s]
Downloading...
From: https://drive.google.com/uc?id=1EQRVW5M31GzZjEXnULygxpvBnIGOZ6WA
To: /content/brainTumorDataPublic_22993064.zip
100% 232M/232M [00:01<00:00, 207MB/s]
Downloading...
From: https://drive.google.com/uc?id=1QxVOVJ89AHKQkbW-3Ftu-ajcDuzTTrIZ
To: /content/brainTumorDataPublic_7671532.zip
100% 218M/218M [00:02<00:00, 109MB/s]
Downloading...
From: https://drive.google.com/uc?id=1A1dYyanURV6B7EPgI7VS-Y6tZuheEjJy
To: /content/brainTumorDataPublic_1766.zip
100% 214M/214M [00:01<00:00, 125MB/s]
Archive:  brainTumorDataPublic_22993064.zip
replace tumor_data/2299.mat? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [None]:
!ls tumor_data # Checking Data was loaded correctly into folder.

In [None]:
#VGG19
import time
import os
import pickle
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [7, 7]
plt.ion()
import cv2
import pickle
import numpy as np
from scipy.spatial.distance import pdist, squareform

import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from torchvision import datasets
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim

from torchsummary import summary
import json
from PIL import Image

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Running on", device)

vgg19 = models.vgg19(weights="IMAGENET1K_V1").to(device)
vgg19.eval()
summary(vgg19, (3, 224, 224))


In [None]:
import h5py

image_path = "tumor_data/1571.mat" #using one image to see model works

# Open the .mat file using h5py
with h5py.File(image_path, 'r') as file:
    label = file['cjdata']['label'][()][0, 0]
    patient_id = file['cjdata']['PID'][()][0, 0]
    image_data = file['cjdata']['image'][()]
    tumor_border = file['cjdata']['tumorBorder'][()].flatten()

In [None]:
#similar code to assignment 7

!gdown 13teI_njgCL6oXbGFVYrDCyhkcOYEhOtj #imagenet_classes.json

def preprocess(image_data):
    data = np.repeat(image_data[..., np.newaxis], 3, -1)
    image = Image.fromarray((data * 255).astype(np.uint8))

    # Resize and normalize using PyTorch transforms
    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    normalized_image = transform(image)
    return normalized_image

PATH_TO_LABELS = 'imagenet_classes.json'
with open(PATH_TO_LABELS, 'r') as f:
    imagenet_classes = json.load(f)

def decode_preds(outputs, class_names=imagenet_classes):
  # Assuming outputs is the tensor of model outputs
  softmax_outputs = F.softmax(outputs, dim=1)
  probability, predicted_class = torch.max(softmax_outputs, dim=1)

  predicted_class_labels = [class_names[str(idx)] for idx in predicted_class.cpu().numpy()]
  probability_scores = probability.cpu().numpy()

  # Print or return the results
  for label, score in zip(predicted_class_labels, probability_scores):
      print(f'\nClass: {label}, Probability: {score}')


In [None]:
array = preprocess(image_data).to(device)

with torch.no_grad():
  vector = vgg19(array[None, :, :, :])

# Display the image
plt.imshow(image_data) #coloured by plt.imshow()
decode_preds(vector)
#prediction is not correct, but not crazy

## **Fine tuning the model**

In [None]:
from sklearn.model_selection import train_test_split
import shutil

path = "tumor_data"
file_paths = [os.path.join(path, file) for file in os.listdir(path)]
train_files, test_val_files = train_test_split(file_paths, test_size=0.2, random_state=42)
test_files, val_files = train_test_split(test_val_files, test_size=0.5, random_state=42)

def move_files(file_list, destination_folder):
    os.makedirs(destination_folder, exist_ok=True)
    for file in file_list:
        shutil.move(file, destination_folder)

# Move files to their folders
move_files(train_files, "tumor_data/train")
move_files(val_files, "tumor_data/validate")
move_files(test_files, "tumor_data/test")


In [None]:
!ls tumor_data/test

In [None]:
class CustomMatDataset(Dataset):
    def __init__(self, folder_path, transform=None):
        self.folder_path = folder_path
        self.file_paths = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith('.mat')]
        self.transform = transform

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        file_path = self.file_paths[idx]

        # Open the .mat file using h5py
        with h5py.File(file_path, 'r') as file:
            label = file['cjdata']['label'][()][0, 0]
            # patient_id = file['cjdata']['PID'][()][0, 0]
            image_data = file['cjdata']['image'][()]
            # tumor_border = file['cjdata']['tumorBorder'][()].flatten()

        if self.transform:
            image = self.transform(image_data)

        return image, label

train_dataset = CustomMatDataset(folder_path='tumor_data/train', transform=preprocess)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = CustomMatDataset(folder_path='tumor_data/test', transform=preprocess)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

validate_dataset = CustomMatDataset(folder_path='tumor_data/validate', transform=preprocess)
validate_loader = DataLoader(validate_dataset, batch_size=32, shuffle=True)


In [None]:
#class names
class_names = {
    1: "a",
    2: "b",
    3: "c",
}
print(class_names)

Creating a new model for fine-tuning

In [None]:
standard_vgg19 = models.vgg19(weights="IMAGENET1K_V1").to(device)
# Freeze the layers of the standard VGG19
for param in standard_vgg19.parameters():
    param.requires_grad = False

# Modify the model for 5-class classification of Moth images
# Remove the fully connected layers (classifier) on top
standard_vgg19.classifier = nn.Identity()

# Now create a new model with the features of the standard VGG19 and a new classifier
class NewVGG19(nn.Module):
    def __init__(self):
        super(NewVGG19, self).__init__()
        self.features = standard_vgg19.features
        self.avgpool = standard_vgg19.avgpool
        self.oldvgg = standard_vgg19
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(512 * 7 * 7, 3),  #3 classes of tumours
        )

    def forward(self, x):
        x = self.oldvgg(x)
        print(x.shape)
        x = self.classifier(x)
        return x

new_vgg19 = NewVGG19().to(device)
print(new_vgg19)

Train the model

In [None]:
# -- Your code here -- #
%env CUDA_LAUNCH_BLOCKING=1

adam_optimizer = optim.SGD(new_vgg19.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
epochs = 12
val_accuracies = []
for epoch in range(epochs):
  total_loss = 0
  for (batch_X, batch_y) in train_loader:
      # Zero the gradients
      # print(torch.isnan(batch_X).any(), torch.isnan(outputs).any())
      torch.cuda.synchronize()
      adam_optimizer.zero_grad()
      # Forward pass
      outputs = new_vgg19(batch_X.cuda()).float()
      # Compute the loss

      loss = criterion(outputs, batch_y.long().cuda())
      # Backward pass
      loss.backward()
      torch.cuda.synchronize()
      # Update weights
      adam_optimizer.step()
      total_loss += loss.item()
      print(f"Epoch {epoch + 1}, Loss: {total_loss / len(train_loader.dataset):.4f}")

  new_vgg19.eval()
  with torch.no_grad():
    val_acc = []
    for (batch_X, batch_y) in validate_loader:
      val_outputs = new_vgg19(batch_X.cuda())
      val_loss = criterion(val_outputs, batch_y.cuda())
      _, val_preds = torch.max(val_outputs, 1)
      val_acc.append((val_preds == batch_y.cuda()).float().mean().item())
    val_accuracies.append(np.mean(val_acc))

  print(f"Validation Loss: {val_loss.item():.4f}, Validation Accuracy: {np.mean(val_acc):.4f}")
# --------------------- #