<a href="https://colab.research.google.com/github/mmagavi/FinalProject1291/blob/main/SecondModel_FinalProjectCLPS1291.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Final Project: Classifying Brain Tumors With Convolutional Neural Networks and a Foundational Model**

**Model:** Vision Transformer (ViT) \
**Source:** http://pytorch.org/vision/stable/models/vision_transformer.html

## **Import Data**


Importing the data from https://www.kaggle.com/datasets/masoudnickparvar/brain-tumor-mri-dataset/data

**cjdata.label:** 1 for meningioma, 2 for glioma, 3 for pituitary tumor

**cjdata.PID:** patient ID

**cjdata.image:** image data

**cjdata.tumorBorder:** a vector storing the coordinates of discrete points on tumor border.

		For example, [x1, y1, x2, y2,...] in which x1, y1 are planar coordinates on tumor border.
		It was generated by manually delineating the tumor border. So we can use it to generate
		binary image of tumor mask.

**cjdata.tumorMask:** a binary image with 1s indicating tumor region




In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Importing and Downloading Images

!gdown 1p_q_wHJZQIEeinSUskvQHtqu88s2gfq8
!gdown 1EQRVW5M31GzZjEXnULygxpvBnIGOZ6WA
!gdown 1QxVOVJ89AHKQkbW-3Ftu-ajcDuzTTrIZ
!gdown 1A1dYyanURV6B7EPgI7VS-Y6tZuheEjJy
!unzip brainTumorDataPublic_22993064.zip -d tumor_data
!unzip brainTumorDataPublic_15332298.zip -d tumor_data
!unzip brainTumorDataPublic_7671532.zip -d tumor_data
!unzip brainTumorDataPublic_1766.zip -d tumor_data

In [None]:
#!ls tumor_data # Checking Data was loaded correctly into folder.

# **Vision Transformer (ViT) Model**
Creating, Evaluating and Training a ViT Model on our data



**Setting Up**

- Downloading packages
- Setting up environment

In [None]:
%env CUDA_LAUNCH_BLOCKING=1 TORCH_USE_CUDA_DSA
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

#SETUP
import time
import os
import pickle
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [7, 7]
plt.ion()
import cv2
import pickle
import numpy as np
from scipy.spatial.distance import pdist, squareform

import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from torchvision import datasets
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim

from torchsummary import summary
import json
from PIL import Image

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Running on", device)

**Instantiating Model**

torchvision.models.vit_b_32()

In [None]:
vit = models.vit_b_16(weights="IMAGENET1K_V1")
vit = vit.to(device)
vit.eval()
#summary(vit, (3, 224, 224))

In [None]:
import h5py # Import Python Library to interact with HDF5 files

image_path = "tumor_data/1571.mat" # using one image to see model works

# Open the .mat file using h5py
with h5py.File(image_path, 'r') as file:
    label = file['cjdata']['label'][()][0, 0]
    patient_id = file['cjdata']['PID'][()][0, 0]
    image_data = file['cjdata']['image'][()]
    tumor_border = file['cjdata']['tumorBorder'][()].flatten() #make it into a 1D array of data on the border

In [None]:
# Similar code to assignment 7
# Preprocessing images

!gdown 13teI_njgCL6oXbGFVYrDCyhkcOYEhOtj #imagenet_classes.json


def preprocess(image_data):
    data = np.repeat(image_data[..., np.newaxis], 3, -1)
    image = Image.fromarray((data * 255).astype(np.uint8))

    # Resize and normalize using PyTorch transforms
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    normalized_image = transform(image)
    #print(normalized_image.shape)
    return normalized_image


PATH_TO_LABELS = 'imagenet_classes.json'
with open(PATH_TO_LABELS, 'r') as f:
    imagenet_classes = json.load(f)


def decode_preds(outputs, class_names=imagenet_classes):
  # Assuming outputs is the tensor of model outputs
  softmax_outputs = F.softmax(outputs, dim=1)
  probability, predicted_class = torch.max(softmax_outputs, dim=1)

  predicted_class_labels = [class_names[str(idx)] for idx in predicted_class.cpu().numpy()]
  probability_scores = probability.cpu().numpy()

  # Print or return the results
  for label, score in zip(predicted_class_labels, probability_scores):
      print(f'\nClass: {label}, Probability: {score}')

Trying ViT before finetuning for our images

In [None]:
array = preprocess(image_data)

with torch.no_grad():
  vector = vit(array[None, :, :, :].cuda())

In [None]:
# Display the Image + Probability
plt.imshow(image_data)
decode_preds(vector)

**FineTuning the ViT Model**

In [None]:

from sklearn.model_selection import train_test_split
import shutil

# Create a Training, Testing, and Validation set

path = "tumor_data"
file_paths = [os.path.join(path, file) for file in os.listdir(path)]
train_files, test_val_files = train_test_split(file_paths, test_size=0.2, random_state=42)
test_files, val_files = train_test_split(test_val_files, test_size=0.5, random_state=42)

def move_files(file_list, destination_folder):
    os.makedirs(destination_folder, exist_ok=True)
    for file in file_list:
        shutil.move(file, destination_folder)

# Move files to their folders
# move_files(train_files, "tumor_data/train")
# move_files(val_files, "tumor_data/validate")
# move_files(test_files, "tumor_data/test")


In [None]:
class CustomMatDataset(Dataset):
    def __init__(self, folder_path, transform=None):
        self.folder_path = folder_path
        self.file_paths = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith('.mat')]
        self.transform = transform

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        file_path = self.file_paths[idx]

        # Open the .mat file using h5py
        with h5py.File(file_path, 'r') as file:
            label = file['cjdata']['label'][()][0, 0]
            label = label - 1
            # patient_id = file['cjdata']['PID'][()][0, 0]
            image_data = file['cjdata']['image'][()]
            # tumor_border = file['cjdata']['tumorBorder'][()].flatten()

        if self.transform:
            image = self.transform(image_data)

        return image, label

train_dataset = CustomMatDataset(folder_path='tumor_data/train', transform=preprocess)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = CustomMatDataset(folder_path='tumor_data/test', transform=preprocess)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

validate_dataset = CustomMatDataset(folder_path='tumor_data/validate', transform=preprocess)
validate_loader = DataLoader(validate_dataset, batch_size=32, shuffle=True)


Setting up the new labels for the new model

In [None]:
#class names
class_names = {
    0: "meningioma",
    1: "glioma",
    2: "pituitary tumor",
}

In [None]:
# Akash's code
standard_vit = models.vit_b_32(weights="IMAGENET1K_V1").to(device)

# Freeze the layers of the standard VIT
for param in standard_vit.parameters():
    param.requires_grad = False

# Modify the model for 3-class classification of Brain images, Remove the fully connected layers (classifier) on top
standard_vit.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(512 * 7 * 7, 3),  #3 classes of tumours
            nn.Softmax(dim=1) # Added
        )

# Now create a new model with the features of the standard VIT and a new classifier (fully connected layers)
class NewVIT(nn.Module):
    def __init__(self):
        super(NewVIT, self).__init__()
        self.oldvit = standard_vit

    def forward(self, x):
        x = self.oldvit(x)
        return x

new_vit = NewVIT()

#print(new_vit)

In [None]:
for name, param in new_vit.named_parameters():
    print(f"Parameter: {name}, Data Type: {param.dtype}")

In [None]:
# Code adapted from: https://medium.com/mlearning-ai/vision-transformers-from-scratch-pytorch-a-step-by-step-guide-96c3313c2e0c

from tqdm import tqdm, trange
from torch.optim import Adam
from torch.nn import CrossEntropyLoss
from torch.utils.data import DataLoader

from torchvision.transforms import ToTensor
from torchvision.datasets.mnist import MNIST

np.random.seed(0)
torch.manual_seed(0)

def main(model, epochs, lr, train_loader, test_loader, validate_loader):

    # Training loop
    optimizer = Adam(model.parameters(), lr=lr)
    print(model.parameters())
    criterion = CrossEntropyLoss()

    for epoch in trange(epochs, desc="Training"):
        train_loss = 0.0

        for batch in tqdm(train_loader, desc=f"Epoch {epoch + 1} in training", leave=False):
            x, y = batch
            x, y = x.to(device), y.to(device)
            y_hat = model(x)
            y_hat = y_hat.float()

            loss = criterion(y_hat, y)


            train_loss += loss.detach().cpu().item() / len(train_loader)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print(f"Epoch {epoch + 1}/{epochs} loss: {train_loss:.2f}")

    # Test loop
    with torch.no_grad():
        correct, total = 0, 0
        test_loss = 0.0
        for batch in tqdm(test_loader, desc="Testing"):
            x, y = batch
            x, y = x.to(device), y.to(device)
            y_hat = model(x)
            loss = criterion(y_hat, y)
            test_loss += loss.detach().cpu().item() / len(test_loader)

            correct += torch.sum(torch.argmax(y_hat, dim=1) == y).detach().cpu().item()
            total += len(x)
        print(f"Test loss: {test_loss:.2f}")
        print(f"Test accuracy: {correct / total * 100:.2f}%")

# Assuming new_vit is your modified Vision Transformer model
main(new_vit, 20, 0.001, train_loader, test_loader, validate_loader)
