# Overview of the competition:
https://www.kaggle.com/c/rsna-miccai-brain-tumor-radiogenomic-classification/overview

# Dataset
The dataset consists of 4 types of mpMRI scans: 
* Fluid Attenuated Inversion Recovery
* T1-weighted pre-contrast (T1w)
* T1-weighted post-contrast (T1Gd)
* T2-weighted (T2)

The 'train' folder contains the training images. This is accompanied by 'train-labels.csv' file which file containing the target MGMT_value for each subject in the training data (e.g. the presence of MGMT promoter methylation). The 'test' folder contains the test images. 

Further details on the original dataset have been provided in the following **paper**:
U.Baid, et al., “The RSNA-ASNR-MICCAI BraTS 2021 Benchmark on Brain Tumor Segmentation and Radiogenomic Classification”, arXiv:2107.02314, 2021. https://arxiv.org/abs/2107.02314

# Task

To predict the MGMT value of each subject in the test dataset.

Here I have attempted to fine tune the pre-trained Resnet, Alexnet, Densenet and all versions of VGG networks on the dataset obtained from the RSNA-MICCAI Brain Tumor Radiogenic Classification Challenge.

## Importing Libraries

In [None]:
import os
import matplotlib.pyplot as plt
import cv2
import pandas as pd
from tqdm.notebook import tqdm
import pydicom
import numpy as np
import shutil
from PIL import Image
import scipy
import torch 
import torchvision
import torchvision.transforms as transforms
from torchvision import models , datasets
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import time
import copy

print("All modules have been imported")

## Splitting the train folder into subfolders
Here I separated the subjects having the MGMT biomarker (category 1) from the ones who don't have it (category 0) within the training folder. 

In [None]:
!mkdir "data"
!mkdir "data/0"
!mkdir "data/1"
labels = pd.read_csv("../input/png-dataset-for-rsna-mgmt-detection/png_data/png_voxel_converted_ds/train_labels.csv")

## New sorted folder with the data

In [None]:
main_folder_path = "../input/png-dataset-for-rsna-mgmt-detection/png_data/png_voxel_converted_ds"
main_train_folder_path = os.path.join(main_folder_path  , "train")
for subject in tqdm(os.listdir(main_train_folder_path)):
    subject_folder = os.path.join(main_train_folder_path , subject)
    for mri_type in os.listdir(subject_folder):
        mri_type_folder = os.path.join(subject_folder , mri_type)
        for mri_image in os.listdir(mri_type_folder):
            original_image_path = os.path.join(mri_type_folder , mri_image)
            mri_image = subject +"_"+ mri_type +"_"+ mri_image
            subject_num = int(subject)
            idx = np.where(labels['BraTS21ID'] == subject_num)[0][0]
            label = str(labels.loc[idx , 'MGMT_value'])
            new_image_folder_path =os.path.join("data" , label)
            new_image_path = os.path.join(new_image_folder_path , mri_image)
            if (Image.open(original_image_path).getcolors()==1): continue
            shutil.copy(original_image_path , new_image_path)

In [None]:
print("Images with label 0 = " , len(os.listdir("data/0")) , "Images with label 1 = " , len(os.listdir("data/1")))

## Train-validation-test split

In [None]:
!mkdir "data/TRAIN"
!mkdir "data/TRAIN/1"
!mkdir "data/TRAIN/0"
!mkdir "data/VAL"
!mkdir "data/VAL/0"
!mkdir "data/VAL/1"
!mkdir "data/TEST"
!mkdir "data/TEST/0"
!mkdir "data/TEST/1"

In [None]:
IMG_PATH = "./data"

#split the data into train/test/val
for CLASS in tqdm(["0" , "1"]):
    IMG_NUM = len(os.listdir(IMG_PATH +"/"+ CLASS))
    for (n, FILE_NAME) in enumerate(os.listdir(IMG_PATH +"/"+ CLASS)):
            img = IMG_PATH+ '/' +  CLASS + '/' + FILE_NAME
            if n <4000 :
                shutil.copy(img, 'data/TEST/' + str(CLASS) + '/' + FILE_NAME)
            elif n < 0.9*IMG_NUM:
                shutil.copy(img, 'data/TRAIN/'+ str(CLASS) + '/' + FILE_NAME)
            else:
                shutil.copy(img, 'data/VAL/'+ str(CLASS) + '/' + FILE_NAME)

In [None]:
print(len(os.listdir("data/TRAIN/1")))
print(len(os.listdir("data/TRAIN/0")))
print(len(os.listdir("data/VAL/1")))
print(len(os.listdir("data/VAL/0")))
print(len(os.listdir("data/TEST/1"))) 
print(len(os.listdir("data/TEST/0")))

## Data augmentation and normalization

In [None]:
# Data augmentation and normalization for training
# Just normalization for validation and testing
data_transforms = {
    'TRAIN': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'VAL': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'TEST': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

data_dir = 'data'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                  for x in ['TRAIN', 'VAL', 'TEST']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
                                             shuffle=True, num_workers=4)
              for x in ['TRAIN', 'VAL', 'TEST']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['TRAIN', 'VAL', 'TEST']}
class_names = image_datasets['TRAIN'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

## Viewing training images

In [None]:
def imshow(inp, title=None):
    """Imshow for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated


# Get a batch of training data
inputs, classes = next(iter(dataloaders['TRAIN']))

# Make a grid from batch
out = torchvision.utils.make_grid(inputs)

imshow(out, title=[class_names[x] for x in classes])

## Function for Training model

In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25, is_inception=False):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['TRAIN', 'VAL']:
            if phase == 'TRAIN':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'TRAIN'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    
                    if is_inception and phase == 'TRAIN':
                        # From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs, labels)
                        loss2 = criterion(aux_outputs, labels)
                        loss = loss1 + 0.4*loss2
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)
                    
                    # backward + optimize only if in training phase
                    if phase == 'TRAIN':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'TRAIN':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'VAL' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

## Fine tuning Resnet18

In [None]:
#RESNET
resnet = models.resnet18(pretrained=True)
num_ftrs = resnet.fc.in_features

resnet.fc = nn.Linear(num_ftrs, 2)

resnet = resnet.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer = optim.SGD(resnet.parameters(), lr=0.001)

step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

train_model(resnet, criterion, optimizer, step_lr_scheduler, num_epochs=1, is_inception=False)

## Fine tuning Alexnet

In [None]:
#ALEXNET
alexnet = models.alexnet(pretrained=True)
num_ftrs = alexnet.classifier[6].in_features

alexnet.classifier[6] = nn.Linear(num_ftrs, 2)

alexnet = alexnet.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer = optim.SGD(alexnet.parameters(), lr=0.001)

step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

train_model(alexnet, criterion, optimizer, step_lr_scheduler, num_epochs=1, is_inception=False)

## Finetuning Densenet121

In [None]:
#DENSENET
densenet = models.densenet121(pretrained=True)
num_ftrs = densenet.classifier.in_features

densenet.classifier = nn.Linear(num_ftrs,2)

densenet = densenet.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer = optim.SGD(densenet.parameters(), lr=0.001)

step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

train_model(densenet, criterion, optimizer, step_lr_scheduler, num_epochs=1, is_inception=False)

## Fine tuning VGGnet

Now we will finetune all the versions of VGG models.

In [None]:
#VGG11
vgg11 = models.vgg11(pretrained=True)
num_ftrs = vgg11.classifier[6].in_features

vgg11.classifier[6] = nn.Linear(num_ftrs,2)

vgg11 = vgg11.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer = optim.SGD(vgg11.parameters(), lr=0.001)

step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

train_model(vgg11, criterion, optimizer, step_lr_scheduler, num_epochs=1, is_inception=False)

In [None]:
#VGG11_bn
vgg11_bn = models.vgg11_bn(pretrained=True)
num_ftrs = vgg11_bn.classifier[6].in_features

vgg11_bn.classifier[6] = nn.Linear(num_ftrs,2)

vgg11_bn = vgg11_bn.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer = optim.SGD(vgg11_bn.parameters(), lr=0.001)

step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

train_model(vgg11_bn, criterion, optimizer, step_lr_scheduler, num_epochs=1, is_inception=False)

In [None]:
#VGG13
vgg13 = models.vgg13(pretrained=True)
num_ftrs = vgg13.classifier[6].in_features

vgg13.classifier[6] = nn.Linear(num_ftrs,2)

vgg13 = vgg13.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer = optim.SGD(vgg13.parameters(), lr=0.001)

step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

train_model(vgg13, criterion, optimizer, step_lr_scheduler, num_epochs=1, is_inception=False)

In [None]:
#VGG13_bn
vgg13_bn = models.vgg13_bn(pretrained=True)
num_ftrs = vgg13_bn.classifier[6].in_features

vgg13_bn.classifier[6] = nn.Linear(num_ftrs,2)

vgg13_bn = vgg13_bn.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer = optim.SGD(vgg13_bn.parameters(), lr=0.001)

step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

train_model(vgg13_bn, criterion, optimizer, step_lr_scheduler, num_epochs=1, is_inception=False)

In [None]:
#VGG16
vgg16 = models.vgg16(pretrained=True)
num_ftrs = vgg16.classifier[6].in_features

vgg16.classifier[6] = nn.Linear(num_ftrs,2)

vgg16 = vgg16.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer = optim.SGD(vgg16.parameters(), lr=0.1)

step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

model = train_model(vgg16, criterion, optimizer, step_lr_scheduler, num_epochs=15, is_inception=False)

In [None]:
#VGG16_bn
vgg16_bn = models.vgg16_bn(pretrained=True)
num_ftrs = vgg16_bn.classifier[6].in_features

vgg16_bn.classifier[6] = nn.Linear(num_ftrs,2)

vgg16_bn = vgg16_bn.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer = optim.SGD(vgg16_bn.parameters(), lr=0.001)

step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

train_model(vgg16_bn, criterion, optimizer, step_lr_scheduler, num_epochs=1, is_inception=False)

In [None]:
#VGG19
vgg19 = models.vgg19(pretrained=True)
num_ftrs = vgg19.classifier[6].in_features

vgg19.classifier[6] = nn.Linear(num_ftrs,2)

vgg19 = vgg19.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer = optim.SGD(vgg19.parameters(), lr=0.1)

step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

train_model(vgg19, criterion, optimizer, step_lr_scheduler, num_epochs=1, is_inception=False)

In [None]:
#VGG19_bn
vgg19_bn = models.vgg19_bn(pretrained=True)
num_ftrs = vgg19_bn.classifier[6].in_features

vgg19_bn.classifier[6] = nn.Linear(num_ftrs,2)

vgg19_bn = vgg19_bn.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer = optim.SGD(vgg19_bn.parameters(), lr=0.001)

step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

train_model(vgg19_bn, criterion, optimizer, step_lr_scheduler, num_epochs=1, is_inception=False)

In [None]:
samp_subm = pd.read_csv("../input/png-dataset-for-rsna-mgmt-detection/png_data/png_voxel_converted_ds/sample_submission.csv")
print('Samples test:', len(samp_subm))

In [None]:
samp_subm.head()

In [None]:
samp_subm.to_csv('submission.csv', index=False)

Here I have given an outline on how to finetune some pytorch models. Please reach out if there is any doubt or feel free to comment if anything seems incorrect. Thanks!