# RSNA-MICCAI Brain Tumor Radiogenomic Classificationn - **An approach with PyTorch EfficientNet 3D**

## **Problem Description**:

There are structural multi-parametric MRI (mpMRI) scans for different subjects, in DICOM format. The exact mpMRI scans included are:

* Fluid Attenuated Inversion Recovery (FLAIR)
* T1-weighted pre-contrast (T1w)
* T1-weighted post-contrast (T1Gd)
* T2-weighted (T2)

`train_labels.csv` - file contains the target **MGMT_value** for each subject in the training data **(e.g. the presence of MGMT promoter methylation)**.

So, it's a binary classification problem.

## **A EfficientNet3D solution**:

* For each patient, we consider 4 sequences (FLAIR, T1w, T1Gd, T2), and for each of those sequences we take 50 slices from the middle, and stack them, to get 50 x 4 = 200 slices. We resize the slices in shape (200, 200).

* Construct an efficientnet-3d in pytorch with input shape (200, 200, 200).

* Perform binary classification.


### **Importing libraries**

In [None]:
import os
import glob
from tqdm import tqdm_notebook as tqdm
import random
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
from torchvision import transforms, utils
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
import cv2


import warnings
warnings.filterwarnings("ignore")

In [None]:
def set_seed(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True


set_seed(42)

### **Importing EfficientNet-3D**

In [None]:
import sys
sys.path.append('../input/efficientnetpyttorch3d/EfficientNet-PyTorch-3D')
from efficientnet_pytorch_3d import EfficientNet3D

### **Inspecting Labels**

In [None]:
path = '/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification'
train_data = pd.read_csv(os.path.join(path, 'train_labels.csv'))
print('Num of train samples:', len(train_data))
train_data.head()
img_size = 256

### **MRI Slice Loading/Processing**

In [None]:
def dicom2array(path, voi_lut=True, fix_monochrome=True):
    dicom = pydicom.read_file(path)
    # VOI LUT (if available by DICOM device) is used to
    # transform raw DICOM data to "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    data = cv2.resize(data, (img_size, img_size))
    return data

def load_3d_dicom_images(scan_id, split = "train"):
    """
    we will use some heuristics to choose the slices to avoid any numpy zero matrix (if possible)
    """
    flair = sorted(glob.glob(f"{path}/{split}/{scan_id}/FLAIR/*.dcm"))
    t1w = sorted(glob.glob(f"{path}/{split}/{scan_id}/T1w/*.dcm"))
    t1wce = sorted(glob.glob(f"{path}/{split}/{scan_id}/T1wCE/*.dcm"))
    t2w = sorted(glob.glob(f"{path}/{split}/{scan_id}/T2w/*.dcm"))
    
    
    flair_img = np.array([dicom2array(a) for a in flair[len(flair)//2 - 25:len(flair)//2 + 25]]).T
    
    if flair_img.shape[-1] < 50:
        n_zero = 50 - flair_img.shape[-1]
        flair_img = np.concatenate((flair_img, np.zeros((img_size, img_size, n_zero))), axis = -1)
    #print(flair_img.shape)
        
    
    
    t1w_img = np.array([dicom2array(a) for a in t1w[len(t1w)//2 - 25:len(t1w)//2 + 25]]).T
    if t1w_img.shape[-1] < 50:
        n_zero = 50 - t1w_img.shape[-1]
        t1w_img = np.concatenate((t1w_img, np.zeros((img_size, img_size, n_zero))), axis = -1)
    #print(t1w_img.shape)
    
    
    t1wce_img = np.array([dicom2array(a) for a in t1wce[len(t1wce)//2 - 25:len(t1wce)//2 + 25]]).T
    if t1wce_img.shape[-1] < 50:
        n_zero = 50 - t1wce_img.shape[-1]
        t1wce_img = np.concatenate((t1wce_img, np.zeros((img_size, img_size, n_zero))), axis = -1)
    #print(t1wce_img.shape)
    
    
    t2w_img = np.array([dicom2array(a) for a in t2w[len(t2w)//2 - 25:len(t2w)//2 + 25]]).T
    if t2w_img.shape[-1] < 50:
        n_zero = 50 - t2w_img.shape[-1]
        t2w_img = np.concatenate((t2w_img, np.zeros((img_size, img_size, n_zero))), axis = -1)
    #print(t2w_img.shape)
    
    return np.concatenate((flair_img, t1w_img, t1wce_img, t2w_img), axis = -1)

In [None]:
load_3d_dicom_images("00000").shape

In [None]:
slices = load_3d_dicom_images("00000")
print(slices.shape)

### **Visualization**

In [None]:
def plot_imgs(imgs, cols=20, size=7, is_rgb=True, title="", cmap='gray', img_size=(64,64)):
    rows = len(imgs)//cols + 1
    fig = plt.figure(figsize=(cols*size, rows*size))
    for i in range(cols):
        img = imgs[:,:,i]
        fig.add_subplot(rows, cols, i+1)
        plt.imshow(img, cmap=cmap)
    plt.suptitle(title)
    plt.show()
    
plot_imgs(slices)

In [None]:
# frames = []
# for i in range(200):
#     frames.append(np.array(slices[:,:,i], dtype = np.uint8))

# fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
# out = cv2.VideoWriter('/kaggle/working/out_video.mp4', fourcc, 15, (200,200))
# for i in range(len(frames)):
#     c_frame =  cv2.cvtColor(frames[i],cv2.COLOR_GRAY2RGB)
#     out.write(c_frame)
    
# out.release()

In [None]:
# # the video play doesn't work, you can download it to view

# from IPython.display import HTML
# from base64 import b64encode

# def play(filename):
#     html = ''
#     video = open(filename,'rb').read()
#     src = 'data:video/mp4;base64,' + b64encode(video).decode()
#     html += '<video width=1000 controls autoplay loop><source src="%s" type="video/mp4"></video>' % src 
#     return HTML(html)

# play('/kaggle/working/out_video.mp4')

### **Data Loader**

In [None]:
# let's write a simple pytorch dataloader


class BrainTumor(Dataset):
    def __init__(self, path = '/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification', split = "train", validation_split = 0.0):
        # labels
        train_data = pd.read_csv(os.path.join(path, 'train_labels.csv'))
        self.labels = {}
        brats = list(train_data["BraTS21ID"])
        mgmt = list(train_data["MGMT_value"])
        for b, m in zip(brats, mgmt):
            self.labels[str(b).zfill(5)] = m
            
        if split == "valid":
            self.split = split
            self.ids = [a.split("/")[-1] for a in sorted(glob.glob(path + f"/{split}/" + "/*"))]
            self.ids = self.ids[:int(len(self.ids)* validation_split)] # first 20% as validation
        elif split == "train":
            self.split = split
            self.ids = [a.split("/")[-1] for a in sorted(glob.glob(path + f"/{split}/" + "/*"))]
            self.ids = self.ids[int(len(self.ids)* validation_split):] # last 80% as train
        else:
            self.split = split
            self.ids = [a.split("/")[-1] for a in sorted(glob.glob(path + f"/{split}/" + "/*"))]
            
    
    def __len__(self):
        return len(self.ids)
    
    def __getitem__(self, idx):
        imgs = load_3d_dicom_images(self.ids[idx], self.split)
        transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,) * 200, (0.5,) * 200)])
        imgs = transform(imgs)
        
        if self.split != "test":
            label = self.labels[self.ids[idx]]
            return torch.tensor(imgs, dtype = torch.float32), torch.tensor(label, dtype = torch.long)
        else:
            return torch.tensor(imgs, dtype = torch.float32)

In [None]:
# testing the dataloader
train_dataset = BrainTumor()
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, num_workers=8)
# val_dataset = BrainTumor(split="valid")
# val_loader = DataLoader(val_dataset, batch_size=2, shuffle=False, num_workers=8)

In [None]:
for img, label in train_loader:
    print(img.shape)
    print(label.shape)
    break

# for img, label in val_loader:
#     print(img.shape)
#     print(label.shape)
#     break

### **Model: EfficientNet-3D B0**

In [None]:
model = EfficientNet3D.from_name("efficientnet-b0", override_params={'num_classes': 2}, in_channels=1)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr = 0.0001)
n_epochs = 2

### **Training**

In [None]:
# let's train
gpu = torch.device(f"cuda:0" if torch.cuda.is_available() else "cpu")
model.to(gpu)

for epoch in range(n_epochs):  # loop over the dataset multiple times

    train_loss = []
    best_pres = 10000
    model.train()
    for i, data in tqdm(enumerate(train_loader, 0)):
        x, y = data
        
        x = torch.unsqueeze(x, dim = 1)
        x = x.to(gpu)
        y = y.to(gpu)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(x)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()

        # print statistics
        train_loss.append(loss.item())
    avg_train = sum(train_loss) / len(train_loss)
    print(f"epoch {epoch+1} train: {avg_train}")

#     running_loss = []
#     best_pres = 10000
#     model.eval()
#     for i, data in tqdm(enumerate(val_loader, 0)):

#         x, y = data
        
#         x = torch.unsqueeze(x, dim = 1)
#         x = x.to(gpu)
#         y = y.to(gpu)

#         # forward
#         outputs = model(x)
#         loss = criterion(outputs, y)

#         # print statistics
#         running_loss.append(loss.item())
#     avg_pred = sum(running_loss) / len(running_loss)   
#    print(f"epoch {epoch+1} val: {avg_pred}")
    if avg_train < best_pres:
        print('save model...')
        best_pres = avg_train
        torch.save(model.state_dict(),'best_loss.pt')

**Inference**

In [None]:
# model = EfficientNet3D.from_name("efficientnet-b0", override_params={'num_classes': 2}, in_channels=1)
# model.to(gpu)
# checkpoint = torch.load(f"best_loss.pt")
# model.load_state_dict(checkpoint)
# model.eval()

In [None]:
# class test_BrainTumor(Dataset):
#     def __init__(self, path = '/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification', split = "test"):
#         # labels
#         train_data = pd.read_csv(os.path.join(path, 'sample_submission.csv'))
#         self.labels = {}
#         brats = list(train_data["BraTS21ID"])  
#         self.split = split
#         self.ids = [a.split("/")[-1] for a in sorted(glob.glob(path + f"/{split}/" + "/*"))]   
#     def __len__(self):
#         return len(self.ids)
    
#     def __getitem__(self, idx):
#         imgs = load_3d_dicom_images(self.ids[idx], self.split)
#         transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,) * 200, (0.5,) * 200)])
#         imgs = transform(imgs)
#         return torch.tensor(imgs, dtype = torch.float32)

In [None]:
# test_dataset = test_BrainTumor(split = "test")
# test_loader = DataLoader(test_dataset, batch_size=2, shuffle=False, num_workers=8)

In [None]:
# submission = pd.read_csv("../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv")

In [None]:
# y_pred = []
# ids = []

# for e, batch in enumerate(test_loader):
#     print(f"{e}/{len(test_loader)}", end="\r")
#     with torch.no_grad():
#         tmp_pred = np.zeros((batch.shape[0], ))
#         tmp_res = torch.sigmoid(model(batch.to(gpu))).cpu().numpy().squeeze()
#         tmp_pred += tmp_res
#         y_pred.extend(tmp_pred)
#         ids.extend(batch["id"].numpy().tolist())

In [None]:
# submission = pd.DataFrame({"BraTS21ID": ids, "MGMT_value": y_pred})
# submission.to_csv("submission.csv", index=False)

In [None]:
# submission