In [None]:
#Imports
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torch.nn.functional as F
import sklearn
import torchvision
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import PIL
from PIL import Image
import albumentations as A
from albumentations.pytorch import ToTensorV2
import seaborn as sns
import glob
from pathlib import Path
import cv2
torch.manual_seed(1)
np.random.seed(1)
import re
import pydicom
import math
from pydicom.pixel_data_handlers.util import apply_voi_lut

In [None]:
#Dimensions
IMAGE_SIZE = 256
NUM_IMAGES = 64
BATCH_SIZE= 4

# **Data Loading and Visualizations**

In [None]:
def loading_image(path, img_size=IMAGE_SIZE):
    dicom = pydicom.read_file(path)
    data = dicom.pixel_array
    data = apply_voi_lut(dicom.pixel_array, dicom)
    data = cv2.resize(data, (img_size, img_size))
    return data

In [None]:
def load_3d_image(idx, mri_type, num_imgs=NUM_IMAGES, split='train'):
    files = sorted(glob.glob(f"../input/rsna-miccai-brain-tumor-radiogenomic-classification/{split}/{idx}/{mri_type}/*.dcm"), 
                   key=lambda var:[int(x) if x.isdigit() else x for x in re.findall(r'[^0-9]|[0-9]+', var)])
    middle = int(len(files) / 2)
    half_num_imgs = int(num_imgs / 2)
    start = max(0, middle - half_num_imgs)
    end = min(len(files) + 1, middle + half_num_imgs)

    arrays = [loading_image(f) for f in files[start:end]]
    img3d = np.stack(arrays, axis=2)
    
    if img3d.shape[-1] < num_imgs:
        n_zero = np.zeros((IMAGE_SIZE, IMAGE_SIZE, num_imgs - img3d.shape[-1]))
        img3d = np.concatenate((img3d,  n_zero), axis=-1)
        
    if np.min(img3d) < np.max(img3d):
        img3d = img3d - np.min(img3d)
        img3d = img3d / np.max(img3d)

    return img3d


In [None]:
train_labels = pd.read_csv('../input/rsna-miccai-brain-tumor-radiogenomic-classification/train_labels.csv')

In [None]:
train_labels

In [None]:
train_files = sorted(os.listdir('../input/rsna-miccai-png/train'))

In [None]:
len(train_files)

In [None]:
train_files = pd.Series(train_files, name='train_files')

In [None]:
train_labels = pd.concat([train_labels, train_files], axis=1)

In [None]:
train_labels

In [None]:
train_labels = train_labels[train_labels['BraTS21ID'] != 109]

In [None]:
train_labels = train_labels[train_labels['BraTS21ID'] != 709]

109 and 709 don't have flair images so for this dataset. 

In [None]:
train_labels['MGMT_value'].value_counts()

Fairly balanced train set.

In [None]:
test_data = pd.read_csv('../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv')
test_ids = []
for f in test_data.itertuples():
    test_ids.append(f[1])

In [None]:
a = load_3d_image("00000", "FLAIR")
print(a.shape)

In [None]:
plt.imshow(load_3d_image("00122", "FLAIR")[:, :, 2], cmap='gray')

# **Dataset and DataLoader**

In [None]:
class TumorDataset(torch.utils.data.Dataset):
    def __init__(self, df=train_labels, transform=transforms.Compose([transforms.ToTensor()]), mri_type="T1wCE", train=True):
        self.df = df
        self.transform = transform
        self.type = mri_type
        self.train = train
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
            if self.train == True:
                patient_id = self.df.iloc[idx, 2]
                
                image = load_3d_image(str(patient_id), self.type)
                image = self.transform(image)
                image = image[None, :, :, :]
                label = self.df.iloc[idx, 1]
                label = torch.tensor(label)
                
                return image, label
            
            else:
                patient_id = self.df[idx]
                patient_id = str(patient_id)
                for i in range(5 - len(patient_id)):
                    patient_id = '0' + patient_id
                
                
                image = load_3d_image(patient_id, self.type, split='test')
                image = self.transform(image)
                image = image[None, :, :, :]
                
                return image, idx

In [None]:
train_dataset = TumorDataset()
test_dataset = TumorDataset(df=test_ids, train=False)

In [None]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, num_workers=4)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, num_workers=4)

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

# **Simple Model Architecture**

In [None]:
class ThreeDNetwork(nn.Module):
    
    def conv_layer(self, in_channels, out_channels, kernel_size, stride=2):
        conv_layer = nn.Sequential(
            nn.Conv3d(in_channels, out_channels, kernel_size, stride=stride),
            nn.LeakyReLU(),
            nn.MaxPool3d((2, 2, 2)),
            nn.BatchNorm3d(out_channels))
        return conv_layer
    
    def __init__(self, batch_size=BATCH_SIZE):
        super(ThreeDNetwork, self).__init__()
        self.batch_size = batch_size
        self.block1 = nn.Sequential(
            self.conv_layer(1, 64, 3, 2),
            self.conv_layer(64, 128, 3, 2))
        
        self.fc = nn.Sequential(
            nn.Linear(86400, 1024),
            nn.LeakyReLU(),
            nn.BatchNorm1d(1024),
            nn.Dropout(0.2),
            nn.Linear(1024, 1))
        
    def forward(self, x):
        x = self.block1(x)
        x = x.view(-1, 86400)
        x = self.fc(x)
        return x

In [None]:
model = ThreeDNetwork()

In [None]:
print(model)

# **Training**

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

train_criterion = nn.BCELoss()
lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=4, cooldown=2, verbose=True)

model = model.to(device)
train_criterion = train_criterion.to(device)

In [None]:
epochs = 30

total_train_loss = []
best_train_loss = np.Inf

for epoch in range(epochs): 
    print('Epoch: ', epoch + 1)
    train_loss = []
    train_correct = 0
    train_total = 0
    for image, target in train_loader:
        optimizer.zero_grad()
        new_target = []
        for element in target:
            new_target.append([element])
        new_target = torch.tensor(new_target, dtype=torch.float)
        image = image.float()
        image, new_target = image.to(device), new_target.to(device)
        output = model(image)
        output = nn.Sigmoid()(output)
        loss = train_criterion(output, new_target)
        loss.backward()
        optimizer.step()
        train_loss.append(loss.item())
            
    epoch_train_loss = np.mean(train_loss)
    print(f'Epoch {epoch + 1}, train loss: {epoch_train_loss:.4f}')
    
    if epoch_train_loss < best_train_loss:
        torch.save(model.state_dict(), 'tumor.pth')
        print('Model improved. Saving model.')
        best_train_loss = epoch_train_loss
        
    lr_scheduler.step(epoch_train_loss)
    total_train_loss.append(epoch_train_loss)

In [None]:
plt.plot(total_train_loss)

In [None]:
def rounding(num):
    return math.floor(num + 0.5)

In [None]:
model.load_state_dict(torch.load('tumor.pth'))

In [None]:
correct = 0
total = 0

with torch.no_grad():
    model.eval()
    for image, target in train_loader:
        new_target = []
        for element in target:
            new_target.append([element])
        new_target = torch.tensor(new_target, dtype=torch.int)
        image = image.float()
        image, new_target = image.to(device), new_target.to(device)
        output = model(image)
        output = nn.Sigmoid()(output)
        predicted = []
        for element in output:
            predicted.append([rounding(element)])
        predicted = torch.tensor(predicted, dtype=torch.int)
        predicted = predicted.to(device)
        total += BATCH_SIZE

        num_correct = 0
        for i, element in enumerate(predicted):
            if element == new_target[i]:
                num_correct += 1
                
        correct += num_correct

print('Train Accuracy: %d %%' % (100 * correct / total))

# **Inference**

In [None]:
id_series = []
mgmt_series = []

with torch.no_grad():
    for image, idx in test_loader:
        image = image.float()
        image = image.to(device)
        output = model(image)
        output = nn.Sigmoid()(output)
        for element in output:
            for el in element.cpu().numpy():
                mgmt_series.append(float(math.trunc(el * 10000) / 10000.0))
        idx = idx.tolist()
        for element in idx:
            id_series.append(element)

In [None]:
brats_id_series = []
for idx in id_series:
    brats_id_series.append(int(test_ids[idx]))

In [None]:
brats_id_series = pd.Series(brats_id_series, name='BraTS21ID')
mgmt_series = pd.Series(mgmt_series, name='MGMT_value')
test_preds = pd.concat([brats_id_series, mgmt_series], axis=1)

In [None]:
test_preds