<div class='alert alert-info' style='text-align: center'><h1>Brain Tumor Classification</h1</div>

#### This notebook is a train & predict script for classifiying brain tumors.
#### It trains Classification (pos/neg for MGMT status).
#### The datasets are split into train/test sets.
#### I exported the JPGs from the RSNA-MICCAI brain MR dataset

In [None]:
import sys
import os
import platform
print(sys.version)
print(os.name)
print(platform.system())
print(platform.release())

In [None]:
import torch
is_cuda_enabled = torch.cuda.is_available()
print('Cuda enabled', is_cuda_enabled)
if torch.cuda.is_available():
    print(torch.cuda.current_device())
    print(torch.cuda.device(0))
    print(torch.cuda.device_count())
    print(torch.cuda.get_device_name(0))

In [None]:
!nvcc --version

In [None]:
!nvidia-smi

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import seaborn as sns
import matplotlib.pyplot as plt
import pydicom
import pandas as pd
from pydicom.pixel_data_handlers.util import apply_voi_lut
from tqdm import tqdm
import binascii
from PIL import Image

from fastai.vision.all import *
import numpy as np
import pandas as pd
import random
np.set_printoptions(threshold=sys.maxsize)

In [None]:
torch.cuda.empty_cache()

# Load labels

In [None]:
EPOCHS = 10
INPUT_PATH = '../input/rsna-miccai-brain-tumor-radiogenomic-classification'
LABELS_PATH = os.path.join(INPUT_PATH, 'train_labels.csv')
MODEL_EXPORT = '/kaggle/working/trained_model'

df = pd.read_csv(LABELS_PATH, header=0, names=['id','value'], dtype=object)
exclude_cases = ["00109", "00123", "00709"] #according to description
df = df[~df.id.isin(exclude_cases)]

In [None]:
df.head()

In [None]:
# values distribution
plt.figure(figsize=(5, 4))
sns.countplot(data=df, x="value")

In [None]:
#create output dataset folders
os.makedirs('./train', exist_ok = True)
print('Train folder created')

os.makedirs('./test', exist_ok = True)
print('Test folder created')

In [None]:
def seed_everything(seed=2021):
    import random
    import os
    import tensorflow as tf
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    print('Seed done!')
    
def natural_sort(l): 
    #https://stackoverflow.com/a/4836734/8245487
    convert = lambda text: int(text) if text.isdigit() else text.lower()
    alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
    return sorted(l, key=alphanum_key)    
    
def process_dicom(path):
    # Original from: https://www.kaggle.com/raddar/convert-dicom-to-np-array-the-correct-way
    dicom = pydicom.read_file(path)
    data = apply_voi_lut(dicom.pixel_array, dicom)
    if dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
    
    max_val = np.max(data)
    if max_val == 0:  # RuntimeWarning: invalid value encountered in true_divide
        return None
    
    data = data - np.min(data)
    data = data / max_val
    data = (data * 255).astype(np.uint8)
    return data
    
def save_image(data, outpath):
    height = len(data)
    width = len(data[0])
    
    pixels_out = []
    for row in data:
        pixels_out.extend(row)
    assert(len(pixels_out) == height * width)
    
    image_out = Image.new('L', (width, height))
    image_out.putdata(pixels_out)
    image_out.save(outpath)
    
def resolve_dicom_files(input_dir, dataset='train'):
    for subdir, dirs, files in os.walk(f"{input_dir}/{dataset}"):
        if len(files) == 0:
            continue
        filename = natural_sort(files)[len(files)//2] #take middle most image -- FLAIR DCM file per training item.
        filepath = os.path.join(subdir, filename)
        
        if filepath.endswith(".dcm") and "FLAIR" in filepath:
            cur_id = subdir.split('/')[-2]
            outpath = os.path.join(f'./{dataset}',f'{cur_id}.png')
            
            data = process_dicom(filepath)
            save_image(data, outpath)

In [None]:
seed_everything()

In [None]:
%%time
resolve_dicom_files(INPUT_PATH, 'train')
resolve_dicom_files(INPUT_PATH, 'test')

In [None]:
for id_num in df.id:
    full_path = f'./train/{id_num}.png'
    df.loc[df.id == id_num, 'file'] = full_path

In [None]:
df

In [None]:
# a DataLoaders object is a combination of training and validation data
image_data = ImageDataLoaders.from_df(df, item_tfms=Resize(224), bs=64, label_col=1, fn_col=2, path='')

In [None]:
# look at the data
image_data.show_batch()

# Training stage

In [None]:
import torch 
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self, pretrained=False):
        super().__init__()
        # 3 input image channel, 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        return x

In [None]:
model = Net()
print(model)

In [None]:
params = list(model.parameters())
print(len(params))
print(params[0].size())  # conv1's .weight

In [None]:
# chooses an appropriate loss function
learn = cnn_learner(image_data, Net, metrics=[error_rate, accuracy], model_dir="/tmp/model/").to_fp16()

In [None]:
learn.lr_find()

In [None]:
# Print model's state_dict
print("Model's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

In [None]:
%%time
learn.fit_one_cycle(EPOCHS, lr_max=1e-2)

In [None]:
# show results of prediction
learn.show_results()

In [None]:
#save model to disk
learn.save(MODEL_EXPORT)

In [None]:
interp = ClassificationInterpretation.from_learner(learn)
interp.plot_top_losses(9, figsize=(15,11))

In [None]:
df_test = pd.DataFrame(columns=['id', 'value'])
df_test.id = os.listdir(os.path.join(INPUT_PATH, "test/"))

# Predict stage

In [None]:
# load weights
#learn = cnn_learner(image_data, Net, metrics=[error_rate, accuracy], model_dir="/tmp/model/").to_fp16()
#learn_new = learn.load(MODEL_EXPORT)

In [None]:
%%time
for id_num in df_test.id:
    full_path = f'./test/{id_num}.png'
    prediction = learn.predict(full_path)
    probability = prediction[2][1].item()
    print(probability)
    df_test.loc[df_test.id==id_num, 'value'] = probability

In [None]:
df_test.head()

In [None]:
df_test.value.min(), df_test.value.max()

In [None]:
df_output = df_test.rename(columns={'id':'BraTS21ID','value':'MGMT_value'})
df_output.to_csv('submission.csv', index=False)
df_output.head()

# Print requirements

In [None]:
# taken from here https://stackoverflow.com/a/49199019
import pkg_resources
import types
def get_imports():
    for name, val in globals().items():
        if isinstance(val, types.ModuleType):
            # Split ensures you get root package, 
            # not just imported function
            name = val.__name__.split(".")[0]

        elif isinstance(val, type):
            name = val.__module__.split(".")[0]

        # Some packages are weird and have different
        # imported names vs. system/pip names. Unfortunately,
        # there is no systematic way to get pip names from
        # a package's imported name. You'll have to add
        # exceptions to this list manually!
        poorly_named_packages = {
            "PIL": "Pillow",
            "sklearn": "scikit-learn"
        }
        if name in poorly_named_packages.keys():
            name = poorly_named_packages[name]

        yield name
imports = list(set(get_imports()))

# The only way I found to get the version of the root package
# from only the name of the package is to cross-check the names 
# of installed packages vs. imported packages
requirements = []
for m in pkg_resources.working_set:
    if m.project_name in imports and m.project_name!="pip":
        requirements.append((m.project_name, m.version))

for r in requirements:
    print("{}=={}".format(*r))

In [None]:
df_test.head()