In [None]:
import numpy as np 
import pandas as pd
import os
import pydicom
import glob
import seaborn as sn
from fastai.vision.all import *
from fastai.medical.imaging import *
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from skimage import exposure
from pydicom.pixel_data_handlers.util import apply_voi_lut
import cv2
import warnings
warnings.filterwarnings('ignore')

#### Check files and folders

In [None]:
train_dataset_path = Path('../input/rsna-miccai-brain-tumor-radiogenomic-classification/train')

In [None]:
train_dataset_path.ls()

In [None]:
def dicom2array(path, voi_lut=True, fix_monochrome=True):
    dicom = pydicom.read_file(path)
    # VOI LUT (if available by DICOM device) is used to
    # transform raw DICOM data to "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    return data
        
    
def plot_img(img, size=(10, 10), is_rgb=True, title="", cmap='gray'):
    plt.figure(figsize=size)
    plt.imshow(img, cmap=cmap)
    plt.suptitle(title)
    plt.show()


def plot_imgs(imgs, cols=4, size=7, is_rgb=True, title="", cmap='gray', img_size=(500,500)):
    rows = len(imgs)//cols + 1
    fig = plt.figure(figsize=(cols*size, rows*size))
    for i, img in enumerate(imgs):
        if img_size is not None:
            img = cv2.resize(img, img_size)
        fig.add_subplot(rows, cols, i+1)
        plt.imshow(img, cmap=cmap)
    plt.suptitle(title)
    plt.show()

In [None]:
dicom_paths = [i.ls()[10] for i in (train_dataset_path/'00045').ls()]
imgs = [dicom2array(path) for path in dicom_paths]
plot_imgs(imgs)

In [None]:
dicom_paths

In [None]:
dataset_path = '../input/rsna-miccai-brain-tumor-radiogenomic-classification/'
train_path = '../input/rsna-miccai-brain-tumor-radiogenomic-classification/train'
test_path = '../input/rsna-miccai-brain-tumor-radiogenomic-classification/test'

flair_dir = 'FLAIR'
t1w_dir = 'T1w'
t1wce_dir = 'T1wCE'
t2w_dir = 'T2w'

os.listdir(dataset_path)

In [None]:
train_labels = pd.read_csv("../input/rsna-miccai-brain-tumor-radiogenomic-classification/train_labels.csv")
print(f'There are {len(train_labels)} patients in this dataset')

In [None]:
train_labels.head()

In [None]:
train_labels.groupby('MGMT_value').count()

In [None]:

sn.countplot(train_labels.MGMT_value)

In [None]:
def getFullId(id):
    return str(id).zfill(5)
    
def getFlairPath(id):
    flair_path = os.path.join(train_path, getFullId(id), flair_dir)
    return flair_path if os.path.isdir(flair_path) else False

def getT1wPath(id):
    t1w_path = os.path.join(train_path, getFullId(id), t1w_dir)
    return t1w_path if os.path.isdir(t1w_path) else False

def getT1wcePath(id):
    t1wce_path = os.path.join(train_path, getFullId(id), t1wce_dir)
    return t1wce_path if os.path.isdir(t1wce_path) else False

def getT2wPath(id):
    t2w_path = os.path.join(train_path, getFullId(id), t2w_dir)
    return t2w_path if os.path.isdir(t2w_path) else False

In [None]:
def countFlairFiles(id):
    path = getFlairPath(id)
    return len([file for file in os.listdir(path)])

def countT1wFiles(id):
    path = getT1wPath(id)
    return len([file for file in os.listdir(path)])

def countT1wceFiles(id):
    path = getT1wcePath(id)
    return len([file for file in os.listdir(path)])

def countT2wFiles(id):
    path = getT2wPath(id)
    return len([file for file in os.listdir(path)])

train_labels['FLAIR'] = train_labels['BraTS21ID'].apply(lambda x: countFlairFiles(x))
train_labels['T1w'] = train_labels['BraTS21ID'].apply(lambda x: countT1wFiles(x))
train_labels['T1wCE'] = train_labels['BraTS21ID'].apply(lambda x: countT1wceFiles(x))
train_labels['T2w'] = train_labels['BraTS21ID'].apply(lambda x: countT2wFiles(x))

In [None]:
sn.boxplot(x="variable", y="value", data=pd.melt(train_labels[['FLAIR', 'T1w', 'T1wCE', 'T2w']]))
plt.title('Number of images files by structural multi-parametric MRI')
plt.show()

In [None]:
def get3ScaledImage(path):

    dicom = pydicom.read_file(path)
    img = dicom.pixel_array

    r, c = img.shape
    img_conv = np.empty((c, r, 3), dtype=img.dtype)
    img_conv[:,:,2] = img_conv[:,:,1] = img_conv[:,:,0] = img

    ## Step 1. Convert to float to avoid overflow or underflow losses.
    img_2d = img_conv.astype(float)

    ## Step 2. Rescaling grey scale between 0-255
    img_2d_scaled = (np.maximum(img_2d,0) / img_2d.max()) * 255.0

    ## Step 3. Convert to uint
    img_2d_scaled = np.uint8(img_2d_scaled)
    img_2d_scaled.reshape([img_2d_scaled.shape[0], img_2d_scaled.shape[1], 3])
    
    return img_2d_scaled, (c, r)

In [None]:
id = 0

nb = countFlairFiles(id)
path = getFlairPath(id)
frames =[]

for i in range(nb):
    file_name = 'Image-' + str(i+1) + '.dcm'
    img_path = os.path.join(path, file_name)
    img_2d_scaled, size = get3ScaledImage(img_path)
    frames.append(img_2d_scaled)


In [None]:
img_2d_scaled.shape

In [None]:
img_2d_scaled

In [None]:
train_labels

In [None]:
label = train_labels.MGMT_value 

In [None]:
label

In [None]:
X = train_labels.drop(['BraTS21ID', 'MGMT_value'], axis='columns')
y = label

In [None]:
X.shape, y.shape

In [None]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()

In [None]:
model.fit(X, y)

#### Thanks for reading this far. If you have any suggestions for further tips to add, feel free to comment below.