In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import glob
import pydicom
import matplotlib.pyplot as plt
import os
import seaborn as sns 
from matplotlib import animation, rc
rc('animation', html='jshtml')

In [None]:
train = pd.read_csv("../input/rsna-miccai-brain-tumor-radiogenomic-classification/train_labels.csv")
sample = pd.read_csv("../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv")

train

### Data visualization

In [None]:
def load_dicom(path):
    dicom = pydicom.read_file(path)
    data = dicom.pixel_array
    data = data - np.min(data)
    if np.max(data) != 0:
        data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    return data

In [None]:
def create_animation(ims, num, mgmt, img_type):
    fig = plt.figure(figsize=(6, 6))
    plt.axis('off')
    plt.title("Num: " + str(num) + ", MGMT: " + str(mgmt) + ", Image type: " + img_type)
    im = plt.imshow(ims[0], cmap='gray')
    
    def animate_func(i):
        im.set_array(ims[i])
        return [im]
    
    return animation.FuncAnimation(fig, animate_func, len(ims), interval = 1000//24)

In [None]:
def load_dicom_line(root_path, num, img_type):
    path = root_path + str(num).zfill(5) + "/" + img_type
    t_paths = sorted(
        glob.glob(os.path.join(path, "*")),
        key = lambda x: int(x[:-4].split("-")[-1])
    )
    images = []
    for filename in t_paths:
        data = load_dicom(filename)
        if data.max() == 0: continue
        images.append(data)
        
    return images

In [None]:
root_path = "../input/rsna-miccai-brain-tumor-radiogenomic-classification/train/"
num = 30
img_type = "FLAIR"  # "FLAIR", "T1w", "T1wCE", "T2w"
images = load_dicom_line(root_path, train.iloc[num]["BraTS21ID"], img_type)
create_animation(images, train.iloc[num]["BraTS21ID"], train.iloc[num]["MGMT_value"], img_type)

In [None]:
from tqdm import tqdm

Scans = ["FLAIR","T1w","T1wCE","T2w"]
train_path = "../input/rsna-miccai-brain-tumor-radiogenomic-classification/train/"
allres = []

for index, num in tqdm(enumerate(train["BraTS21ID"])):
    scannum = []
    for i in range(4):
        scan = Scans[i]
        path = train_path + str(num).zfill(5)
        folderpath = os.path.join(path,scan)
        filelist = os.listdir(folderpath)
        scannum.append(len(filelist))
    
    allres.append([train["BraTS21ID"].iloc[index],train["MGMT_value"].iloc[index],
                   scannum[0], scannum[1], scannum[2], scannum[3]])
        
allnumdf = pd.DataFrame(allres)
allnumdf.columns = ["BraTS21Id", "MGMT_value","FLAIR", "T1w", "T1wCE", "T2w"]
allnumdf

In [None]:
plt.figure(figsize=(5, 5))
sns.countplot(data=allnumdf, x="MGMT_value");

In [None]:
fig, ax =plt.subplots(2,2, figsize=(10, 10))

sns.distplot(allnumdf["FLAIR"], ax=ax[0][0])
sns.distplot(allnumdf["T1w"], ax=ax[0][1])
sns.distplot(allnumdf["T1wCE"], ax=ax[1][0])
sns.distplot(allnumdf["T2w"], ax=ax[1][1])
fig.show()

In [None]:
tmpdf = allnumdf[allnumdf["MGMT_value"]==1].reset_index(drop=True)
tmpdf