In [None]:
import pandas as pd
pd.set_option('display.max_colwidth', -1)  
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams['animation.html'] = 'jshtml'
import seaborn as sns

import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from kaggle_datasets import KaggleDatasets

from sklearn.model_selection import train_test_split
import pydicom
import os, re, gc, glob, cv2
import imageio
from tqdm import tqdm
import matplotlib.pyplot as plt
%matplotlib inline


In [None]:
# import wandb
# print('W&B version: ', wandb.__version__)
# from wandb.keras import WandbCallback

# wandb.login()

In [None]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

# MRI(MAGNETIC RESONANCE IMAGING)
A magnetic resonance imaging (MRI) scan is a common procedure around the world. <br>
N.B. This excerpt is taken from https://m.blog.naver.com/PostView.naver?isHttpsRedirect=true&blogId=meryang63&logNo=221163428141


#### MRI uses a strong magnetic field and radio waves to create detailed images of the organs and tissues within the body.




### Cross-sections and divisions <br>
<img src="https://github.com/Kevinlee49/MRI_images/blob/main/mri3.png?raw=true">


## MRI IMAGING SEQUENCES
### The most commons MRI imaging sequences are 

1. T1-weighted (T1w)
2. T2-weighted (T2w)
3. Fluid Attenuated Inversion Recovery (FLAIR)
4. T1-weighted Gadolinium Post Contrast (T1wCE/T1Gd)

## Comparison of Different Sequences <br>
<img src="https://github.com/Kevinlee49/MRI_images/blob/main/mri2.png?raw=true">

This table is from https://m.blog.naver.com/PostView.naver?isHttpsRedirect=true&blogId=meryang63&logNo=221163428141 <br>

<div class="se_component se_table default">
        <div class="se_sectionArea se_align-left">
            <div class="se_editArea">
                <div id="SEDOC-1513275760453--243951851_table_0" data-attachment-id="" class="se_table_wrap __se-component">
                    <div class="se_table_innerWrap">
                        <table class="se_table_col" style="width:100%; border-width: 0px; border-color: rgb(212, 212, 212);">
                            <tbody>
                                <tr>
                                    <td class="se_cell se_align-left" colspan="1" rowspan="1" style="width:25.119541%;height:42.0px;background-color:;border: 1px solid rgb(212, 212, 212); padding: 0cm 5.4pt;">
                                        <div class="se_cellArea">
  <b><span lang="EN-US">&nbsp;</span></b><br>
  </div>
                                    </td>
                                    <td class="se_cell se_align-left" colspan="1" rowspan="1" style="width:24.322601%;height:42.0px;background-color:#e2e2e2;border-width: 1px; border-style: solid; border-color: rgb(212, 212, 212); padding: 0cm 5.4pt;">
                                        <div class="se_cellArea">
  <b><span lang="EN-US">T1</span></b><br>
  </div>
                                    </td>
                                    <td class="se_cell se_align-left" colspan="1" rowspan="1" style="width:20.975454%;height:42.0px;background-color:#e2e2e2;border-width: 1px; border-style: solid; border-color: rgb(212, 212, 212); padding: 0cm 5.4pt;">
                                        <div class="se_cellArea">
  <b><span lang="EN-US">T2</span></b><br>
  </div>
                                    </td>
                                    <td class="se_cell se_align-left" colspan="1" rowspan="1" style="width:29.582403%;height:42.0px;background-color:#e2e2e2;border-width: 1px; border-style: solid; border-color: rgb(212, 212, 212); padding: 0cm 5.4pt;">
                                        <div class="se_cellArea">
  <b><span lang="EN-US">FLAIR</span></b><br>
  </div>
                                    </td>
                                </tr>
                                <tr>
                                    <td class="se_cell se_align-left" colspan="1" rowspan="1" style="width:25.119541%;height:84.0px;background-color:;border: 1px solid rgb(212, 212, 212); padding: 0cm 5.4pt;">
                                        <div class="se_cellArea"></div>
                                    </td>
                                    <td class="se_cell se_align-left" colspan="1" rowspan="1" style="width:24.322601%;height:84.0px;background-color:;border-width: 1px; border-style: solid; border-color: rgb(212, 212, 212); padding: 0cm 5.4pt;">
                                        <div class="se_cellArea"><span lang="EN-US" style="color: rgb(0, 0, 0);" class="se_fs_T5">-<span class="se_fs_T5">&nbsp;</span></span><span style="color: rgb(0, 0, 0);" class="se_fs_T5">video before contrast<span lang="EN-US" class="se_fs_T5">.<span class="se_fs_T5">&nbsp;</span><br></span><span lang="EN-US"><span class="se_fs_T5"></span></span>image<span class="se_fs_T5">&nbsp;</span><span lang="EN-US" class="se_fs_T5">T2</span>which is for comparison<span lang="EN-US" class="se_fs_T5"><span class="se_fs_T5">&nbsp;</span>with T2 image.</span></span></div>
                                    </td>
                                    <td class="se_cell se_align-left" colspan="1" rowspan="1" style="width:20.975454%;height:84.0px;background-color:;border-width: 1px; border-style: solid; border-color: rgb(212, 212, 212); padding: 0cm 5.4pt;">
                                        <div class="se_cellArea"><span lang="EN-US" style="color: rgb(0, 0, 0);" class="se_fs_T5">-<span class="se_fs_T5">&nbsp;</span></span><span style="color: rgb(0, 0, 0);" class="se_fs_T5">video after contrast<span lang="EN-US" class="se_fs_T5">,<span class="se_fs_T5">&nbsp;</span><br></span><span lang="EN-US" class="se_fs_T5"><span class="se_fs_T5"></span></span>Brain lesions can be closely monitored</span></div>
                                    </td>
                                    <td class="se_cell se_align-left" colspan="1" rowspan="1" style="width:29.582403%;height:84.0px;background-color:;border-width: 1px; border-style: solid; border-color: rgb(212, 212, 212); padding: 0cm 5.4pt;">
                                        <div class="se_cellArea"><span class="se_fs_T5" style="color: rgb(0, 0, 0);"><span lang="EN-US"><span lang="EN-US" style=" color: rgb(0, 0, 0)" class="se_fs_T5"><span lang="EN-US"><span>-<span>&nbsp; </span></span></span><span lang="EN-US">T2</span><span>video <span lang="EN-US">+ dark CSF </span></span></span><br></span><span lang="EN-US"><span lang="EN-US" style=" color: rgb(0, 0, 0)" class="se_fs_T5">-<span class="se_fs_T5">&nbsp;</span></span><span style=" color: rgb(0, 0, 0)" class="se_fs_T5"> lesions are brightly monitored<span class="se_fs_T5">&nbsp;</span><span lang="EN-US" class="se_fs_T5"><span class="se_fs_T5">&nbsp;</span></span></span></span><br></span><span class="se_fs_T4" style="color: rgb(0, 0, 0);"><span lang="EN-US" class="se_fs_T5">-  Unlike T2, CSF space can be observed </span><span class="se_fs_T5"><span class="se_fs_T5">&nbsp;</span><span lang="EN-US" class="se_fs_T5"><span class="se_fs_T5">&nbsp;</span></span><span class="se_fs_T5">&nbsp;</span><br></span><span class="se_fs_T5"><span class="se_fs_T5"></span><span lang="EN-US" class="se_fs_T5">-&gt; Around ventricle, sulci<span class="se_fs_T5">&nbsp;</span></span><span class="se_fs_T5">&nbsp;</span><span lang="EN-US" class="se_fs_T5">lesion<span class="se_fs_T5">&nbsp;</span></span>easily can be observed</span></span></div>
                                    </td>
                                </tr>
                                <tr>
                                    <td class="se_cell se_align-left" colspan="1" rowspan="1" style="width:25.119541%;height:42.0px;background-color:;border: 1px solid rgb(212, 212, 212); padding: 0cm 5.4pt;">
                                        <div class="se_cellArea">
  <b><span lang="EN-US">CSF</span></b><br>
  </div>
                                    </td>
                                    <td class="se_cell se_align-left" colspan="1" rowspan="1" style="width:24.322601%;height:42.0px;background-color:;border-width: 1px; border-style: solid; border-color: rgb(212, 212, 212); padding: 0cm 5.4pt;">
                                        <div class="se_cellArea">
  <span lang="EN-US">Dark</span><br>
  </div>
                                    </td>
                                    <td class="se_cell se_align-left" colspan="1" rowspan="1" style="width:20.975454%;height:42.0px;background-color:;border-width: 1px; border-style: solid; border-color: rgb(212, 212, 212); padding: 0cm 5.4pt;">
                                        <div class="se_cellArea">
  <span lang="EN-US">Bright</span><br>
  </div>
                                    </td>
                                    <td class="se_cell se_align-left" colspan="1" rowspan="1" style="width:29.582403%;height:42.0px;background-color:;border-width: 1px; border-style: solid; border-color: rgb(212, 212, 212); padding: 0cm 5.4pt;">
                                        <div class="se_cellArea">
  <span lang="EN-US">Dark</span><br>
  </div>
                                    </td>
                                </tr>
                                <tr>
                                    <td class="se_cell se_align-left" colspan="1" rowspan="1" style="width:25.119541%;height:42.0px;background-color:;border: 1px solid rgb(212, 212, 212); padding: 0cm 5.4pt;">
                                        <div class="se_cellArea">
  <b><span lang="EN-US">White
  matter</span></b><br>
  </div>
                                    </td>
                                    <td class="se_cell se_align-left" colspan="1" rowspan="1" style="width:24.322601%;height:42.0px;background-color:;border-width: 1px; border-style: solid; border-color: rgb(212, 212, 212); padding: 0cm 5.4pt;">
                                        <div class="se_cellArea">
  <span lang="EN-US">Light</span><br>
  </div>
                                    </td>
                                    <td class="se_cell se_align-left" colspan="1" rowspan="1" style="width:20.975454%;height:42.0px;background-color:;border-width: 1px; border-style: solid; border-color: rgb(212, 212, 212); padding: 0cm 5.4pt;">
                                        <div class="se_cellArea">
  <span lang="EN-US">Darkgray</span><br>
  </div>
                                    </td>
                                    <td class="se_cell se_align-left" colspan="1" rowspan="1" style="width:29.582403%;height:42.0px;background-color:;border-width: 1px; border-style: solid; border-color: rgb(212, 212, 212); padding: 0cm 5.4pt;">
                                        <div class="se_cellArea">
  <span lang="EN-US">Darkgray</span><br>
  </div>
                                    </td>
                                </tr>
                                <tr>
                                    <td class="se_cell se_align-left" colspan="1" rowspan="1" style="width:25.119541%;height:42.0px;background-color:;border: 1px solid rgb(212, 212, 212); padding: 0cm 5.4pt;">
                                        <div class="se_cellArea">
  <b><span lang="EN-US">Cortex</span></b><br>
  </div>
                                    </td>
                                    <td class="se_cell se_align-left" colspan="1" rowspan="1" style="width:24.322601%;height:42.0px;background-color:;border-width: 1px; border-style: solid; border-color: rgb(212, 212, 212); padding: 0cm 5.4pt;">
                                        <div class="se_cellArea">
  <span lang="EN-US">Gray</span><br>
  </div>
                                    </td>
                                    <td class="se_cell se_align-left" colspan="1" rowspan="1" style="width:20.975454%;height:42.0px;background-color:;border-width: 1px; border-style: solid; border-color: rgb(212, 212, 212); padding: 0cm 5.4pt;">
                                        <div class="se_cellArea">
  <span lang="EN-US">Light gray</span><br>
  </div>
                                    </td>
                                    <td class="se_cell se_align-left" colspan="1" rowspan="1" style="width:29.582403%;height:42.0px;background-color:;border-width: 1px; border-style: solid; border-color: rgb(212, 212, 212); padding: 0cm 5.4pt;">
                                        <div class="se_cellArea">
  <span lang="EN-US">Light gray</span><br>
  </div>
                                    </td>
                                </tr>
                                <tr>
                                    <td class="se_cell se_align-left" colspan="1" rowspan="1" style="width:25.119541%;height:42.0px;background-color:;border: 1px solid rgb(212, 212, 212); padding: 0cm 5.4pt;">
                                        <div class="se_cellArea">
  <b><span lang="EN-US">Fat(B.M)</span></b><br>
  </div>
                                    </td>
                                    <td class="se_cell se_align-left" colspan="1" rowspan="1" style="width:24.322601%;height:42.0px;background-color:;border-width: 1px; border-style: solid; border-color: rgb(212, 212, 212); padding: 0cm 5.4pt;">
                                        <div class="se_cellArea">
  <span lang="EN-US">Bright</span><br>
  </div>
                                    </td>
                                    <td class="se_cell se_align-left" colspan="1" rowspan="1" style="width:20.975454%;height:42.0px;background-color:;border-width: 1px; border-style: solid; border-color: rgb(212, 212, 212); padding: 0cm 5.4pt;">
                                        <div class="se_cellArea">
  <span lang="EN-US">Light</span><br>
  </div>
                                    </td>
                                    <td class="se_cell se_align-left" colspan="1" rowspan="1" style="width:29.582403%;height:42.0px;background-color:;border-width: 1px; border-style: solid; border-color: rgb(212, 212, 212); padding: 0cm 5.4pt;">
                                        <div class="se_cellArea">
  <span lang="EN-US">Light</span><br>
  </div>
                                    </td>
                                </tr>
                                <tr>
                                    <td class="se_cell se_align-left" colspan="1" rowspan="1" style="width:25.119541%;height:42.0px;background-color:;border: 1px solid rgb(212, 212, 212); padding: 0cm 5.4pt;">
                                        <div class="se_cellArea">
  <b><span lang="EN-US">Inflammation<br>
  (infection, demyelination)</span></b><br>
  </div>
                                    </td>
                                    <td class="se_cell se_align-left" colspan="1" rowspan="1" style="width:24.322601%;height:42.0px;background-color:;border-width: 1px; border-style: solid; border-color: rgb(212, 212, 212); padding: 0cm 5.4pt;">
                                        <div class="se_cellArea">
  <span lang="EN-US">Dark</span><br>
  </div>
                                    </td>
                                    <td class="se_cell se_align-left" colspan="1" rowspan="1" style="width:20.975454%;height:42.0px;background-color:;border-width: 1px; border-style: solid; border-color: rgb(212, 212, 212); padding: 0cm 5.4pt;">
                                        <div class="se_cellArea">
  <span lang="EN-US">Bright</span><br>
  </div>
                                    </td>
                                    <td class="se_cell se_align-left" colspan="1" rowspan="1" style="width:29.582403%;height:42.0px;background-color:;border-width: 1px; border-style: solid; border-color: rgb(212, 212, 212); padding: 0cm 5.4pt;">
                                        <div class="se_cellArea">
  <span lang="EN-US">Bright </span><br>
  </div>
                                    </td>
                                </tr>
                            </tbody>
                        </table>
                    </div>
                </div>
            </div>
        </div>
    </div>

In [None]:
root_dir = '../input/rsna-miccai-brain-tumor-radiogenomic-classification/'
df = pd.read_csv(root_dir+'train_labels.csv')
sns.countplot(data=df, x='MGMT_value')

## What is the 'MGMT_value'? <br>

O[6]-methylguanine-DNA methyltransferase (MGMT) is a protein in cells, including tumour cells, that repairs damage to the cell’s DNA. For example, the damage caused by chemotherapy drugs to tumour cells. The more MGMT protein that the tumour produces, the less effective the chemotherapy drug is expected to be, as the protein will repair the damage to the tumour. Thus, determination of MGMT promoter methylation status in newly diagnosed GBM can influence treatment decision making.

In this dataset the MGMT promoter methylation status data is defined as a binary label (0: unmethylated, 1: methylated)

In [None]:
# Add the full paths for each id for different types of sequences to the csv 
def full_ids(data):
    zeros = 5 - len(str(data))
    if zeros > 0:
        prefix = ''.join(['0' for i in range(zeros)])
    
    return prefix+str(data)
        

df['BraTS21ID_full'] = df['BraTS21ID'].apply(full_ids)

# Add all the paths to the df for easy access
df['flair'] = df['BraTS21ID_full'].apply(lambda file_id : root_dir+'train/'+file_id+'/FLAIR/')
df['t1w'] = df['BraTS21ID_full'].apply(lambda file_id : root_dir+'train/'+file_id+'/T1w/')
df['t1wce'] = df['BraTS21ID_full'].apply(lambda file_id : root_dir+'train/'+file_id+'/T1wCE/')
df['t2w'] = df['BraTS21ID_full'].apply(lambda file_id : root_dir+'train/'+file_id+'/T2w/')
df

## DICOM
DICOM is the international standard to communicate and manage medical images and data. Its mission is to ensure the interoperability of systems used to produce, store, share, display, send, query, process, retrieve and print medical images, as well as to manage related workflows.



In [None]:
data = pydicom.dcmread('../input/rsna-miccai-brain-tumor-radiogenomic-classification/train/00000/T1w/Image-24.dcm')
data

In [None]:
def get_image(data):
    if np.max(data.pixel_array)==0:
        img = data.pixel_array
    else:
        img = data.pixel_array/np.max(data.pixel_array)   
    return img
data = pydicom.dcmread('../input/rsna-miccai-brain-tumor-radiogenomic-classification/train/00000/T1w/Image-24.dcm')
img = get_image(data)
plt.imshow(img, cmap='gray')

## Display Sequence of MRIs as Animation <br>
Using Matplotlib to display the sequence of images as animation. This animations can be saved in video format.

In [None]:
def sorted_image_dirs(path: str):
    '''
    Sorts the list of image directories by image number in a path
    '''
    dirs = glob.glob(path+'*')
    dirs.sort(key=lambda x: int(x.split('/')[-1].split('-')[-1].split('.')[0]))
    
    return dirs


def get_all_images(path: str):
    '''
    Returns a list of images (non empty) from a given path
    '''
    image_dirs = sorted_image_dirs(path)
    images = []
    
    for dir in image_dirs:
        data = pydicom.dcmread(dir)
        img = get_image(data)
        
        # Exclude the blank images
        if np.max(img)!=0:
            images.append(img)
        else:
            pass
    
    return images
        
    
def show_animation(images: list):
    '''
    Displays an animation from the list of images.
    
    set: matplotlib.rcParams['animation.html'] = 'jshtml'
    
    '''
    fig = plt.figure(figsize=(6, 6))
    plt.axis('off')
    im = plt.imshow(images[0], cmap='gray')
    
    def animate_func(i):
        im.set_array(images[i])
        return [im]
    
    return matplotlib.animation.FuncAnimation(fig, animate_func, frames = len(images), interval = 20)

## Show the FLAIR Sequence 

In [None]:
# flair_images = get_all_images(df['flair'][0])
    
# print('No of images:', len(flair_images))
# flair_animation = show_animation(flair_images)
# flair_animation
# # flair_animation.save('./a.mp4')

## Show the T2w Sequence 

In [None]:
# t2w_images = get_all_images(df['t2w'][0])
    
# print('No of images:', len(t2w_images))
# show_animation(t2w_images)

## Plot of MRIs with Their respective Sequence, plane & MGMT_value

In [None]:
def get_image_plane(data):
    x1,y1,_,x2,y2,_ = [round(j) for j in data.ImageOrientationPatient]
    cords = [x1,y1,x2,y2]

    if cords == [1,0,0,0]:
        return 'coronal'
    if cords == [1,0,0,1]:
        return 'axial'
    if cords == [0,1,0,0]:
        return 'sagittal'

In [None]:
fig = plt.figure(figsize=(35,20))

seq_types = ['flair', 't1w', 't1wce', 't2w']

for i in range(16):
    
    index = np.random.randint(low=0, high=len(df))
    seq_type = np.random.choice(seq_types)

    # path for the randomly selected image and sequence type
    seq_path = df[seq_type][index]

    # list of images in the seq_path
    tmp = sorted_image_dirs(seq_path)

    # Get the middle image dicom
    dicom = pydicom.dcmread(tmp[len(tmp)//2])
    
    patient_id, mgmt, plane = df['BraTS21ID_full'][index], df['MGMT_value'][index], get_image_plane(dicom)
    
    ax = fig.add_subplot(4,5,i+1)
    ax.imshow(get_image(dicom), cmap='gray')
    plt.title(f'ID: {patient_id}, MGMT_value: {mgmt}, Plane: {plane}, Seq_type: {seq_type}')

## Results 4 type of Sequences for the Same Patient are Not the Same!

In [None]:
fig = plt.figure(figsize=(35,20))

index = 0

for i in range(4):
    
    seq_type = seq_types[i]
    seq_path = df[seq_type][index]

    tmp = sorted_image_dirs(seq_path)

    # Get the middle image dicom
    dicom = pydicom.dcmread(tmp[len(tmp)//2])
    
    patient_id, mgmt, plane = df['BraTS21ID_full'][index], df['MGMT_value'][index], get_image_plane(dicom)
    
    ax = fig.add_subplot(1,4,i+1)
    ax.imshow(get_image(dicom), cmap='gray')
    plt.title(f'ID: {patient_id}, MGMT_value: {mgmt}, Plane: {plane}, Seq_type: {seq_type}')

In [None]:
submission = pd.read_csv("../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv")
# submission.to_csv("submission.csv", index=False)
submission

In [None]:
# Load training csv file
df = pd.read_csv('../input/rsna-miccai-brain-tumor-radiogenomic-classification/train_labels.csv')

def get_patient_id(patient_id):
    if patient_id < 10:
        return '0000'+str(patient_id)
    elif patient_id >= 10 and patient_id < 100:
        return '000'+str(patient_id)
    elif patient_id >= 100 and patient_id < 1000:
        return '00'+str(patient_id)
    else:
        return '0'+str(patient_id)

def get_path(row):
    patient_id = get_patient_id(row.BraTS21ID)
    return f'../input/rsna-miccai-png/train/{patient_id}/FLAIR/'

df['path'] = df.apply(lambda row: get_path(row), axis=1)

# Removing two patient ids from the dataframe since there are not FLAIR directories for these ids. 
df = df.loc[df.BraTS21ID!=109]
df = df.loc[df.BraTS21ID!=709]
df = df.reset_index(drop=True)

df.head()



In [None]:
sample = pd.read_csv("../input/rsna-miccai-brain-tumor-radiogenomic-classification/sample_submission.csv")
sample

BraTS21ID is the patient's Id,

Class 0 refers to people who do not have the methylation of the MGMT promoter.

Class 1 seems to be someone who has the methylation of the MGMT promoter.

In [None]:
train = pd.read_csv("../input/rsna-miccai-brain-tumor-radiogenomic-classification/train_labels.csv")
train

## Let's take a look at one dcm file.
To see what a dcm file looks like, firstly open one dcm file.

In [None]:
dataset = pydicom.filereader.dcmread('../input/rsna-miccai-brain-tumor-radiogenomic-classification/train/00000/FLAIR/Image-109.dcm')
img = dataset.pixel_array

fig, ax = plt.subplots()
ax.imshow(img, cmap='gray')
ax.set_axis_off()
plt.show()

In [None]:
def makeimg(path):
    dataset = pydicom.filereader.dcmread(path)
    img = dataset.pixel_array
    return img

## Let's look at typical examples of images of FLAIR, T1w, T1wCE, and T2w.
#### After issuing the path, use this function to see typical examples of FLAIR, T1w, T1wCE, and T2w images.

#### Before that, I made sure whether the same number of dcm files in each folder or not.

## Create dcm folder path
First, let's think about putting out the path of the dcm image.

In [None]:
train["imfolder"] = ['{0:05d}'.format(s) for s in train["BraTS21ID"]]
train

In [None]:
train_path = "../input/rsna-miccai-brain-tumor-radiogenomic-classification/train"

train["path"] = [os.path.join(train_path,s) for s in train["imfolder"]  ]
train

In [None]:
Scans = ["FLAIR","T1w","T1wCE","T2w"]

In [None]:
Scans[1]

## Check the number of dcm files
#### In the first place, check whether the number of dcm of these Scans is the same for each patient.

In [None]:
from tqdm import tqdm

In [None]:
train.head(3)

#### First of all, the FLAIR path on the 0th line of train can be expressed as follows.

In [None]:
os.path.join(train["path"].iloc[0],"FLAIR")

In [None]:
os.listdir(os.path.join(train["path"].iloc[0],"FLAIR"))[:10]

In [None]:
def sorted_nicely(l):
   """ Sort the given iterable in the way that humans expect."""
   convert = lambda text: int(text) if text.isdigit() else text
   alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ]
   print(alphanum_key)
   return sorted(l, key = alphanum_key)
patient_ids = os.listdir('../input/rsna-miccai-brain-tumor-radiogenomic-classification/train')
IDX = np.random.choice(len(patient_ids))

In [None]:
patient_id = patient_ids[IDX]
flair_filenames = os.listdir(f'../input/rsna-miccai-brain-tumor-radiogenomic-classification/train/{patient_id}/FLAIR/')
flair_filenames = sorted_nicely(flair_filenames)

patient_id = patient_ids[IDX]
T1w_filenames = os.listdir(f'../input/rsna-miccai-brain-tumor-radiogenomic-classification/train/{patient_id}/T1w/')
T1w_filenames = sorted_nicely(T1w_filenames)

patient_id = patient_ids[IDX]
T1wCE_filenames = os.listdir(f'../input/rsna-miccai-brain-tumor-radiogenomic-classification/train/{patient_id}/T1wCE/')
T1wCE_filenames = sorted_nicely(T1w_filenames)

patient_id = patient_ids[IDX]
T2w_filenames = os.listdir(f'../input/rsna-miccai-brain-tumor-radiogenomic-classification/train/{patient_id}/T2w/')
T2w_filenames = sorted_nicely(T2w_filenames)

In [None]:
flair_filenames[:5]

In [None]:
T1w_filenames[:5]

In [None]:
len(os.listdir(os.path.join(train["path"].iloc[0],"FLAIR")))

In [None]:
for scan in Scans:
    train[scan +"_count"] = [ len(os.listdir(os.path.join(train["path"].iloc[s],scan))) for s in tqdm(range(len(train))) ]

In [None]:
train

You can see that some IDs are the same and some are different.

Check if they are all the same

In [None]:
allsame = [train["FLAIR_count"].iloc[s] ==   train["T1w_count"].iloc[s] ==train["T1wCE_count"].iloc[s] ==train["T2w_count"].iloc[s] 
          for s in range(len(train))]

In [None]:
train["allsame"] = allsame
train

In [None]:
train["allsame"].sum()

In [None]:
train["allsame"].sum()/len(train) * 100

## Deepen your understanding by looking at the images of people who have the same number of images
#### For an easy-to-understand image, let's try a person who has all the images.

#### Moreover, let's look at the images of MGMT_value is 1 or 0 in each.

In [None]:
train["allsame"]

In [None]:
train[train["allsame"]]

In [None]:
train["BraTS21ID"]

The person with ID 100 on line 64 : MGMT_values 1(the person with the methylation of the MGMT promoter) and

Let's take a look at all the images of the person with ID 102 : MGMT_values 0 (the person who does not have) on the 65th line.

First, do the person on line 64, then make it a function to see the person on line 65.

In [None]:
row_id = 64

In [None]:
train["BraTS21ID"].iloc[row_id]

In [None]:
temp_folder = train["path"].iloc[row_id]
temp_folder

In [None]:
temp_folder2 = os.path.join(temp_folder,"FLAIR")
temp_files = os.listdir(temp_folder2)
temp_files[:3]

In [None]:
imagenum = [s.split("-")[1] for s in temp_files] # Image/4.dcm
imagenum = [s.split(".")[0] for s in imagenum] # 4/dcm -> [0] index 니까 -> 4 
imagenum[:3]

In [None]:
temp_path = [os.path.join(temp_folder2,s) for s in temp_files]
temp_path[:3]

In [None]:
tempdf = pd.DataFrame()
tempdf["image_num"] = imagenum
tempdf["image_num"] = tempdf["image_num"].astype("int")

tempdf["temp_path"] = temp_path
tempdf.head(5)

In [None]:
tempdf = tempdf.sort_values("image_num").reset_index(drop=True)
tempdf.head(5)

In [None]:
finpath = tempdf["temp_path"]
finpath

In [None]:
def makepath(row_ID,scan):
    
    temp_folder = train["path"].iloc[row_ID]
    temp_folder2 = os.path.join(temp_folder,scan)
    temp_files = os.listdir(temp_folder2)
    imagenum = [s.split("-")[1] for s in temp_files]
    imagenum = [s.split(".")[0] for s in imagenum]
    temp_path = [os.path.join(temp_folder2,s) for s in temp_files]
    tempdf = pd.DataFrame()
    tempdf["image_num"] = imagenum
    tempdf["image_num"] = tempdf["image_num"].astype("int")
    tempdf["temp_path"] = temp_path
    tempdf = tempdf.sort_values("image_num").reset_index(drop=True)
    finpath = tempdf["temp_path"]
    return finpath

In [None]:
Scans

In [None]:
row_id=64

sampledf = pd.DataFrame()
for scan in Scans:
    sampledf[scan + "_path"] = makepath(row_id,scan)

In [None]:
row_id=64

sampledf2 = pd.DataFrame()
for scan in Scans:
    sampledf2[scan + "_path"] = makepath(row_id+1,scan)

In [None]:
sampledf.head(5)

## The one with MGMT_value = 1
#### Visualize the person with gene methylation = people with favorable prognostic factors and strong predictors of responsiveness to chemotherapy

In [None]:
print("MGMT_value = " + str(train["MGMT_value"].iloc[row_id]))


for row in range(len(sampledf)):
    plt.figure(figsize=(80,10))
    for num,scan in enumerate(Scans):
        img = makeimg(sampledf[scan + "_path"].iloc[row])
        plt.subplot(4,25,num+1)
        #plt.axis("off")
        plt.imshow(img)
        
        if row==0:
            plt.title(scan,fontsize=18)
        if num==0:
            plt.ylabel("row=" + str(row),fontsize=18)

## The one with MGMT_value = 0
#### Visualize the person who does not have gene methylation = those who do not have favorable prognostic and strong predictors of responsiveness to chemotherapy

In [None]:
print("MGMT_value = " + str(train["MGMT_value"].iloc[row_id+1]))


for row in range(len(sampledf2)):
    plt.figure(figsize=(80,10))
    for num,scan in enumerate(Scans):
        img = makeimg(sampledf2[scan + "_path"].iloc[row])
        plt.subplot(4,25,num+1)
        #plt.axis("off")
        plt.imshow(img)
        
        if row==0:
            plt.title(scan,fontsize=18)
        if num==0:
            plt.ylabel("row=" + str(row),fontsize=18)

## When the number of files in each scan is not complete (this is about 90%)
#### When the number of data is not the same for 4 types, are the numbers the same in chronological order, or are they just serial numbers? Let's look at the person with id = 0

In [None]:
train

In [None]:
row_id=0

sampledf3 = pd.DataFrame()
for scan in Scans:
    sampledf3[scan + "_path"] = makepath(row_id,scan)

In [None]:
row_id2 = 0

sampledf3 = pd.DataFrame()
for scan in Scans:
    sampledf3[scan + "_path"] = makepath(row_id2,scan)

In [None]:
sampledf3

In [None]:
sampledf3["T1w_path"].iloc[32]

I confirmed that the bottom is Nan. The numbers are sorted in ascending order, and the 32nd line is 33th, so the file names are serial numbers.

In [None]:
print("MGMT_value = " + str(train["MGMT_value"].iloc[row_id2]))

for row in range(33):
    plt.figure(figsize=(80,5))
    for num,scan in enumerate(Scans):
        img = makeimg(sampledf3[scan + "_path"].iloc[row])
        plt.subplot(4,33,num+1)
        #plt.axis("off")
        plt.imshow(img)
        
        if row==0:
            plt.title(scan,fontsize=18)
        if num==0:
            plt.ylabel("row=" + str(row),fontsize=18)

## Check how much the number of test data is the same.
#### The code is the same as the basic train, so I will put it together.

In [None]:
sample["imfolder"] = ['{0:05d}'.format(s) for s in sample["BraTS21ID"]]

test_path = "../input/rsna-miccai-brain-tumor-radiogenomic-classification/test"

sample["path"] = [os.path.join(test_path,s) for s in sample["imfolder"]  ]


allres = []

for scan in Scans:
    sample[scan +"_count"] = [ len(os.listdir(os.path.join(sample["path"].iloc[s],scan))) for s in tqdm(range(len(sample))) ]

In [None]:
sample

In [None]:
allsame = [sample["FLAIR_count"].iloc[s] ==   sample["T1w_count"].iloc[s] ==sample["T1wCE_count"].iloc[s] ==sample["T2w_count"].iloc[s] 
          for s in range(len(sample))]

sample["allsame"] = allsame

sample

In [None]:
sample["allsame"].sum()

In [None]:
sample["allsame"].sum()/len(sample) * 100

In [None]:
root_dir = '../input/rsna-miccai-brain-tumor-radiogenomic-classification/'
df = pd.read_csv(root_dir+'train_labels.csv')
sns.countplot(data=df, x='MGMT_value')

In [None]:
 pip install efficientnet_pytorch