## Import

In [None]:
import numpy as np
from scipy import stats
import pandas as pd
import nibabel as nib
import glob
from tensorflow.keras.utils import to_categorical
from tifffile import imsave
from tensorflow.keras import utils as np_utils

from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.model_selection import KFold

import matplotlib.pyplot as plt
from matplotlib import cm
import matplotlib.animation as anim
import matplotlib.patches as mpatches
import matplotlib.gridspec as gridspec

from google.colab import drive 
drive.mount('/content/drive')

Mounted at /content/drive


## Unzip data

In [None]:
file_path = "/content/drive/My Drive/Projects/Data/CS184 Data"

In [None]:
!ls "{file_path}"

ls: cannot access '/content/drive/My Drive/Projects/Data/CS184 Data': No such file or directory


In [None]:
# !unzip -u "{file_path}/MICCAI_BraTS2020_TrainingData.zip" -d data
# !unzip -u "{file_path}/MICCAI_BraTS2020_ValidationData.zip" -d data

In [None]:
!ls data

ls: cannot access 'data': No such file or directory


## Load sample image

- References: https://www.kaggle.com/polomarco/brats20-3dunet-3dautoencoder

In [None]:
train_data_path = "../data/MICCAI_BraTS2020_TrainingData"
validation_data_path = "../data/MICCAI_BraTS2020_ValidationData"

In [None]:
sample_img = nib.load(f'{train_data_path}/BraTS20_Training_001/BraTS20_Training_001_flair.nii.gz').get_fdata()
print(sample_img.shape)

FileNotFoundError: ignored

In [None]:
fig = plt.figure(figsize=(20, 10))

gs = gridspec.GridSpec(nrows=2, ncols=4, height_ratios=[1, 1.5])

#  Varying density along a streamline
ax0 = fig.add_subplot(gs[0, 0])
flair = ax0.imshow(sample_img[:,:,65], cmap='bone')
ax0.set_title("FLAIR", fontsize=18, weight='bold', y=-0.2)
fig.colorbar(flair)

In [None]:
def load_image(file_path):
  img = nib.load(file_path).get_fdata()
  img = np.rot90(sample_img)
  return img


def load_images(      
    img_num="001",
    type='Training',
    file_path=train_data_path,
  ):
  file_path = f"{file_path}/BraTS20_{type}_{img_num}"
  flair = nib.load(f"{file_path}/BraTS20_{type}_{img_num}_flair.nii.gz").get_fdata()
  t1 = nib.load(f"{file_path}/BraTS20_{type}_{img_num}_t1.nii.gz").get_fdata()
  t2 = nib.load(f"{file_path}/BraTS20_{type}_{img_num}_t2.nii.gz").get_fdata()
  t1ce = nib.load(f"{file_path}/BraTS20_{type}_{img_num}_t1ce.nii.gz").get_fdata()
  seg = nib.load(f"{file_path}/BraTS20_{type}_{img_num}_seg.nii.gz").get_fdata()
  return (flair, t1, t2, t1ce, seg)

In [None]:
def show_image(flair, t1, t2, t1ce, seg):
  fig = plt.figure(figsize=(20, 10))
  gs = gridspec.GridSpec(nrows=2, ncols=4, height_ratios=[1, 1.5])
  
  ax0 = fig.add_subplot(gs[0, 0])
  flair_img = ax0.imshow(flair[:,:,65], cmap='bone')
  ax0.set_title("FLAIR", fontsize=18, weight='bold', y=-0.2)
  fig.colorbar(flair_img)
  
  ax1 = fig.add_subplot(gs[0, 1])
  t1_img = ax1.imshow(t1[:,:,65], cmap='bone')
  ax1.set_title("T1", fontsize=18, weight='bold', y=-0.2)
  fig.colorbar(t1_img)
  
  ax2 = fig.add_subplot(gs[0, 2])
  t2_img = ax2.imshow(t2[:,:,65], cmap='bone')
  ax2.set_title("T2", fontsize=18, weight='bold', y=-0.2)
  fig.colorbar(t2_img)
  
  ax3 = fig.add_subplot(gs[0, 3])
  t1ce_img = ax3.imshow(t1ce[:,:,65], cmap='bone')
  ax3.set_title("T1 contrast", fontsize=18, weight='bold', y=-0.2)
  fig.colorbar(t1ce_img)
  
  ax4 = fig.add_subplot(gs[1, 1:3])
  seg_img = ax4.imshow(seg[:,:,65], cmap='summer')
  ax4.set_title("Mask", fontsize=18, weight='bold', y=-0.2)
  

  plt.suptitle("Multimodal Scans -  Data", fontsize=20, weight='bold')

In [None]:
flair, t1, t2, t1ce, seg = load_images()
show_image(flair, t1, t2, t1ce, seg)

## Preprocess data

In [None]:
survival_info_df = pd.read_csv(f'{train_data_path}/survival_info.csv')
name_mapping_df = pd.read_csv(f'{train_data_path}/name_mapping.csv')

name_mapping_df.rename({'BraTS_2020_subject_ID': 'Brats20ID'}, axis=1, inplace=True) 


df = survival_info_df.merge(name_mapping_df, on="Brats20ID", how="right")
print (df.shape)
df.head()

In [None]:
def get_file_paths(df):
  import os

  paths = []
  for _, row  in df.iterrows():    
    id_ = row['Brats20ID']
    phase = id_.split("_")[-2]

    if phase == 'Training':
      path = os.path.join(train_data_path, id)
    else:
      path = os.path.join(validation_data_path, id_)
    paths.append(path)
  return paths

In [None]:
file_paths = get_file_paths(df)
df['path'] = file_paths

#split data on train, test, split
#train_df, val_df = train_test_split(train_df, test_size=0.1, random_state=69, shuffle=True)
#train_df, val_df = train_df.reset_index(drop=True), val_df.reset_index(drop=True)
seed = 1
train_data = df.loc[df['Age'].notnull()].reset_index(drop=True)
train_data["Age_rank"] =  train_data["Age"] // 10 * 10
train_data = train_data.loc[train_data['Brats20ID'] != 'BraTS20_Training_355'].reset_index(drop=True, )

skf = StratifiedKFold(
    n_splits=7, random_state=seed, shuffle=True
)
for i, (train_index, val_index) in enumerate(
        skf.split(train_data, train_data["Age_rank"])
        ):
        train_data.loc[val_index, "fold"] = i

train_df = train_data.loc[train_data['fold'] != 0].reset_index(drop=True)
val_df = train_data.loc[train_data['fold'] == 0].reset_index(drop=True)

test_df = df.loc[~df['Age'].notnull()].reset_index(drop=True)
print("train_df:", train_df.shape, "val_df:", val_df.shape, "test_df:", test_df.shape)

In [None]:
train_df.head()