In [None]:
import os
import re
import cv2
import sys
import glob
import time
import shutil
import random
import pydicom
import numpy as np
import pandas as pd
import seaborn as sns
from tqdm import tqdm
# import nibabel as nib
import SimpleITK as sitk
import matplotlib.pyplot as plt
# from statistics import mean, median, mode

In [None]:
DATA_PATH = '../input/rsna-miccai-brain-tumor-radiogenomic-classification'

IMG_DIM = 512
NUM_SLICES = 64
mri_types = ['FLAIR','T1wCE','T2w']

In [None]:
reader = sitk.ImageSeriesReader()
reader.LoadPrivateTagsOn()
def get_sitk(path):
    filenamesDICOM = reader.GetGDCMSeriesFileNames(path)
    reader.SetFileNames(filenamesDICOM)
    return reader.Execute()

resampler = sitk.ResampleImageFilter()
resampler.SetInterpolator(sitk.sitkLinear)
def resample(image, ref_image):
    resampler.SetReferenceImage(ref_image)
    resampler.SetTransform(sitk.AffineTransform(image.GetDimension()))
    resampler.SetOutputSpacing(ref_image.GetSpacing())
    resampler.SetSize(ref_image.GetSize())
    resampler.SetOutputDirection(ref_image.GetDirection())
    resampler.SetOutputOrigin(ref_image.GetOrigin())
    resampler.SetDefaultPixelValue(image.GetPixelIDValue())
    resamped_image = resampler.Execute(image)
    return resamped_image

def normalize(data):
    return (data - np.min(data)) / (np.max(data) - np.min(data))

def img_norm(img):
    '''
    Input : image as a numpy array
    Output : normalized and resized image with pad and aspect ratio preserved
    '''
    ratio = IMG_DIM/max(img.shape)
    lst = [round(x*ratio) for x in img.shape]
    new_size = lst[::-1]
    pad = [abs((x-IMG_DIM)//2) for x in new_size]
    pad = [i for i in pad if i != 0]
    img = cv2.resize(img, tuple(new_size), interpolation=cv2.INTER_LANCZOS4)
    if img.shape[0] < img.shape[1]:
        if img.shape[0]%2 == 0 and img.shape[1]%2 == 0:
            return cv2.copyMakeBorder(img, pad[0], pad[0], 0, 0, cv2.BORDER_CONSTANT, value=0)
        else:
            return cv2.copyMakeBorder(img, pad[0], pad[0]-1, 0, 0, cv2.BORDER_CONSTANT, value=0)
    elif img.shape[0] > img.shape[1]:
        if img.shape[0]%2 == 0 and img.shape[1]%2 == 0:
            return cv2.copyMakeBorder(img, 0, 0, pad[0], pad[0], cv2.BORDER_CONSTANT, value=0)
        else:
            return cv2.copyMakeBorder(img, 0, 0, pad[0], pad[0]-1, cv2.BORDER_CONSTANT, value=0)
    else:
        return img

def gen_3d_array(scan_id, num_imgs=NUM_SLICES, img_size=IMG_DIM, mri_type=mri_types[0], ref_type='T1w', split='train'):
    reference = get_sitk(f'{DATA_PATH}/{split}/{scan_id}/{ref_type}')
    mri = get_sitk(f'{DATA_PATH}/{split}/{scan_id}/{mri_type}')
    mri_res = resample(mri, reference)
    mri_res = normalize(sitk.GetArrayFromImage(mri_res))
    mri_re = np.zeros((mri_res.shape[0], img_size, img_size), dtype=np.float16)
    for j in range(mri_res.shape[0]):
        mri_re[j] = img_norm(mri_res[j])
#         mri_re[j] = cv2.resize(img_norm(mri_res[j]), (img_size, img_size))
    mri_re = mri_re.transpose(1,2,0)
    middle = mri_re.shape[-1]//2
    num_imgs2 = num_imgs//2
    if mri_re.shape[-1] < num_imgs:
        n_zero = np.zeros((img_size, img_size, num_imgs - mri_re.shape[-1]))
        mri_re = np.concatenate((mri_re,  n_zero), axis = -1)
    elif mri_re.shape[-1] > num_imgs:
        mri_re = mri_re[:, :, middle-num_imgs2: middle+num_imgs2]
    mri_re = mri_re - np.min(mri_re)
    mri_re = mri_re / np.max(mri_re)
#     mri_re = (mri_re * 255).astype(np.uint8)
    return np.expand_dims(mri_re, 0)

In [None]:
def ignore_files(dir, files):
    return [f for f in files if os.path.isfile(os.path.join(dir, f))]
shutil.copytree('../input/rsna-miccai-brain-tumor-radiogenomic-classification/train',
                './train', ignore=ignore_files)
shutil.copytree('../input/rsna-miccai-brain-tumor-radiogenomic-classification/test',
                './test', ignore=ignore_files)

In [None]:
train_dirs = sorted(os.listdir(f'{DATA_PATH}/train/'))
for i, path in enumerate(train_dirs):
    BASE_PATH = f'./train/{path}'
    for mri_type in mri_types:
        num_arr = gen_3d_array(path, mri_type=mri_type)
#         print(num_arr.dtype, sys.getsizeof(num_arr))
        file_name = f'{mri_type}'
        np.savez_compressed(os.path.join(BASE_PATH, file_name), num_arr, allow_pickle=False)
#     print(path)
#     if i == 2:
#         break

In [None]:
test_dirs = sorted(os.listdir(f'{DATA_PATH}/test/'))
for i, path in enumerate(test_dirs):
    BASE_PATH = f'./test/{path}'
    for mri_type in mri_types:
        num_arr = gen_3d_array(path, mri_type=mri_type, split='test')
#         print(num_arr.dtype, sys.getsizeof(num_arr))
        file_name = f'{mri_type}'
        np.savez_compressed(os.path.join(BASE_PATH, file_name), num_arr, allow_pickle=False)
#     print(path)
#     if i == 2:
#         break