In [10]:
import os
import sys

from pathlib import Path
import glob

import numpy as np
import pandas as pd

import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
import cv2

import torch

from tqdm.notebook import tqdm

import warnings
warnings.filterwarnings("ignore")

import matplotlib.pyplot as plt
%matplotlib inline

In [11]:
data_root = Path('E:\\datasets\\rsna-miccai-brain-tumor-radiogenomic-classification')

In [27]:
SCAN_TYPE = ('FLAIR', 'T1w', 'T1wCE', 'T2w')

In [22]:
def get_file_frame(config = 'train'):
    df = pd.DataFrame()
    file_list = glob.glob(str(data_root / f'{config}' / '*' / '*' / '*.dcm'))
    df['file_path'] = file_list
    
    df['file_name'] = df.apply(lambda x: x['file_path'].split(os.path.sep)[-1], axis = 1)
    df['file_index'] = df.apply(lambda x: int(x['file_name'].split('-')[1].split('.')[0]), axis = 1)
    df['scan'] = df.apply(lambda x: x['file_path'].split(os.path.sep)[-2], axis = 1)
    df['case'] = df.apply(lambda x: x['file_path'].split(os.path.sep)[-3], axis = 1)
    
    return df

(348641, 5)


Unnamed: 0,file_path,file_name,file_index,scan,case
0,E:\datasets\rsna-miccai-brain-tumor-radiogenom...,Image-1.dcm,1,FLAIR,0
1,E:\datasets\rsna-miccai-brain-tumor-radiogenom...,Image-10.dcm,10,FLAIR,0
2,E:\datasets\rsna-miccai-brain-tumor-radiogenom...,Image-100.dcm,100,FLAIR,0
3,E:\datasets\rsna-miccai-brain-tumor-radiogenom...,Image-101.dcm,101,FLAIR,0
4,E:\datasets\rsna-miccai-brain-tumor-radiogenom...,Image-102.dcm,102,FLAIR,0


In [24]:
train_df = get_file_frame()
print(train_df.shape)
train_df.head()

(348641, 5)


Unnamed: 0,file_path,file_name,file_index,scan,case
0,E:\datasets\rsna-miccai-brain-tumor-radiogenom...,Image-1.dcm,1,FLAIR,0
1,E:\datasets\rsna-miccai-brain-tumor-radiogenom...,Image-10.dcm,10,FLAIR,0
2,E:\datasets\rsna-miccai-brain-tumor-radiogenom...,Image-100.dcm,100,FLAIR,0
3,E:\datasets\rsna-miccai-brain-tumor-radiogenom...,Image-101.dcm,101,FLAIR,0
4,E:\datasets\rsna-miccai-brain-tumor-radiogenom...,Image-102.dcm,102,FLAIR,0


In [23]:
test_df = get_file_frame('test')
print(test_df.shape)
test_df.head()

(51473, 5)


Unnamed: 0,file_path,file_name,file_index,scan,case
0,E:\datasets\rsna-miccai-brain-tumor-radiogenom...,Image-1.dcm,1,FLAIR,1
1,E:\datasets\rsna-miccai-brain-tumor-radiogenom...,Image-10.dcm,10,FLAIR,1
2,E:\datasets\rsna-miccai-brain-tumor-radiogenom...,Image-100.dcm,100,FLAIR,1
3,E:\datasets\rsna-miccai-brain-tumor-radiogenom...,Image-101.dcm,101,FLAIR,1
4,E:\datasets\rsna-miccai-brain-tumor-radiogenom...,Image-102.dcm,102,FLAIR,1


In [25]:
def dicom2array(path, voi_lut = True, fix_monochrome = True, remove_black_boundary = True):
    dicom = pydicom.read_file(path)
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
    
    if fix_monochrome and dicom.PhotometricInterpretation == 'MONOCHROME1':
        data = data.amax() - data
    
    data -= data.min()
    data /= data.max()
    data = data
    
    if remove_black_boundary:
        (x, y) = np.where(data > 0)
        if x.shape[0] == 0 or y.shape[0] == 0:
            pass
        else:
            min_x, max_x = x.min(), x.max()
            min_y, max_y = y.min(), y.max()
            if(max_x - min_x) > 10 and (max_y - min_y) > 10:
                data = data[min_x:max_x, min_y:max_y]
        data = cv2.resize(data, (512,512))
    
    return data

In [43]:
def get_case_arr(path):
    output = dict()
    for scan in SCAN_TYPE:
        path_list = glob.glob(str(path / f'{scan}' / '*.dcm'))
        arr_list = []
        for p in path_list:
            arr = dicom2array(p)
            arr_list.append(np.expand_dims(arr, 0))
        output[scan] = np.concatenate(arr_list, axis = 0)
        output[scan +'-mean'] = output[scan].mean(axis = 0)
        output[scan +'-std'] = output[scan].std(axis = 0)
    return output
            

data_dict = get_case_arr(data_root / 'train' / '00000')
data_dict['FLAIR-mean']
data_dict['FLAIR-std']

array([[0.        , 0.        , 0.        , ..., 0.09987492, 0.09987492,
        0.09987492],
       [0.        , 0.        , 0.        , ..., 0.09987492, 0.09987492,
        0.09987492],
       [0.        , 0.        , 0.        , ..., 0.09987492, 0.09987492,
        0.09987492],
       ...,
       [0.19974984, 0.19974984, 0.19974984, ..., 0.19974984, 0.19974984,
        0.19974984],
       [0.19974984, 0.19974984, 0.19974984, ..., 0.19974984, 0.19974984,
        0.19974984],
       [0.19974984, 0.19974984, 0.19974984, ..., 0.19974984, 0.19974984,
        0.19974984]])