In [None]:
! conda install -c conda-forge gdcm -y

In [None]:
import numpy as np 
import pandas as pd 
import os
import pydicom
import glob
from tqdm.notebook import tqdm
from pydicom.pixel_data_handlers.util import apply_voi_lut
import matplotlib.pyplot as plt
from skimage import exposure
import cv2
import warnings
from fastai.vision.all import *
from fastai.medical.imaging import *
warnings.filterwarnings('ignore')
from glob import iglob
from joblib import Parallel, delayed
from tqdm.notebook import tqdm

In [None]:
# config params
class CFG:
    data_path = '../input/rsna-miccai-brain-tumor-radiogenomic-classification/'
    size = 192
    seed = 2021
    working_dir = '/kaggle/working/'

In [None]:
def dicom2array(path, voi_lut=True, fix_monochrome=True):
    dicom = pydicom.read_file(path)
    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    return data
        
    
def plot_img(img, size=(7, 7), is_rgb=True, title="", cmap='gray'):
    plt.figure(figsize=size)
    plt.imshow(img, cmap=cmap)
    plt.suptitle(title)
    plt.show()


def plot_imgs(imgs, cols=4, size=7, is_rgb=True, title="", cmap='gray', img_size=(500,500)):
    rows = len(imgs)//cols + 1
    fig = plt.figure(figsize=(cols*size, rows*size))
    for i, img in enumerate(imgs):
        if img_size is not None:
            img = cv2.resize(img, img_size)
        fig.add_subplot(rows, cols, i+1)
        plt.imshow(img, cmap=cmap)
    plt.suptitle(title)
    plt.show()

def resize(array, size, keep_ratio=False, resample=Image.LANCZOS):
    # Original from: https://www.kaggle.com/xhlulu/vinbigdata-process-and-resize-to-image
    im = Image.fromarray(array)
    
    if keep_ratio:
        im.thumbnail((size, size), resample)
    else:
        im = im.resize((size, size), resample)
    
    return im

def load_and_preprocess(fn):
    # read the image
    img = dicom2array(fn)
    im = resize(img, size = CFG.size)  
    # new filename
    filename = fn.split('/')[-1].replace('dcm', 'jpg')
    # folder to store in
    fn2 = '/kaggle/working/' + '/'.join(fn.split('/')[3:-1])#.replace('train', 'train2')
    os.makedirs(fn2, exist_ok=True)

    im.save(os.path.join(fn2, filename))


In [None]:
# list the files
filelist = []

level3 = iglob(CFG.data_path + 'train/*/*/*')
for file in level3:
    filelist.append(file)
    
print(len(filelist))

In [None]:
res = Parallel(n_jobs = 4, prefer="threads")(delayed(load_and_preprocess)(fn) for fn in tqdm(filelist))    

In [None]:
# !rm -rf /kaggle/tmp/train2/
!tar -zcf train.tar.gz -C "/kaggle/working/train/" .
!rm -rf /kaggle/working/train