In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2

import os
from tqdm.notebook import tqdm

In [None]:
TRAIN_DIR = '../input/rsna-miccai-brain-tumor-radiogenomic-classification/train/'
OUT_FOLDER = 'train'
MRI_TYPES = ['T1w', 'T1wCE','T2w', 'FLAIR']
EXT = 'jpg'

DEBUG = False

In [None]:
train_labels = pd.read_csv('../input/rsna-miccai-brain-tumor-radiogenomic-classification/train_labels.csv')

In [None]:
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut

In [None]:
# dicom = pydicom.dcmread('../input/rsna-miccai-brain-tumor-radiogenomic-classification/train/00000/FLAIR/Image-1.dcm')

In [None]:
def get_meta_info_v2(dicom):
    ret_dict = {dicom.get(k).name.replace(' ',''): dicom.get(k).value for k in dicom.keys() if dicom.get(k).name != 'Pixel Data'}
    ret_dict['timestamp'] = dicom.timestamp
    return ret_dict

In [None]:
# meta = get_meta_info_v2(dicom)
# meta2 = meta.copy()
# meta2['abc'] = 111

In [None]:
# pd.DataFrame([meta, meta2])

In [None]:
def process_dicom(path, voi_lut=True, fix_monochrome=True):
    dicom = pydicom.dcmread(path)
    # VOI LUT (if available by DICOM device) is used to
    # transform raw DICOM data to "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    
    # read other meta
    meta = get_meta_info_v2(dicom)
    
    return data, meta

In [None]:
patient_ids = []
image_names = []
mri_types = []
metas = []

iterations = tqdm(os.listdir(TRAIN_DIR)) if not DEBUG else tqdm(os.listdir(TRAIN_DIR)[:3])

for patient_id in iterations:
    patient_dir = os.path.join(TRAIN_DIR, patient_id) 
    for mri_type in MRI_TYPES:
        type_dir = os.path.join(patient_dir, mri_type)
        out_dir = os.path.join(OUT_FOLDER, patient_id, mri_type)
        os.makedirs(out_dir, exist_ok=True)
        for image_name in os.listdir(type_dir):
            # read dicom
            try:
                path = os.path.join(type_dir, image_name)
                image, meta = process_dicom(path)

                cv2.imwrite(os.path.join(out_dir, image_name.replace('dcm', EXT)), image)

                image_names.append(image_name.replace('dcm',EXT))
                patient_ids.append(patient_id)
                mri_types.append(mri_type)
                metas.append(meta)
            except Exception as ex:
                print(ex)
                
#             break
#         break
#     break

In [None]:
# plt.imshow(image, cmap='gray')

In [None]:
train_df = pd.DataFrame({'image_name':image_names, 'patient_id':patient_ids, 'mri_type':mri_types})
train_df = pd.concat([train_df, pd.DataFrame(metas)], axis=1)

In [None]:
train_df.head()

In [None]:
!zip -r train.zip train >> log.txt

In [None]:
rm -rf train

In [None]:
train_df.to_csv('train.csv', index=False)