In [None]:
import pydicom
import os 
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import cv2
import math

# READ DATA

In [None]:
data_dir = "../input/rsna-miccai-brain-tumor-radiogenomic-classification/train"
patients = os.listdir(data_dir)
labels_df = pd.read_csv("../input/rsna-miccai-brain-tumor-radiogenomic-classification/train_labels.csv")
labels_df["patient"] = [(5 - len(str(l))) * "0" + str(l) for l in labels_df["BraTS21ID"]]       

In [None]:
print(f" The number of different patients is {len(patients)}")
print(f" The number of patients with tumor is {sum(labels_df['MGMT_value'])}") 

In [None]:
c = 0
dims_t2w = {}
dims_t1w = {}
dims_t1wce = {}
dims_flair = {}
for patient in patients[1:10]:
#     print(patient)
    c += 1
    label = labels_df[labels_df["patient"] == patient]['MGMT_value']
    if c % 45 == 0:
        print(f"{c}/585")
    
    # T2w
    path_t2w = data_dir + '/' + patient + '/T2w/'
    slices_t2w = [pydicom.dcmread(path_t2w + s) for s in os.listdir(path_t2w)]
    slices_t2w.sort(key = lambda x: int(x.ImagePositionPatient[2]))
    dims_t2w[patient] = (len(slices_t2w), slices_t2w[0].pixel_array.shape)
    # T1w
    path_t1w = data_dir + '/' + patient + '/T1w/'
    slices_t1w = [pydicom.dcmread(path_t1w + s) for s in os.listdir(path_t1w)]
    slices_t1w.sort(key = lambda x: int(x.ImagePositionPatient[2]))
    dims_t1w[patient] = (len(slices_t1w), slices_t1w[0].pixel_array.shape)
    # T1wCE
    path_t1wce = data_dir + '/' + patient + '/T1wCE/'
    slices_t1wce = [pydicom.dcmread(path_t1wce + s) for s in os.listdir(path_t1wce)]
    slices_t1wce.sort(key = lambda x: int(x.ImagePositionPatient[2]))
    dims_t1wce[patient] = (len(slices_t1wce), slices_t1wce[0].pixel_array.shape)
    # FLAIR
    path_flair = data_dir + '/' + patient + '/FLAIR/'
    slices_flair = [pydicom.dcmread(path_flair + s) for s in os.listdir(path_flair)]
    slices_flair.sort(key = lambda x: int(x.ImagePositionPatient[2]))
    dims_flair[patient] = (len(slices_flair), slices_flair[0].pixel_array.shape)

In [None]:
# dims_t2w # 27 (256,208)
# dims_t1w # 29 (256,192)
# dims_t1wce # 75 (256, 192)
# dims_flair # 27 (256, 192)

# VISUALIZE DATA

In [None]:
for patient in patients[:1]:
    # T2w
    path_t2w = data_dir + '/' + patient + '/T2w/'
    slices_t2w = [pydicom.dcmread(path_t2w + s) for s in os.listdir(path_t2w)]
    slices_t2w.sort(key = lambda x: int(x.ImagePositionPatient[2]))
    
    plt.imshow(slices_t2w[0].pixel_array)
    plt.show()

In [None]:
imp_px_size = 192

for patient in patients[:1]:
    # T1w
    path_t1w = data_dir + '/' + patient + '/T1w/'
    slices_t1w = [pydicom.dcmread(path_t1w + s) for s in os.listdir(path_t1w)]
    slices_t1w.sort(key = lambda x: int(x.ImagePositionPatient[2]))
    
    fig = plt.figure()
    for num, each_slice in enumerate(slices_t1w[72:108]):
        y = fig.add_subplot(6,6, num + 1)
        new_image = cv2.resize(np.array(each_slice.pixel_array), (imp_px_size, imp_px_size))
        y.imshow(new_image, cmap = 'gray')
    plt.show()

In [None]:
imp_px_size = 192

for patient in patients[:1]:
    # T1w
    path_t1wce = data_dir + '/' + patient + '/T1wCE/'
    slices_t1wce = [pydicom.dcmread(path_t1wce + s) for s in os.listdir(path_t1wce)]
    slices_t1wce.sort(key = lambda x: int(x.ImagePositionPatient[2]))
    
    fig = plt.figure()
    for num, each_slice in enumerate(slices_t1wce[72:108]):
        y = fig.add_subplot(6,6, num + 1)
        new_image = cv2.resize(np.array(each_slice.pixel_array), (imp_px_size, imp_px_size))
        y.imshow(new_image, cmap = 'gray')
    plt.show()

In [None]:
imp_px_size = 208

for patient in patients[:1]:
    # T2w
    path_t2w = data_dir + '/' + patient + '/T2w/'
    slices_t2w = [pydicom.dcmread(path_t2w + s) for s in os.listdir(path_t2w)]
    slices_t2w.sort(key = lambda x: int(x.ImagePositionPatient[2]))
    
    fig = plt.figure()
    for num, each_slice in enumerate(slices_t2w[:36]):
        y = fig.add_subplot(6,6, num + 1)
        new_image = cv2.resize(np.array(each_slice.pixel_array), (imp_px_size, imp_px_size))
        y.imshow(new_image, cmap = 'gray')
    plt.show()

In [None]:
imp_px_size = 208

for patient in patients[:1]:
    # flair
    path_flair = data_dir + '/' + patient + '/FLAIR/'
    slices_flair = [pydicom.dcmread(path_flair + s) for s in os.listdir(path_flair)]
    slices_flair.sort(key = lambda x: int(x.ImagePositionPatient[2]))
    
    fig = plt.figure()
    for num, each_slice in enumerate(slices_flair[36:72]):
        y = fig.add_subplot(6,6, num + 1)
        new_image = cv2.resize(np.array(each_slice.pixel_array), (imp_px_size, imp_px_size))
        y.imshow(new_image, cmap = 'gray')
    plt.show()

# RESIZE DATA

### NEED TO RESIZE BOTH DEPTH AND PIXEL-GRID ?

**work only with t2w for now**

In [None]:
image_pixel_size = 196
hm_slices = 20

def chunks(l, n):
    for i in range(0, len(l), n):
        yield l[i:i + n]
        
def mean(l):
    return sum(l)/len(l)

for patient in patients[:10]:
    # T2w
    path_t2w = data_dir + '/' + patient + '/T2w/'
    slices_t2w = [pydicom.dcmread(path_t2w + s) for s in os.listdir(path_t2w)]
    slices_t2w.sort(key = lambda x: int(x.ImagePositionPatient[2]))
    
    new_slices = []
    
    # resize pixel size
    slices_t2w = [cv2.resize(np.array(each_slice.pixel_array), (imp_px_size, imp_px_size)) for each_slice in slices_t2w]
    
    # resize depth
    c_size = math.ceil(len(slices_t2w) / hm_slices)
    
    for s_c in chunks(slices_t2w, c_size):
        s_c = list(map(mean, zip(*s_c)))
        new_slices.append(s_c)
        
    while len(new_slices) < hm_slices:
        new_slices.append(new_slices[:-1])
    
    
    print(len(new_slices))