# FLAIR images

In [None]:
import os
import zipfile
import numpy as np
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers

import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut

In [None]:
# Reference: # https://www.kaggle.com/rude009/working-with-dicom-data
import pydicom as dicom

In [None]:
# here we create a pd.df with each patient's id and the target (MGMT value)
import pandas as pd
data_directory = '../input/rsna-miccai-brain-tumor-radiogenomic-classification'
df = pd.read_csv(f"{data_directory}/train_labels.csv")

# Dataframe with ID and MGMT value

As we have to predict the MGMT value, we create a df with these two columns to link the ID of each set of images with the MGMT value

In [None]:
def convert_BraTS21ID_to_string(df):
    
    patientID = []
    for ID in df['BraTS21ID']:
        patientID.append(str(ID))
    df['BraTS21ID'] = patientID
    
    patientID2 = []
    for ID in df['BraTS21ID']:
        if len(ID)==1:
            patientID2.append('0000'+ID)
        elif len(ID)==2:
            patientID2.append('000'+ID)
        elif len(ID)==3:
            patientID2.append('00'+ID)
        elif len(ID)==4:
            patientID2.append('0'+ID)
        elif len(ID)==5:
            patientID2.append(ID)
            
    df['BraTS21ID'] = patientID2
    return df

In [None]:
df = convert_BraTS21ID_to_string(df)

In [None]:
df=df.drop(labels=71 , axis=0)
df=df.drop(labels=81 , axis=0)
df=df.drop(labels=488 , axis=0)

In [None]:
# we create 2 dfs, one for MGMT=1 and another for MGMT=0
df_MGMT_1 = df.loc[df.MGMT_value == 1] #len = 
df_MGMT_0 = df.loc[df.MGMT_value == 0] #len =

In [None]:
# now, we define functions to load and stack the images
from scipy import ndimage
import glob
import re
import cv2

SIZE=128
def load_dicom_image(path, img_size=SIZE, voi_lut=True):
    dicom = pydicom.read_file(path)
    data = dicom.pixel_array
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
        
    data = cv2.resize(data, (img_size, img_size))
    return data

NUM_IMAGES = 64
def load_dicom_images_3d(scan_id, num_imgs=NUM_IMAGES, img_size=SIZE, mri_type="FLAIR", split="train"):

    files = sorted(glob.glob(f"{data_directory}/{split}/{scan_id}/{mri_type}/*.dcm"), 
               key=lambda var:[int(x) if x.isdigit() else x for x in re.findall(r'[^0-9]|[0-9]+', var)])

    middle = len(files)//2
    num_imgs2 = num_imgs//2
    p1 = max(0, middle - num_imgs2)
    p2 = min(len(files), middle + num_imgs2)
    img3d = np.stack([load_dicom_image(f) for f in files[p1:p2]]).T 
    if img3d.shape[-1] < num_imgs:
        n_zero = np.zeros((img_size, img_size, num_imgs - img3d.shape[-1]))
        img3d = np.concatenate((img3d,  n_zero), axis = -1)
        
    if np.min(img3d) < np.max(img3d):
        img3d = img3d - np.min(img3d)
        img3d = img3d / np.max(img3d)
            
    return img3d

In [None]:
# now we specify the geral directory and load one image
data_directory = '../input/rsna-miccai-brain-tumor-radiogenomic-classification'

# Transforming the Data

We now define some functions to apply transformations to the images: normalization, resizing, cropping and dicom processing

In [None]:
# now, we define functions to apply transformations to the images (stacked images)

# values of the array are between 0 and aprox. 2000

def normalize(volume):
    """Normalize the volume"""
    minv = np.min(volume)
    maxv = np.max(volume)
    volume[volume < minv] = minv
    volume[volume > maxv] = maxv
    volume = (volume - minv) / (maxv - minv)
    volume = volume.astype("float32")
    return volume


def resize_volume(img):
    """Resize across z-axis"""
    # Set the desired depth
    desired_depth = 64
    desired_width = 128
    desired_height = 128
    # Get current depth
    current_depth = img.shape[-1]
    current_width = img.shape[0]
    current_height = img.shape[1]
    # Compute depth factor
    depth = current_depth / desired_depth
    width = current_width / desired_width
    height = current_height / desired_height
    depth_factor = 1 / depth
    width_factor = 1 / width
    height_factor = 1 / height
    # Rotate
    img = ndimage.rotate(img, 90, reshape=False)
    # Resize across z-axis
    img = ndimage.zoom(img, (width_factor, height_factor, depth_factor), order=1)
    return img

def cropped_images(images):
    min=np.array(np.nonzero(images)).min(axis=1)
    max=np.array(np.nonzero(images)).max(axis=1)
    return images[min[0]:max[0],min[1]:max[1],min[2]:max[2]]
    
# path= patient's ID
def process_scan(path):
    """Read and resize volume"""
    # Read scan
    volume = load_dicom_images_3d(path)
    # Normalize
    volume = normalize(volume)
    # Resize width, height and depth
    volume = cropped_images(volume)
    
    volume = resize_volume(volume)
    
    return volume

In [None]:
process_scan('00000').shape

Since we have a big amount of images, we reduce the dataset, so it can fit to 16GB ram

In [None]:
# reducing the amount of data to 200 scans per category

# Read and process the scans.
# Each scan is resized across height, width, and depth and rescaled.
MGMT_scans = np.array([process_scan(path) for path in df_MGMT_1['BraTS21ID'][:200]])
no_MGMT_scans = np.array([process_scan(path) for path in df_MGMT_0['BraTS21ID'][:200]])

# For the MRI scans having presence of metylation
# assign 1, for the normal ones assign 0.
MGMT_labels = np.array([1 for _ in range(len(df_MGMT_1['BraTS21ID'][:200]))])
no_MGMT_labels = np.array([0 for _ in range(len(df_MGMT_0['BraTS21ID'][:200]))])

We now split the data to be then processed by CNN

In [None]:
# Split data in the ratio 70-30 for training and validation.
x_train = np.concatenate((MGMT_scans[:140], no_MGMT_scans[:140]), axis=0)
y_train = np.concatenate((MGMT_labels[:140], no_MGMT_labels[:140]), axis=0)
x_val = np.concatenate((MGMT_scans[140:], no_MGMT_scans[140:]), axis=0)
y_val = np.concatenate((MGMT_labels[140:], no_MGMT_labels[140:]), axis=0)
print(
    "Number of samples in train and validation are %d and %d."
    % (x_train.shape[0], x_val.shape[0])
)

# Creating the dataset

Now that we have store the transformed data into numpy tensors, wue proceed to save it to then create the tensor's dataset

In [None]:
x_train.shape

In [None]:
os.makedirs("Dataset")

In [None]:
os.makedirs("x_train_dataset")
os.makedirs("y_train_dataset")
os.makedirs("x_val_dataset")
os.makedirs("y_val_dataset")

In [None]:
x_train_path='/kaggle/working/x_train_dataset'
np.save(x_train_path, x_train)

y_train_path='/kaggle/working/y_train_dataset'
np.save(y_train_path, y_train)

x_val_path='/kaggle/working/x_val_dataset'
np.save(x_val_path, x_val)

y_val_path='/kaggle/working/y_val_dataset'
np.save(y_val_path, y_val)

The FLAIR dataset is stored in the following kaggle Dataset:
https://www.kaggle.com/hugovallejo/numpy-for-rsna-compet-flair