<font size = '5'> **Data visualization and extraction of middle images from all four scan types for classification.** </font>

Notes:

Exclude the data from subjects [00109, 00123, 00790]


In [None]:
import numpy as np 
import pandas as pd 
import os
import matplotlib.pyplot as plt
import matplotlib
import pydicom as dicom
import cv2
import ast
from glob import glob
import warnings
from collections import Counter
import seaborn as sns
from scipy import ndimage, misc
import pydicom
warnings.filterwarnings('ignore')

import re
import math

import tensorflow as tf

import imageio
import ipywidgets as widgets
from PIL import Image

In [None]:
#get train path
path = '../input/rsna-miccai-brain-tumor-radiogenomic-classification/'

#create train path
train_path = os.path.join(path, 'train')

#list all the files in train path
subject_ids = os.listdir(train_path)

#get names of scan types
scan_types = os.listdir(train_path + '/' + str(subject_ids[0]))

#print types of scans
print(scan_types, '\n')

#get train labels
#check the train labels csv file
train_labels = pd.read_csv(path + 'train_labels.csv')

train_labels.head(5)


In [None]:
#get subject id's and get rid of those three subjects
subject_rid = ['00109', '00123', '00790']

for subject_id in subject_rid:
    
    if subject_id in subject_ids:
        subject_ids.remove(subject_id)

num_of_sub = len(subject_ids)
        
#number of subjects
print('Number of subjects after elimination: {}'.format(num_of_sub))

In [None]:
#this function takes the file list inside FLAIR of each subject and returns the number for each file image. (ex. 'Image-4.dcm' returns 4). This helps to get
# the image scans in the middle 

def file_nums(file_list):
    
    nums = []
    image_str = []
    
    for f in file_list:
        
        name = f.replace('Image-', '')
        name = name.replace('.dcm', '')
        nums.append(int(name))
    
    nums = np.sort(nums)
    
    for num in nums:
        
        name = 'Image-' + str(num) + '.dcm'
        image_str.append(name)
    
    return nums, image_str

#a function to just pad images to 512 by 512. If the image is small this function will paste the image in a black 512 by 512 image

def pad_image(image):
        
    #check if the image is bigger than 512 x 512    
    if (image.shape[0] > 512) or (image.shape[1] > 512):
        
        #if true resize the image to 512 x 512
        image = tf.image.resize(image, (512, 512))
        
    #get shape of the image
    rows, cols, _ = image.shape   
    
    #padding image size
    final_im = np.zeros([512, 512, 1])
    
    #paste image
    paste_row = int((512 - rows) / 2)
    paste_col = int((512 - cols) / 2)
    
    final_im[paste_row:paste_row + rows, paste_col:paste_col + cols, :] = image
    
    return final_im

<font size = '3'> **Interactive slides for Flair images** </font>

In [None]:
#check the image slices from one subject

#flair scans from subject 0
sub_path = '../input/rsna-miccai-brain-tumor-radiogenomic-classification/train/00000/FLAIR'

#list all the files inside
image_list = os.listdir(sub_path)
nums, file_names = file_nums(image_list)

def show_flair(Slide):

    dcom = pydicom.read_file(sub_path + '/' + file_names[Slide])
    im = dcom.pixel_array
    
    plt.figure(figsize = (6, 6))
    plt.title('Flair')
    plt.imshow(im, cmap = 'gray')
    plt.xticks([])
    plt.yticks([])
    plt.show()
    
widgets.interact(show_flair, Slide = (0, len(nums) - 1 , 1))

#append images to save as gif
ims = []

# #save as a gif
# for file in file_names:
    
#     dcom = pydicom.read_file(sub_path + '/' + file)
#     im = dcom.pixel_array
#     #im = imageio.imread(im)
#     ims.append(im.astype('uint8'))

# imgs = [Image.fromarray(img) for img in ims]    
# imgs[0].save("flair.gif", save_all=True, append_images=imgs[1:], duration=50, loop=0)
    

<font size = '3'> **Interactive images for T1W type** </font>

In [None]:
#check the image slices from one subject

#flair scans from subject 0
sub_path = '../input/rsna-miccai-brain-tumor-radiogenomic-classification/train/00000/T1w'

#list all the files inside
image_list = os.listdir(sub_path)
nums, file_names = file_nums(image_list)

def show_t1w(Slide):

    dcom = pydicom.read_file(sub_path + '/' + file_names[Slide])
    im = dcom.pixel_array
    
    plt.figure(figsize = (6, 6))
    plt.title('T1W')
    plt.imshow(im, cmap = 'gray')
    plt.xticks([])
    plt.yticks([])
    plt.show()
    
widgets.interact(show_t1w, Slide = (0, len(nums) - 1 , 1))

#save as a gif
for file in file_names:
    
    dcom = pydicom.read_file(sub_path + '/' + file)
    im = dcom.pixel_array
    #im = imageio.imread(im)
    ims.append(im.astype('uint8'))

imgs = [Image.fromarray(img) for img in ims]    
imgs[0].save("t1w.gif", save_all=True, append_images=imgs[1:], duration=150, loop=0)

<font size = '3'> **Interactive for T2W type** </font>

In [None]:
#check the image slices from one subject

#flair scans from subject 0
sub_path = '../input/rsna-miccai-brain-tumor-radiogenomic-classification/train/00000/T2w'

#list all the files inside
image_list = os.listdir(sub_path)
nums, file_names = file_nums(image_list)

def show_T2W(Slide):

    dcom = pydicom.read_file(sub_path + '/' + file_names[Slide])
    im = dcom.pixel_array
    
    plt.figure(figsize = (6, 6))
    plt.title('T2W')
    plt.imshow(im, cmap = 'gray')
    plt.xticks([])
    plt.yticks([])
    plt.show()
    
widgets.interact(show_T2W, Slide = (0, len(nums) - 1 , 1))

<font size = '3'> **Interactive T1wCE images** </font>

In [None]:
#check the image slices from one subject

#flair scans from subject 0
sub_path = '../input/rsna-miccai-brain-tumor-radiogenomic-classification/train/00000/T1wCE'

#list all the files inside
image_list = os.listdir(sub_path)
nums, file_names = file_nums(image_list)

def show_T1wCE(Slide):

    dcom = pydicom.read_file(sub_path + '/' + file_names[Slide])
    im = dcom.pixel_array
    
    plt.figure(figsize = (6, 6))
    plt.title('T1wCE')
    plt.imshow(im, cmap = 'gray')
    plt.xticks([])
    plt.yticks([])
    plt.show()
    
widgets.interact(show_T1wCE, Slide = (0, len(nums) - 1 , 1))

In [None]:
import time
tick = time.time()

#create an empty list for the image data and the labels
X_flair = np.zeros([num_of_sub, 512, 512, 1]) #image length and the number of pictures
X_t1w = np.zeros([num_of_sub, 512, 512, 1])
X_t2w = np.zeros([num_of_sub, 512, 512, 1])
X_t1wce = np.zeros([num_of_sub, 512, 512, 1])
Y = np.zeros(num_of_sub)

#iterate through subject number and label in train_labels dataframe
for i in range(len(scan_types)):

    for k, s in enumerate(subject_ids):

        #get the directory for the subject
        directory = train_path + '/' +  s + '/' + scan_types[i]

        #get the file_list
        file_list = os.listdir(directory)
        file_number,_ = file_nums(file_list)
        
        #get the middle image 
        ind = len(file_number)// 2
        sub_id = file_number[ind]

        file_name = 'Image-' + str(sub_id) + '.dcm'
        
        #create directory
        file_dir = directory + '/' + file_name
        
        #read dicom
        dicom = pydicom.read_file(file_dir)
        
        #get the numpy array
        image = dicom.pixel_array
        
        #reshape the image to 3 dims
        image = image.reshape(image.shape[0], image.shape[1], 1)
     
        #check the size and pad the image to 512 and 512
        if image.shape != (512, 512, 1):
            
            #pad image
            image = pad_image(image)

        #normalize the image
        image_norm = (image - np.mean(image)) / np.std(image)
        
        if scan_types[i] == 'FLAIR':
            
            X_flair[k, :, :, :] = image_norm
            Y[k] = train_labels['MGMT_value'][train_labels['BraTS21ID'] == int(s)].values[0]
        
        elif scan_types[i] == 'T2w':
            
            X_t2w[k, :, :, :] = image_norm
        
        elif scan_types[i] == 'T1w':
            
            X_t1w[k, :, :, :] = image_norm
        
        elif scan_types[i] == 'T1wCE':
            
            X_t1wce[k, :, :, :] = image_norm

tock = time.time()

print(tock - tick)

In [None]:
#plot some of the pictures to see
rand_inds = np.random.choice(np.arange(len(X_flair)), 10)

fig, ax = plt.subplots(len(rand_inds), 4, figsize = (10, 20))

for i, r in enumerate(rand_inds):
    
    ax[i, 0].imshow(X_flair[r], cmap = 'gray')
    ax[i, 0].set_xticks([])
    ax[i, 0].set_yticks([])
    ax[i, 0].set_title('Flair - {}'.format(Y[r]))
    
    ax[i, 1].imshow(X_t1wce[r], cmap = 'gray')
    ax[i, 1].set_xticks([])
    ax[i, 1].set_yticks([])
    ax[i, 1].set_title('T1WCE - {}'.format(Y[r]))
    
    ax[i, 2].imshow(X_t1w[r], cmap = 'gray')
    ax[i, 2].set_xticks([])
    ax[i, 2].set_yticks([])
    ax[i, 2].set_title('T1W - {}'.format(Y[r]))
    
    ax[i, 3].imshow(X_t2w[r], cmap = 'gray')
    ax[i, 3].set_xticks([])
    ax[i, 3].set_yticks([])
    ax[i, 3].set_title('T2W - {}'.format(Y[r]))

In [None]:
#save numpy files
np.save('x_flair', X_flair)
np.save('x_t1w', X_t1w)
np.save('x_t2w', X_t2w)
np.save('x_t1wce', X_t1wce)
np.save('y', Y)