In [None]:
import os
import numpy as np
import tensorflow as tf
import pandas as pd

from tensorflow import keras
from tensorflow.keras import layers

import datetime
import math

from scipy import ndimage

using_data_from_kaggle = True

def PrintTime(msg=""):
    if len(msg) > 0:
        print("\n",msg," at Time: ",datetime.datetime.now(),"\n")
    else:
        print("\n",datetime.datetime.now(),"\n")

PrintTime("Start Notebook")

In [None]:
def GetSizeOfImage(x_low,x_mid,x_high):
    x = x_high * 65536 + x_mid * 256 + x_low
    x_half = x / 2
    x_half_sr = math.sqrt(x_half)
    num_rows = 0
    if math.fabs(512-x_half_sr) < math.fabs(256-x_half_sr):
        num_rows = 512
    else:
        num_rows = 256
    num_cols = int(x / num_rows)
    return num_rows,num_cols

def FindIndexOfStartOfImage(f):
    # Input: f is a .dcm file
    # Return: the position in the file where the actual data resides (typically 1004)
    
    f.seek(0,0)
    buf = bytearray(2048)
    subbytes = bytearray(4)
    subbytes[0] = 224 # x'E0'
    subbytes[1] = 127 # x'7F'
    subbytes[2] = 16  # x'10'
    subbytes[3] = 0   # x'00'
    
    subbytes_rowcol = bytearray(8)
    subbytes_rowcol[0] = 40
    subbytes_rowcol[1] = 0
    subbytes_rowcol[2] = 16
    subbytes_rowcol[3] = 0
    subbytes_rowcol[4] = 2
    subbytes_rowcol[5] = 0
    subbytes_rowcol[6] = 0
    subbytes_rowcol[7] = 0
    
    
    
    buf = f.read(2048)
    idx = buf.find(subbytes)
    idx_row_col = buf.find(subbytes_rowcol)
    num_row = 0
    num_col = 0
    if idx_row_col > 0 and idx > 0:
        num_row = 256 * buf[idx_row_col + 8 + 1] + buf[idx_row_col + 8 + 0]
        num_col = 256 * buf[idx_row_col + 10 + 8 + 1] + buf[idx_row_col + 10 + 8 + 0]
        num_col *= 2
        size = 65536 * buf[idx+6] + 256 * buf[idx+5] + buf[idx+4]
        return idx + 8,num_row,num_col
    elif idx > 0:
        num_row,num_col = GetSizeOfImage(buf[idx+4],buf[idx+5],buf[idx+6])
        return idx + 8,num_row,num_col
    else:
        return -1,-1,-1
    
    
def resize_volume(img,desired_width,desired_height,desired_depth):

    # Get current depth
    current_depth = img.shape[-1]
    current_width = img.shape[0]
    current_height = img.shape[1]

    # Compute depth factor
    depth = current_depth / desired_depth
    width = current_width / desired_width
    height = current_height / desired_height
    depth_factor = 1 / depth
    width_factor = 1 / width
    height_factor = 1 / height

    # Rotate
    img = ndimage.rotate(img, 90, reshape=False)
    
    # Resize across z-axis
    img = ndimage.zoom(img, (width_factor, height_factor, depth_factor), order=1)
    
    return img



In [None]:
# The number of rows X columns X slices for each type
target_num_rows = 512
target_num_cols = 512
target_num_slices_per_filetype = 32

# The number of rows X columns X slices that actually get fed into the 3D CNN for each type
cnn_num_rows = 256
cnn_num_cols = 256
cnn_num_slices_per_filetype = 16

def Create_128by128by16(directory):
    # Input: directory is a folder like train/00000/FLAIR or test/00091/T1wCE
    #
    # Return: a ndarray of shape (cnn_num_rows,cnn_num_cols,cnn_num_slices_per_filetype)
    # IMPORTANT: It is assumed the image cols will only take the low-order byte!
    # so num_col = 512 means the original image has 1024 bytes
    
    big_buf = np.zeros( (target_num_rows,target_num_cols,512),dtype = np.float32)  
        # we will first resize z-axis to target_num_slices_per_filetype later
        # then we will resize all 3 axis to cnn_num_rows X cnn_num_cols X cnn_num_slices_per_filetype

    count = 0
    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename)
        f = open(file_path,"rb")
        pos_of_image,size_of_image_rows,size_of_image_cols = FindIndexOfStartOfImage(f)
        if pos_of_image > 0:
            
            """
            # Is this a known format?
            # argg... line continuation doesn't work!!
            if (size_of_image_rows == 512 and size_of_image_cols == 1024):
                pass
            elif (size_of_image_rows == 512 and size_of_image_cols == 800):
                pass
            elif (size_of_image_rows == 256 and size_of_image_cols == 448):
                pass
            elif (size_of_image_rows == 256 and size_of_image_cols == 512):
                pass
            elif (size_of_image_rows == 256 and size_of_image_cols == 384):
                pass
            else:
                #print(f"{file_path} has abnormal size: ",size_of_image_rows,size_of_image_cols)
                pass
            """
            
            f.seek(pos_of_image,0)
            size_of_image = size_of_image_rows *  size_of_image_cols
            buf = np.fromfile(f, dtype=np.int8, count=size_of_image)
            buf = buf.astype(np.float32)
            buf = buf.reshape((size_of_image_rows,size_of_image_cols))
            actual_cols = int(size_of_image_cols / 2)
            buf = buf[:,0:size_of_image_cols:2]  # get rid of high byte
            if size_of_image_rows <= target_num_rows and actual_cols <= target_num_cols:
                big_buf[0:size_of_image_rows,0:actual_cols,count] = buf
            else:
                # resize image to num_rows X num_cols
                upd = ndimage.zoom(buf,(target_num_rows/size_of_image_rows,target_num_cols/actual_cols))
                
                big_buf[0:target_num_rows,0:target_num_cols,count] = upd
                
        else:
            print(f"WARNING! file_path {file_path} has pos_of_image = 0 ")
                
        f.close()
        count += 1
        if count >= 512:  # GDD TODO: CHANGE THIS back to 512
            break
        
    # Now compress the slices of images so it is exactly target_num_rows X target_num_cols X target_num_slices_per_filetype
    compressed_img1 = resize_volume(big_buf[:,:,0:count],target_num_rows,target_num_cols,target_num_slices_per_filetype)

    # Now compress the slices of images so it is exactly cnn_num_rows X cnn_num_cols X cnn_num_slices_per_filetype
    compressed_img = resize_volume(compressed_img1,cnn_num_rows,cnn_num_cols,cnn_num_slices_per_filetype)
    
    return compressed_img

def CreateCaseData_128by128by64(directory):
    # Input: directory is something like "train/00000" or "test/00019"
    arr1 = Create_128by128by16(f"{directory}/FLAIR")
    arr2 = Create_128by128by16(f"{directory}/T1w")
    arr3 = Create_128by128by16(f"{directory}/T1wCE")
    arr4 = Create_128by128by16(f"{directory}/T2w")
    arr = np.concatenate([arr1,arr2,arr3,arr4],axis=2)
    return arr



In [None]:
import threading
import time
import pickle

class myTestThread (threading.Thread):
    
    def __init__(self, caseID,directory):
        threading.Thread.__init__(self)
        self.caseID = str(caseID).zfill(5)
        self.directory = directory
        self.arr = np.zeros((cnn_num_rows,cnn_num_cols,4*cnn_num_slices_per_filetype))

    def run(self):
        folder = f"{self.directory}/{self.caseID}"
        #print("In thread for {self.caseID}, working on folder {folder}")
        arr = CreateCaseData_128by128by64(folder)
        self.arr = arr


In [None]:

# Test using 'saved off files'
model_path = "../input/save-tlgp120gp145/Save_TransferLearning-210830-WED-Gp120Gp145"
print("Loaded Saved Model:",model_path)
model_load = keras.models.load_model(model_path)




In [None]:
def WaitTillProcessingTestDataFinishes(testThreads,numThreads):  
    num_times_thru_loop = 0
    while True:
        num_times_thru_loop += 1
        
        atleastOneActive = False
        count_active_threads = 0

        # check status once per second
        time.sleep(5)

        for i in range(numThreads):
            status = testThreads[i].is_alive()
            if status == True:
                atleastOneActive = True
                count_active_threads += 1
                
        if num_times_thru_loop % 12 == 0:
            print(f"{count_active_threads} out of {numThreads} are still active at ",datetime.datetime.now())

        if atleastOneActive == False:
            break



In [None]:
# Now test the test-set
PrintTime("Start making predictions on test cases ")

# open file to store predictions - file must be named "submissions.txt"
pred_file = open("./submission.csv","w+")
pred_file.write("BraTS21ID,MGMT_value\n")

base_dir_for_data = "../input/rsna-miccai-brain-tumor-radiogenomic-classification"
base_dir_test = f"{base_dir_for_data}/test"
if using_data_from_kaggle == False:
    base_dir_test = f"test"

threads = []
number_cases_per_loop = 4

count = 0
all_cases = []
for case in os.listdir(base_dir_test):
    all_cases.append(case)
    
num_test_groups = int(len(all_cases) / 4)
num_cases_last_group = 4

rem_test_cases = len(all_cases) - 4 * num_test_groups
if rem_test_cases > 0:
    num_test_groups += 1
    num_cases_last_group = rem_test_cases

print(f"Number of test groups: {num_test_groups}, Number remaining test cases: {rem_test_cases}")

for gp_no in range(num_test_groups):
    threads = []
    num_cases_this_group = 4
    if gp_no == (num_test_groups - 1):
        num_cases_this_group = num_cases_last_group
        
    for i in range(num_cases_this_group):
        case = all_cases[number_cases_per_loop * gp_no + i]
        print(f"GN:{gp_no},working on {case} at time {datetime.datetime.now()}")
        aThread = myTestThread(case,base_dir_test)
        aThread.start()
        threads.append(aThread)
        
    WaitTillProcessingTestDataFinishes(threads,num_cases_this_group)
    
    # Get the data
    for i in range(num_cases_this_group):
        case = all_cases[number_cases_per_loop * gp_no + i]
        prediction = model_load.predict(np.expand_dims(threads[i].arr, axis=0))[0]  
    
        print(f"case={case}, prediction={prediction[0]}")
        pred_file.write(f"{case},{prediction[0]}\n")
        
    #if gp_no == 2:  # GDD TODO: REMOVE THIS  
    #    break


pred_file.close()

PrintTime("END making predictions on test cases")