## Import statements

In [1]:
import h5py
import sys
sys.path.append('..')
from modules.configfile import config
import matplotlib.pyplot as plt
import cPickle as pickle
import numpy as np
import random
from random import shuffle
random.seed(1337)
np.random.seed(1337)
from scipy.ndimage.measurements import center_of_mass

## Open mean and variance file

In [2]:
mean_var = pickle.load(open(config['saveMeanVarFilepath'] + '/HDF5_Datasetstraining_data_hgg_mean_var.p', 'rb'))

## Open new database with cropped images

In [3]:
hdf5_file = h5py.File(config['hdf5_filepath_cropped'], mode='r')

In [4]:
hdf5_file_g = hdf5_file['preprocessed']

In [5]:
def apply_mean_std(im, mean_var):
    # expects a dictionary of means and VARIANCES, NOT STD
    for m in range(0,4):
        if len(np.shape(m)) > 4:
            im[:,m,...] = (im[:,m,...] - mean_var['mn'][m]) / np.sqrt(mean_var['var'][m])
        else:
            im[m,...] = (im[m,...] - mean_var['mn'][m]) / np.sqrt(mean_var['var'][m])
            
    return im

## Get all the HGG data

In [6]:
training_data_hgg = hdf5_file_g['training_data_hgg']
training_data_segmasks_hgg = hdf5_file_g['training_data_segmasks_hgg']

# Start the Iteration here. This is the "Epoch"

## Generate random access order

In [7]:
indices = list(range(0, training_data_hgg.shape[0]))
shuffle(indices)

## Split indices into training and testing

In [8]:
train_end = int((len(indices) * config['data_split']['train']) / 100.0)
train_indices = indices[0:train_end]
test_indices = indices[train_end:]

## Start the patch generation process

### Get the segmentation mask, find centroid and diameter of tumor region

In [10]:
for idx in train_indices:
    patient_x_train = apply_mean_std(training_data_hgg[idx], mean_var)
    patient_y_train = training_data_segmasks_hgg[idx]
    break

## Let's call this (x, y, z)

<img alt="" height="279" src="https://www.med.upenn.edu/sbia/assets/user-content/BRATS_tasks.png" width="700">

The segmentations are combined to generate the final labels of the tumor sub-regions (Fig.D): edema (yellow), non-enhancing solid core (red), necrotic/cystic core (green), enhancing core (blue). (Figure taken from the BraTS IEEE TMI paper.)

### However, in the segmentation mask, the encoding is this - 

    1 for necrosis
    2 for edema
    3 for non-enhancing tumor
    4 for enhancing tumor
    0 for everything else

### To be able to weight the centre of mass correctly, the encoding needs to change.

In [11]:
seg_reweighted = np.copy(patient_y_train)

In [12]:
seg_reweighted[np.where(patient_y_train == 1)] = 10 # necrotic, the most inner region, has highest weight
seg_reweighted[np.where(patient_y_train == 4)] = 9 # enhancing
seg_reweighted[np.where(patient_y_train == 3)] = 8 # non-enhancing
seg_reweighted[np.where(patient_y_train == 2)] = 7 # edema

In [13]:
m_x, m_y, m_z = center_of_mass(seg_reweighted)

## Now we need to find the extent of mass, in all directions - (x, y, z). This is the "standard deviation". 

Checked this using visualization

In [14]:
def check_valid(patch_coords):
    patch_coords = [int(x) for x in patch_coords]
    xmin, xmax, ymin, ymax, zmin, zmax = patch_coords
    
    if xmin >=0 and xmax < config['size_after_cropping'][0] and \
                    ymin >=0 and ymax < config['size_after_cropping'][1] and \
                    zmin >=0 and zmax < config['size_after_cropping'][2]:
        return patch_coords
    else:
        return None

In [15]:
x,y,z = np.where(patient_y_train > 0)
std_x = np.max(x) - np.min(x)
std_y = np.max(y) - np.min(y)
std_z = np.max(z) - np.min(z)

In [16]:
k = 0
while k != None:
    patch_size_x, patch_size_y, patch_size_z = (60, 60, 60)
    std_scale = 1.8
    xmin, ymin, zmin = np.random.multivariate_normal(mean=[m_x, m_y, m_z], cov=np.diag(np.array([std_x, std_y, std_z])*std_scale))
    xmax = xmin + patch_size_x
    ymax = ymin + patch_size_y
    zmax = zmin + patch_size_z
    patch_coords = [xmin, xmax, ymin, ymax, zmin, zmax]
    t = check_valid(patch_coords)
    if t != None:
        k = None

# Consolidate Code

In [None]:
def calculateCOM_STD(segmask):
    seg_reweighted = np.copy(segmask)

    # brute force way to make sure the COM calculation is weighted correctly. We need more
    # weight on necrotic region, than edema.
    seg_reweighted[np.where(segmask == 1)] = 10  # necrotic, the most inner region, has highest weight
    seg_reweighted[np.where(segmask == 4)] = 9  # enhancing
    seg_reweighted[np.where(segmask == 3)] = 8  # non-enhancing
    seg_reweighted[np.where(segmask == 2)] = 7  # edema

    # calculate COM
    m_x, m_y, m_z = center_of_mass(seg_reweighted)

    x, y, z = np.where(segmask > 0)
    std_x = np.max(x) - np.min(x)
    std_y = np.max(y) - np.min(y)
    std_z = np.max(z) - np.min(z)
    
    return [m_x, m_y, m_z], [std_x, std_y, std_z]

# TESTING

## VIsualize the patch in 3D

In [18]:
from mayavi import mlab

In [19]:
def createDense(bbox, im):
    box = np.zeros(im.shape)
    box[bbox[0]:bbox[1], bbox[2]:bbox[3], bbox[4]:bbox[5]] = 1
    return box

In [20]:
# lets get a segmentation
seg = patient_y_train

dense_bbox = createDense(t, seg)

src = mlab.pipeline.scalar_field(seg)

src_bbox = mlab.pipeline.scalar_field(dense_bbox)
# mlab.pipeline.iso_surface(src, contours=[0, 1, 2, 3, 4], opacity=0.5)
mlab.pipeline.iso_surface(src, contours=[1], opacity=0.4, color=(0,1,0))
mlab.pipeline.iso_surface(src, contours=[2], opacity=0.4)
mlab.pipeline.iso_surface(src, contours=[3], opacity=0.4)
mlab.pipeline.iso_surface(src, contours=[4], opacity=0.4)
mlab.pipeline.iso_surface(src_bbox, contours=[1], opacity=0.2)
# mlab.pipeline.iso_surface(src, contours=[s.max()-0.1*s.ptp(), ],)
mlab.show()

## Dry run the patch generation pipeline and manually see the visualization

In [28]:
import time

In [29]:
count = 0
for idx in train_indices[3:]:
    patient_x_train = apply_mean_std(training_data_hgg[idx], mean_var)
    patient_y_train = training_data_segmasks_hgg[idx]
    
    seg_reweighted = np.copy(patient_y_train)
    
    seg_reweighted[np.where(patient_y_train == 1)] = 10 # necrotic, the most inner region, has highest weight
    seg_reweighted[np.where(patient_y_train == 4)] = 9 # enhancing
    seg_reweighted[np.where(patient_y_train == 3)] = 8 # non-enhancing
    seg_reweighted[np.where(patient_y_train == 2)] = 7 # edema
    
    m_x, m_y, m_z = center_of_mass(seg_reweighted)
    
    for _num in range(0, 10):
        k = 0
        while k != None:
            patch_size_x, patch_size_y, patch_size_z = (40, 40, 40)
            std_scale = 400
            xc, yc, zc = np.random.multivariate_normal(mean=[m_x, m_y, m_z], cov=np.diag(np.array([std_x, std_y, std_z])*std_scale))
            xmin = xc - patch_size_x
            xmax = xc + patch_size_x
            
            ymin = yc - patch_size_y
            ymax = yc + patch_size_y
            
            zmin = zc - patch_size_z
            zmax = zc + patch_size_z
            
#             xmax = xmin + patch_size_x
#             ymax = ymin + patch_size_y
#             zmax = zmin + patch_size_z
            patch_coords = [xmin, xmax, ymin, ymax, zmin, zmax]
            t = check_valid(patch_coords)
            if t != None:
                k = None

        # lets get a segmentation
        seg = patient_y_train

        dense_bbox = createDense(t, seg)

        src = mlab.pipeline.scalar_field(seg)

        src_bbox = mlab.pipeline.scalar_field(dense_bbox)
        # mlab.pipeline.iso_surface(src, contours=[0, 1, 2, 3, 4], opacity=0.5)
        mlab.pipeline.iso_surface(src, contours=[1], opacity=0.4, color=(0,1,0))
        mlab.pipeline.iso_surface(src, contours=[2], opacity=0.4)
        mlab.pipeline.iso_surface(src, contours=[3], opacity=0.4)
        mlab.pipeline.iso_surface(src, contours=[4], opacity=0.4)
        mlab.pipeline.iso_surface(src_bbox, contours=[1], opacity=0.2)
        # mlab.pipeline.iso_surface(src, contours=[s.max()-0.1*s.ptp(), ],)
        mlab.show()
        count += 1
        if count > 30:
            break
    break

KeyboardInterrupt: 