# Finding largest bounding box


**The information is used to guide selectino of input dimensions **

**Set everything up:**

In [2]:
import os
import math
import numpy as np
import pandas as pd
import scipy.io as spio

from six.moves import cPickle as pickle

In [3]:
data_root = '/data/bigbone4/ciriondo/'
#data_root = os.getcwd()

csv_dir = os.path.join(data_root, 'clean_all_path.csv')  # change for server

**Import batch list:**

In [5]:
# Load in dataframe
df = pd.read_csv(csv_dir)
df['patient'] = df['mriFile'].apply(lambda f: os.path.splitext(os.path.basename(f))[0].split('_')[0])
df.head()

Unnamed: 0,mriFile,segFile,mfcWorms,lfcWorms,mfcBME,lfcBME,patient
0,/data/bigbone4/DeepLearning_temp/Data/all_CUBE...,/data/bigbone4/DeepLearning_temp/Data/all_CUBE...,0.0,0.0,0,0,P052
1,/data/bigbone4/DeepLearning_temp/Data/all_CUBE...,/data/bigbone4/DeepLearning_temp/Data/all_CUBE...,0.0,0.0,0,0,P052
2,/data/bigbone4/DeepLearning_temp/Data/all_CUBE...,/data/bigbone4/DeepLearning_temp/Data/all_CUBE...,0.0,0.0,0,0,P052
3,/data/bigbone4/DeepLearning_temp/Data/all_CUBE...,/data/bigbone4/DeepLearning_temp/Data/all_CUBE...,0.0,0.0,0,0,P052
4,/data/bigbone4/DeepLearning_temp/Data/all_CUBE...,/data/bigbone4/DeepLearning_temp/Data/all_CUBE...,2.5,1.0,0,0,P122


**Some quick batch list formatting:**

In [4]:
# Bit of pre-processing
# remove whitespace in columns
df.columns = df.columns.str.strip()

# get patient ID from image description
df['patient'] = df['mfcDescription'].apply(lambda s: '_'.join(s.split('_')[:2]))

# combine directory and image names
df['mriFile'] = df.apply(lambda r: os.path.join(r['pathtoMRI'], r['mriName']), axis=1)
df['segFile'] = df.apply(lambda r: os.path.join(r['pathtoSeg'], r['segName']), axis=1)

# convert descriptions {baseline, 6_month...} to {0, 1, 2} to use as index
df['followUpN'] = df['mfcDescription'].apply(lambda r: ('baseline' in r)*0 
                                             + ('6_month' in r)*1 
                                             + ('1_year' in r)*2)
# remove unneeded columns
df = df.drop(labels=['pathtoMRI', 'mriName', 
                     'pathtoSeg', 'segName', 
                     'mfcDescription', 'lfcDescription'],
            axis=1)

# this is needed for the next script -- did that to remove clutter
# df.to_csv('formattedBatchlistAF.csv', index=False)

df.head()

Unnamed: 0,mfcWORMS,lfcWORMS,patient,mriFile,segFile,followUpN
0,0.0,3.0,AF_2005,C:\Users\Raouf\Documents\raouf\academic\uc ber...,C:\Users\Raouf\Documents\raouf\academic\uc ber...,0
1,0.0,2.0,AF_2006,C:\Users\Raouf\Documents\raouf\academic\uc ber...,C:\Users\Raouf\Documents\raouf\academic\uc ber...,0
2,0.0,0.0,AF_2007,C:\Users\Raouf\Documents\raouf\academic\uc ber...,C:\Users\Raouf\Documents\raouf\academic\uc ber...,0
3,0.0,3.0,AF_2008,C:\Users\Raouf\Documents\raouf\academic\uc ber...,C:\Users\Raouf\Documents\raouf\academic\uc ber...,0
4,0.0,2.0,AF_2009,C:\Users\Raouf\Documents\raouf\academic\uc ber...,C:\Users\Raouf\Documents\raouf\academic\uc ber...,0


**Useful functions:**

In [5]:
def bbox3d_size(img):
    """Gets edge coordinates for the 3D area with non-zero elements"""
    r = np.any(img, axis=(1, 2))
    c = np.any(img, axis=(0, 2))
    z = np.any(img, axis=(0, 1))
    ymin, ymax = np.where(r)[0][[0, -1]]
    xmin, xmax = np.where(c)[0][[0, -1]]
    zmin, zmax = np.where(z)[0][[0, -1]]
    return ymax-ymin, xmax-xmin, zmax-zmin

**Load segmentation images in a loop, updating the largest dimensions:**

In [6]:
max_dims = [0, 0, 0]
seg_filedirs = df['segFile'].tolist()

image_count = 0
for seg_file in seg_filedirs:
    image_count += 1
    print('Segmentation {} of {}....'.format(image_count, len(seg_filedirs)), end="")
    img_seg= spio.loadmat(seg_file, squeeze_me=True)['pred_con_vol'].astype(np.float32)
    img_lfc = np.squeeze(img_seg*[img_seg == 1])
    img_mfc = np.squeeze(img_seg*[img_seg == 2])
    
    lfc_dims = bbox3d_size(img_lfc)
    mfc_dims = bbox3d_size(img_mfc)
    max_dims = [max([lfc_dims[i], mfc_dims[i], max_dims[i]]) for i in range(3)]
    print('\tOriginal: {}\tLFC: {} \t MFC: {}'.format(img_seg.shape, lfc_dims, mfc_dims))
    
print('\nLargest dimensions: {}'.format(max_dims))

Segmentation 1 of 23....	Original: (512, 512, 112)	LFC: (46, 168, 22) 	 MFC: (125, 171, 31)
Segmentation 2 of 23....	Original: (512, 512, 216)	LFC: (55, 171, 44) 	 MFC: (60, 131, 41)
Segmentation 3 of 23....	Original: (512, 512, 248)	LFC: (111, 146, 50) 	 MFC: (135, 182, 54)
Segmentation 4 of 23....	Original: (512, 512, 199)	LFC: (51, 150, 47) 	 MFC: (81, 110, 44)
Segmentation 5 of 23....	Original: (512, 512, 208)	LFC: (49, 167, 37) 	 MFC: (116, 129, 44)
Segmentation 6 of 23....	Original: (512, 512, 208)	LFC: (30, 131, 39) 	 MFC: (98, 120, 51)
Segmentation 7 of 23....	Original: (512, 512, 240)	LFC: (131, 189, 49) 	 MFC: (150, 108, 52)
Segmentation 8 of 23....	Original: (512, 512, 216)	LFC: (161, 218, 48) 	 MFC: (142, 181, 54)
Segmentation 9 of 23....	Original: (512, 512, 216)	LFC: (41, 155, 42) 	 MFC: (139, 181, 54)
Segmentation 10 of 23....	Original: (512, 512, 216)	LFC: (126, 195, 38) 	 MFC: (63, 141, 39)
Segmentation 11 of 23....	Original: (512, 512, 248)	LFC: (127, 196, 41) 	 MFC: 

**Saving largest dimensions as text file in root data directory:**

In [7]:
dim_file = os.path.join(data_root, 'largest_bbox.txt')
roi_dims = max_dims

try:
    with open(dim_file, "w") as text_file:
        text_file.write("{}".format(str(roi_dims)))
except Exception as e:
    print("Unable to save text file with dimensions: ", dim_file, ": ", e)
    raise