# Create Random Bounding Boxes

My box positions and dimensions may be off by 1 in some sections from incorrect indices

First import necessary packages and set up file paths

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pydicom
import pandas as pd
from glob import glob
import os
from matplotlib.patches import Rectangle
import csv
import math

filepath = "C:/Users/Matthew/ECE5970Project"
det_class_path = filepath+'/input/stage_1_detailed_class_info.csv'
full_info_path = filepath+'/input/image_bbox_full.csv'
bbox_path = filepath+'/input/stage_1_train_labels.csv'
dicom_dir = filepath+'/input/stage_1_train_images/'
test_path = filepath+'/input/stage_1_test_images/'

Gather data statistics using a pandas dataframe

In [2]:
full_info_df = pd.read_csv(full_info_path)
xmean1 = 0  # x position mean for left side of image
k1 = 0
xmean2 = 0  # x position mean for right side of image
k2 = 0
for x in full_info_df['x'].dropna():
    if(x < 512):
        xmean1 = xmean1 + x
        k1 = k1 + 1
    else:
        xmean2 = xmean2 + x
        k2 = k2 + 1
xmean = full_info_df['x'].mean()
xmean1=xmean1/k1
xmean2 = xmean2/k2
xvar1 = 0
k1 = 0
xvar2 = 0
k2 = 0
for x in full_info_df['x'].dropna():
    if(x < 512):
        xvar1 = xvar1 + (x-xmean1)*(x-xmean1)
        k1 = k1 + 1
    else:
        xvar2 = xvar2 + (x-xmean2)*(x-xmean2)
        k2 = k2 + 1
xstd = full_info_df['x'].std()
xstd1 = math.sqrt((1/(k1-1))*xvar1)
xstd2 = math.sqrt((1/(k2-1))*xvar2)
xmin = full_info_df['x'].min()
xmax = full_info_df['x'].max()
ymean = full_info_df['y'].mean()
ystd = full_info_df['y'].std()
ymin = full_info_df['y'].min()
ymax = full_info_df['y'].max()
wmean = full_info_df['width'].mean()
wstd = full_info_df['width'].std()
wmin = full_info_df['width'].min()
wmax = full_info_df['width'].max()
hmean = full_info_df['height'].mean()
hstd = full_info_df['height'].std()
hmin = full_info_df['height'].min()
hmax = full_info_df['height'].max()

Function to make a random box using Gaussian distributions and assuming independence between x, y, width, and height

Need if statements to ensure all valules and boxes lie within the 1024x1024 pixel image  
This will condense probability from Gaussian "tails" onto the edges of the image  
Other distributions may be better, but this serves as a simple baseline

In [3]:
def makeRandBox():
    xp = round(np.random.normal(xmean, xstd))
    yp = round(np.random.normal(ymean, ystd))
    wp = round(np.random.normal(wmean, wstd))
    hp = round(np.random.normal(hmean, hstd))
    if xp < 0:
        xp = 0
    if xp > 1023:
        xp = 1023
    if yp < 0:
        yp = 0
    if yp > 1023:
        yp = 1023
    if wp < 1:
        wp = 1
    if hp < 1:
        hp = 1
    if xp + wp > 1024:
        wp = 1024 - xp
    if yp + hp > 1024:
        hp = 1024 - yp
    return [xp, yp, wp, hp]

Function to make a random box with uniform distributions bounded by training data minimums and maximums

Need fewer if statements since the position will not be outside bounds

In [4]:
def UmakeRandBox():
    xp = round(np.random.uniform(xmin, xmax))
    yp = round(np.random.uniform(ymin, ymax))
    wp = round(np.random.uniform(wmin, wmax))
    hp = round(np.random.uniform(hmin, hmax))
    if xp + wp > 1024:
        wp = 1024 - xp
    if yp + hp > 1024:
        hp = 1024 - yp
    return [xp, yp, wp, hp]

Function to make a random box using a bimodal distribution over x (one Gaussian for each lung) and Gaussians for the three other variables

In [5]:
def BimakeRandBox():
    f = np.random.rand()
    if(f < 0.5):
        xp = round(np.random.normal(xmean1, xstd1))
    else:
        xp=round(np.random.normal(xmean2, xstd2))
    yp = round(np.random.normal(ymean, ystd))
    wp = round(np.random.normal(wmean, wstd))
    hp = round(np.random.normal(hmean, hstd))
    if xp < 0:
        xp = 0
    if xp > 1023:
        xp = 1023
    if yp < 0:
        yp = 0
    if yp > 1023:
        yp = 1023
    if wp < 1:
        wp = 1
    if hp < 1:
        hp = 1
    if xp + wp > 1024:
        wp = 1024 - xp
    if yp + hp > 1024:
        hp = 1024 - yp
    return [xp, yp, wp, hp]

Function to make a completely random uniform bounding box (does not use any training data)

In [6]:
def UmakeTrueRandBox():
    xp = round(np.random.uniform(0, 1022))
    yp = round(np.random.uniform(0, 1022))
    wp = round(np.random.uniform(1, 1024))
    hp = round(np.random.uniform(1, 1024))
    if xp + wp > 1024:
        wp = 1024 - xp
    if yp + hp > 1024:
        hp = 1024 - yp
    return [xp, yp, wp, hp]

Check that the functions work

In [7]:
a=makeRandBox()
b=UmakeRandBox()
c=BimakeRandBox()
d=UmakeTrueRandBox()
print(a)
print(b)
print(c)
print(d)

[436, 526, 304, 211]
[441, 234, 392, 519]
[627, 358, 241, 57]
[500, 207, 452, 29]


If we want to use different numbers of boxes based on the training data:  
There are 25684 subjects  
0 Boxes 16720  
1 Box 6566  
2 Boxes 3266  
3 Boxes 119  
4 Boxes 13

Create a random bounding box for every input image  
The model with two Gaussians for the x position appears to work the best

In [8]:
ofile  = open('test_submission_random_two_gaussian.csv', "w", newline='')
writer = csv.writer(ofile, delimiter=',')
writer.writerow(['patientId','PredictionString'])
for fpath in glob(test_path+'*.dcm'):
    pID = os.path.splitext(os.path.basename(fpath))[0]
    a=np.random.uniform()
    bbox='1.0 '+str(BimakeRandBox())[1:-1].replace(',', '')
    writer.writerow([pID,bbox])
ofile.close()