In [1]:
import numpy as np
import keras
import pandas as pd

### Sub-sections
In the assignment, you will define a "patch size" in three dimensions, that will be the size of the sub-section you want to extract. For this exercise, you only need to define a patch size in one dimension.

In [4]:
# Define a simple one dimensional "image" to extract from
image = np.array([10,11,12,13,14,15])
# Compute the dimensions of your "image"
image_length = image.shape[0]

patch_length = 3
start_i = 0

# Define an end index given your start index and patch size
print(f"start index {start_i}")
end_i = start_i + patch_length
print(f"end index {end_i}")

# Extract a sub-section from your "image"
sub_section = image[start_i: end_i]
print("output patch length: ", len(sub_section))
print("output patch array: ", sub_section)

# Add one to your start index
start_i +=1

start index 0
end index 3
output patch length:  3
output patch array:  [10 11 12]


In [7]:
# Choose a random start index, note the np.random.randint() function excludes the upper bound.
start_i = np.random.randint(image_length - patch_length + 1)
print(f"randomly selected start index {start_i}")


# Randomly select multiple start indices in a loop
for _ in range(10):
    start_i = np.random.randint(image_length - patch_length + 1)
    print(f"randomly selected start index {start_i}")

randomly selected start index 0
randomly selected start index 1
randomly selected start index 2
randomly selected start index 2
randomly selected start index 1
randomly selected start index 1
randomly selected start index 3
randomly selected start index 3
randomly selected start index 0
randomly selected start index 1
randomly selected start index 0


### Background Ratio

Another thing you will be doing in the assignment is to compute the ratio of background to edema and tumorous regions. You will be provided with a file containing labels with these categories:

* 0: background
* 1: edema
* 2: non-enhancing tumor
* 3: enhancing tumor

Let's try to demonstrate this in 1-D to get some intuition on how to implement it in 3D later in the assignment.

In [22]:
# We first simulate input data by defining a random patch of length 16. This will contain labels 
# with the categories (0 to 3) as defined above.

patch_labels = np.random.randint(0, 4, (16))
print(patch_labels)

[2 3 1 3 1 2 1 3 1 0 1 0 0 3 3 1]


In [23]:
# A straightforward approach to get the background ratio is
# to count the number of 0's and divide by the patch length

bgrd_ratio = np.count_nonzero(patch_labels == 0) / len(patch_labels)
print("using np.count_nonzero(): ", bgrd_ratio)


bgrd_ratio = len(np.where(patch_labels == 0)[0]) / len(patch_labels)
print("using np.where(): ", bgrd_ratio)

using np.count_nonzero():  0.1875
using np.where():  0.1875


In [24]:
# However, take note that we'll use our label array to train a neural network
# so we can opt to compute the ratio a bit later after we do some preprocessing. 
# First, we convert the label's categories into one-hot format so it can be used to train the model

patch_labels_one_hot = keras.utils.to_categorical(patch_labels, num_classes=4)
print(patch_labels_one_hot)

[[0. 0. 1. 0.]
 [0. 0. 0. 1.]
 [0. 1. 0. 0.]
 [0. 0. 0. 1.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 1. 0. 0.]
 [0. 0. 0. 1.]
 [0. 1. 0. 0.]
 [1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [0. 0. 0. 1.]
 [0. 0. 0. 1.]
 [0. 1. 0. 0.]]


In [25]:
# Let's convert the output to a dataframe just so we can see the labels more clearly

pd.DataFrame(patch_labels_one_hot, columns=['background', 'edema', 'non-enhancing tumor', 'enhancing tumor'])

Unnamed: 0,background,edema,non-enhancing tumor,enhancing tumor
0,0.0,0.0,1.0,0.0
1,0.0,0.0,0.0,1.0
2,0.0,1.0,0.0,0.0
3,0.0,0.0,0.0,1.0
4,0.0,1.0,0.0,0.0
5,0.0,0.0,1.0,0.0
6,0.0,1.0,0.0,0.0
7,0.0,0.0,0.0,1.0
8,0.0,1.0,0.0,0.0
9,1.0,0.0,0.0,0.0


In [27]:
# we can compute the background ratio by counting the number of 1's 
# in the said column divided by the length of the patch

bgrd_ratio = np.sum(patch_labels_one_hot[:,0])/ len(patch_labels)
print("using one-hot column: ", bgrd_ratio)

using one-hot column:  0.1875
