In [1092]:
import tensorflow as tf
from tensorflow.keras.layers import LeakyReLU
import glob
import matplotlib.pyplot as plt
from math import floor

In [1093]:
# Define the constants
S = 7 # Divide the image to have S*S cells
C = 1 # Number of classes 
B = 2 # Number of bounding boxes to be predicted per cell
BATCH_SIZE = 1

In [1094]:
# Import image paths
imagesRaw = glob.glob("./new_images/*.jpg")
imagesRaw.sort()

In [1095]:
# Grab all the bounding box info
with open('boundingBoxes.txt', 'r') as f:
    lines = f.readlines()
 
boxes = [[float(x) for x in line.strip().split(",")] for line in lines]

In [1096]:
def load_jpeg(image, box):
    # Load the image turn into jpeg
    decoded = tf.io.read_file(image)
    imageTf = tf.image.decode_jpeg(decoded)
    # Normalize the images
    returnImage = tf.cast(imageTf, tf.float32) / 255.0
    returnImage = tf.reshape(returnImage, (488,488,3))
    
    return returnImage, box

def convertToYTrue(box):
    # This inserts the BOX to correct GRID POSITION
    row, col = box[1], box[2]
    updates = tf.constant([box[3:]])
    # Could simplify this using tf.meshgrid somehow
    idx = tf.constant([[[row, col, 0], [row, col, 1], [row, col, 2], [row, col, 3], [row, col, 4], [row, col, 5]]])
    output = tf.scatter_nd(idx, updates, [7, 7, 6])
    return output
    
# Plots two given images
def display(imageOne, imageTwo):
    plt.figure(figsize=(15, 15))
    plt.subplot(1,2,1)
    plt.imshow(tf.keras.preprocessing.image.array_to_img(imageOne))
    plt.axis("off")
    plt.subplot(1,2,2)
    plt.imshow(tf.keras.preprocessing.image.array_to_img(imageTwo))
    plt.axis("off")
    plt.show()

#Convert true pixel indexes to yolo format
def convertToYoloFormat(pixelPositions, num_boxes=1, imageWidth=488, imageHeight=488):
    # Format is [containsObj, center_x, center_y, width, height, classes...]
    cellHeight = imageHeight / S
    cellWidth = imageWidth / S
    returnBox = []
    countStart = 0
    countEnd = 1
    storage = []
    while countStart < num_boxes:
        _ ,xMin, xMax, yMin, yMax = pixelPositions[5*countStart:4*countEnd + 1]
        # Calculate the exact pixel index of the center of the box w.r.t the image
        pointX = (floor((xMax - xMin)/2)) + xMin
        pointY = (floor((yMax - yMin)/2)) + yMin
        # Calculate the center of the box w.r.t the cell
        centerX = (pointX - floor(pointX/cellWidth) * cellWidth)/cellWidth
        centerY = (pointY - floor(pointY/cellHeight) * cellHeight)/cellHeight
        # Calculate the dimensions w.r.t the cell
        width = (xMax - xMin)/(imageHeight)
        height = (yMax - yMin)/(imageWidth)
        
        returnBox = returnBox + [_, centerX, centerY, width, height]
        countEnd += 1
        countStart += 1
        
    colIndex = floor(pointX/cellWidth)
    rowIndex = floor(pointY/cellHeight)
    storage += colIndex, rowIndex
    
    return [num_boxes] + storage + returnBox + pixelPositions[5*countStart:]

def generateIndexes():
    table = []
    for i in range(BATCH_SIZE):
        for j in range(S*S):
            for k in range(B):
                table.append([i,j,k])
    return table

# Convert the yolo box format to tensor box format NEED TO UPDATE ++++
def convertFromYolo(box, image, index):
    xMax = box[0]*488*2
    yMax = box[1]*488*2
    xMin = - box[2]*488 + xMax
    yMin = - box[3]*488 + yMax
    return xMax, yMax, xMin, yMin
    

In [1097]:
# Convert boxes to yolo format:
convertedBoxes = [convertToYoloFormat(box) for box in boxes]
# Convert the convertedBoxes to the same format as y_pred
yTrue = [convertToYTrue(box) for box in convertedBoxes]
# Create a new dataset, pairing images and respective boxes
imageDataSet = tf.data.Dataset.from_tensor_slices((imagesRaw, yTrue))
# Shuffle the data
imageDataSet = imageDataSet.shuffle(100)
# Map the paths to turn into images
entireSet = imageDataSet.map(load_jpeg)

In [1098]:
# Divide the dataset into train, test and validation
trainSize = int(0.8 * 2594)
valSize = int(0.1 * 2594)
train = entireSet.take(trainSize)
temp = entireSet.skip(trainSize)
test = temp.skip(valSize)
validation = temp.take(valSize)

In [1099]:
# Create batches:
train_batches = train.batch(BATCH_SIZE)
validation_batches = validation.batch(BATCH_SIZE)
test_batches = test.batch(BATCH_SIZE)

In [1100]:
model = tf.keras.Sequential([
    #First Layer
    tf.keras.layers.Conv2D(64, (7,7), strides=(2, 2),  input_shape=(488,488,3)),
    tf.keras.layers.BatchNormalization(),
    LeakyReLU(alpha=0.1),
    tf.keras.layers.MaxPooling2D((2, 2), strides=(2,2)),
    
    #Second Layer
    #tf.keras.layers.Conv2D(192, (3,3), activation=LeakyReLU(alpha=0.1), padding="same"),
    tf.keras.layers.Conv2D(192, (3,3),  padding="same"),
    tf.keras.layers.BatchNormalization(),
    LeakyReLU(alpha=0.1),
    tf.keras.layers.MaxPooling2D((2, 2), strides=(2,2)),
    
    #Third Layer
    tf.keras.layers.Conv2D(128, (1,1), activation=LeakyReLU(alpha=0.1), padding="same"),
    tf.keras.layers.Conv2D(256, (3,3), activation=LeakyReLU(alpha=0.1), padding="same"),
    tf.keras.layers.Conv2D(256, (3,3), activation=LeakyReLU(alpha=0.1), padding="same"),
    #tf.keras.layers.Conv2D(512, (3,3), activation=LeakyReLU(alpha=0.1), padding="same"),
    tf.keras.layers.Conv2D(512, (3,3),  padding="same"),
    tf.keras.layers.BatchNormalization(),
    LeakyReLU(alpha=0.1),
    tf.keras.layers.MaxPooling2D((2, 2), strides=(2,2)),
    
    #Fourth Layer
    # +++ Repeated block
    tf.keras.layers.Conv2D(256, (1,1), activation=LeakyReLU(alpha=0.1), padding="same"),
    tf.keras.layers.Conv2D(512, (3,3), activation=LeakyReLU(alpha=0.1), padding="same"),
    
    tf.keras.layers.Conv2D(256, (1,1), activation=LeakyReLU(alpha=0.1), padding="same"),
    tf.keras.layers.Conv2D(512, (3,3), activation=LeakyReLU(alpha=0.1), padding="same"),

    tf.keras.layers.Conv2D(256, (1,1), activation=LeakyReLU(alpha=0.1), padding="same"),
    tf.keras.layers.Conv2D(512, (3,3), activation=LeakyReLU(alpha=0.1), padding="same"),

    tf.keras.layers.Conv2D(256, (1,1), activation=LeakyReLU(alpha=0.1), padding="same"),
    tf.keras.layers.Conv2D(512, (3,3), activation=LeakyReLU(alpha=0.1), padding="same"),
    # +++ END BLOCK
    tf.keras.layers.Conv2D(512, (1,1), activation=LeakyReLU(alpha=0.1), padding="same"),
    #tf.keras.layers.Conv2D(1024, (3,3), activation=LeakyReLU(alpha=0.1), padding="same"),
    tf.keras.layers.Conv2D(1024, (3,3),  padding="same"),
    tf.keras.layers.BatchNormalization(),
    LeakyReLU(alpha=0.1),
    tf.keras.layers.MaxPooling2D((2, 2), strides=(2,2)),
    
    #Fifth layer
    # +++ Repeated Block
    tf.keras.layers.Conv2D(512, (1,1), activation=LeakyReLU(alpha=0.1), padding="same"),
    tf.keras.layers.Conv2D(1024, (3,3), activation=LeakyReLU(alpha=0.1), padding="same"),
    
    tf.keras.layers.Conv2D(512, (1,1), activation=LeakyReLU(alpha=0.1), padding="same"),
    tf.keras.layers.Conv2D(1024, (3,3), activation=LeakyReLU(alpha=0.1), padding="same"),
    # +++ END BLOCK
    tf.keras.layers.Conv2D(1024, (3,3), activation=LeakyReLU(alpha=0.1), padding="same"),
    tf.keras.layers.Conv2D(1024, (3,3), strides=(2, 2), activation=LeakyReLU(alpha=0.1), padding="same"),
    
    #Sixth Layer
    tf.keras.layers.Conv2D(1024, (3,3), activation=LeakyReLU(alpha=0.1), padding="same"),
    #tf.keras.layers.Conv2D(1024, (3,3), activation=LeakyReLU(alpha=0.1), padding="same"),
    tf.keras.layers.Conv2D(1024, (3,3),  padding="same"),
    tf.keras.layers.BatchNormalization(),
    LeakyReLU(alpha=0.1),
    

    # Final Output Layer
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(4096, activation=LeakyReLU(alpha=0.1)),
    tf.keras.layers.Dense(S * S * (B*5+C), input_shape=(4096,), activation="sigmoid"),
    tf.keras.layers.Reshape(target_shape = (S, S, (B*5+C)))
    
])

The yoloLoss function is an adaptation from here: https://blog.emmanuelcaradec.com/humble-yolo-implementation-in-keras/. Credit to Emmanuel Caradec

In [1101]:
def yoloLoss(y_true, y_pred):
    
    if tf.math.reduce_any(tf.math.is_nan(y_pred)):
        print("\ny_pred is NAN")
        test = tf.reshape(y_pred, [-1,539])
        for e in test.numpy():
            print(e)
    
    grid = [[[float(x),float(y)]]*B for y in range(S) for x in range(S)]
    
    true_boxes = tf.reshape(y_true[...,:5], [-1,S*S,1,5])
    pred_boxes = tf.reshape(y_pred[...,:B*5], (-1,S*S,B,5))
    # Reshape the 3: too S*S, B, 5 i.e. 49, 2, 5. 
    # Each cell has two bounding boxes, with each bounding box, having 5 elements. 
    
    y_pred_conf = pred_boxes[...,0]
    y_true_conf = true_boxes[...,0]
    
    y_pred_wh   = pred_boxes[...,3:5]
    y_true_wh   = true_boxes[...,3:5]
    # y_pred_wh. Shape (S*S,B,2). Where each cell, has B bounding boxes, with each
    # Bounding box having two rows?

    y_pred_xy   = pred_boxes[...,1:3] + grid
    y_true_xy   = true_boxes[...,1:3]
    # The y_true_xy will have a shape of S*S, B, 2. Where each cell, has B bounding boxes, with two colums? 
    # I understand grabbing the xy pairs, but why + the grid?
    
    y_true_class = tf.reshape(y_true[...,5:], [-1, S*S, C]) 
    y_pred_class = tf.reshape(y_pred[...,B*5:], [-1, S*S, C])
    
    
    # Losses Calculations +++++++++++++++++++++++++++++++++++++++++++++++++++
    xy_loss    = tf.math.reduce_sum(tf.math.reduce_sum(tf.math.square(y_true_xy - y_pred_xy),
                                                       axis=-1)*y_true_conf, axis=-1)
    
    # Two reduce sums  = 2 summations. Axis = -1, along last dimensions i.e. columns/entries in this case.
    # Square is element wise. y_true_conf is the little 1, in the formula. 

    wh_loss    = tf.math.reduce_sum(tf.math.reduce_sum(tf.math.square(tf.math.sqrt(y_true_wh) - 
                                                                      tf.math.sqrt(y_pred_wh)), axis=-1)
                                    *y_true_conf, axis=-1)
    
    # Two reduce sums  = 2 summations. Axis = -1, along last dimensions i.e. rows in this case
    # Makes sense, matches formula. 
    
    clss_loss  = tf.math.reduce_sum(tf.math.square(y_true_class - y_pred_class)*y_true_conf, axis=-1)       
        
    intersect_wh = tf.math.maximum(tf.zeros_like(y_pred_wh), (y_pred_wh + y_true_wh)/2 
                                   - tf.math.abs(y_pred_xy - y_true_xy) )
    
    intersect_area = intersect_wh[...,0] * intersect_wh[...,1]
    true_area = y_true_wh[...,0] * y_true_wh[...,1]
    pred_area = y_pred_wh[...,0] * y_pred_wh[...,1]
    union_area = pred_area + true_area - intersect_area
    
    # Make the denom not zero
    #union_area = tf.where(tf.equal(0., union_area), tf.ones_like(union_area), union_area)
    
    iou = intersect_area / union_area
    
    union_area = tf.where(tf.math.equal(0., union_area), tf.ones_like(union_area), union_area)
    
    conf_loss = tf.math.reduce_sum(tf.math.square(y_true_conf*iou - y_pred_conf)*y_true_conf, axis=-1)
    
    loss =  clss_loss + xy_loss + wh_loss + conf_loss
    
    #loss = tf.where(tf.math.is_nan(loss), tf.zeros_like(loss), loss)
    
    return tf.math.reduce_sum(loss)

In [1102]:
def jaccardIndex(y_true, y_pred):
    """
    Compares y_true with highest prob y_pred box
    """
            
    y_true = tf.reshape(y_true[...,:5], [-1,S*S,1,5])
    y_pred = tf.reshape(y_pred[...,:B*5], [-1,S*S,B,5])
    
    y_true_box = tf.gather_nd(y_true, (tf.where(tf.equal(y_true[...,4], tf.math.reduce_max(y_true[...,4]))))[0])
    y_pred_box = tf.gather_nd(y_pred, (tf.where(tf.equal(y_pred[...,4], tf.math.reduce_max(y_pred[...,4]))))[0])
    
    numerator = (tf.math.reduce_sum(tf.math.multiply(y_true_box, y_pred_box)))
    # Calculate the area of both - area that overlaps
    denom = tf.math.reduce_sum(y_true_box) + tf.math.reduce_sum(y_pred_box) - numerator
    iou = numerator/denom
    
    return iou

In [1103]:
# This max supression will only work when they're only exists a single class. Can be adapted.  
def nonMaxSupression(y_true, y_pred):
    maxTrueValue = tf.gather_nd(y_true, (tf.where(tf.equal(y_true[...,4], tf.math.reduce_max(y_true[...,4]))))[0])
    # Grabs the max value along index 4 (reduce_max), then finds the index of the value in the tensor
    maxPredIndex = (tf.where(tf.equal(y_pred[...,4], tf.math.reduce_max(y_pred[...,4]))))[0]
    # Grabs the actual value, using index calculated above
    maxPredValue = tf.gather_nd(y_pred, maxPredIndex)
    idx = generateIndexes()
    # Disgusting time complexity :D
    for i in range(len(idx)):
        box = tf.gather_nd(y_pred, idx[i])
        val = iou(maxPredValue, box)
        if val < 0.5:
            del idx[i]

In [1104]:
zeros = tf.zeros([2, 7, 7, 11])
zeros = zeros + 0.1
for image, box in train_batches.take(1):
    testBox = box
    output = yoloLoss(testBox, zeros)
    jaccard = jaccardIndex(testBox, zeros)

In [1105]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss=yoloLoss,
              metrics=[jaccardIndex], run_eagerly=True)

In [1106]:
#model.summary()

In [1107]:
history =  model.fit(train_batches, epochs=1, validation_data=validation_batches)

   5/2075 [..............................] - ETA: 59:23 - loss: nan - jaccardIndex: 0.5313    
y_pred is NAN
[nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan na

InvalidArgumentError: in user code:

    C:\Users\e_gui\AppData\Local\Temp/ipykernel_8496/1141752945.py:10 jaccardIndex  *
        y_pred_box = tf.gather_nd(y_pred, (tf.where(tf.equal(y_pred[...,4], tf.math.reduce_max(y_pred[...,4]))))[0])
    C:\Users\e_gui\anaconda3\envs\tfEnv\lib\site-packages\tensorflow\python\util\dispatch.py:206 wrapper
        return target(*args, **kwargs)
    C:\Users\e_gui\anaconda3\envs\tfEnv\lib\site-packages\tensorflow\python\ops\array_ops.py:1041 _slice_helper
        return strided_slice(
    C:\Users\e_gui\anaconda3\envs\tfEnv\lib\site-packages\tensorflow\python\util\dispatch.py:206 wrapper
        return target(*args, **kwargs)
    C:\Users\e_gui\anaconda3\envs\tfEnv\lib\site-packages\tensorflow\python\ops\array_ops.py:1214 strided_slice
        op = gen_array_ops.strided_slice(
    C:\Users\e_gui\anaconda3\envs\tfEnv\lib\site-packages\tensorflow\python\ops\gen_array_ops.py:10510 strided_slice
        _ops.raise_from_not_ok_status(e, name)
    C:\Users\e_gui\anaconda3\envs\tfEnv\lib\site-packages\tensorflow\python\framework\ops.py:6941 raise_from_not_ok_status
        six.raise_from(core._status_to_exception(e.code, message), None)
    <string>:3 raise_from
        

    InvalidArgumentError: slice index 0 of dimension 0 out of bounds. [Op:StridedSlice] name: strided_slice/


In [None]:
for image, mask in test_batches.take(1):
    # Predict mask on new image
    boxes = model.predict(image)
    boxes = tf.reshape(boxes[...,:B*5], [-1,S*S,B,5])
    bestBox = tf.gather_nd(boxes, (tf.where(tf.equal(result[...,4], tf.math.reduce_max(boxes[...,4]))))[0])