In [148]:
import tensorflow as tf
from tensorflow.keras.layers import LeakyReLU
import glob
import matplotlib.pyplot as plt
from math import floor

In [101]:
# Define the constants
S = 7 # Divide the image to have S*S cells
C = 1 # Number of classes 
B = 2 # Number of bounding boxes to be predicted per cell

In [127]:
# Import image paths
imagesRaw = glob.glob("./new_images/*.jpg")
imagesRaw.sort()

In [128]:
# Grab all the bounding box info
with open('boundingBoxes.txt', 'r') as f:
    lines = f.readlines()
 
boxes = [[float(x) for x in line.strip().split(",")] for line in lines]

In [294]:
def load_jpeg(image, box):
    # Load the image turn into jpeg
    decoded = tf.io.read_file(image)
    imageTf = tf.image.decode_jpeg(decoded)
    # Normalize the images
    returnImage = tf.cast(imageTf, tf.float32) / 255.0
    returnImage = tf.reshape(returnImage, (488,488,3))
    out = tf.zeros([S,S,5+C]) + box
    
    return returnImage, out

# Plots two given images
def display(imageOne, imageTwo):
    plt.figure(figsize=(15, 15))
    plt.subplot(1,2,1)
    plt.imshow(tf.keras.preprocessing.image.array_to_img(imageOne))
    plt.axis("off")
    plt.subplot(1,2,2)
    plt.imshow(tf.keras.preprocessing.image.array_to_img(imageTwo))
    plt.axis("off")
    plt.show()

#Convert true pixel indexes to yolo format
def convertToYoloFormat(pixelPositions, num_boxes=1, imageWidth=488, imageHeight=488):
    # Format is [containsObj, center_x, center_y, width, height, classes...]
    cellHeight = imageHeight / S
    cellWidth = imageWidth / S
    returnBox = []
    countStart = 0
    countEnd = 1
    while countStart < num_boxes:
        _ ,xMin, xMax, yMin, yMax = pixelPositions[5*countStart:4*countEnd + 1]
        pointX = (floor((xMax - xMin)/2)) + xMin
        pointY = (floor((yMax - yMin)/2)) + yMin
        centerX = (pointX - floor(pointX/cellWidth) * cellWidth)/cellWidth
        centerY = (pointY - floor(pointY/cellHeight) * cellHeight)/cellHeight
        width = (xMax - xMin)/(cellHeight)
        height = (yMax - yMin)/(cellHeight)
        returnBox = returnBox + [_, centerX, centerY, width, height]
        countEnd += 1
        countStart += 1
        
    return returnBox + pixelPositions[5*countStart:]
    
# Convert the yolo box format to tensor box format NEED TO UPDATE ++++
def convertFromYolo(box):
    xMax = box[0]*488*2
    yMax = box[1]*488*2
    xMin = - box[2]*488 + xMax
    yMin = - box[3]*488 + yMax
    return xMax, yMax, xMin, yMin

In [295]:
for image, box in imageDataSet.take(1):
    load_jpeg(image, box)

In [296]:
# Convert boxes to yolo format:
convertedBoxes = [convertToYoloFormat(box) for box in boxes]
# Convert the convertedBoxes to the same format as y_pred
# Create a new dataset, pairing images and respective boxes
imageDataSet = tf.data.Dataset.from_tensor_slices((imagesRaw, convertedBoxes))
# Shuffle the data
imageDataSet = imageDataSet.shuffle(100)

In [297]:
entireSet = imageDataSet.map(load_jpeg)

In [298]:
# Divide the dataset into train, test and validation
trainSize = int(0.8 * 2594)
valSize = int(0.1 * 2594)
train = entireSet.take(trainSize)
temp = entireSet.skip(trainSize)
test = temp.skip(valSize)
validation = temp.take(valSize)

In [299]:
# Create batches:
train_batches = train.batch(1)
validation_batches = validation.batch(1)
test_batches = test.batch(1)

In [300]:
model = tf.keras.Sequential([
    #First Layer
    tf.keras.layers.Conv2D(64, (7,7), strides=(2, 2), activation=LeakyReLU(alpha=0.1),  input_shape=(488,488,3)),
    tf.keras.layers.MaxPooling2D((2, 2), strides=(2,2)),
    
    #Second Layer
    tf.keras.layers.Conv2D(192, (3,3), activation=LeakyReLU(alpha=0.1), padding="same"),
    tf.keras.layers.MaxPooling2D((2, 2), strides=(2,2)),
    
    #Third Layer
    tf.keras.layers.Conv2D(128, (1,1), activation=LeakyReLU(alpha=0.1), padding="same"),
    tf.keras.layers.Conv2D(256, (3,3), activation=LeakyReLU(alpha=0.1), padding="same"),
    tf.keras.layers.Conv2D(256, (3,3), activation=LeakyReLU(alpha=0.1), padding="same"),
    tf.keras.layers.Conv2D(512, (3,3), activation=LeakyReLU(alpha=0.1), padding="same"),
    tf.keras.layers.MaxPooling2D((2, 2), strides=(2,2)),
    
    #Fourth Layer
    # +++ Repeated block
    tf.keras.layers.Conv2D(256, (1,1), activation=LeakyReLU(alpha=0.1), padding="same"),
    tf.keras.layers.Conv2D(512, (3,3), activation=LeakyReLU(alpha=0.1), padding="same"),
    
    tf.keras.layers.Conv2D(256, (1,1), activation=LeakyReLU(alpha=0.1), padding="same"),
    tf.keras.layers.Conv2D(512, (3,3), activation=LeakyReLU(alpha=0.1), padding="same"),

    tf.keras.layers.Conv2D(256, (1,1), activation=LeakyReLU(alpha=0.1), padding="same"),
    tf.keras.layers.Conv2D(512, (3,3), activation=LeakyReLU(alpha=0.1), padding="same"),

    tf.keras.layers.Conv2D(256, (1,1), activation=LeakyReLU(alpha=0.1), padding="same"),
    tf.keras.layers.Conv2D(512, (3,3), activation=LeakyReLU(alpha=0.1), padding="same"),
    # +++ END BLOCK
    tf.keras.layers.Conv2D(512, (1,1), activation=LeakyReLU(alpha=0.1), padding="same"),
    tf.keras.layers.Conv2D(1024, (3,3), activation=LeakyReLU(alpha=0.1), padding="same"),
    tf.keras.layers.MaxPooling2D((2, 2), strides=(2,2)),
    
    #Fifth layer
    # +++ Repeated Block
    tf.keras.layers.Conv2D(512, (1,1), activation=LeakyReLU(alpha=0.1), padding="same"),
    tf.keras.layers.Conv2D(1024, (3,3), activation=LeakyReLU(alpha=0.1), padding="same"),
    
    tf.keras.layers.Conv2D(512, (1,1), activation=LeakyReLU(alpha=0.1), padding="same"),
    tf.keras.layers.Conv2D(1024, (3,3), activation=LeakyReLU(alpha=0.1), padding="same"),
    # +++ END BLOCK
    tf.keras.layers.Conv2D(1024, (3,3), activation=LeakyReLU(alpha=0.1), padding="same"),
    tf.keras.layers.Conv2D(1024, (3,3), strides=(2, 2), activation=LeakyReLU(alpha=0.1), padding="same"),
    
    #Sixth Layer
    tf.keras.layers.Conv2D(1024, (3,3), activation=LeakyReLU(alpha=0.1), padding="same"),
    tf.keras.layers.Conv2D(1024, (3,3), activation=LeakyReLU(alpha=0.1), padding="same"),
    

    # Final Output Layer
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(4096, activation=LeakyReLU(alpha=0.1)),
    tf.keras.layers.Dense(S * S * (B*5+C), input_shape=(4096,), activation="sigmoid"),
    tf.keras.layers.Reshape(target_shape = (S, S, (B*5+C)))
    
])

The yoloLoss function is an adaptation from here: https://blog.emmanuelcaradec.com/humble-yolo-implementation-in-keras/. Credit to Emmanuel Caradec

In [327]:
def yoloLoss(y_true, y_pred):
    grid = [[[float(x),float(y)]]*B for y in range(S) for x in range(S)]
    
    
    true_boxes = tf.reshape(y_true[...,:5], [-1,S*S,1,5])
    pred_boxes = tf.reshape(y_pred[...,:B*5], (-1,S*S,B,5))
    # Reshape the 3: too S*S, B, 5 i.e. 49, 2, 5. 
    # Each cell has two bounding boxes, with each bounding box, having 5 elements. 
    
    y_pred_conf = pred_boxes[...,0]
    y_true_conf = true_boxes[...,0]
    
    y_pred_wh   = pred_boxes[...,3:5]
    y_true_wh   = true_boxes[...,3:5]
    # y_pred_wh. Shape (S*S,B,2). Where each cell, has B bounding boxes, with each
    # Bounding box having two rows?

    y_pred_xy   = pred_boxes[...,1:3] + grid
    y_true_xy   = true_boxes[...,1:3]
    # The y_true_xy will have a shape of S*S, B, 2. Where each cell, has B bounding boxes, with two colums? 
    # I understand grabbing the xy pairs, but why + the grid?
    
    y_true_class = tf.reshape(y_true[...,5:], [-1, S*S, C]) 
    y_pred_class = tf.reshape(y_pred[...,B*5:], [-1, S*S, C])
    
    
    # Losses Calculations +++++++++++++++++++++++++++++++++++++++++++++++++++
    xy_loss    = tf.math.reduce_sum(tf.math.reduce_sum(tf.math.square(y_true_xy - y_pred_xy),
                                                       axis=-1)*y_true_conf, axis=-1)
    # Two reduce sums  = 2 summations. Axis = -1, along last dimensions i.e. columns/entries in this case.
    # Square is element wise. y_true_conf is the little 1, in the formula. 

    wh_loss    = tf.math.reduce_sum(tf.math.reduce_sum(tf.math.square(tf.math.sqrt(y_true_wh) - 
                                                                      tf.math.sqrt(y_pred_wh)), axis=-1)
                                    *y_true_conf, axis=-1)
    # Two reduce sums  = 2 summations. Axis = -1, along last dimensions i.e. rows in this case
    # Makes sense, matches formula. 
    
    clss_loss  = tf.math.reduce_sum(tf.math.square(y_true_class - y_pred_class)*y_true_conf, axis=-1)
    # Needs to be reshaped. 
    #clss_loss = tf.reshape(clss_loss, [-1, 49])
    
                                    
    intersect_wh = tf.math.maximum(tf.zeros_like(y_pred_wh), (y_pred_wh + y_true_wh)/2 
                                   - tf.math.abs(y_pred_xy - y_true_xy) )
    
    
    intersect_area = intersect_wh[...,0] * intersect_wh[...,1]
    true_area = y_true_wh[...,0] * y_true_wh[...,1]
    pred_area = y_pred_wh[...,0] * y_pred_wh[...,1]
    union_area = pred_area + true_area - intersect_area
    iou = intersect_area / union_area

    conf_loss = tf.math.reduce_sum(tf.math.square(y_true_conf*iou - y_pred_conf)*y_true_conf, axis=-1)
    
    loss =  clss_loss + xy_loss + wh_loss + conf_loss
    return tf.math.reduce_sum(loss)
                                    
    #return loss

In [330]:
def highestConfidenceBox(y_true, y_pred):
    # This will extract the best box across all batches. Kind of dumb. Need to change highest box per batch. 
    boxes = tf.reshape(y_pred[...,:B*5], (-1,S*S,B,5))
    maxValue = tf.reduce_max(boxes[...,4])
    location = tf.where(tf.math.equal(boxes[...,4], maxValue))[0]
    box = boxes[int(location[0]), int(location[1]), int(location[2])]
    return box

In [338]:
def jaccardIndex(y_true, y_pred):
    #output = tf.repeat(y_true, 98, axis=1)
    
    y_true = tf.reshape(y_true[...,:5], [-1,S*S,1,5])
    y_pred = tf.reshape(y_pred[...,:B*5], (-1,S*S,B,5))
    
    numerator = (tf.math.reduce_sum(tf.math.multiply(y_true, y_pred)))
    # Calculate the area of both - area that overlaps
    denom = tf.math.reduce_sum(y_true) + tf.math.reduce_sum(y_true) - numerator
    return numerator/denom

In [339]:
zeros = tf.zeros([1, 7, 7, 11])
for image, box in train_batches.take(1):
    testBox = box
    
output = yoloLoss(testBox, zeros)
jaccard = jaccardIndex(testBox, zeros)

In [340]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss=yoloLoss,
              metrics=[jaccardIndex])

In [341]:
model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_120 (Conv2D)          (None, 241, 241, 64)      9472      
_________________________________________________________________
max_pooling2d_20 (MaxPooling (None, 120, 120, 64)      0         
_________________________________________________________________
conv2d_121 (Conv2D)          (None, 120, 120, 192)     110784    
_________________________________________________________________
max_pooling2d_21 (MaxPooling (None, 60, 60, 192)       0         
_________________________________________________________________
conv2d_122 (Conv2D)          (None, 60, 60, 128)       24704     
_________________________________________________________________
conv2d_123 (Conv2D)          (None, 60, 60, 256)       295168    
_________________________________________________________________
conv2d_124 (Conv2D)          (None, 60, 60, 256)      

In [342]:
history =  model.fit(train_batches, epochs=1, validation_data=validation_batches)

 114/2075 [>.............................] - ETA: 38:32 - loss: 2207.7048 - jaccardIndex: 4.0957

KeyboardInterrupt: 