In [None]:
import io
import json
import os
import random

import cv2
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

from google.colab.patches import cv2_imshow
from PIL import Image, ImageDraw
from keras import Sequential, Model
from keras import backend as K
from keras.layers import Conv2D, Dense, Flatten, Reshape, Input, BatchNormalization, MaxPooling2D, LeakyReLU, Add
from keras.optimizers import SGD


BASE_PATH = os.path.join('.', 'drive', 'My Drive', 'IZ*Net')

In [None]:
ANCHOR_SIZES = np.array([[1, 1], [1, 1.15]])

def getHelperFunctions(gridSize):
  def getXOffsets(theGridSize): return lambda: K.expand_dims(K.arange(theGridSize, dtype=tf.float64))
  def getYOffsets(theGridSize): return lambda: K.expand_dims(K.reshape(K.arange(theGridSize, dtype=tf.float64), (theGridSize, 1)))
  return (getXOffsets(gridSize), getYOffsets(gridSize))

def yoloLoss(yTrueIn, yPredIn):
  LAMBDA_COORD = 25
  LAMBDA_NOOBJ = 0.5
  (BATCH_SIZE, N_ROWS, _, _, _) = yTrueIn.shape 

  yTrue = K.cast(yTrueIn, 'float64')
  yPred = K.cast(yPredIn, 'float64')

  (getXOffsets, getYOffsets) = getHelperFunctions(N_ROWS)

  yTrueXRealCoords = K.expand_dims(yTrue[...,1] + getXOffsets())
  yTrueYRealCoords = K.expand_dims(yTrue[...,2] + getYOffsets())
  yTrueWithRealCoords = K.concatenate([K.expand_dims(yTrue[...,0]), yTrueXRealCoords, yTrueYRealCoords, K.expand_dims(yTrue[...,3]), K.expand_dims(yTrue[...,4])])

  yPredCRealCoords = K.expand_dims(tf.sigmoid(yPred[...,0]))
  yPredXRealCoords = K.expand_dims(tf.sigmoid(yPred[...,1]) + getXOffsets())
  yPredYRealCoords = K.expand_dims(tf.sigmoid(yPred[...,2]) + getYOffsets())
  yPredWRealCoords = K.expand_dims(K.exp(yPred[...,3]) * ANCHOR_SIZES[...,0])
  yPredHRealCoords = K.expand_dims(K.exp(yPred[...,4]) * ANCHOR_SIZES[...,1])
  yPredWithRealCoords = K.concatenate([yPredCRealCoords, yPredXRealCoords, yPredYRealCoords, yPredWRealCoords, yPredHRealCoords])

  indicatorMask = K.expand_dims(yTrueWithRealCoords[...,0])
  xyLoss = K.sum(K.square((yTrueWithRealCoords[...,1:3] - yPredWithRealCoords[...,1:3]) * indicatorMask))
  whLoss = K.sum(K.square((K.sqrt(yTrueWithRealCoords[...,3:5]) - K.sqrt(yPredWithRealCoords[...,3:5])) * indicatorMask))

  topLeftCoordOfIntersect = K.maximum(yPredWithRealCoords[...,1:3] - (yPredWithRealCoords[...,3:5] / 2.), yTrueWithRealCoords[...,1:3] - (yTrueWithRealCoords[...,3:5] / 2.))
  bottomRightCoordOfIntersect = K.minimum(yPredWithRealCoords[...,1:3] + (yPredWithRealCoords[...,3:5] / 2.), yTrueWithRealCoords[...,1:3] + (yTrueWithRealCoords[...,3:5] / 2.))
  widthAndHeightOfIntersect = K.maximum(bottomRightCoordOfIntersect - topLeftCoordOfIntersect, 0)
  intersectionAreas = widthAndHeightOfIntersect[...,0] * widthAndHeightOfIntersect[...,1]
  encodingAreas = yPredWithRealCoords[...,3] * yPredWithRealCoords[...,4]
  boundingBoxAreas = yTrueWithRealCoords[...,3] * yTrueWithRealCoords[...,4]
  unionAreas = encodingAreas + boundingBoxAreas - intersectionAreas
  IOUScores = tf.truediv(intersectionAreas, unionAreas)

  noObjCLoss = LAMBDA_NOOBJ * (K.sum(K.square(K.expand_dims(yPredWithRealCoords[...,0]) * (1. - indicatorMask))) + K.sum(K.square(K.expand_dims(1 - yPredWithRealCoords[...,0]) * indicatorMask)))
  objCLoss = K.sum(K.square(K.expand_dims(yTrueWithRealCoords[...,0] - IOUScores) * indicatorMask))

  return tf.truediv(noObjCLoss + objCLoss + LAMBDA_COORD * (xyLoss + whLoss), BATCH_SIZE)

def yoloAccuracy(yTrueIn, yPredIn):
  (_, N_ROWS, _, _, _) = yTrueIn.shape 

  yTrue = K.cast(yTrueIn, 'float64')
  yPred = K.cast(yPredIn, 'float64')

  (getXOffsets, getYOffsets) = getHelperFunctions(N_ROWS)

  yTrueXRealCoords = K.expand_dims(yTrue[...,1] + getXOffsets())
  yTrueYRealCoords = K.expand_dims(yTrue[...,2] + getYOffsets())
  yTrueWithRealCoords = K.concatenate([K.expand_dims(yTrue[...,0]), yTrueXRealCoords, yTrueYRealCoords, K.expand_dims(yTrue[...,3]), K.expand_dims(yTrue[...,4])])

  yPredCRealCoords = K.expand_dims(tf.sigmoid(yPred[...,0]))
  yPredXRealCoords = K.expand_dims(tf.sigmoid(yPred[...,1]) + getXOffsets())
  yPredYRealCoords = K.expand_dims(tf.sigmoid(yPred[...,2]) + getYOffsets())
  yPredWRealCoords = K.expand_dims(K.exp(yPred[...,3]) * ANCHOR_SIZES[...,0])
  yPredHRealCoords = K.expand_dims(K.exp(yPred[...,4]) * ANCHOR_SIZES[...,1])
  yPredWithRealCoords = K.concatenate([yPredCRealCoords, yPredXRealCoords, yPredYRealCoords, yPredWRealCoords, yPredHRealCoords])

  topLeftCoordOfIntersect = K.maximum(yPredWithRealCoords[...,1:3] - (yPredWithRealCoords[...,3:5] / 2.), yTrueWithRealCoords[...,1:3] - (yTrueWithRealCoords[...,3:5] / 2.))
  bottomRightCoordOfIntersect = K.minimum(yPredWithRealCoords[...,1:3] + (yPredWithRealCoords[...,3:5] / 2.), yTrueWithRealCoords[...,1:3] + (yTrueWithRealCoords[...,3:5] / 2.))
  widthAndHeightOfIntersect = K.maximum(bottomRightCoordOfIntersect - topLeftCoordOfIntersect, 0)
  intersectionAreas = widthAndHeightOfIntersect[...,0] * widthAndHeightOfIntersect[...,1]
  encodingAreas = yPredWithRealCoords[...,3] * yPredWithRealCoords[...,4]
  boundingBoxAreas = yTrueWithRealCoords[...,3] * yTrueWithRealCoords[...,4]
  unionAreas = encodingAreas + boundingBoxAreas - intersectionAreas
  IOUScores = tf.truediv(intersectionAreas, unionAreas)

  return tf.truediv(K.sum(IOUScores * yPredWithRealCoords[...,0] * yTrueWithRealCoords[...,0]), K.sum(yTrueWithRealCoords[...,0]))

In [None]:
def Conv(XInput, filters, kernel_size, strides):
  X = Conv2D(filters=filters, kernel_size=kernel_size, strides=strides, padding='same')(XInput)
  X = BatchNormalization()(X)
  X = LeakyReLU(0.1)(X)
  return X

def Residual(XInput, filters):
  XBranch = Conv(XInput, filters=filters, kernel_size=1, strides=1)
  XBranch = Conv(XBranch, filters=filters * 2, kernel_size=3, strides=1)
  X = Add()([XInput, XBranch])
  return X

def network():
  X_input = Input((416, 416, 3))

  X = Conv(X_input, filters=32, kernel_size=3, strides=1)
  X = Conv(X, filters=64, kernel_size=3, strides=2)
  X = Residual(X, filters=32)
  X = Conv(X, filters=128, kernel_size=3, strides=2)
  X = Residual(X, filters=64)
  X = Residual(X, filters=64)
  X = Conv(X, filters=256, kernel_size=3, strides=2)
  X = Residual(X, filters=128)
  X = Residual(X, filters=128)
  X = Residual(X, filters=128)
  X = Residual(X, filters=128)
  X = Residual(X, filters=128)
  X = Residual(X, filters=128)
  X = Residual(X, filters=128)
  X = Residual(X, filters=128)
  X = Conv(X, filters=512, kernel_size=3, strides=2)
  X = Residual(X, filters=256)
  X = Residual(X, filters=256)
  X = Residual(X, filters=256)
  X = Residual(X, filters=256)
  X = Residual(X, filters=256)
  X = Residual(X, filters=256)
  X = Residual(X, filters=256)
  X = Residual(X, filters=256)
  X = Conv(X, filters=1024, kernel_size=3, strides=2)
  X = Residual(X, filters=512)
  X = Residual(X, filters=512)
  X = Residual(X, filters=512)
  X = Residual(X, filters=512)
  X = Conv2D(128, 1)(X)
  X = Flatten()(X)
  X = Dense(1690)(X)
  X = Reshape((13, 13, 2, 5))(X)

  return Model(inputs=X_input, outputs=X)

# print(network().summary())

In [None]:
def load_img(img_path):
    img = cv2.imread(img_path)
    img = cv2.resize(img, (416, 416))
    img = img[..., ::-1]  # Reverse channels
    img = np.around(img/255.0, decimals=12)  # Normalize
    return img

def getBoundingBoxCoordinates(image, boundingBoxRow, boundingBoxCol, anchorBox, boundingBoxInfo):
    imageWidth, imageHeight = image.size
    widthScale = imageWidth / 13
    heightScale = imageHeight / 13

    (bbInfoX, bbInfoY) = tf.sigmoid(boundingBoxInfo[:2])
    (bbInfoW, bbInfoH) = tf.exp(boundingBoxInfo[2:]) * ANCHOR_SIZES[anchorBox]
    
    midX = bbInfoX + boundingBoxCol
    midY = bbInfoY + boundingBoxRow

    startX = (midX - (bbInfoW / 2.)) * widthScale
    startY = (midY - (bbInfoH / 2.)) * heightScale
    endX = (midX + (bbInfoW / 2.)) * widthScale
    endY = (midY + (bbInfoH / 2.)) * heightScale

    return [startY, startX, endY, endX]

def drawBoxes(image, boxes):
    Draw = ImageDraw.Draw(image)

    for box in boxes:
      [startY, startX, endY, endX] = box
      for i in range(2):
        Draw.rectangle([(startX + i, startY + i), (endX - i, endY - i)], outline='red')

def drawImage(imgPath, encoding):
    theImage = Image.open(imgPath).convert('RGB').resize((299, 299))

    boundingBoxes = []
    confidenceScores = []
    for row in range(13):
        for col in range(13):
          for boxNum in range(2):
            boundingBoxes.append(getBoundingBoxCoordinates(theImage, row, col, boxNum, encoding[row][col][boxNum][1:]))
            confidenceScores.append(tf.sigmoid(encoding[row][col][boxNum][0]))

    selectedIndices = tf.image.non_max_suppression(boundingBoxes, confidenceScores, max_output_size=12, iou_threshold=0.5)
    selectedBoxes = tf.gather(boundingBoxes, selectedIndices)
    selectedBoxesConfidences = tf.gather(confidenceScores, selectedIndices)
    
    highConfidenceBoxes = [box for (box, boxConfidence) in zip(selectedBoxes, selectedBoxesConfidences) if boxConfidence > 0.3]
    drawBoxes(theImage, highConfidenceBoxes)
    # print("Confidence Scores: ", tf.gather(confidenceScores, selectedIndices))
    cv2_imshow(np.array(theImage)[...,::-1])

In [None]:
data = {}

with open(os.path.join(BASE_PATH, 'output.json')) as infile:
    memfile = io.BytesIO()
    memfile.write(json.load(infile).encode('latin-1'))
    memfile.seek(0)
    data = np.load(memfile, allow_pickle=True)[()] # See https://stackoverflow.com/questions/30811918/saving-dictionary-of-numpy-arrays/45661259

dataKeysList = list(data.keys())

model = network()
model.load_weights(os.path.join(BASE_PATH, 'network4_yolo_highestMeanAccuracy.h5'))
model.compile(loss=yoloLoss, optimizer=SGD(learning_rate=1e-4, momentum=0.92, clipnorm=1), metrics=[yoloAccuracy])

ACCURACY_SAVE_PATH = os.path.join(BASE_PATH, 'network4_yolo_highestMeanAccuracy.h5')
START_EPOCH = 0
NUM_EPOCH = 30
BATCH_SIZE = 32
losses = []
accuracies = []
maxMeanAccuracySofar = 0
for epochNum in range(START_EPOCH + 1, START_EPOCH + NUM_EPOCH + 1):
  print("Begin epoch {}/{}".format(epochNum, START_EPOCH + NUM_EPOCH))
  random.shuffle(dataKeysList)
  batches = [dataKeysList[i:i + BATCH_SIZE] for i in range(0, len(dataKeysList), BATCH_SIZE)]

  batchAccuracies = []
  for batchNum, theBatch in enumerate(batches, start=1):
    print("Training on batch {}/{} ........".format(batchNum, len(batches)), end=" ")
    inputImgs = np.array([load_img(os.path.join(BASE_PATH, path)) for path in theBatch])
    outputEncoding = np.array([data[path] for path in theBatch])
    (loss, accuracy) = model.train_on_batch(x=inputImgs, y=outputEncoding)
    print("Loss: {}, Accuracy: {}".format(loss, accuracy))
    losses.append(loss)
    batchAccuracies.append(accuracy)

  accuracies += batchAccuracies
  meanAccuracy = np.mean(np.array(batchAccuracies))
  if meanAccuracy > maxMeanAccuracySofar:
    maxMeanAccuracySofar = meanAccuracy
    model.save(ACCURACY_SAVE_PATH)
  
  if epochNum % 10 == 0:
    model.save(os.path.join(BASE_PATH, 'network4_yolo_epoch_{}.h5'.format(epochNum)))

plt.plot(losses)

In [None]:
# Local paths to images
allImages = [(member, imageName) for member in os.listdir(os.path.join(BASE_PATH, 'train')) for imageName in os.listdir(os.path.join(BASE_PATH, 'train', member))]
random.shuffle(allImages)

model = network()
model.load_weights(os.path.join(BASE_PATH, 'network4_yolo_highestMeanAccuracy.h5')) # Local path again

for (member, imageName) in allImages[:10]:
  print(member, imageName)
  fullPath = os.path.join(BASE_PATH, 'train', member, imageName)
  drawImage(fullPath, tf.reshape(model(np.array([load_img(fullPath)])), (13, 13, 2, 5)))