In [None]:
from keras import Sequential, Model
from keras.layers import Conv2D, Dense, Flatten, Reshape, Input, BatchNormalization, MaxPooling2D, LeakyReLU
from keras import backend as K
import tensorflow as tf
import numpy as np
from keras.applications import InceptionV3, ResNet50V2, InceptionResNetV2
from keras.optimizers import SGD
import json
import copy
import os
import cv2
import matplotlib.pyplot as plt
from google.colab.patches import cv2_imshow
from PIL import Image, ImageDraw

BASE_PATH = os.path.join('.', 'drive', 'My Drive', 'IZ*Net')

In [None]:
import random
import matplotlib.patches as patches

anchorSizes = np.array([[1, 1], [1, 1.15]])

def getClosestAnchor(width, height):
  ratio = np.true_divide(height, width)
  return 1 if ((ratio - 1.15) ** 2) < ((ratio - 1) ** 2) else 0 

def sigmoid(x): return np.true_divide(1., 1. + np.exp(-x))

def genRectangle():
  startRow = random.randrange(15 - 1)
  startCol = random.randrange(15 - 1)
  numRows = random.randrange(1, 15 - startRow)
  numCols = random.randrange(1, 15 - startCol)

  return (startRow, startCol, numRows, numCols)

def createRandomImageAndEncoding():
  rectangles = [genRectangle() for _ in range(random.randint(1, 3))]

  image = np.zeros((15, 15))
  encoding = np.zeros((3, 3, 2, 5), dtype='float64')
  
  for (startRow, startCol, numRows, numCols) in rectangles:
    for r in range(numRows):
      for c in range(numCols):
        image[startRow + r][startCol + c] = 1
  
    midRow = startRow + ((numRows - 1)/2.)
    midCol = startCol + ((numCols - 1)/2.)

    rowOffset = int(midRow // 5)
    colOffset = int(midCol // 5)
    
    midRow = np.true_divide((midRow % 5), 5.)
    midCol = np.true_divide((midCol % 5), 5.)

    closestAnchor = getClosestAnchor(numCols, numRows)

    encoding[rowOffset][colOffset][closestAnchor][0] = 1
    encoding[rowOffset][colOffset][closestAnchor][1] = midCol
    encoding[rowOffset][colOffset][closestAnchor][2] = midRow
    encoding[rowOffset][colOffset][closestAnchor][3] = numCols
    encoding[rowOffset][colOffset][closestAnchor][4] = numRows

  return (image.reshape((15, 15, 1)), encoding)

def drawImageWithTrueEncoding(image, encoding):
  fig, ax = plt.subplots()

  ax.imshow(image, cmap='Greys')

  for row in range(3):
    for col in range(3):
      for boxNum in range(2):
        if encoding[row][col][boxNum][0] == 1:
          midX = (encoding[row][col][boxNum][1] + col) * 5
          midY = (encoding[row][col][boxNum][2] + row) * 5
          width = encoding[row][col][boxNum][3]
          height = encoding[row][col][boxNum][4]
          rect = patches.Rectangle((midX - (width/2.), midY - (height/2.)), width, height, linewidth=2, edgecolor='r', facecolor='none')
          ax.add_patch(rect)
          plt.scatter(midX, midY, c='red')

  plt.show()

def drawImage(image, encoding):
  fig, ax = plt.subplots()

  ax.imshow(image, cmap='Greys')

  for row in range(3):
    for col in range(3):
      for boxNum in range(2):
        if sigmoid(encoding[row][col][boxNum][0]) > 0.5:
          midX = (sigmoid(encoding[row][col][boxNum][1]) + col) * 5
          midY = (sigmoid(encoding[row][col][boxNum][2]) + row) * 5
          width = anchorSizes[boxNum][0] * np.exp(encoding[row][col][boxNum][3])
          height = anchorSizes[boxNum][1] * np.exp(encoding[row][col][boxNum][4])
          rect = patches.Rectangle((midX - (width/2.), midY - (height/2.)), width, height, linewidth=2, edgecolor='r', facecolor='none')
          ax.add_patch(rect)
          plt.scatter(midX, midY, c='red')

  plt.show()

(image, encoding) = createRandomImageAndEncoding()
drawImageWithTrueEncoding(image.reshape((15, 15)), encoding)

In [None]:
def getHelperFunctions(gridSize):
  def getXOffsets(theGridSize): return lambda: K.expand_dims(K.arange(theGridSize, dtype=tf.float64))
  def getYOffsets(theGridSize): return lambda: K.expand_dims(K.reshape(K.arange(theGridSize, dtype=tf.float64), (theGridSize, 1)))
  return (getXOffsets(gridSize), getYOffsets(gridSize))

def yoloLoss(yTrueIn, yPredIn):
  LAMBDA_COORD = 5
  LAMBDA_NOOBJ = 0.5
  IMAGE_SHAPE = (BATCH_SIZE, N_ROWS, N_COLS, N_BOXES, BOX_ENCODING_LEN) = yTrueIn.shape 

  yTrue = K.cast(yTrueIn, 'float64')
  yPred = K.cast(yPredIn, 'float64')

  (getXOffsets, getYOffsets) = getHelperFunctions(N_ROWS)

  yTrueXRealCoords = K.expand_dims(yTrue[...,1] + getXOffsets())
  yTrueYRealCoords = K.expand_dims(yTrue[...,2] + getYOffsets())
  yTrueWithRealCoords = K.concatenate([K.expand_dims(yTrue[...,0]), yTrueXRealCoords, yTrueYRealCoords, K.expand_dims(yTrue[...,3]), K.expand_dims(yTrue[...,4])])

  yPredCRealCoords = K.expand_dims(tf.sigmoid(yPred[...,0]))
  yPredXRealCoords = K.expand_dims(tf.sigmoid(yPred[...,1]) + getXOffsets())
  yPredYRealCoords = K.expand_dims(tf.sigmoid(yPred[...,2]) + getYOffsets())
  yPredWRealCoords = K.expand_dims(K.exp(yPred[...,3]) * anchorSizes[...,0])
  yPredHRealCoords = K.expand_dims(K.exp(yPred[...,4]) * anchorSizes[...,1])
  yPredWithRealCoords = K.concatenate([yPredCRealCoords, yPredXRealCoords, yPredYRealCoords, yPredWRealCoords, yPredHRealCoords])

  indicatorMask = K.expand_dims(yTrueWithRealCoords[...,0])
  xyLoss = K.sum(K.square((yTrueWithRealCoords[...,1:3] - yPredWithRealCoords[...,1:3]) * indicatorMask))
  whLoss = K.sum(K.square((K.sqrt(yTrueWithRealCoords[...,3:5]) - K.sqrt(yPredWithRealCoords[...,3:5])) * indicatorMask))

  topLeftCoordOfIntersect = K.maximum(yPredWithRealCoords[...,1:3] - (yPredWithRealCoords[...,3:5] / 2.), yTrueWithRealCoords[...,1:3] - (yTrueWithRealCoords[...,3:5] / 2.))
  bottomRightCoordOfIntersect = K.minimum(yPredWithRealCoords[...,1:3] + (yPredWithRealCoords[...,3:5] / 2.), yTrueWithRealCoords[...,1:3] + (yTrueWithRealCoords[...,3:5] / 2.))
  widthAndHeightOfIntersect = K.maximum(bottomRightCoordOfIntersect - topLeftCoordOfIntersect, 0)
  intersectionAreas = widthAndHeightOfIntersect[...,0] * widthAndHeightOfIntersect[...,1]
  encodingAreas = yPredWithRealCoords[...,3] * yPredWithRealCoords[...,4]
  boundingBoxAreas = yTrueWithRealCoords[...,3] * yTrueWithRealCoords[...,4]
  unionAreas = encodingAreas + boundingBoxAreas - intersectionAreas
  IOUScores = tf.truediv(intersectionAreas, unionAreas)

  noObjCLoss = LAMBDA_NOOBJ * (K.sum(K.square(K.expand_dims(yPredWithRealCoords[...,0]) * (1. - indicatorMask))) + K.sum(K.square(K.expand_dims(1 - yPredWithRealCoords[...,0]) * indicatorMask)))
  objCLoss = K.sum(K.square(K.expand_dims(yTrueWithRealCoords[...,0] - IOUScores) * indicatorMask))

  return tf.truediv(noObjCLoss + objCLoss + LAMBDA_COORD * (xyLoss + whLoss), BATCH_SIZE)

def yoloAccuracy(yTrueIn, yPredIn):
  IMAGE_SHAPE = (BATCH_SIZE, N_ROWS, N_COLS, N_BOXES, BOX_ENCODING_LEN) = yTrueIn.shape 

  yTrue = K.cast(yTrueIn, 'float64')
  yPred = K.cast(yPredIn, 'float64')

  (getXOffsets, getYOffsets) = getHelperFunctions(N_ROWS)

  yTrueXRealCoords = K.expand_dims(yTrue[...,1] + getXOffsets())
  yTrueYRealCoords = K.expand_dims(yTrue[...,2] + getYOffsets())
  yTrueWithRealCoords = K.concatenate([K.expand_dims(yTrue[...,0]), yTrueXRealCoords, yTrueYRealCoords, K.expand_dims(yTrue[...,3]), K.expand_dims(yTrue[...,4])])

  yPredCRealCoords = K.expand_dims(tf.sigmoid(yPred[...,0]))
  yPredXRealCoords = K.expand_dims(tf.sigmoid(yPred[...,1]) + getXOffsets())
  yPredYRealCoords = K.expand_dims(tf.sigmoid(yPred[...,2]) + getYOffsets())
  yPredWRealCoords = K.expand_dims(K.exp(yPred[...,3]) * anchorSizes[...,0])
  yPredHRealCoords = K.expand_dims(K.exp(yPred[...,4]) * anchorSizes[...,1])
  yPredWithRealCoords = K.concatenate([yPredCRealCoords, yPredXRealCoords, yPredYRealCoords, yPredWRealCoords, yPredHRealCoords])

  topLeftCoordOfIntersect = K.maximum(yPredWithRealCoords[...,1:3] - (yPredWithRealCoords[...,3:5] / 2.), yTrueWithRealCoords[...,1:3] - (yTrueWithRealCoords[...,3:5] / 2.))
  bottomRightCoordOfIntersect = K.minimum(yPredWithRealCoords[...,1:3] + (yPredWithRealCoords[...,3:5] / 2.), yTrueWithRealCoords[...,1:3] + (yTrueWithRealCoords[...,3:5] / 2.))
  widthAndHeightOfIntersect = K.maximum(bottomRightCoordOfIntersect - topLeftCoordOfIntersect, 0)
  intersectionAreas = widthAndHeightOfIntersect[...,0] * widthAndHeightOfIntersect[...,1]
  encodingAreas = yPredWithRealCoords[...,3] * yPredWithRealCoords[...,4]
  boundingBoxAreas = yTrueWithRealCoords[...,3] * yTrueWithRealCoords[...,4]
  unionAreas = encodingAreas + boundingBoxAreas - intersectionAreas
  IOUScores = tf.truediv(intersectionAreas, unionAreas)

  return tf.truediv(K.sum(IOUScores * yPredWithRealCoords[...,0] * yTrueWithRealCoords[...,0]), K.sum(yTrueWithRealCoords[...,0]))

In [None]:
def network():
  X_input = Input((15, 15, 1))
  X = Conv2D(128, (5, 5), padding='same')(X_input)
  X = Conv2D(128, (5, 5), padding='same')(X)
  X = Conv2D(128, (5, 5), padding='same')(X)
  X = BatchNormalization()(X)
  X = Conv2D(128, (5, 5), padding='same')(X_input)
  X = Conv2D(128, (5, 5), padding='same')(X)
  X = Conv2D(128, (5, 5), padding='same')(X)
  X = BatchNormalization()(X)
  X = Conv2D(128, (5, 5))(X_input)
  X = Conv2D(128, (5, 5))(X)
  X = Conv2D(128, (5, 5))(X)
  X = BatchNormalization()(X)
  X = Flatten()(X)
  X = Dense(1024)(X)
  X = Dense(1024)(X)
  X = Dense(90)(X)
  X = Reshape((3, 3, 2, 5))(X)
  return Model(inputs=X_input, outputs=X)

In [None]:
from keras.optimizers import Adadelta

model = network()
model.compile(loss=yoloLoss, optimizer=SGD(learning_rate=10e-4, momentum=0.998, clipnorm=10e-2, clipvalue=(10e-3 / 2.)), metrics=[yoloAccuracy])

imagesAndEncodings = [createRandomImageAndEncoding() for _ in range(1000)]

NUM_EPOCH = 600
BATCH_SIZE = 32
losses = []
accuracies = []
maxMeanAccuracySofar = 0
for epochNum in range(1, NUM_EPOCH + 1):
  print("Begin epoch {}/{}".format(epochNum, NUM_EPOCH))
  batches = [imagesAndEncodings[i:i + BATCH_SIZE] for i in range(0, len(imagesAndEncodings), BATCH_SIZE)]

  batchAccuracies = []
  for batchNum, theBatch in enumerate(batches, start=1):
    print("Training on batch {}/{} ........".format(batchNum, len(batches)), end=" ")
    inputImgs = np.array([tup[0].reshape((15, 15, 1)) for tup in theBatch])
    outputEncoding = np.array([tup[1] for tup in theBatch])
    (loss, accuracy) = model.train_on_batch(x=inputImgs, y=outputEncoding)
    print("Loss: {}, Accuracy: {}".format(loss, accuracy))
    losses.append(loss)
    batchAccuracies.append(accuracy)

plt.plot(losses)

In [None]:
for tup in imagesAndEncodings[:50]:
  drawImage(tup[0].reshape(15,15), tf.reshape(model(np.array([tup[0]])), (3,3,2,5)))