In [1]:
import os
from google.colab import drive
MOUNTPOINT = '/content/gdrive'

DATADIR = os.path.join(MOUNTPOINT, 'My Drive', 'Project_Face_Mask')
DATADIR_annotations = os.path.join(DATADIR, 'Annotations')
DATADIR_annotations_train = os.path.join(DATADIR_annotations, 'train')
DATADIR_annotations_test = os.path.join(DATADIR_annotations, 'test')
DATADIR_train = os.path.join(DATADIR, 'train')
DATADIR_test = os.path.join(DATADIR, 'test')

drive.mount(MOUNTPOINT)

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:
import xml.etree.ElementTree as ET
import pandas as pd
import lxml
import os
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import tensorflow as tf 
import numpy as np 

In [35]:
ds_train = tf.data.experimental.load(os.path.join(DATADIR, 'train_ds_yolov3'))

print(ds_train)

ds_train = ds_train.map(lambda img, target, bbox: (tf.cast(img, tf.float32), target, bbox))
ds_train = ds_train.map(lambda img, target, bbox: ((img/128.)-1.,target, bbox))
ds_train = ds_train.map(lambda img, target, bbox: (tf.expand_dims(img, axis = 0), target, bbox))
#ds_train = ds_train.map(lambda img, target, bbox: (img, tf.cast(target, tf.float), bbox))
ds_train = ds_train.map(lambda img, target, bbox: (img, tf.one_hot(target, 3), bbox))
ds_train = ds_train.shuffle(100)
#ds_train = ds_train.batch(10)



<_LoadDataset element_spec=(TensorSpec(shape=(416, 416, 3), dtype=tf.uint8, name=None), TensorSpec(shape=(), dtype=tf.int32, name=None), RaggedTensorSpec(TensorShape([None]), tf.int32, 0, tf.int64))>


In [4]:
"""Anchor-Boxes for the predictions on the 3 different Shapes"""
anchor = [(10, 13), (16, 30), (33, 23),
            (30, 61), (62, 45), (59, 119),
            (116, 90), (156, 198), (373, 326)]

In [5]:
"""
Pads the Input of the Tensor with Constant values
"""
def constant_padding(tensor, kernel_size):
  padding = kernel_size - 1
  padding_vorne = padding // 2
  padding_hinten = padding - padding_vorne

  padding = tf.constant([[0,0],[padding_vorne, padding_hinten], [padding_vorne, padding_hinten],[0,0]])

  input_padded = tf.pad(tensor, padding)
  return input_padded

In [6]:

"""Hyperparameter for the Batch-Normalization and Leaky-RELU Activation of the Model"""
batch_normalization_epsilon = 1e-05
batch_normalization_momentum = 0.9
leaky_relu_alpha = 0.1
"""
A Convolutional Block of the Darknet53-Framework. It Consists of a normal Conv2D-Layer, followed by a BatchNormalization and a Leaky-Relu Activation
Parameter:
filter = Amount of Filters for Convolution 
Kernel_size = Kernel-Size for Convolution

"""
class Conv_Block(tf.keras.layers.Layer):

  def __init__(self, filter, kernel_size, strides = 1):
    self.padding = ('same' if strides == 1 else 'valid')
    self.kernel_size = kernel_size
    super(Conv_Block, self).__init__()
    self.layers = [
    tf.keras.layers.Conv2D(filter, kernel_size = self.kernel_size, padding = self.padding, activation = 'linear', strides = strides),
    tf.keras.layers.BatchNormalization(axis = 3, momentum = batch_normalization_momentum, epsilon = batch_normalization_epsilon,scale = True),
    tf.keras.layers.LeakyReLU(alpha = 0.1)
    ]

  @tf.function
  def call(self, input):
    if self.padding == 'valid':
      input = constant_padding(input, self.kernel_size)
    out = input
    for layer in self.layers: 
      out = layer(out)
  
    return out

In [7]:
"""
A Residual Block consisting of 2 Convolutional Blocks and a Skip-Connection and Addition 


"""
class Residual_Block(tf.keras.layers.Layer):
  def __init__(self, filter):
      super(Residual_Block, self).__init__()
     
      self.conv1 = Conv_Block(filter, 1,1)
      self.conv2 = Conv_Block(filter * 2, 3,1)
      self.addition_layer = tf.keras.layers.Add()
  
      

  @tf.function
  def call(self, input):
    
    out = self.conv1(input)
    out = self.conv2(out)
    out = self.addition_layer([input, out])
   
    

    return out


In [8]:
"""
A Class of n Residual-Blocks
Parameters:
filters: Convolution-Filters
n : Amount of Residual Blocks Stacked on top of each other 
"""
class Stacked_Residual_Block(tf.keras.layers.Layer):
  def __init__(self, filter, n):
      super(Stacked_Residual_Block, self).__init__()
      self.blocks = []
      for i in range(n):
        self.blocks.append(Residual_Block(filter))
      

  @tf.function
  def call(self, input):
    out = input
    for block in self.blocks:
      out = block(out)

    return out


In [9]:
"""
The Darknet53 Model. This acts as the Feature-Extractor of the Yolo-V3-Model
The Model Outputs feature Maps on 3 different Routes, that are later used in the Predictions on the 3 Scales 
"""
class darknet53(tf.keras.Model):

  def __init__(self):
    super(darknet53, self).__init__()
 
    self.conv_1 = Conv_Block(32,3,1)
    self.conv_2 = Conv_Block(64,3,2)
    self.res_1 = Stacked_Residual_Block(32, 1)
    self.conv_3 = Conv_Block(128,3,2)
    self.res_2 = Stacked_Residual_Block(64, 2)
    self.conv_4 = Conv_Block(256,3,2)
    self.res_3 = Stacked_Residual_Block(128, 8)
    self.conv_5 = Conv_Block(512,3,2)
    self.res_4 = Stacked_Residual_Block(256, 8)
    self.conv_6 = Conv_Block(1024,3,2)
    self.res_5 = Stacked_Residual_Block(512, 4)
    
  @tf.function
  def call(self, input):

    out = self.conv_1(input)
    out = self.conv_2(out)   
    out = self.res_1(out)   
    out = self.conv_3(out)   
    out = self.res_2(out)   
    out = self.conv_4(out)   
    out = self.res_3(out)  
    route3 = out
    out = self.conv_5(out)  
    out = self.res_4(out)   
    route2 = out
    out = self.conv_6(out)   
    route1 = self.res_5(out)       

    return route1, route2, route3


In [10]:
"""
A Block of 6 Convolutional Layers. 

"""

class yolo_conv_block(tf.keras.layers.Layer):
  def __init__(self, filters):
      super(yolo_conv_block, self).__init__()
     
      self.conv1 = Conv_Block(filters, 1)
      self.conv2 = Conv_Block(filters * 2, 3)
      self.conv3 = Conv_Block(filters, 1)
      self.conv4 = Conv_Block(filters * 2, 3)
      self.conv5 = Conv_Block(filters, 1)
      self.conv6 = Conv_Block(filters * 2, 3)

    
      

  @tf.function
  def call(self, input):
    out = self.conv1(input)
    out = self.conv2(out)
    out = self.conv3(out)
    out = self.conv4(out)
    out = self.conv5(out)
    reroute = out
    out = self.conv6(out)

    return out, reroute

In [11]:
"""
Output Layer of the Yolo Model. This Layer detects Feature-Maps at 3 different Scales, in our case (13,13) (26,26) and (52,52)
"""
class yolo_detection_layer(tf.keras.layers.Layer):
  
  def __init__(self, anchors, image_size = (416,416)):
      super(yolo_detection_layer, self).__init__()
      '''
      #Filters = 
        Menge_Anchor(Boxen) * (5 (Box-Koordinaten und Convidence)+ Menge an Klassen (Bei uns 3 (mit oder ohne Maske)))
      '''
      self.anchors = anchors
      self.image_size = image_size
      """1*1 Convolution - Bottleneck-Layer to get right size of feature map"""
      self.conv1 = tf.keras.layers.Conv2D(filters= 3 * (5+3),kernel_size = 1, strides = 1)

    

      
    
      

  @tf.function
  def call(self, input):
    out = self.conv1(input)
    out = self.calc_output_tensor(out)
    return out

  def calc_output_tensor(self, input):
    
    """Get the Size of the Input Image as a Python-list. The Sizes on the 3 Prediction-Scales are (13x13) (26x26) and (52x52)"""
    grid_pattern_size = (input.shape)[1:3]
    grid_pattern_size_list = grid_pattern_size.as_list()

    #(f"Grid_Size_Pattern :  {grid_pattern_size_list}")
    """Reshape Input into 2D-Tensor of Cells in the Grid"""
    input = tf.reshape(input, [-1, 3 * grid_pattern_size_list[0] * grid_pattern_size_list[1], 8])
    """Split the feature map by the last Axis into the different Values per Cell"""
    center_coordinates, width_Length, confidence, classes = tf.split(input, [2, 2, 1, 3], axis=-1)
    """The Strides for the different Scales correspond to the Divisor of the Scale Size by the Image Size. E.g. for (13,13)-Scale the Divisor would be 412/scale = 13, so 32 """
    strides = (self.image_size[0] // grid_pattern_size_list[0], self.image_size[1] // grid_pattern_size_list[1])
    #(f"Output von Strides: {strides}")
    """Create a Meshgrid equal to the size of the Input Shape, e.g. (13,13)"""
    grid_x_axis = tf.range(grid_pattern_size_list[0], dtype = tf.float32)
    grid_y_axis = tf.range(grid_pattern_size_list[1], dtype = tf.float32)
    offset_x, offset_y = tf.meshgrid(grid_x_axis, grid_y_axis)
  
    """Reshape the Grids into """
    offset_x = tf.reshape(offset_x, (-1, 1))
    offset_y = tf.reshape(offset_y, (-1, 1))

    """Concatenate the 2 Grids"""
    x_y_offset = tf.concat([offset_x, offset_y], axis=-1)
    """Replicates the Tensor"""
    x_y_offset = tf.tile(x_y_offset, [1, 3])
    x_y_offset = tf.reshape(x_y_offset, [1, -1, 2])
 
    """
    For the Shape (13,13) the Meshgrid looks the following way:
    [[[ 0  0]
      [ 0  0]
      [ 0  0]
      ...
      [12 12]
      [12 12]
      [12 12]]]
    
    Essentially, for each normalized Center Coordinate, the Offset on the 13x13 Grid is Added (remember: There are 3 Bounding-Boxes per Grid-Cell with 2 Coordinates). 
    We Multiply this Value by the Stride (Scale 1: (32,32) Scale 2: (16,16) Scale 3: (8,8)) to get the Center Coordinates in our Input Size 
    
    """
    """Center Coordinates are between 0 and 1 after Sigmoid-Activation"""
    center_coordinates = tf.nn.sigmoid(center_coordinates)
    center_coordinates = (center_coordinates + x_y_offset) * strides
  
    """Expands the Anchors into one Anchor-Triple for each Grid Cell, resulting in 13*13*3 = 507 Anchor Boxes for each Image (For Scale 13x13)"""

    anchor = tf.tile(self.anchors, [grid_pattern_size_list[0] * grid_pattern_size_list[1], 1])
    """Calculate the Bounding Boxes width and length for the current Scale, given a normalized Input of Width and Length"""
    width_Length = width_Length * tf.cast(anchor, tf.float32)

    """Apply Activation-Function on the other Parameters (Confidence-Score and Classes)"""
    confidence = tf.nn.sigmoid(confidence)
   
    classes = tf.nn.softmax(classes)

    """Re-concatenate the Tensor before Output"""
    inputs = tf.concat([center_coordinates, width_Length, confidence, classes], axis=-1)

    return inputs
    

In [39]:
def calculate_bounding_box_coordinates(input):

  """Split the Tensor"""
  center_coordinate_x,center_coordinate_y, width, length, confidence, classes = tf.split(input, [1, 1, 1, 1, 1, -1], axis=-1)

  """Calculate the Boxes"""

  top_left_x = center_coordinate_x - width / 2
  top_left_y = center_coordinate_y - length /2
  bottom_right_x = center_coordinate_x + width / 2
  bottom_right_y = center_coordinate_y + length / 2



  """Reconcatenate the Tensor"""
  box = tf.concat([top_left_x, top_left_y, bottom_right_x, bottom_right_y, confidence, classes], axis = -1)

  return box


In [13]:
"""Performs non-max suppression
INPUT SHAPE: ([top_left_x, top_left_y, bottom_right_x, bottom_right_y, confidence, classes])"""
def non_max_suppression(inputs, max_output_size, iou_threshold, confidence_threshold):#
    """
    Parameter:
      inputs: Input Tensor of INPUT SHAPE
      max_output_size: Maximal Amount of Tensors Non-max-Suppression outputs
      iou_threshold: Non-Max-Suppression performs Intersection-of-Union on the Input Bounding Boxes. The Parameter acts as a Threshold for the maximum Overlap of the Bounding Boxes
      Confidence_threshold: Probability Vector of the Objectness Score i.e. the Probability of the presence of an Object in the Bounding-Box
    Returns:
      A Tensor of Shape (Output_amount_of_non_max_suppression, 8), that represents the chosen Bounding Boxes and their Coordinates, Confidence and Class Probabilities. 
      
    """

    """Unstack the Batch"""
    batch = tf.unstack(inputs)
 
    boxes_dicts = []
    """Iterate the Boxes in the Batch"""
    for boxes in batch:
       # (f"Die Boxen haben den Shape am Anfang: {boxes.shape}")
        """Filter out the Boxes, that have a confidence lower than our Threshold"""
        boxes = tf.boolean_mask(boxes, boxes[:, 4] > confidence_threshold)
        """Split the Boxes for Input into Non-Max-Suppression"""
        boxes_coords, boxes_conf_scores, c1, c2, c3= tf.split(boxes,[4, 1, 1, 1, -1],axis=-1)
        boxes_conf_scores = tf.reshape(boxes_conf_scores, [-1])
        indices = tf.image.non_max_suppression(boxes_coords,boxes_conf_scores, max_output_size, iou_threshold)
        """Get Bounding Boxes at Indices returned by Non-Max-Suppression"""       
        boxes = tf.gather(boxes, indices)
  


    return boxes

In [30]:
"""Final Model of Yolo_V3, combining all Previous Modules"""

class yolo_v3(tf.keras.models.Model):

  def __init__(self):
    super(yolo_v3,self).__init__()

    #Layer Structure
    self.darknet = darknet53()
    
    """Route1 with Convolution (13,13)"""
    self.yolo_conv_route1 = yolo_conv_block(512)
    self.detection_route1 = yolo_detection_layer(anchors = anchor[0:3])

    """Route with Convolution (26,26)"""
    self.conv_route2 = Conv_Block(256, 1, 1)

    self.concat_route2 = tf.keras.layers.Concatenate(axis = 3)
    self.yolo_block_route2 = yolo_conv_block(256)
    self.detection_route2 = yolo_detection_layer(anchors = anchor[3:6])
  
    """Route with Convolution (52,52)"""
    self.conv_route3 = Conv_Block(128, 1, 1)
 
    self.concat_route3 = tf.keras.layers.Concatenate(axis = 3)
    self.yolo_block_route3 = yolo_conv_block(128)
    self.detection_route3 = yolo_detection_layer(anchors = anchor[6:9])

  def call(self, input):
  
    """Darknet Feature-Extraction"""
    route1, route2, route3 = self.darknet(input)
   
    """Route1"""
    output_for_detection_r1, reroute_route2 = self.yolo_conv_route1(route1)
    scale1_detection_output = self.detection_route1(output_for_detection_r1)

    """Route2"""
    reroute_route2 = self.conv_route2(reroute_route2)
    
    size_route2 = route2.shape.as_list()
    """Upscale the Reroute of Route 1"""
    reroute_route2 = tf.image.resize(reroute_route2,size_route2[1:3], method = tf.image.ResizeMethod.NEAREST_NEIGHBOR)
    """Concatenate the Darknet-Output for Route 2 and the Reroute from Route1"""
    route2 = self.concat_route2([route2, reroute_route2])
    output_for_detection_r2, reroute_route3 = self.yolo_block_route2(route2)
    scale2_detection_output = self.detection_route2(output_for_detection_r2)
    
    """Route3 - Works in the same way as Route 2"""
    reroute_route3 = self.conv_route3(reroute_route3)
    
    size_route3 = route3.shape.as_list()

    reroute_route3 = tf.image.resize(reroute_route3,size_route3[1:3], method = tf.image.ResizeMethod.NEAREST_NEIGHBOR)

    route3 = self.concat_route2([route3, reroute_route3])
    output_for_detection_r3, _ = self.yolo_block_route3(route3)
    scale3_detection_output = self.detection_route3(output_for_detection_r3)
    """Concatenate the Bounding-Boxes from the 3 Scales"""
    bounding_boxes = tf.concat([scale1_detection_output, scale2_detection_output, scale3_detection_output], axis = 1)
    """Calculate the Box-Coordinates"""
    bounding_boxes = calculate_bounding_box_coordinates(bounding_boxes)
    """Perform non-max-suppression"""
    output = non_max_suppression(bounding_boxes, 20, 0.5, 0.5)
  
    #(output)
    return output


In [15]:
def train_step(model, input, target_c, target_bbox, optimizer):


  with tf.GradientTape() as tape:
    
    prediction = model(input)
   # (prediction)
 

    loss = yolo_loss(prediction, target_c, target_bbox)
  
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
  
  return loss


In [31]:
def yolo_loss(input, target_classification, target_bbox):
  """
  Parameters:
    input: The Input Tensor with Shape ([top_left_x, top_left_y, bottom_right_x, bottom_right_y, confidence, classes , ...])
    target_classification: One-hot-encoded Target 
    target_bbox: Target Bounding-Box of Shape (xmin, ymin, xmax, ymax) 
  """
  total_loss_huber = []
  """Split the Input"""
  top_left_x, top_left_y, bottom_right_x, bottom_right_y, confidence, class1, class2, class3 = tf.split(input, [1, 1, 1, 1, 1, 1 , 1,-1], axis=-1)

  """Concatenate the Class Predictions into a Tensor usable in Categorical Crossentropy"""
  class_predictions = tf.concat([class1,class2,class3], axis = -1)
  """Calculate the Huber-Loss and take the Mean over all Bounding Boxes(because it is less sensitive to extrem values than Mean-Squared-Error).  """
  huber_loss=tf.keras.losses.Huber()

  top_left_x_loss = tf.reduce_mean(huber_loss(target_bbox[0], top_left_x))
  top_left_y_loss = tf.reduce_mean(huber_loss(target_bbox[1], top_left_y))
  bottom_right_x_loss = tf.reduce_mean(huber_loss(target_bbox[2], bottom_right_x))
  bottom_right_y_loss = tf.reduce_mean(huber_loss(target_bbox[3], bottom_right_y))
    
  huber_loss = tf.reduce_mean([top_left_x_loss, top_left_y_loss, bottom_right_x_loss, bottom_right_y_loss])

  """Reshape the Target into [Bounding_Box_Amount] of Predictions"""
  class_loss = 0
  if class_predictions.shape[0] != 0:
    target_classification = tf.expand_dims(target_classification, -1)#
    target_classification = tf.repeat(target_classification, class_predictions.shape[0], axis = -1)
    target_classification = tf.transpose(target_classification)
    """Compute Cross-Entropy"""
    #print(target_classification)
    #print(class_predicitions)
  
    class_loss = tf.keras.metrics.categorical_crossentropy(target_classification,class_predictions)
   
    class_loss = tf.reduce_mean(class_loss)
  """Scaling the different Losses to give some extra weight to the Classification-Loss"""
 
  scaling_coord_loss = 0.01
  scaling_category_loss = 5
  """Add and Return the Losses."""
  total_loss = class_loss * scaling_category_loss + huber_loss * scaling_coord_loss
  #print(f"Class_loss : {class_loss}")
  #print(f"Huber_loss : {huber_loss}")
  print(f"Total_loss : {total_loss}")
  return total_loss




In [17]:
#yolo_loss(None, None, )

In [41]:
### Hyperparameters
num_epochs = 10
# Initialize the model.
optimizer = tf.keras.optimizers.Adam()
model = yolo_v3()
train_loss = []



"""Training"""

for epoch in range(num_epochs):
    epoch_loss = []

    for input, target, bbox in ds_train:   
      
        #yolo_loss(None, target, bbox)
        #x+=1
        train_loss_step = train_step(model, input, target, bbox, optimizer)
        #print(train_loss_step)
   
        epoch_loss.append(train_loss_step)
    train_loss.append(tf.reduce_mean(epoch_loss))

    print(train_loss[-1])


KeyboardInterrupt: ignored