#Downloading libraries and packages

In [None]:
import tensorflow as tf
import numpy as np
import cv2
import json
import random

Let's mount Drive

In [None]:
from google.colab import drive
drive.mount('/gdrive')

#Downloading Weights and model preparation

In [None]:
%%bash
git clone --depth 1 https://github.com/tensorflow/models
wget http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.tar.gz
tar -xf ssd_resnet50_v1_fpn_640x640_coco17_tpu-8.tar.gz
mv ssd_resnet50_v1_fpn_640x640_coco17_tpu-8/checkpoint models/research/object_detection/test_data/
cd models/research/
protoc object_detection/protos/*.proto --python_out=.
cp object_detection/packages/tf2/setup.py .
python -m pip install .

In [None]:
from object_detection.utils import label_map_util
from object_detection.utils import config_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.utils import colab_utils
from object_detection.builders import model_builder

Let's define the model architecture

In [None]:
num_classes=2
pipeline_config_path='/content/ssd_resnet50_v1_fpn_640x640_coco17_tpu-8/pipeline.config'
checkpoint_path='/content/models/research/object_detection/test_data/checkpoint/ckpt-0'

Now we are going to use a method from the object detection API, with this method we will get the configuration of the model from a Pipeline Path

In [None]:
configs=config_util.get_configs_from_pipeline_file(pipeline_config_path)

Now we will instantiate the model attribute from the config file to change some features as the number of classes to be predicted

In [None]:
model_config=configs['model']
model_config.ssd.num_classes=num_classes
model_config.ssd.freeze_batchnorm=True

Now we are going to build our model. In order to do so, we have to use another method from the object detection API and set the training to True

In [None]:
detection_model=model_builder.build(model_config=model_config, is_training=True)

The RetinaNet model has two prediction heads, one for classification, and the another for box regression. We will initialize the classification head from scratch, and keep the bounding box head untouched

In [None]:
fake_box_predictor=tf.train.Checkpoint(_base_tower_for_heads=detection_model._box_predictor._base_tower_layers_for_heads,
                                       _box_prediction_head=detection_model._box_predictor._box_prediction_head)

Now we are going to add the feature extractor to our model

In [None]:
fake_model=tf.train.Checkpoint(_feature_extractor=detection_model._feature_extractor, 
                               _box_predictor=fake_box_predictor)

Now let's load the saved weigths into to model

In [None]:
ckpt=tf.train.Checkpoint(model=fake_model)
ckpt.restore(checkpoint_path).expect_partial()

In [None]:
image, shapes=detection_model.preprocess(tf.zeros([1, 640, 640, 3]))
prediction_dict=detection_model.predict(image, shapes)
_=detection_model.postprocess(prediction_dict, shapes)

#Dataset Preparation

First of all, we have to know how our model expects the data. We will prepare our model to receive the data with the next format:

Image: A numpy array of a tensor with shape [640, 640, 3]

Boxes: A numpy array of shape [N, 4]

Classes: A numpy array with shape [N]. **Note that class indices must match the keys in the label map (we will talk about it later)**

##Loading the image from a path contained in a json file

In the last notebook, where we filtered and saved the data, we made a Json file that contained the Path of each image, now we are going to define a function to load the images from that path, let's wee how we can do it.

First let's download the json objects that contain the ids and bounding boxes of the images

In [None]:
%%bash
mkdir /content/dataset_train
mkdir /content/dataset_test

cp -r /gdrive/MyDrive/Test /content/dataset_test
cp -r /gdrive/MyDrive/Train /content/dataset_train
unzip  /content/dataset_train/Train/train_images.zip
unzip  /content/dataset_test/Test/test_images.zip 

In [None]:
with open('/content/dataset_train/Train/train_bboxes.json') as json_file:
    train_bboxes = json.load(json_file)
    json_file.close()
    train_bboxes=json.loads(train_bboxes)

In [None]:
with open('/content/dataset_train/Train/train_images_id.json') as json_file:
    train_images_ids = json.load(json_file)
    json_file.close()
    train_images_ids=json.loads(train_images_ids)

In [None]:
with open('/content/dataset_train/Train/train_classes.json') as json_file:
    train_classes = json.load(json_file)
    json_file.close()
    train_classes=json.loads(train_classes)

This functions expects a json file that contains as a key the id of the image, and as its value the name of the image, its format is shown below:

    'image_id': 'name of the image'

We have to pass to the function the path of the folder that contains the images, and this function will parse that folder loading the images into a numpy array

In [None]:
def load_image_from_path(json_file, image_folder_path, key):

  image_name=json_file[str(key)]

  path=image_folder_path+image_name


  image=cv2.imread(path, 1)
  image=image[:, :, ::-1]

  return image.astype(np.uint8)


The following functions converts the numpy array images into tf tensors

In [None]:
def images_to_tensors(json_file, image_folder_path):
  images=[]
  for i in json_file.keys():
    image=load_image_from_path(json_file=json_file, image_folder_path=image_folder_path, key=i)
    images.append(image)

  return images

In [None]:
def one_image_to_tensor(json_file, image_folder_path, key):
  image=[]
  image_np=load_image_from_path(json_file=json_file, image_folder_path=image_folder_path, key=key)
  image.append(image_np)
  return image_np

Now let's create another function to convert the list of bounding boxes into a list of tensors

In [None]:
def bboxes_to_tensors(dictionary):
  boxes_tensor=[]
  for i in dictionary.keys():
    boxes_tensor.append(np.array(train_bboxes[i]).reshape(-1, 4))
  return boxes_tensor

In [None]:
def one_bbox_to_tensor(json_file, key):
  box_tensor=[]
  boxes_list=(np.array(train_bboxes[key]).reshape(-1, 4))
  box_tensor.append(boxes_list)
  return box_tensor

We have the classes contained in another json file, so we have to create a new function to parse them

In [None]:
def get_classes_from_json(json_object):
  classes_list=[]
  for i in json_object.keys():
    classes_list.append(np.array(json_object[i])+np.ones(np.array(json_object[i]).shape))
  return classes_list

In [None]:
def get_one_class(json_file, key):
  class_list=[]
  class_np=np.array(json_file[key]+np.ones(np.array(json_file[key]).shape))
  class_list.append(class_np)
  return class_list

Let's load the boxes, images and classes

In [None]:
boxes=bboxes_to_tensors(train_bboxes)

In [None]:
images=images_to_tensors(train_images_ids, '/content/content/dataset_train_tensorflow/data/')

In [None]:
classes=get_classes_from_json(train_classes)

##Preparing the data

The TensorFlow Object Detection API expects tensors as an Input, so we have to convert the classes, images and Bounding Boxes to Tensors.

Remember the classes must be One Hot Enconded.

Before, we had a problem when training the Dataset, because we were loading all the data at once, instead of loading it with parts. We got a problem realeted with the RAM usage because TesnsorFlow was not capable to alocate all the vectors, so what we are going to do, is to define a function that allows us to charge the data when it is needed in order to solve the storage problems we had.

In [None]:
def charge_data_batches(images, boxes, classes, tensor_indexes, num_classes):

  # This list will contain the images as tensors
  train_image_tensors=[]

  # This list has inside it the Bounding Boxes. The DType of this boxes must be tf.float32
  # And every tensor must be of shape [N_i, 4] where the N_i represents the bounding boxes that a single image contains
  gt_box_tensors=[]

  #The list of classes will have tensors with shape [N_i, Num_clases] where N_i will be the class associated wich each Bounding 
  # Box in the image, and Num_classes stands for the One Hot Encoding representation
  gt_classes_tensors=[]

  # The One Hot method from tensorflow expects a depth to create the tensors, that depth represents the number of classes 
  # of the dataset.
  for index in tensor_indexes:

    train_image_tensors.append(tf.expand_dims(tf.convert_to_tensor(images[index], dtype=tf.float32), axis=0))

    gt_box_tensors.append(tf.convert_to_tensor(boxes[index], dtype=tf.float32))
    #Tensorflow expects the classes One hot encoded, so we can use the method called one hot, where it 
    #returns the classes in the One hot ecoding format

    gt_classes_tensors.append(tf.one_hot(classes[index], num_classes, dtype=tf.float32))




  # for (train_image_np, gt_box_np, gt_classes) in zip(images, boxes, classes):


  return train_image_tensors, gt_box_tensors, gt_classes_tensors




  #   train_image_tensors.append(tf.expand_dims(tf.convert_to_tensor(train_image_np, dtype=tf.float32), axis=0))

  #   gt_box_tensors.append(tf.convert_to_tensor(gt_box_np, dtype=tf.float32))

  #   #Tensorflow expects the classes One hot encoded, so we can use the method called one hot, where it 
  #   #returns the classes in the One hot ecoding format
  #   gt_classes_tensors.append(tf.one_hot(gt_classes, num_classes, dtype=tf.float32))



In [None]:
cat_class_id = 1
car_class_id=2
category_index = {car_class_id: {'id': car_class_id, 'name': 'Car'}, cat_class_id: {'id': cat_class_id, 'name': 'Cat'} }


#Train the model

First, we have to define the variables to be trained. As a normal neural network we can get the trainable variables by calling its attributes

In [None]:
trainable_variables=detection_model.trainable_variables

prefixes_to_train=['WeightSharedConvolutionalBoxPredictor/WeightSharedConvolutionalBoxHead',
  'WeightSharedConvolutionalBoxPredictor/WeightSharedConvolutionalClassHead']

In [None]:
to_fine_tune=[]
for var in trainable_variables:
  if any([var.name.startswith(prefix) for prefix in prefixes_to_train]):
    to_fine_tune.append(var)

First we have to define how our training loop will be:

    #First we have to instantiate our model:
    model=detection_model 

    #Then we define the number of epochs:
    epochs=20

    #Now we have to run through the training batches:

    losses_train= train_data_for_one_epoch()

    #We have to calculate the validation losses and metrics:

    losses_val=perform_validation()

    losses_train_mean= np.mean(losses_train)
    losses_val_mean= np.mean(losses_val)

As we saw before, to run each step we have to divide the data in batches, which will reduce the amount of images stored in memory and also will help to speed up (in some cases) the training process.

Let's prepare the dataset with batches. We can get one single example of oour dataset by indexing it. So what we are going to do is to create a list with the indexes of the images, create a tf.Dataset with that and then batch that tf.Dataset

In [None]:
indexes=np.arange(0, len(images))

In [None]:
#Now let's instantiate the Dataset Class 
Dataset=tf.data.Dataset.from_tensor_slices(indexes).shuffle(buffer_size=100)

Noe let's create another variable to define the batch size, and the optimizer

In [None]:
batch_size=8
optimizer=tf.keras.optimizers.Adam()

In [None]:
Dataset=Dataset.batch(batch_size=batch_size)

With the things we already have we can define a function to perform the training step for one epoch, and another function to calculat the loss of each batch

In [None]:
def calculate_loss_each_batch(model, vars_to_fine_tune, optimizer, bboxes, images, classes):
  shapes=tf.constant(batch_size*[[641, 640, 3]], dtype=tf.int32)
  
  model.provide_groundtruth(groundtruth_boxes_list=bboxes, groundtruth_classes_list=classes)

  with tf.GradientTape() as tape:
    preprocessed_images=tf.concat([model.preprocess(image)[0] for image in images], axis=0)

    prediction_dict=model.predict(preprocessed_images, shapes)
    losses_dict=model.loss(prediction_dict, shapes)
    total_loss=losses_dict['Loss/localization_loss']+losses_dict['Loss/classification_loss']
    gradients=tape.gradient(total_loss, vars_to_fine_tune)
    optimizer.apply_gradients(zip(gradients, vars_to_fine_tune))
  print('loss {}'.format(total_loss))

  return total_loss

In [None]:
def train_data_for_one_epoch():

  for step, batch in enumerate(Dataset):
    images_list, boxes_list, classes_list=charge_data_batches(images, boxes, classes, batch, 2)
    print('batch {}'.format(step))

    # Now we can compute the loss of this batch:
    total_loss=calculate_loss_each_batch(detection_model, to_fine_tune, optimizer, boxes_list, images_list, classes_list)

  return total_loss

In [None]:
# for i in range(10):
#   train_data_for_one_epoch()