Este cuaderno de Jypiter se ha elaborado para entrenar modelos de detección utilizando TensorFlow 2 Object Detection API.
Los modelos deben ser descargados desde el [model zoo de la API](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.md). La evaluación de los modelos se realiza con el script de [Padilla et al.](https://github.com/rafaelpadilla/Object-Detection-Metrics)

El entrenamiento está basado en el tutorial de [Renu Khandelwal](https://medium.com/analytics-vidhya/tensorflow-2-object-detection-api-using-custom-dataset-745f30278446).

# Montar el Drive

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
import os
os.chdir('/content/gdrive/My Drive/TFM-MaskDetection/detecion-models/') # Cambia al directorio donde se encuentra training.ipynb

# Instalación de TensorFlow 2 Object Detection API

In [None]:
import os
import pathlib

# Clone the tensorflow models repository if it doesn't already exist
if "models" in pathlib.Path.cwd().parts:
  while "models" in pathlib.Path.cwd().parts:
    os.chdir('..')
elif not pathlib.Path('models').exists():
  !git clone --depth 1 https://github.com/tensorflow/models.git

In [None]:
%%bash
cd models/research
protoc object_detection/protos/*.proto --python_out=.
cp object_detection/packages/tf2/setup.py .
python -m pip install .

In [None]:
# Clone the pycocotools API repository if it doesn't already exist
if "cocoapi" in pathlib.Path.cwd().parts:
  while "cocoapi" in pathlib.Path.cwd().parts:
    os.chdir('..')
elif not pathlib.Path('cocoapi').exists():
  !git clone --depth 1 https://github.com/cocodataset/cocoapi.git

#Preparación del dataset

In [None]:
## Comprueba que todas las imagenes son correctas
from skimage import io
import cv2
from os import listdir
from os.path import isfile, join

mypaths = ['Workspace/images/train/', 'Workspace/images/test/']
for mypath in mypaths:

  files = [mypath+f for f in listdir(mypath) if (isfile(join(mypath, f)) and f.endswith(".png"))]
  for i in range(len(files)):   
    try:
        _ = io.imread(files[i])
        img = cv2.imread(files[i])
    except Exception as e:
        print(e)
        print(files[i])

In [None]:
# Crea el fichero csv a partir de los xml
!python xml_to_csv.py

In [None]:
# Crea los tfrecords a partir de los csv
!python generate_tfrecord.py --csv_input=Workspace/images/train_labels.csv --image_dir=Workspace/images/train --output_path=train.record
!python generate_tfrecord.py --csv_input=Workspace/images/test_labels.csv --image_dir=Workspace/images/test --output_path=test.record

## Creación de ficheros .txt con las Ground Truth que se utilizará durante la evaluación del modelo para calcular las métricas de error

In [None]:
# Ground truth
import csv
import glob

GROUND_TRUTH_PATH = 'Evaluation/ground_truth/'

# Delete all previous files
files = glob.glob(GROUND_TRUTH_PATH + '*')
for f in files:
    os.remove(f)
print("Previous files were deleted")

with open('Workspace/images/test_labels.csv', 'r') as src:
  csv_reader = csv.reader(src, delimiter=',')
  line_count = 0
  img_name_prev = ''
  img_name = ''
  dst = open(GROUND_TRUTH_PATH+img_name+'.txt', 'a')
  for row in csv_reader:
      if line_count == 0:
          line_count += 1
      else:
          img_name = row[0][:-4]
          if img_name != img_name_prev:
              dst.close()
              dst = open(GROUND_TRUTH_PATH+img_name+'.txt', 'a')
          img_name_prev = img_name
          buffer = ''
          if row[3] == 'without_mask':
            buffer += '1 '
          elif row[3] == 'with_mask':
            buffer += '2 '
          elif row[3] == 'mask_weared_incorrect':
            buffer += '3 '
          buffer += row[4] + ' ' + row[5] + ' ' + row[6] + ' ' + row[7] + '\n'
          dst.write(buffer)
          line_count += 1
  dst.close()

# Entrenamiento del modelo

A partir de aquí será necesario modificar el nombre del modelo en función del que se desee entrenar. Se indicará los lugares en los que es necesario aplicar el cambio.

In [None]:
!python Workspace/model_main_tf2.py \
     --model_dir=Workspace/models/<__MODEL_NAME__> \
     --pipeline_config_path=Workspace/models/<__MODEL_NAME__>/pipeline.config

## Exportación

In [None]:
!python Workspace/exporter_main_v2.py \
        --input_type=image_tensor \
        --trained_checkpoint_dir=Workspace/models/<__MODEL_NAME__> \
        --pipeline_config_path=Workspace/models/<__MODEL_NAME__>/pipeline.config \
        --output_directory=Workspace/exported_model/<__MODEL_NAME__>

# Evaluación

In [None]:
# Descarga del software para calcular las métricas
!git clone --depth 1 https://github.com/rafaelpadilla/Object-Detection-Metrics.git

In [None]:
# Evaluación: se calculan las bounding boxes del dataset de evaluación y se
# guardan en un fichero .txt que se utilizará para calcular las métricas

# Import the required libraries for Object detection infernece
import time
import tensorflow as tf
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import glob
%matplotlib inline

#Loading the exported model from saved_model directory
MODEL = '<__MODEL_NAME__>'
PATH_TO_SAVED_MODEL =r'Workspace/exported_model/' + MODEL + '/saved_model'
BBOX_PATH = "Evaluation/detection_bbox/"
# Delete previous files
files = glob.glob(BBOX_PATH + MODEL + '/*')
for f in files:
    os.remove(f)
print("Previous files were deleted")

# setting min confidence threshold
MIN_CONF_THRESH=0.5
print('Loading model...', end='')
start_time = time.time()
# LOAD SAVED MODEL AND BUILD DETECTION FUNCTION
detect_fn = tf.saved_model.load(PATH_TO_SAVED_MODEL)
end_time = time.time()
elapsed_time = end_time - start_time
print('Done! Took {} seconds'.format(elapsed_time))
# LOAD LABEL MAP DATA
PATH_TO_LABELS=r'Workspace/annotations/label_map.pbtxt'
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)

def load_image_into_numpy_array(path):
    """Load an image from file into a numpy array.
    Puts image into numpy array of shape (height, width, channels), where channels=3 for RGB to feed into tensorflow graph.
    Args:
      path: the file path to the image
    Returns:
      uint8 numpy array with shape (img_height, img_width, 3)
    """
    return np.array(cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB))

#Image file for inference
IMAGE_FOLDER=r'Workspace/images/test/*.png'
i = 0
for IMAGE_PATH in glob.glob(IMAGE_FOLDER):
    image_np = load_image_into_numpy_array(IMAGE_PATH)
    # Running the infernce on the image specified in the  image path
    # The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
    input_tensor = tf.convert_to_tensor(image_np)
    # The model expects a batch of images, so add an axis with `tf.newaxis`.
    input_tensor = input_tensor[tf.newaxis, ...]
    detections = detect_fn(input_tensor)

    # All outputs are batches tensors.
    # Convert to numpy arrays, and take index [0] to remove the batch dimension.
    # We're only interested in the first num_detections.
    num_detections = int(detections.pop('num_detections'))
    detections = {key: value[0, :num_detections].numpy()
                  for key, value in detections.items()}
    detections['num_detections'] = num_detections
    # detection_classes should be ints.
    detections['detection_classes'] = detections['detection_classes'].astype(np.int64)
    #print(detections['detection_classes'])
    image_np_with_detections = image_np.copy()
    viz_utils.visualize_boxes_and_labels_on_image_array(
          image_np_with_detections,
          detections['detection_boxes'],
          detections['detection_classes'],
          detections['detection_scores'],
          category_index,
          use_normalized_coordinates=True,
          max_boxes_to_draw=200,
          min_score_thresh=MIN_CONF_THRESH,
          agnostic_mode=False)
    
    # Save into .txt
    height, width, _ = image_np_with_detections.shape
    with open(BBOX_PATH+MODEL+IMAGE_PATH[21:-4]+'.txt', 'a') as textfile:
        for n,_ in enumerate(detections['detection_classes']):
            buffer = str(detections['detection_classes'][n])  + " " + \
                     str(detections['detection_scores'][n])   + " " + \
                     str(detections['detection_boxes'][n][1]*width) + " " + \
                     str(detections['detection_boxes'][n][0]*height)  + " " + \
                     str(detections['detection_boxes'][n][3]*width) + " " + \
                     str(detections['detection_boxes'][n][2]*height)  + "\n"
            textfile.write(buffer)
    if (i%10 == 0):
        print("{:.2f}% done".format(100*i/169))
    i = i+1

print('100.00% done')

In [None]:
# Cálculo de las métricas de error
# Requiere crear un directorio ./metrics/<__MODEL_NAME__>

os.chdir('Object-Detection-Metrics/')
!python3 pascalvoc.py -gt ../Evaluation/ground_truth/ \                         # Directorio con las ground truth
      -det ../Evaluation/detection_bbox/<__MODEL_NAME__>/ \                     # Directorio con las bounding boxes
      -t 0.5 \                                                                  # Threshold
      -sp ../Evaluation/metrics/<__MODEL_NAME__>/                               # Directorio de salida
os.chdir('..')

#Inferencia

A continuación se dejan dos scripts para realizar la inferencia tanto de imágenes como de vídeo. El resultado se almacena en la carpeta especificada.

In [None]:
# Image input
!python Workspace/detect_objects.py --threshold=0.5 \                           # Threshold
      --model_path=Workspace/exported_model/<__MODEL_NAME__>/saved_model \      # Modelo empleado
      --path_to_labelmap=Workspace/annotations/label_map.pbtxt \
      --images_dir=Test/src/images \                                            # Directorio con las imágenes de entrada
      --output_directory=Test/dst/<__MODEL_NAME__> \                            # Directorio con las imágenes de salida
      --save_output

In [None]:
# Video input
!python Workspace/detect_objects.py --video_input --threshold=0.5 \             # Threshold
      --model_path=Workspace/exported_model/<__MODEL_NAME__>/saved_model \      # Modelo empleado
      --path_to_labelmap=Workspace/annotations/label_map.pbtxt \                
      --video_path=Test/src/video.mp4 \                                         # Video de entrada
      --output_directory=Test/dst/<__MODEL_NAME__> \                            # Directorio de salida
      --save_output