# **Mask Object Detection - Training**

## **Environment**

### **Configurations**

In [None]:
import os

# Repo URL
repo_url = 'https://github.com/marcocelia/mask-object-detection'

# Models
MODELS_CONFIG = {
    'ssd_inception_v2_coco': {
        'model_name': 'ssd_inception_v2_coco_2018_01_28',
        'model_path': '/models/tf1/my_ssd_inception_v2_coco/',
        'pipeline_file': 'pipeline.config'
    },
    'faster_rcnn_inception_v2_coco': {
        'model_name': 'faster_rcnn_inception_v2_coco_2018_01_28',
        'model_path': '/models/tf1/my_faster_rcnn_inception_v2_coco/',
        'pipeline_file': 'pipeline.config'        
    }
}

# Select a model to use.
selected_model = 'faster_rcnn_inception_v2_coco'

model_name = MODELS_CONFIG[selected_model]['model_name']
model_path = MODELS_CONFIG[selected_model]['model_path']
pipeline_file = MODELS_CONFIG[selected_model]['pipeline_file']

# Set Repository Home Directory
base_dir = '/home/marcocelia/personal/tesi'
repo_dir_path = os.path.abspath(os.path.join(base_dir, os.path.basename(repo_url)))

# Set Label Map (.pbtxt) path and pipeline.config path
label_map_pbtxt_fname = repo_dir_path + '/annotations/label_map.pbtxt'
pipeline_fname = repo_dir_path + model_path + pipeline_file

# Set .record path
test_record_fname = repo_dir_path + '/annotations/test.record'
train_record_fname = repo_dir_path + '/annotations/train.record'

# Set output directories and clean up
model_dir = repo_dir_path + '/training/'
output_dir = repo_dir_path + '/exported-models/'

print(f'repo_dir_path: {repo_dir_path}')
print(f'model_dir: {model_dir}')
print(f'pipeline_fname: {pipeline_fname}')
print(f'output_dir: {output_dir}')

### **Clean previous execution**

In [None]:
!rm -rf {base_dir}/cocoapi 
!rm -rf {base_dir}/models 
!rm -rf {repo_dir_path}/images/test
!rm -rf {repo_dir_path}/images/train
!rm -rf {repo_dir_path}/exported-models
!rm -rf {repo_dir_path}/training
!rm -rf {repo_dir_path}/annotations/*.csv
!rm -rf {repo_dir_path}/annotations/*.record
!rm -rf {repo_dir_path}/{selected_model}_train.tar.gz
!rm -rf {repo_dir_path}/{selected_model}_finetuned.tar.gz

### **Clone TF model repo**

In [None]:
%cd {base_dir}
!git clone -b r1.13.0 https://github.com/tensorflow/models.git

%cd {base_dir}/models/research
!protoc object_detection/protos/*.proto --python_out=.

import os
os.environ['PYTHONPATH'] += f':{base_dir}/models/research/:{base_dir}/models/research/slim/'

!pip install .

# Test
!python object_detection/builders/model_builder_test.py

### **Install COCO evaluation metrics**

In [None]:
%cd {base_dir}
!git clone --quiet https://github.com/cocodataset/cocoapi.git
%cd {base_dir}/cocoapi/PythonAPI
!make
!cp -r pycocotools {base_dir}/models/research/

### **Download pretrained model**

In [None]:
%cd {base_dir}/models/research
%rm -rf pretrained_model/

import os
import shutil
import glob
import urllib.request
import tarfile

MODEL_FILE = model_name + '.tar.gz'
DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'
DEST_DIR = f'{base_dir}/models/research/pretrained_model/'

if not (os.path.exists(MODEL_FILE)):
    urllib.request.urlretrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)

tar = tarfile.open(MODEL_FILE)
tar.extractall()
tar.close()

os.remove(MODEL_FILE)
if (os.path.exists(DEST_DIR)):
    shutil.rmtree(DEST_DIR)
os.rename(model_name, DEST_DIR)

# Check downloaded files
!echo {DEST_DIR}
!ls -alh {DEST_DIR}

# Set fine tune checkpoint
fine_tune_checkpoint = os.path.join(DEST_DIR, "model.ckpt")
print("fine_tune_checkpoint: ", fine_tune_checkpoint)

### **Check project repository**

In [None]:
import os
%cd {base_dir}

# # Check if label map and pipeline files exist
assert os.path.isfile(label_map_pbtxt_fname), '`{}` not exist'.format(label_map_pbtxt_fname)
assert os.path.isfile(pipeline_fname), '`{}` not exist'.format(pipeline_fname)

!rm -rf {model_dir} {output_dir}
os.makedirs(model_dir, exist_ok=True)
os.makedirs(output_dir, exist_ok=True)

## **Prepare dataset**

### **Split train/test**

In [None]:
%cd {repo_dir_path}

# Split images to train:test = 7:3
!python scripts/partition_dataset.py -x -i images/ -r 0.3

# Check test images
!ls images/test

### **xml to csv**

In [None]:
# Create train data:
!python scripts/xml_to_csv.py -i images/train -o annotations/train_labels.csv

# Create test data:
!python scripts/xml_to_csv.py -i images/test -o annotations/test_labels.csv

### **Create TF record**

In [None]:
!cat {label_map_pbtxt_fname}

In [None]:
# Create train data:
!python {repo_dir_path}/scripts/generate_tfrecord_v1.py \
    --csv_input={repo_dir_path}/annotations/train_labels.csv \
    --output_path={repo_dir_path}/annotations/train.record \
    --img_path={repo_dir_path}/images/train \
    --label_map {repo_dir_path}/annotations/label_map.pbtxt

# Create test data:
!python {repo_dir_path}/scripts/generate_tfrecord_v1.py \
    --csv_input={repo_dir_path}/annotations/test_labels.csv \
    --output_path={repo_dir_path}/annotations/test.record \
    --img_path={repo_dir_path}/images/test \
    --label_map {repo_dir_path}/annotations/label_map.pbtxt

In [None]:
# Check
assert os.path.isfile(test_record_fname), '`{}` not exist'.format(test_record_fname)
assert os.path.isfile(train_record_fname), '`{}` not exist'.format(train_record_fname)

## **Training**

In [None]:
# train config
!cat {pipeline_fname}

### **Train**

In [None]:
# Set log directory for tensorboard to watch
LOG_DIR = model_dir

# Clean up the directory
!rm -rf {LOG_DIR}/*
!ls -lrt {LOG_DIR}

In [None]:
%cd {repo_dir_path}
!python {base_dir}/models/research/object_detection/model_main.py \
    --pipeline_config_path={pipeline_fname} \
    --model_dir={model_dir} \
    --alsologtostderr

In [None]:
# Check the generated files
!ls -lrt {model_dir}

In [None]:
# Archive all the output
%cd {repo_dir_path}
!tar zcvf {selected_model}_train.tar.gz {model_dir}

## **Export**

### **Make trained model**

In [None]:
%cd {repo_dir_path}
import re
import numpy as np

lst = os.listdir(model_dir)
lst = [l for l in lst if 'model.ckpt-' in l and '.meta' in l]
steps=np.array([int(re.findall('\d+', l)[0]) for l in lst])
last_model = lst[steps.argmax()].replace('.meta', '')
last_model_path = os.path.join(model_dir, last_model)

!python {base_dir}/models/research/object_detection/export_inference_graph.py \
    --input_type=image_tensor \
    --pipeline_config_path={pipeline_fname} \
    --output_directory={output_dir} \
    --trained_checkpoint_prefix={last_model_path}

In [None]:
# Check the output files
!echo {output_dir}
!ls -lsr {output_dir}

In [None]:
# archive
%cd {repo_dir_path}
!tar zcvf {selected_model}_finetuned.tar.gz {output_dir}

## **Evaluate performances**

In [None]:
import os

#  Change these values for the model used
num_classes = 1
IMAGE_SIZE = (12, 8)

# Use images in test dir
IMAGE_DIR = os.path.join(repo_dir_path, "images", "test")
IMAGE_PATHS = []
for file in os.listdir(IMAGE_DIR):
    if file.endswith(".jpg") or file.endswith(".png"):
        IMAGE_PATHS.append(os.path.join(IMAGE_DIR, file))

# Set paths to the trained model
PATH_TO_LABELS = label_map_pbtxt_fname
PATH_TO_CKPT = os.path.join(os.path.abspath(output_dir), "frozen_inference_graph.pb")

In [None]:
# Perform the inference
%cd {base_dir}/models/research/object_detection

import time
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile

from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image

# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
from object_detection.utils import ops as utils_ops

# This is needed to display the images.
%matplotlib inline

from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

# Set tensorflow graph
detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

# Set categories
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(
    label_map, 
    max_num_classes=num_classes, 
    use_display_name=True
)
category_index = label_map_util.create_category_index(categories)

# Convert input image to a numpy array
def load_image_to_numpy(image):
    (w, h) = image.size
    return np.array(image.getdata()).reshape((h, w, 3)).astype(np.uint8)

# Inference pipeline
def run_inference(image, graph):
    with graph.as_default():
        with tf.Session() as sess:
            # Get handles to input and output tensors
            ops = tf.get_default_graph().get_operations()
            all_tensor_names = {output.name for op in ops for output in op.outputs}
            tensor_dict = {}
            for key in ['num_detections', 'detection_boxes', 'detection_scores','detection_classes', 'detection_masks']:
                tensor_name = key + ':0'
                if tensor_name in all_tensor_names:
                    tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(tensor_name)

            if 'detection_masks' in tensor_dict:
                # The following processing is only for single image
                detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
                detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])

                # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
                real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
                detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
                detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
                detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(detection_masks, detection_boxes, image.shape[0], image.shape[1])
                detection_masks_reframed = tf.cast(tf.greater(detection_masks_reframed, 0.5), tf.uint8)
                
                # Follow the convention by adding back the batch dimension
                tensor_dict['detection_masks'] = tf.expand_dims(detection_masks_reframed, 0)

            image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')

            # Run inference
            start_time = time.time()
            output_dict = sess.run(tensor_dict,feed_dict={image_tensor: np.expand_dims(image, 0)})
            end_time = time.time()

            # all outputs are float32 numpy arrays, so convert types as appropriate
            output_dict['elapsed_ms'] = (end_time - start_time)*100
            output_dict['num_detections'] = int(output_dict['num_detections'][0])
            output_dict['detection_classes'] = output_dict['detection_classes'][0].astype(np.uint8)
            output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
            output_dict['detection_scores'] = output_dict['detection_scores'][0]
            if 'detection_masks' in output_dict:
                output_dict['detection_masks'] = output_dict['detection_masks'][0]

    return output_dict

i_times = []

# Run the inference for each image
for image_path in IMAGE_PATHS:
    image = Image.open(image_path)
    # Conver the image to numpy array
    image_np = load_image_to_numpy(image)
    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
    image_np_expanded = np.expand_dims(image_np, axis=0)
    # Perform the interence
    output_dict = run_inference(image_np, detection_graph)
    
    i_times.append(output_dict['elapsed_ms'])

    # Visualize
    vis_util.visualize_boxes_and_labels_on_image_array(
        image_np,
        output_dict['detection_boxes'],
        output_dict['detection_classes'],
        output_dict['detection_scores'],
        category_index,
        instance_masks=output_dict.get('detection_masks'),
        use_normalized_coordinates=True,
        line_thickness=8
    )
    plt.figure(figsize=IMAGE_SIZE)
    plt.imshow(image_np)

In [None]:
print(f"average inference time: {np.mean(i_times)} ms")