Reference Used
- wget.download('https://tensorflow-object-detection-api-tutorial.readthedocs.io/en/latest/_downloads/da4babe668a8afb093cc7776d7e630f3/generate_tfrecord.py')
- Setup https://tensorflow-object-detection-api-tutorial.readthedocs.io/en/latest/install.html

# 0. Setup Paths

In [1]:
WORKSPACE_PATH = 'Tensorflow/workspace'
SCRIPTS_PATH = 'Tensorflow/scripts'
APIMODEL_PATH = 'Tensorflow/workspace/models'
ANNOTATION_PATH = WORKSPACE_PATH+'/annotations'
IMAGE_PATH = WORKSPACE_PATH+'/images'
MODEL_PATH = WORKSPACE_PATH+'/models'
PRETRAINED_MODEL_PATH = WORKSPACE_PATH+'/pre-trained-models'
CONFIG_PATH = MODEL_PATH+'/my_ssd_mobnet_tuned_2000/pipeline.config'
CHECKPOINT_PATH = MODEL_PATH+'/my_ssd_mobnet_tuned_2000/'
CUSTOM_MODEL_NAME = 'my_ssd_mobnet_tuned_2000' 
CONFIG_PATH_10000 = MODEL_PATH+'/my_ssd_mobnet_10000/pipeline.config'
CHECKPOINT_PATH_10000 = MODEL_PATH+'/my_ssd_mobnet_10000/'
CUSTOM_MODEL_NAME_10000 = 'my_ssd_mobnet_10000' 

In [2]:
#Additional Training saved in different folder
#CONFIG_PATH =CONFIG_PATH_10000
#CHECKPOINT_PATH=CHECKPOINT_PATH_10000
#CUSTOM_MODEL_NAME =CUSTOM_MODEL_NAME_10000

# 1. Create Label Map

In [3]:
labels = [{'name':'hello', 'id':1}, {'name':'thanks', 'id':2}, {'name':'iloveyou', 'id':3},{'name':'no', 'id':4},{'name':'yes', 'id':5}]

with open(ANNOTATION_PATH + '\label_map.pbtxt', 'w') as f:
    for label in labels:
        f.write('item { \n')
        f.write('\tname:\'{}\'\n'.format(label['name']))
        f.write('\tid:{}\n'.format(label['id']))
        f.write('}\n')

# 2. Create TF records

In [4]:
!python {SCRIPTS_PATH + '/generate_tfrecord.py'} -x {IMAGE_PATH + '/train'} -l {ANNOTATION_PATH + '/label_map.pbtxt'} -o {ANNOTATION_PATH + '/train.record'}
!python {SCRIPTS_PATH + '/generate_tfrecord.py'} -x{IMAGE_PATH + '/test'} -l {ANNOTATION_PATH + '/label_map.pbtxt'} -o {ANNOTATION_PATH + '/test.record'}

Successfully created the TFRecord file: Tensorflow/workspace/annotations/train.record
Successfully created the TFRecord file: Tensorflow/workspace/annotations/test.record


# 3. Download TF Models Pretrained Models from Tensorflow Model Zoo

In [4]:
#!cd Tensorflow && git clone https://github.com/tensorflow/models

Cloning into 'models'...


In [6]:
#wget.download('http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.tar.gz')
#!mv ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.tar.gz {PRETRAINED_MODEL_PATH}
#!cd {PRETRAINED_MODEL_PATH} && tar -zxvf ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.tar.gz

# 4. Copy Model Config to Training Folder

In [37]:
#!mkdir {'Tensorflow\workspace\models\\'+CUSTOM_MODEL_NAME}
#!cp {PRETRAINED_MODEL_PATH+'/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/pipeline.config'} {MODEL_PATH+'/'+CUSTOM_MODEL_NAME}

A subdirectory or file Tensorflow\workspace\models\my_ssd_mobnet_tuned_2000 already exists.


# 5. Update Config For Transfer Learning

In [5]:
import tensorflow as tf
from object_detection.utils import config_util
from object_detection.protos import pipeline_pb2
from google.protobuf import text_format

In [6]:
CONFIG_PATH = MODEL_PATH+'/'+CUSTOM_MODEL_NAME+'/pipeline.config'

In [7]:
config = config_util.get_configs_from_pipeline_file(CONFIG_PATH)

In [8]:
config

{'model': ssd {
   num_classes: 5
   image_resizer {
     fixed_shape_resizer {
       height: 320
       width: 320
     }
   }
   feature_extractor {
     type: "ssd_mobilenet_v2_fpn_keras"
     depth_multiplier: 1.0
     min_depth: 16
     conv_hyperparams {
       regularizer {
         l2_regularizer {
           weight: 3.9999998989515007e-05
         }
       }
       initializer {
         random_normal_initializer {
           mean: 0.0
           stddev: 0.009999999776482582
         }
       }
       activation: RELU_6
       batch_norm {
         decay: 0.996999979019165
         scale: true
         epsilon: 0.0010000000474974513
       }
     }
     use_depthwise: true
     override_base_feature_extractor_hyperparams: true
     fpn {
       min_level: 3
       max_level: 7
       additional_layer_depth: 128
     }
   }
   box_coder {
     faster_rcnn_box_coder {
       y_scale: 10.0
       x_scale: 10.0
       height_scale: 5.0
       width_scale: 5.0
     }
   }
   match

In [9]:
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
with tf.io.gfile.GFile(CONFIG_PATH, "r") as f:                                                                                                                                                                                                                     
    proto_str = f.read()                                                                                                                                                                                                                                          
    text_format.Merge(proto_str, pipeline_config)  

In [10]:
pipeline_config.model.ssd.num_classes = 5
pipeline_config.train_config.batch_size = 4
pipeline_config.train_config.fine_tune_checkpoint = PRETRAINED_MODEL_PATH+'/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/checkpoint/ckpt-0'
#pipeline_config.train_config.fine_tune_checkpoint = PRETRAINED_MODEL_PATH+'/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/checkpoint/ckpt-11'
#pipeline_config.train_config.fine_tune_checkpoint = CHECKPOINT_PATH+'ckpt-11'
pipeline_config.train_config.fine_tune_checkpoint_type = "detection"
pipeline_config.train_input_reader.label_map_path= ANNOTATION_PATH + '/label_map.pbtxt'
pipeline_config.train_input_reader.tf_record_input_reader.input_path[:] = [ANNOTATION_PATH + '/train.record']
pipeline_config.eval_input_reader[0].label_map_path = ANNOTATION_PATH + '/label_map.pbtxt'
pipeline_config.eval_input_reader[0].tf_record_input_reader.input_path[:] = [ANNOTATION_PATH + '/test.record']

In [11]:
config_text = text_format.MessageToString(pipeline_config)                                                                                                                                                                                                        
with tf.io.gfile.GFile(CONFIG_PATH, "wb") as f:                                                                                                                                                                                                                     
    f.write(config_text)   

# 6. Train the model

In [12]:
print("""python {}/research/object_detection/model_main_tf2.py --model_dir={}/{} --pipeline_config_path={}/{}/pipeline.config --num_train_steps=10000""".format(APIMODEL_PATH, MODEL_PATH,CUSTOM_MODEL_NAME,MODEL_PATH,CUSTOM_MODEL_NAME))

python Tensorflow/workspace/models/research/object_detection/model_main_tf2.py --model_dir=Tensorflow/workspace/models/my_ssd_mobnet_tuned_2000 --pipeline_config_path=Tensorflow/workspace/models/my_ssd_mobnet_tuned_2000/pipeline.config --num_train_steps=10000


# 6A. Evaluate the model

In [13]:
print("""python {}/research/object_detection/model_main_tf2.py --model_dir={}/{} --pipeline_config_path={}/{}/pipeline.config --checkpoint_dir=Tensorflow\workspace\models\{}""".format(APIMODEL_PATH, MODEL_PATH,CUSTOM_MODEL_NAME,MODEL_PATH,CUSTOM_MODEL_NAME,CUSTOM_MODEL_NAME))

python Tensorflow/workspace/models/research/object_detection/model_main_tf2.py --model_dir=Tensorflow/workspace/models/my_ssd_mobnet_tuned_2000 --pipeline_config_path=Tensorflow/workspace/models/my_ssd_mobnet_tuned_2000/pipeline.config --checkpoint_dir=Tensorflow\workspace\models\my_ssd_mobnet_tuned_2000


In [14]:
##python Tensorflow\workspace\models\research\object_detection\model_main_tf2.py --model_dir=Tensorflow\workspace\models\my_ssd_mobnet --pipeline_config_path=Tensorflow\workspace\models\my_ssd_mobnet\pipeline.config --checkpoint_dir=Tensorflow\workspace\models\my_ssd_mobnet

# 7. Load Train Model From Checkpoint

In [15]:
import os
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.builders import model_builder

In [16]:
# Load pipeline config and build a detection model
configs = config_util.get_configs_from_pipeline_file(CONFIG_PATH)
detection_model = model_builder.build(model_config=configs['model'], is_training=False)

# Restore checkpoint
ckpt = tf.compat.v2.train.Checkpoint(model=detection_model)
ckpt.restore(os.path.join(CHECKPOINT_PATH, 'ckpt-2')).expect_partial()

@tf.function
def detect_fn(image):
    image, shapes = detection_model.preprocess(image)
    prediction_dict = detection_model.predict(image, shapes)
    detections = detection_model.postprocess(prediction_dict, shapes)
    return detections

# 8. Detect in Real-Time

In [17]:
import cv2 
import numpy as np

In [18]:
category_index = label_map_util.create_category_index_from_labelmap(ANNOTATION_PATH+'/label_map.pbtxt')

In [19]:
# Setup capture
cap = cv2.VideoCapture(0)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

In [None]:
while True: 
    ret, frame = cap.read()
    image_np = np.array(frame)
    
    input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
    detections = detect_fn(input_tensor)
    
    num_detections = int(detections.pop('num_detections'))
    detections = {key: value[0, :num_detections].numpy()
                  for key, value in detections.items()}
    detections['num_detections'] = num_detections

    # detection_classes should be ints.
    detections['detection_classes'] = detections['detection_classes'].astype(np.int64)

    label_id_offset = 1
    image_np_with_detections = image_np.copy()

    viz_utils.visualize_boxes_and_labels_on_image_array(
                image_np_with_detections,
                detections['detection_boxes'],
                detections['detection_classes']+label_id_offset,
                detections['detection_scores'],
                category_index,
                use_normalized_coordinates=True,
                max_boxes_to_draw=5,
                min_score_thresh=.5,
                agnostic_mode=False)

    cv2.imshow('object detection',  cv2.resize(image_np_with_detections, (800, 600)))
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        cap.release()
        break

In [18]:
detections = detect_fn(input_tensor)

NameError: name 'input_tensor' is not defined

In [19]:
from matplotlib import pyplot as plt

In [20]:
frame

array([[[255, 255, 255],
        [255, 255, 255],
        [255, 255, 255],
        ...,
        [ 79, 117, 129],
        [ 88, 126, 137],
        [ 76, 115, 124]],

       [[255, 255, 255],
        [255, 255, 255],
        [255, 255, 255],
        ...,
        [ 85, 119, 133],
        [ 93, 127, 140],
        [ 80, 114, 126]],

       [[255, 255, 255],
        [255, 255, 255],
        [255, 255, 255],
        ...,
        [ 94, 119, 138],
        [101, 126, 143],
        [ 86, 110, 126]],

       ...,

       [[ 40,  43,  58],
        [ 69,  75,  90],
        [ 88, 102, 118],
        ...,
        [ 36,  51,  83],
        [ 36,  51,  78],
        [ 36,  50,  75]],

       [[ 32,  31,  45],
        [ 65,  68,  83],
        [ 89, 100, 115],
        ...,
        [ 30,  47,  90],
        [ 31,  47,  84],
        [ 31,  47,  81]],

       [[ 29,  27,  41],
        [ 65,  66,  80],
        [ 91, 100, 115],
        ...,
        [ 28,  46,  94],
        [ 28,  46,  87],
        [ 29,  45,  84]]

In [21]:
i=1
image = cv2.imwrite('database/{index}.png'.format(index=i), frame)
#gray = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRAY)

cv2.imshow('frame', frame)

In [None]:
cap.release()

In [22]:
image_np = np.array(frame)
    
input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
detections = detect_fn(input_tensor)
    
num_detections = int(detections.pop('num_detections'))
detections = {key: value[0, :num_detections].numpy()
                  for key, value in detections.items()}
detections['num_detections'] = num_detections
detections['detection_classes'] = detections['detection_classes'].astype(np.int64)


In [23]:
input_tensor

<tf.Tensor: shape=(1, 480, 640, 3), dtype=float32, numpy=
array([[[[255., 255., 255.],
         [255., 255., 255.],
         [255., 255., 255.],
         ...,
         [ 79., 117., 129.],
         [ 88., 126., 137.],
         [ 76., 115., 124.]],

        [[255., 255., 255.],
         [255., 255., 255.],
         [255., 255., 255.],
         ...,
         [ 85., 119., 133.],
         [ 93., 127., 140.],
         [ 80., 114., 126.]],

        [[255., 255., 255.],
         [255., 255., 255.],
         [255., 255., 255.],
         ...,
         [ 94., 119., 138.],
         [101., 126., 143.],
         [ 86., 110., 126.]],

        ...,

        [[ 40.,  43.,  58.],
         [ 69.,  75.,  90.],
         [ 88., 102., 118.],
         ...,
         [ 36.,  51.,  83.],
         [ 36.,  51.,  78.],
         [ 36.,  50.,  75.]],

        [[ 32.,  31.,  45.],
         [ 65.,  68.,  83.],
         [ 89., 100., 115.],
         ...,
         [ 30.,  47.,  90.],
         [ 31.,  47.,  84.],
         

In [30]:
detections

{'detection_boxes': <tf.Tensor: shape=(1, 100, 4), dtype=float32, numpy=
 array([[[0.        , 0.        , 0.0625    , 0.0625    ],
         [0.        , 0.        , 0.08321067, 0.08321067],
         [0.        , 0.        , 0.04785534, 0.08321067],
         [0.        , 0.        , 0.0625    , 0.1125    ],
         [0.        , 0.        , 0.08321067, 0.04785534],
         [0.        , 0.        , 0.1125    , 0.0625    ],
         [0.        , 0.        , 0.0625    , 0.08750001],
         [0.        , 0.00214466, 0.08321067, 0.07285534],
         [0.        , 0.        , 0.1125    , 0.0875    ],
         [0.        , 0.        , 0.08321067, 0.13321069],
         [0.        , 0.        , 0.04785534, 0.13321069],
         [0.        , 0.        , 0.0625    , 0.16250001],
         [0.        , 0.02714466, 0.08321067, 0.09785534],
         [0.        , 0.03750001, 0.0625    , 0.1375    ],
         [0.        , 0.05214466, 0.08321067, 0.12285535],
         [0.        , 0.03750001, 0.1125  

In [28]:
detections['num_detections']

100

In [92]:
while True: 
    ret, frame = cap.read()
    image_np_with_detections = np.array(frame)
    
    #input_tensor = tf.convert_to_tensor(np.expand_dims(image_np_with_detections, 0), dtype=tf.float32)
    #detections = detect_fn(input_tensor)
    
    #print("Model detection fn",detections)
    
    cv2.imshow('object detection',  cv2.resize(image_np_with_detections, (800, 600)))
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        cap.release()
        break

In [75]:
cv2.destroyAllWindows()

In [76]:
while True: 
    #ret, frame = cap.read()
    image_np_with_detections = np.array(frame)
    
    #input_tensor = tf.convert_to_tensor(np.expand_dims(image_np_with_detections, 0), dtype=tf.float32)
    #detections = detect_fn(input_tensor)
    
    #print("Model detection fn",detections)
    
    cv2.imshow('object detection',  cv2.resize(image_np_with_detections, (800, 600)))
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        cap.release()
        break

KeyboardInterrupt: 

In [94]:
input_tensor = tf.convert_to_tensor(np.expand_dims(image_np_with_detections, 0), dtype=tf.float32)
detections = detect_fn(input_tensor)
num_detections = int(detections.pop('num_detections'))
detections = {key: value[0, :num_detections].numpy()
                  for key, value in detections.items()}
detections['num_detections'] = num_detections
detections['detection_classes'] = detections['detection_classes'].astype(np.int64)

In [95]:
for key, value in detections.items():print(key,value)

detection_boxes [[1.61651343e-01 2.72813499e-01 8.47910762e-01 7.22538292e-01]
 [2.68365473e-01 4.07399476e-01 7.18923688e-01 6.66306317e-01]
 [0.00000000e+00 0.00000000e+00 1.00000000e+00 1.00000000e+00]
 [2.66415715e-01 4.69093591e-01 6.00423634e-01 6.61134720e-01]
 [0.00000000e+00 0.00000000e+00 1.00000000e+00 1.00000000e+00]
 [0.00000000e+00 6.07340395e-01 4.41643298e-01 9.93833244e-01]
 [1.07224077e-01 2.59668976e-01 7.95147419e-01 7.36705780e-01]
 [8.24030489e-04 5.94279945e-01 9.40862447e-02 7.54862010e-01]
 [7.35737085e-01 3.21900487e-01 8.01814079e-01 4.14054811e-01]
 [8.11030626e-01 1.36708513e-01 9.37632918e-01 3.06868374e-01]
 [5.79868257e-02 1.55285776e-01 1.00000000e+00 7.90697634e-01]
 [8.11030626e-01 1.36708513e-01 9.37632918e-01 3.06868374e-01]
 [1.16334632e-02 6.46646738e-01 1.02671795e-01 7.76932001e-01]
 [8.08590174e-01 1.24671414e-01 9.29263353e-01 2.78388381e-01]
 [7.47936249e-01 3.21801364e-01 8.24983954e-01 4.03185368e-01]
 [0.00000000e+00 0.00000000e+00 1.00000

In [64]:
image, shapes = detection_model.preprocess(input_tensor)


In [67]:
prediction_dict = detection_model.predict(image, shapes)

In [69]:
detections = detection_model.postprocess(prediction_dict, shapes)

In [96]:
detections.items()

dict_items([('detection_boxes', array([[1.61651343e-01, 2.72813499e-01, 8.47910762e-01, 7.22538292e-01],
       [2.68365473e-01, 4.07399476e-01, 7.18923688e-01, 6.66306317e-01],
       [0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.00000000e+00],
       [2.66415715e-01, 4.69093591e-01, 6.00423634e-01, 6.61134720e-01],
       [0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.00000000e+00],
       [0.00000000e+00, 6.07340395e-01, 4.41643298e-01, 9.93833244e-01],
       [1.07224077e-01, 2.59668976e-01, 7.95147419e-01, 7.36705780e-01],
       [8.24030489e-04, 5.94279945e-01, 9.40862447e-02, 7.54862010e-01],
       [7.35737085e-01, 3.21900487e-01, 8.01814079e-01, 4.14054811e-01],
       [8.11030626e-01, 1.36708513e-01, 9.37632918e-01, 3.06868374e-01],
       [5.79868257e-02, 1.55285776e-01, 1.00000000e+00, 7.90697634e-01],
       [8.11030626e-01, 1.36708513e-01, 9.37632918e-01, 3.06868374e-01],
       [1.16334632e-02, 6.46646738e-01, 1.02671795e-01, 7.76932001e-01],
       [8.08590174e

In [97]:
category_index

{1: {'id': 1, 'name': 'hello'},
 2: {'id': 2, 'name': 'thanks'},
 3: {'id': 3, 'name': 'iloveyou'},
 4: {'id': 4, 'name': 'no'},
 5: {'id': 5, 'name': 'yes'}}

IMAGE_PATH_TEST = IMAGE_PATH+'/'+'test'+'/thanks-55606ee4-b160-11ee-90bc-68071540c9cc.jpg'

IMAGE_PATH_TEST

img = cv2.imread(IMAGE_PATH)
image_np = np.array(img)

input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
detections = detect_fn(input_tensor)



num_detections = int(detections.pop('num_detections'))
detections = {key: value[0, :num_detections].numpy()
              for key, value in detections.items()}
detections['num_detections'] = num_detections

# detection_classes should be ints.
detections['detection_classes'] = detections['detection_classes'].astype(np.int64)

label_id_offset = 1
image_np_with_detections = image_np.copy()

viz_utils.visualize_boxes_and_labels_on_image_array(
            image_np_with_detections,
            detections['detection_boxes'],
            detections['detection_classes']+label_id_offset,
            detections['detection_scores'],
            category_index,
            use_normalized_coordinates=True,
            max_boxes_to_draw=5,
            min_score_thresh=.8,
            agnostic_mode=False)

plt.imshow(cv2.cvtColor(image_np_with_detections, cv2.COLOR_BGR2RGB))
plt.show()

In [54]:
cv2.destroyAllWindows()