# 0. Setup Paths

In [10]:
WORKSPACE_PATH = 'SignLanguageDetection/Tensorflow/workspace'
SCRIPTS_PATH = 'SignLanguageDetection/Tensorflow/scripts'
APIMODEL_PATH = 'models'
ANNOTATION_PATH = WORKSPACE_PATH+'/annotations'
IMAGE_PATH = WORKSPACE_PATH+'/images'
MODEL_PATH = WORKSPACE_PATH+'/models'
PRETRAINED_MODEL_PATH = WORKSPACE_PATH+'/pre-trained-models'
CONFIG_PATH = MODEL_PATH+'/my_ssd_mobnet/pipeline.config'
CHECKPOINT_PATH = MODEL_PATH+'/my_ssd_mobnet/'

# 1. Create Label Map

In [20]:
labels = [
#     {'name':'hello', 'id':1},
#     {'name':'yes', 'id':2},
#     {'name':'no', 'id':3},
#     {'name':'sorry', 'id':4},
#     {'name':'thank you', 'id':5},
    {'name':'0', 'id':6},
    {'name':'1', 'id':7},
    {'name':'2', 'id':8},
    {'name':'3', 'id':9},
    {'name':'4', 'id':10},
    {'name':'5', 'id':11},
    {'name':'6', 'id':12},
    {'name':'7', 'id':13},
    {'name':'8', 'id':14},
    {'name':'9', 'id':15},
    {'name':'a', 'id':16},
    {'name':'b', 'id':17},
    {'name':'c', 'id':18},
    {'name':'d', 'id':19},
    {'name':'e', 'id':20},
    {'name':'f', 'id':21},
    {'name':'g', 'id':22},
    {'name':'h', 'id':23},
    {'name':'i', 'id':24},
    {'name':'j', 'id':25},
    {'name':'k', 'id':26},
    {'name':'l', 'id':27},
    {'name':'m', 'id':28},
    {'name':'n', 'id':29},
    {'name':'o', 'id':30},
    {'name':'p', 'id':31},
    {'name':'q', 'id':32},
    {'name':'r', 'id':33},
    {'name':'s', 'id':34},
    {'name':'t', 'id':35},
    {'name':'u', 'id':36},
    {'name':'v', 'id':37},
    {'name':'w', 'id':38},
    {'name':'x', 'id':39},
    {'name':'y', 'id':40},
    {'name':'z', 'id':41}
]

with open(ANNOTATION_PATH + '\label_map.pbtxt', 'w') as f:
    for label in labels:
        f.write('item { \n')
        f.write('\tname:\'{}\'\n'.format(label['name']))
        f.write('\tid:{}\n'.format(label['id']))
        f.write('}\n')

# 2. Create TF records

In [5]:
!python {SCRIPTS_PATH + '/generate_tfrecord.py'} -x {IMAGE_PATH + '/train'} -l {ANNOTATION_PATH + '/label_map.pbtxt'} -o {ANNOTATION_PATH + '/train.record'}
!python {SCRIPTS_PATH + '/generate_tfrecord.py'} -x{IMAGE_PATH + '/test'} -l {ANNOTATION_PATH + '/label_map.pbtxt'} -o {ANNOTATION_PATH + '/test.record'}

Successfully created the TFRecord file: SignLanguageDetection/Tensorflow/workspace/annotations/train.record
Successfully created the TFRecord file: SignLanguageDetection/Tensorflow/workspace/annotations/test.record


# 3. Copy Model Config to Training Folder

In [13]:
CUSTOM_MODEL_NAME = 'my_ssd_mobnet' 

In [53]:
!mkdir {'SignLanguageDetection\Tensorflow\workspace\models\\'+CUSTOM_MODEL_NAME}
!copy {PRETRAINED_MODEL_PATH.replace('/',"\\") +'\\ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8\\pipeline.config'} {MODEL_PATH.replace('/',"\\")+'\\'+CUSTOM_MODEL_NAME.replace('/',"\\")}

A subdirectory or file SignLanguageDetection\Tensorflow\workspace\models\my_ssd_mobnet already exists.


        1 file(s) copied.


# 4. Update Config For Transfer Learning

In [14]:
import tensorflow as tf
from object_detection.utils import config_util
from object_detection.protos import pipeline_pb2
from google.protobuf import text_format

In [15]:
CONFIG_PATH = MODEL_PATH+'/'+CUSTOM_MODEL_NAME+'/pipeline.config'

In [16]:
config = config_util.get_configs_from_pipeline_file(CONFIG_PATH)

In [17]:
config

{'model': ssd {
   num_classes: 36
   image_resizer {
     fixed_shape_resizer {
       height: 320
       width: 320
     }
   }
   feature_extractor {
     type: "ssd_mobilenet_v2_fpn_keras"
     depth_multiplier: 1.0
     min_depth: 16
     conv_hyperparams {
       regularizer {
         l2_regularizer {
           weight: 3.9999998989515007e-05
         }
       }
       initializer {
         random_normal_initializer {
           mean: 0.0
           stddev: 0.009999999776482582
         }
       }
       activation: RELU_6
       batch_norm {
         decay: 0.996999979019165
         scale: true
         epsilon: 0.0010000000474974513
       }
     }
     use_depthwise: true
     override_base_feature_extractor_hyperparams: true
     fpn {
       min_level: 3
       max_level: 7
       additional_layer_depth: 128
     }
   }
   box_coder {
     faster_rcnn_box_coder {
       y_scale: 10.0
       x_scale: 10.0
       height_scale: 5.0
       width_scale: 5.0
     }
   }
   matc

In [None]:
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
with tf.io.gfile.GFile(CONFIG_PATH, "r") as f:                                                                                                                                                                                                                     
    proto_str = f.read()                                                                                                                                                                                                                                          
    text_format.Merge(proto_str, pipeline_config)  

In [59]:
pipeline_config.model.ssd.num_classes = 36
pipeline_config.train_config.batch_size = 25
pipeline_config.train_config.fine_tune_checkpoint = "C://Users/kashi/"+PRETRAINED_MODEL_PATH+'/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/checkpoint/ckpt-0'
pipeline_config.train_config.fine_tune_checkpoint_type = "detection"
pipeline_config.train_input_reader.label_map_path= "C://Users/kashi/" + ANNOTATION_PATH + '/label_map.pbtxt'
pipeline_config.train_input_reader.tf_record_input_reader.input_path[:] = ["C://Users/kashi/"+ANNOTATION_PATH + '/train.record']
pipeline_config.eval_input_reader[0].label_map_path ="C://Users/kashi/"+ ANNOTATION_PATH + '/label_map.pbtxt'
pipeline_config.eval_input_reader[0].tf_record_input_reader.input_path[:] = ["C://Users/kashi/"+ANNOTATION_PATH + '/test.record']

In [60]:
config_text = text_format.MessageToString(pipeline_config)                                                                                                                                                                                                        
with tf.io.gfile.GFile(CONFIG_PATH, "wb") as f:                                                                                                                                                                                                                     
    f.write(config_text)   

# 5. Train the model

In [39]:
print("""python {}/research/object_detection/model_main_tf2.py --model_dir={}/{} --pipeline_config_path={}/{}/pipeline.config --num_train_steps=10000""".format(APIMODEL_PATH, MODEL_PATH,CUSTOM_MODEL_NAME,MODEL_PATH,CUSTOM_MODEL_NAME))

python models/research/object_detection/model_main_tf2.py --model_dir=SignLanguageDetection/Tensorflow/workspace/models/my_ssd_mobnet --pipeline_config_path=SignLanguageDetection/Tensorflow/workspace/models/my_ssd_mobnet/pipeline.config --num_train_steps=10000


# 6. Load Train Model From Checkpoint

In [28]:
import os
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.builders import model_builder

In [33]:
# Load pipeline config and build a detection model
configs = config_util.get_configs_from_pipeline_file(CONFIG_PATH)
detection_model = model_builder.build(model_config=configs['model'], is_training=False)

# Restore checkpoint
ckpt = tf.compat.v2.train.Checkpoint(model=detection_model)
ckpt.restore(os.path.join(CHECKPOINT_PATH, 'ckpt-6')).expect_partial()

@tf.function
def detect_fn(image):
    image, shapes = detection_model.preprocess(image)
    prediction_dict = detection_model.predict(image, shapes)
    detections = detection_model.postprocess(prediction_dict, shapes)
    return detections

In [18]:
configs

{'model': ssd {
   num_classes: 36
   image_resizer {
     fixed_shape_resizer {
       height: 320
       width: 320
     }
   }
   feature_extractor {
     type: "ssd_mobilenet_v2_fpn_keras"
     depth_multiplier: 1.0
     min_depth: 16
     conv_hyperparams {
       regularizer {
         l2_regularizer {
           weight: 3.9999998989515007e-05
         }
       }
       initializer {
         random_normal_initializer {
           mean: 0.0
           stddev: 0.009999999776482582
         }
       }
       activation: RELU_6
       batch_norm {
         decay: 0.996999979019165
         scale: true
         epsilon: 0.0010000000474974513
       }
     }
     use_depthwise: true
     override_base_feature_extractor_hyperparams: true
     fpn {
       min_level: 3
       max_level: 7
       additional_layer_depth: 128
     }
   }
   box_coder {
     faster_rcnn_box_coder {
       y_scale: 10.0
       x_scale: 10.0
       height_scale: 5.0
       width_scale: 5.0
     }
   }
   matc

# 8. Detect in Real-Time

In [34]:
import cv2 
import os
import time
import uuid
import numpy as np
from object_detection.utils import label_map_util

In [35]:
category_index = label_map_util.create_category_index_from_labelmap(ANNOTATION_PATH+'/label_map.pbtxt')

In [36]:
category_index

{6: {'id': 6, 'name': '0'},
 7: {'id': 7, 'name': '1'},
 8: {'id': 8, 'name': '2'},
 9: {'id': 9, 'name': '3'},
 10: {'id': 10, 'name': '4'},
 11: {'id': 11, 'name': '5'},
 12: {'id': 12, 'name': '6'},
 13: {'id': 13, 'name': '7'},
 14: {'id': 14, 'name': '8'},
 15: {'id': 15, 'name': '9'},
 16: {'id': 16, 'name': 'a'},
 17: {'id': 17, 'name': 'b'},
 18: {'id': 18, 'name': 'c'},
 19: {'id': 19, 'name': 'd'},
 20: {'id': 20, 'name': 'e'},
 21: {'id': 21, 'name': 'f'},
 22: {'id': 22, 'name': 'g'},
 23: {'id': 23, 'name': 'h'},
 24: {'id': 24, 'name': 'i'},
 25: {'id': 25, 'name': 'j'},
 26: {'id': 26, 'name': 'k'},
 27: {'id': 27, 'name': 'l'},
 28: {'id': 28, 'name': 'm'},
 29: {'id': 29, 'name': 'n'},
 30: {'id': 30, 'name': 'o'},
 31: {'id': 31, 'name': 'p'},
 32: {'id': 32, 'name': 'q'},
 33: {'id': 33, 'name': 'r'},
 34: {'id': 34, 'name': 's'},
 35: {'id': 35, 'name': 't'},
 36: {'id': 36, 'name': 'u'},
 37: {'id': 37, 'name': 'v'},
 38: {'id': 38, 'name': 'w'},
 39: {'id': 39, 'n

In [37]:
# Setup capture
cap = cv2.VideoCapture(0)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

In [None]:
while True: 
    ret, frame = cap.read()
    image_np = np.array(frame)
    input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
    #print(input_tensor)
    detections = detect_fn(input_tensor)

    num_detections = int(detections.pop('num_detections'))
    detections = {key: value[0, :num_detections].numpy()
                  for key, value in detections.items()}
    detections['num_detections'] = num_detections

    # detection_classes should be ints.
    detections['detection_classes'] = detections['detection_classes'].astype(np.int64)

    label_id_offset = 1
    image_np_with_detections = image_np.copy()

    viz_utils.visualize_boxes_and_labels_on_image_array(
                image_np_with_detections,
                detections['detection_boxes'],
                detections['detection_classes']+label_id_offset,
                detections['detection_scores'],
                category_index,
                use_normalized_coordinates=True,
                max_boxes_to_draw=5,
                min_score_thresh=.5,
                agnostic_mode=False)

    cv2.imshow('object detection',  cv2.resize(image_np_with_detections, (800, 1000)))

    if cv2.waitKey(1) & 0xFF == ord('q'):
        cap.release()
        break

In [39]:
detections = detect_fn(input_tensor)

In [40]:
detections

{'detection_boxes': <tf.Tensor: shape=(1, 100, 4), dtype=float32, numpy=
 array([[[6.02740049e-03, 0.00000000e+00, 1.00000000e+00, 9.86452281e-01],
         [1.54173374e-03, 1.35249496e-02, 9.89860296e-01, 9.90350187e-01],
         [1.56825781e-03, 1.75031126e-02, 9.91923273e-01, 9.84224319e-01],
         [0.00000000e+00, 2.97385454e-03, 9.91284311e-01, 9.90302861e-01],
         [5.37276268e-04, 0.00000000e+00, 1.00000000e+00, 9.97304797e-01],
         [6.02740049e-03, 0.00000000e+00, 1.00000000e+00, 9.86452281e-01],
         [6.02740049e-03, 0.00000000e+00, 1.00000000e+00, 9.86452281e-01],
         [8.05066228e-02, 1.59652829e-01, 7.16217339e-01, 9.93894339e-01],
         [1.56825781e-03, 1.75031126e-02, 9.91923273e-01, 9.84224319e-01],
         [1.56825781e-03, 1.75031126e-02, 9.91923273e-01, 9.84224319e-01],
         [3.38504642e-01, 4.17966247e-02, 1.00000000e+00, 9.53431606e-01],
         [0.00000000e+00, 1.65634453e-02, 6.80566907e-01, 1.00000000e+00],
         [1.50298476e-02, 3

In [21]:
from matplotlib import pyplot as plt

In [31]:
cap.release()

In [23]:
cv2.__version__

'4.4.0'