In [None]:
import os
import tensorflow as tf
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.builders import model_builder
from object_detection.utils import config_util
import cv2 
import numpy as np
from matplotlib import pyplot as plt
import time
import random

In [None]:
CUSTOM_MODEL_NAME = 'my_ssd_mobnet' 
PRETRAINED_MODEL_NAME = 'ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8' #name of model from the model zoo website
PRETRAINED_MODEL_URL = 'http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.tar.gz'
TF_RECORD_SCRIPT_NAME = 'generate_tfrecord.py'
LABEL_MAP_NAME = 'label_map.pbtxt'

In [None]:
paths = {
    'WORKSPACE_PATH': os.path.join('Tensorflow', 'workspace'),
    'SCRIPTS_PATH': os.path.join('Tensorflow','scripts'),
    'APIMODEL_PATH': os.path.join('Tensorflow','models'),
    'ANNOTATION_PATH': os.path.join('Tensorflow', 'workspace','annotations'),
    'IMAGE_PATH': os.path.join('Tensorflow', 'workspace','images'),
    'MODEL_PATH': os.path.join('Tensorflow', 'workspace','models'),
    'PRETRAINED_MODEL_PATH': os.path.join('Tensorflow', 'workspace','pre-trained-models'),
    'CHECKPOINT_PATH': os.path.join('Tensorflow', 'workspace','models',CUSTOM_MODEL_NAME), 
    'OUTPUT_PATH': os.path.join('Tensorflow', 'workspace','models',CUSTOM_MODEL_NAME, 'export'), 
    'TFJS_PATH':os.path.join('Tensorflow', 'workspace','models',CUSTOM_MODEL_NAME, 'tfjsexport'), 
    'TFLITE_PATH':os.path.join('Tensorflow', 'workspace','models',CUSTOM_MODEL_NAME, 'tfliteexport'), 
    'PROTOC_PATH':os.path.join('Tensorflow','protoc')
 }

In [None]:
files = {
    'PIPELINE_CONFIG':os.path.join('Tensorflow', 'workspace','models', CUSTOM_MODEL_NAME, 'pipeline.config'),
    'TF_RECORD_SCRIPT': os.path.join(paths['SCRIPTS_PATH'], TF_RECORD_SCRIPT_NAME), 
    'LABELMAP': os.path.join(paths['ANNOTATION_PATH'], LABEL_MAP_NAME)
}

In [None]:
# Load pipeline config and build a detection model
configs = config_util.get_configs_from_pipeline_file(files['PIPELINE_CONFIG'])
detection_model = model_builder.build(model_config=configs['model'], is_training=False)

# Restore checkpoint
ckpt = tf.compat.v2.train.Checkpoint(model=detection_model)
ckpt.restore(os.path.join(paths['CHECKPOINT_PATH'], 'ckpt-3')).expect_partial()

@tf.function
def detect_fn(image):
    image, shapes = detection_model.preprocess(image)
    prediction_dict = detection_model.predict(image, shapes)
    detections = detection_model.postprocess(prediction_dict, shapes)
    return detections

In [None]:
category_index = label_map_util.create_category_index_from_labelmap(files['LABELMAP'])
labels = [{'name':'A', 'id':1}, {'name':'B', 'id':2}, {'name':'C', 'id':3}, {'name':'D', 'id':4}, {'name':'E', 'id':5}, {'name':'F', 'id':6}, {'name':'G', 'id':7}, {'name':'H', 'id':8}, {'name':'I', 'id':9}, {'name':'J', 'id':10}, {'name':'K', 'id':11}, {'name':'L', 'id':12}]


In [None]:
def run_normal():
    cap = cv2.VideoCapture(0)
    while True:
        # Read frame from camera
        ret, image_np = cap.read()
        image_np = cv2.flip(image_np,1)
        # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
        image_np_expanded = np.expand_dims(image_np, axis=0)

        # Things to try:
        # Flip horizontally
        # image_np = np.fliplr(image_np).copy()

        # Convert image to grayscale
        # image_np = np.tile(
        #     np.mean(image_np, 2, keepdims=True), (1, 1, 3)).astype(np.uint8)

        input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
        detections = detect_fn(input_tensor)

        maxIndex = np.argmax(detections['detection_scores'][0].numpy())
        #print(detections['detection_classes'][0][maxIndex].numpy())

        #print(len(detections['detection_boxes'][0][maxIndex]))
        #print((detections['detection_classes'][0][maxIndex].numpy()))
        #print(len(detections['detection_scores'][0]))

        class_name = int(detections['detection_classes'][0][maxIndex].numpy())
        class_name = labels[class_name]['name']
        probability = str(round(detections['detection_scores'][0][maxIndex].numpy()*100))


        label_id_offset = 1
        image_np_with_detections = image_np.copy()

        cv2.putText(image_np_with_detections, class_name + ",{}%".format(probability), (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA)

    #     viz_utils.visualize_boxes_and_labels_on_image_array(
    #           image_np_with_detections,
    #           detections['detection_boxes'][0][maxIndex].numpy(),
    #           (detections['detection_classes'][0][maxIndex].numpy() + label_id_offset).astype(int),
    #           detections['detection_scores'][0][maxIndex].numpy(),
    #           category_index,
    #           use_normalized_coordinates=True,
    #           max_boxes_to_draw=200,
    #           min_score_thresh=.30,
    #           agnostic_mode=False)

        # Display output
        cv2.imshow('object detection', cv2.resize(image_np_with_detections, (800, 600)))

        if cv2.waitKey(25) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

In [None]:
def concatenate_letters():
    text = ""
    count_same_text = 0
    word = ""
    cap = cv2.VideoCapture(0)
    while True:
        ret, image_np = cap.read()
        image_np = cv2.flip(image_np,1)

        image_np_expanded = np.expand_dims(image_np, axis=0)
        input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
        detections = detect_fn(input_tensor)

        maxIndex = np.argmax(detections['detection_scores'][0].numpy())

        class_name = int(detections['detection_classes'][0][maxIndex].numpy())
        class_name = labels[class_name]['name']
        probability = str(round(detections['detection_scores'][0][maxIndex].numpy()*100))
        old_text = text

        text = class_name
        if(old_text == text):
            count_same_text += 1
        else:
            count_same_text = 0


        if(count_same_text > 30):
            word += text  
            count_same_text = 0


        label_id_offset = 1
        image_np_with_detections = image_np.copy()

        cv2.putText(image_np_with_detections, class_name + ",{}%".format(probability), (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA)
        cv2.putText(image_np_with_detections, "Text Mode", (300,60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0))
        cv2.putText(image_np_with_detections, word, (150,150), cv2.FONT_HERSHEY_TRIPLEX, 1, (0,0,0))

        cv2.imshow('object detection', cv2.resize(image_np_with_detections, (800, 600)))

        if cv2.waitKey(25) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

In [None]:
def get_letter(letters):
    index = random.randrange(0, len(letters)-1)
    return letters[index]

letters = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L']

def test():
    cap = cv2.VideoCapture(0)
    prev_letter = ""
    count = 0
    total_right = 0
    total = 0
    correct_letters_list = list()
    letter_to_guess = get_letter(letters)
    while True:
        ret, image_np = cap.read()
        image_np = cv2.flip(image_np,1)
        if not ret:
            print("Error reading input")
            break
        image_np_expanded = np.expand_dims(image_np, axis=0)
        input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
        detections = detect_fn(input_tensor)
        maxIndex = np.argmax(detections['detection_scores'][0].numpy())
        
        class_name = int(detections['detection_classes'][0][maxIndex].numpy())
        class_name = labels[class_name]['name']
        probability = str(round(detections['detection_scores'][0][maxIndex].numpy()*100))
        
        curr_letter = class_name
        
        if(float(probability) > 50):            
            if(prev_letter == curr_letter):
                count = count+1
            else:
                count = 0
            
            if(count > 30):
                if(curr_letter == letter_to_guess):
                    correct_letters_list.append(curr_letter)

                    print("Correct Hand Gesture for letter {} ".format(str(class_name)))
                    total_right = total_right + 1
                    total = total + 1
                    
                    print("Getting New Gesture To Practice")
                    letter_to_guess = get_letter(letters)
#                     while(letter_to_guess != prev_letter):
#                         letter_to_guess = get_letter(letters)
                    curr_letter = ""
                    prev_letter = ""
                    count = 0
                else:
                    count = 0
                    total = total + 1
                    print("Wrong Hand Gesture: Not letter {}".format(str(class_name)))
                    print("Retry")
            
        
        prev_letter = curr_letter
        
        label_id_offset = 1
        image_np_with_detections = image_np.copy()

        cv2.putText(image_np_with_detections, class_name + ",{}%".format(probability), (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA)
        cv2.putText(image_np_with_detections, "Do: {}".format(letter_to_guess), (50, 200), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 2, cv2.LINE_AA)
        cv2.imshow('object detection', cv2.resize(image_np_with_detections, (800, 600)))

        if cv2.waitKey(25) & 0xFF == ord('q'):
            print("Accuracy is {}%".format(100*total_right/total))
            break
            

    cap.release()
    cv2.destroyAllWindows()

In [None]:
test()
