In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

In [2]:
from keras.models import load_model
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    tf.config.experimental.set_virtual_device_configuration(
        gpus[0],[tf.config.experimental.VirtualDeviceConfiguration(memory_limit=512)])
  except RuntimeError as e:
    print(e)

In [3]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [4]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

In [5]:
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION) # Draw face connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # Draw pose connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw right hand connections

In [6]:
def draw_styled_landmarks(image, results):
    # Draw face connections
    #mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
    #                         mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
    #                         mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
    #                         ) 
    # Draw pose connections
    #mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
    #                         mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
    #                         mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
    #                         ) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

In [7]:
def extract_hand_keypoints(results):
    lh = [[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark] if results.left_hand_landmarks else [[0]*3]*21
    rh = [[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark] if results.right_hand_landmarks else [[0]*3]*21
    return np.vstack((lh,rh))

In [8]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([lh, rh])

In [9]:
# Path for exported data, numpy arrays
# Step 4
DATA_PATH = os.path.join('dataset','letters_data')
actions = np.array(['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z'])
#actions = np.array(['A','B'])

# no of images
no_sequences = 1200

# no of frames in each video
#sequence_length = 30

label_map = {label:num for num, label in enumerate(actions)}
#label_map

new_actions = np.array(['C', 'O'])
new_label_map = {label:num for num, label in enumerate(new_actions)}
#new_label_map

new_actions1 = np.array(['M', 'N'])
new_label_map1 = {label:num for num, label in enumerate(new_actions1)}
#new_label_map1

In [10]:
# change the .h5 file with the one you saved
model = load_model('models/weights_custom.h5')

In [11]:
model_c_and_o = load_model('models/actionCO.h5')

In [12]:
model_mn = load_model('models/actionMN.h5')

In [13]:
model_c_and_o.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 28, 32)            12128     
                                                                 
 max_pooling1d (MaxPooling1D  (None, 14, 32)           0         
 )                                                               
                                                                 
 conv1d_1 (Conv1D)           (None, 14, 64)            6208      
                                                                 
 max_pooling1d_1 (MaxPooling  (None, 7, 64)            0         
 1D)                                                             
                                                                 
 conv1d_2 (Conv1D)           (None, 5, 128)            24704     
                                                                 
 max_pooling1d_2 (MaxPooling  (None, 2, 128)           0

In [14]:
#ASCII
keys = [' ','!','"','#','$','%','&','','(',')','*','+',',','-','.','/',
          '0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?','@',
          'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q',
          'R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_']

# Braille symbols
values = ['⠀','⠮','⠐','⠼','⠫','⠩','⠯','⠄','⠷','⠾','⠡','⠬','⠠','⠤','⠨','⠌','⠴','⠂','⠆','⠒','⠲','⠢',
        '⠖','⠶','⠦','⠔','⠱','⠰','⠣','⠿','⠜','⠹','⠈','⠁','⠃','⠉','⠙','⠑','⠋','⠛','⠓','⠊','⠚','⠅',
        '⠇','⠍','⠝','⠕','⠏','⠟','⠗','⠎','⠞','⠥','⠧','⠺','⠭','⠽','⠵','⠪','⠳','⠻','⠘','⠸']

braille_lookup = dict(zip(keys, values))

In [15]:
from PIL import Image, ImageFont, ImageDraw

In [17]:
# 1. New detection variables
sequence = []
seq = []
sentence = []
predictions = []
lst = []
threshold = 0.5
num_of_frames = 30
batch_size = 16

cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        img = image
        #print(results)
        
        # Draw landmarks
        #draw_styled_landmarks(image, results)
        
        # 2. Prediction logic
        keypoints = extract_hand_keypoints(results)
        kp = extract_keypoints(results)
        sequence.append(kp)
        seq.append(keypoints)
        sequence = sequence[-1*num_of_frames:]
        seq = seq[-1*batch_size:]
        
        if len(sequence) == num_of_frames:
            res = model.predict(np.expand_dims(seq, axis=0))[0]
            #print(actions[np.argmax(res)])

            res2 = model_c_and_o.predict(np.expand_dims(sequence, axis=0))[0]
            res3 = model_mn.predict(np.expand_dims(sequence, axis=0))[0]

            index = np.argmax(res)
            pred = actions[np.argmax(res)]
            prob = res[index]

            if pred == 'C' or pred == 'O':
                pred = new_actions[np.argmax(res2)],
                pred = pred[0]
                index = 2 if pred == 'C' else 14,
                index = index[0]
                prob = res[index]

            if pred == 'M' or pred == 'N':
                pred = new_actions1[np.argmax(res3)],
                pred = pred[0]
                index = 12 if pred == 'M' else 13,
                index = index[0]
                prob = res[index]

            predictions.append(index)
            #print("{} {} {}".format(pred, index, prob))

        #3. Viz logic
            if np.unique(predictions[-10:])[0]==index: 
                if prob > threshold: 
                    #print(braille_lookup[actions[np.argmax(res)]])
                    if len(sentence) > 0: 
                        if braille_lookup[pred] != sentence[-1]:
                            sentence.append(braille_lookup[pred])
                    else:
                        sentence.append(braille_lookup[pred])

            if len(sentence) > 5: 
                sentence = sentence[-5:]

            # Viz probabilities
            cv2.putText(image, "Prediction: "+pred, (0, 85), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,0), 2, cv2.LINE_AA)
            #image = prob_viz(res, actions, image, colors)
            #cv2.putText(image, "Prediction: "+new_actions[np.argmax(res2)], (0, 125), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,0), 2, cv2.LINE_AA)
            #image = prob_viz(res, actions, image, colors)

        #cv2.rectangle(image, (500,50), (900, 150), (245, 117, 16), -1)

            img = image #np.zeros((200,400,3),np.uint8)
            b,g,r,a = 0,0,255,0

            font = ImageFont.truetype(r'unifont1.ttf', 40)

            img_pil = Image.fromarray(img)
            draw = ImageDraw.Draw(img_pil)
            draw.text((3, 0),  "Braille: "+braille_lookup[pred], font = font, fill = (b, g, r, a))
            img = np.array(img_pil)

            cv2.putText(img, "", (0, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,0), 2, cv2.LINE_AA)
                #image = prob_viz(res, actions, image, colors)

            #cv2.putText(img, "", (200,150), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (b,g,r), 1, cv2.LINE_AA)
            #print(''.join(sentence))
        
        lst.append(img)
        # Show to screen
        cv2.imshow('OpenCV Feed', img)
        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    
    cv2.destroyAllWindows()