In [14]:
#Import library
import cv2
import numpy as np
from matplotlib import pyplot as plt
import mediapipe as mp
from spellchecker import SpellChecker
import translators as ts

In [15]:
#Load model
from keras.models import load_model
model = load_model('ASL landmarks using Dense v2.h5')
print('Done')

Done


In [16]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities
mp_hands = mp.solutions.hands

mp_model = mp_hands.Hands(
    static_image_mode=True,  # static images
    max_num_hands=1,  # max 1 hands detection
    min_detection_confidence=0.5)  # detection confidence


In [17]:
def make_720p():
    cap.set(3, 1280)
    cap.set(4, 720)

In [18]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

In [19]:
def draw_styled_landmarks(image, results): 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

In [20]:
def draw_border(image, results):
    h, w, c = image.shape
    if results.left_hand_landmarks:
        hand_landmarks = [results.left_hand_landmarks]
    elif results.right_hand_landmarks:
        hand_landmarks = [results.right_hand_landmarks]
    else:
        hand_landmarks = False
        
    x_max = 0
    y_max = 0
    x_min = w
    y_min = h
        
    if hand_landmarks:
        for handLMs in hand_landmarks:
            for lm in handLMs.landmark:
                x, y = int(lm.x * w), int(lm.y * h)
                if x > x_max:
                    x_max = x
                if x < x_min:
                    x_min = x
                if y > y_max:
                    y_max = y
                if y < y_min:
                    y_min = y
            cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (255, 0, 105), 2) 
    return x_min, x_max, y_min, y_max

In [21]:
def extract_keypoints(results):
    if results.left_hand_landmarks != None:
        x = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    elif results.right_hand_landmarks != None:
        x = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([x])

In [22]:
def extract_image(image, x_min, x_max, y_min, y_max):
    height, width, color = image.shape
    if(x_max - x_min >= y_max - y_min):
        h = x_max - x_min
        y_min = y_min - 25
        y_max = y_min + h
    else:
        h = y_max - y_min
        x_min = x_min - 25
        x_max = x_min + h
        
    y_min = y_min - 25
    y_max = y_max + 25
    x_min = x_min - 25
    x_max = x_max + 25
    return image[y_min:y_max, x_min:x_max]

In [23]:
def processing_image(img):
    img_size = 80
    minValue = 70
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    blur = cv2.GaussianBlur(gray, (5, 5), 2)
    th3 = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
    ret, res = cv2.threshold(th3, minValue, 255, cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
    resized = np.int_(cv2.resize(res, (img_size, img_size)))
    return resized

In [24]:
def convert_keypoints(keypoints):
    x = keypoints[0]*200
    y = keypoints[1]*200
    z = keypoints[2]*100
    for j in range(63):
        if j % 3 == 0:
            keypoints[j] = keypoints[j]*200 - x
        elif j % 3 == 1:
            keypoints[j] = keypoints[j]*200 - y
        else:
            keypoints[j] = keypoints[j]*100 - z

In [25]:
cap = cv2.VideoCapture(0)
letterpred = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K',
              'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
              'W', 'X', 'Y', 'Z', 'del', 'space']
str = ""
spell = SpellChecker()
flag = False
flag2 = False
Lang = True
Trans = False
print('Please select language: 1 for Eng, 2 for Vietnamese')
img_sequence = np.zeros((200,1200,3), np.uint8)
# Set mediapipe model
with mp_holistic.Holistic(min_detection_confidence = 0.8, min_tracking_confidence = 0.8) as holistic:
    while cap.isOpened():

        # Change resolution
        # make_720p()

        # Read feed
        ret, frame = cap.read()
        
        k = cv2.waitKey(1)
        if k % 256 == 49:
            print('Selected English')
        elif k % 256 == 50:
            print('Da chon tieng Viet')
            Lang = False
        if k % 256 == 27:
            # ESC pressed
            print("Escape hit, closing...")
            break
        # Make detections of hand
        image, results = mediapipe_detection(frame, holistic)
        if results.left_hand_landmarks == None and results.right_hand_landmarks == None:
            index = 'Nothing'
        else:
            # Draw a box around hand
            x_min, x_max, y_min, y_max = draw_border(image, results)
            
            # Draw landmarks
            draw_styled_landmarks(image, results)

            # Cropping Image
            image_crop = extract_image(frame, x_min, x_max, y_min, y_max)
            
            if k % 256 == 32:
                img_sequence = np.zeros((200,1200,3), np.uint8)
                # Extract keypoints
                keypoints = extract_keypoints(results)
                keypoints = keypoints.reshape(-1, 63)
                
                # Make prediction
                prediction = np.argmax(model.predict(keypoints)[0])
                index = letterpred[prediction]
                if prediction != 26 and prediction != 27:
                    str += index
                else:
                    if prediction == 27:
                        correction = ' '.join([spell.correction(word) for word in str.split()]).upper()
                        if correction != str and Lang:
                            print('Do you mean:', correction)
                            flag = True
                        str += ' '
                        flag2 = True
                        print("Do you want to translate to Vietnamese.")
                    elif prediction == 26:
                        str =  str[:-1]
                cv2.putText(img_sequence, '%s' % (str), (30,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2)
        if k % 256 == 121 and flag:
            str = correction
            str += ' '
            flag = False
            img_sequence = np.zeros((200,1200,3), np.uint8)
            cv2.putText(img_sequence, '%s' % (str), (30,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2)
        elif k % 256 == 110:
            pass
        if Lang and flag2 and k % 256 == 121:
            res = ts.google(str, to_language='vi')
            img_sequence = np.zeros((200,1200,3), np.uint8)
            cv2.putText(img_sequence, '%s' % (res), (30,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2)
            flag2 = False
        cv2.imshow('sequence', img_sequence)        
        # Show to screen
        cv2.imshow('OpenCV Feed', image)
    cap.release()
    cv2.destroyAllWindows()

Please select language: 1 for Eng, 2 for Vietnamese
Escape hit, closing...


In [26]:
ts.google("I Love You", to_language='vi')

AttributeError: module 'translators' has no attribute 'google'