# British Sign Language Detection Using Mediapipe & LSTM

### Load weights and run EASY!!!

In [16]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp
from tensorflow.keras.models import load_model

model = load_model('Best4.h5')

#Holistic model
mp_holistic = mp.solutions.holistic

#Drawing utilities
mp_drawing = mp.solutions.drawing_utils 

def mediapipe_detection(image, model):
    
    # Color conversion BGR 2 RGB (BGR is the default color of OpenCV)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False 
    
    # Make prediction
    results = model.process(image)
    
    image.flags.writeable = True
    # color change back to BGR (For using OpenCV further
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 
    
    return image, results

def draw_styled_landmarks(image, results):
#Adjusted colours of the landmarks
    #left hand connections 
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2, circle_radius=4), mp_drawing.DrawingSpec(color=(255, 255, 255), thickness=2, circle_radius=2))
    
    #right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2, circle_radius=4), mp_drawing.DrawingSpec(color=(255, 255, 255), thickness=2, circle_radius=2))

#Function to extract hand landmarks and flatten if any available.
def extract_hand_landmarks(hand_landmarks):
    if hand_landmarks:
        return np.array([[res.x, res.y, res.z] for res in hand_landmarks.landmark]).flatten()
    else:
        return np.zeros(21 * 3)

#Extracting Keypoint from both lh & rh follwed by concatenation of the result.
def extract_keypoints(results):
    lh = extract_hand_landmarks(results.left_hand_landmarks)
    rh = extract_hand_landmarks(results.right_hand_landmarks)
    return np.concatenate([lh, rh])

alphabets = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
           'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
           'U', 'V', 'W', 'X', 'Y', 'Z']


import cv2

def prob_viz(res, alphabets, input_frame, colors):
    output_frame = input_frame.copy()
    max_prob = max(res)
    bar_height = 30  # Adjust this value to change the height of the bars
    num_labels = len(alphabets)
    half_labels = num_labels // 2
    
    for num, prob in enumerate(res):
        # Calculate the width of the rectangle based on the probability
        rect_width = int(prob / max_prob * 100)
        
        # Calculate the vertical position of the bar
        bar_top = 60 + num * (bar_height + 5)  # Adjust 5 for spacing
        
        # Determine if the label should be on the left or right side
        if num < half_labels:
            text_x = 0
        else:
            text_x = output_frame.shape[1] // 2
        
        # Draw the rectangle
        cv2.rectangle(output_frame, (text_x, bar_top), (text_x + rect_width, bar_top + bar_height), colors[num], -1)
        
        # Draw the label with smaller font size
        cv2.putText(output_frame, alphabets[num], (text_x, bar_top + bar_height // 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1, cv2.LINE_AA)
        
    return output_frame


# Webcam Prediction

import time

# Initialize variables
sequence = []
sentence = ""
current_word = ""
last_prediction_time = time.time()
threshold = 0.5

# Open the webcam feed
cap = cv2.VideoCapture(0)

# Set up mediapipe model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        # Read feed
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)

        # Draw landmarks
        draw_styled_landmarks(image, results)

        # Check if no keypoints are detected
        if not results:
            continue

        # Extract keypoints and update sequence
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-30:]

        # Perform prediction if sequence length is met
        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            prediction_index = np.argmax(res)
            current_time = time.time()

            # If 5 seconds have passed since the last prediction
            if current_time - last_prediction_time >= 5:
                # Update sentence if prediction meets threshold and conditions
                if res[prediction_index] > threshold:
                    current_word += alphabets[prediction_index]
                    last_prediction_time = current_time
                else:
                    if current_word:
                        sentence += current_word + " "
                        current_word = ""

        # Display sentence and current word on the screen
        display_text = "Word: " + current_word
        cv2.rectangle(image, (0, 0), (640, 40), (150, 0, 0), -1)
        cv2.putText(image, display_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2, cv2.LINE_AA)

        # Show image with annotations
        cv2.imshow('BSL Detection Feed', image)

        # Break loop if 'q' is pressed
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

# Release and close windows
cap.release()
cv2.destroyAllWindows()


cap.release()
cv2.destroyAllWindows()



