In [None]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

In [None]:
# Path for exported data, numpy arrays
DATA_PATH = os.path.join('MP_Data') 

# Actions that we try to detect
actions = np.array(["الصبح","availabile","direct","hour","kaam","Masr","saturday","money","stop_cut","ticket","train"])

# Thirty videos worth of data
no_sequences = 100

# Videos are going to be 30 frames in length
sequence_length = 50

In [None]:
actions

array(['الصبح', 'availabile', 'direct', 'hour', 'kaam', 'Masr',
       'saturday', 'money', 'stop_cut', 'ticket', 'train'], dtype='<U10')

In [None]:
for action in actions: 
    for sequence in range(no_sequences):
        try: 
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            pass

In [None]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [None]:
label_map = {label:num for num, label in enumerate(actions)}

In [None]:
label_map

{'الصبح': 0,
 'availabile': 1,
 'direct': 2,
 'hour': 3,
 'kaam': 4,
 'Masr': 5,
 'saturday': 6,
 'money': 7,
 'stop_cut': 8,
 'ticket': 9,
 'train': 10}

In [None]:
sequences, labels = [], []
for action in actions:
    for sequence in range(no_sequences):
        window = []
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

FileNotFoundError: [Errno 2] No such file or directory: 'MP_Data\\الصبح\\0\\0.npy'

In [None]:
np.array(sequences).shape

(1100, 50, 258)

In [None]:
np.array(labels).shape

(1100,)

In [None]:
X = np.array(sequences)

In [None]:
X.shape

(1100, 50, 258)

In [None]:
y = to_categorical(labels).astype(np.float32)

In [None]:
y

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.]], dtype=float32)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
y_test.shape

(220, 11)

In [29]:
import keras
from keras.callbacks import EarlyStopping
import tensorflow as tf
# Define early stopping criteria
early_stop = EarlyStopping(monitor='loss', patience=5, verbose=1, mode='min')

from tensorflow.keras.optimizers import Adam
early_stopping = tf.keras.callbacks.EarlyStopping(patience=10, verbose=1)
checkpointer = tf.keras.callbacks.ModelCheckpoint('lstm_char22.h5',verbose=1,save_best_only=True)


In [30]:

from tensorflow.keras.layers import Input, LSTM, Dense
from tensorflow.keras.models import Model
from keras.optimizers import Adam
with tf.device('/device:GPU:0'):
# Set the dimensions of input and output

    input_dim = 258
    output_dim = 11
    # Define the LSTM model architecture
    input_layer = Input(shape=(50, input_dim))
    lstm_layer = LSTM(64, return_sequences=False)(input_layer)
    output_layer = Dense(output_dim, activation='softmax')(lstm_layer)

    # Create the model object
    model = Model(inputs=input_layer, outputs=output_layer)

optimizer = Adam(learning_rate=0.001)
# Compile the model with categorical cross-entropy loss and Adam optimizer
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

# Print the model summary
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 50, 258)]         0         
                                                                 
 lstm (LSTM)                 (None, 64)                82688     
                                                                 
 dense (Dense)               (None, 11)                715       
                                                                 
Total params: 83,403
Trainable params: 83,403
Non-trainable params: 0
_________________________________________________________________


In [25]:
model.fit(X_train, y_train, epochs=500,validation_data=(X_test, y_test),callbacks=[early_stopping, checkpointer])

Epoch 1/500
Epoch 1: val_loss improved from inf to 1.94795, saving model to lstm_char22.h5
Epoch 2/500
Epoch 2: val_loss improved from 1.94795 to 1.60765, saving model to lstm_char22.h5
Epoch 3/500
Epoch 3: val_loss improved from 1.60765 to 1.27992, saving model to lstm_char22.h5
Epoch 4/500
Epoch 4: val_loss did not improve from 1.27992
Epoch 5/500
Epoch 5: val_loss improved from 1.27992 to 1.13126, saving model to lstm_char22.h5
Epoch 6/500
Epoch 6: val_loss did not improve from 1.13126
Epoch 7/500
Epoch 7: val_loss improved from 1.13126 to 1.04668, saving model to lstm_char22.h5
Epoch 8/500
Epoch 8: val_loss did not improve from 1.04668
Epoch 9/500
Epoch 9: val_loss improved from 1.04668 to 0.81098, saving model to lstm_char22.h5
Epoch 10/500
Epoch 10: val_loss improved from 0.81098 to 0.76966, saving model to lstm_char22.h5
Epoch 11/500
Epoch 11: val_loss improved from 0.76966 to 0.73392, saving model to lstm_char22.h5
Epoch 12/500
Epoch 12: val_loss did not improve from 0.73392
Ep

<keras.callbacks.History at 0x1dbe9d11e80>

In [26]:
# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test)
print('Test accuracy:', test_acc)

Test accuracy: 0.9045454263687134


In [27]:
model.save('9045454263687134%.h5')

test here 

In [81]:
colors = [(245,117,16), (117,245,16), (16,117,245),(245,117,16), (117,245,16), (16,117,245),(245,117,16), (117,245,16), (16,117,245),(245,117,16), (117,245,16), (16,117,245),(245,117,16), (117,245,16), (16,117,245),(245,117,16), (117,245,16)]
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
    return output_frame

In [82]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [83]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

In [84]:
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # Draw pose connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)

In [85]:
def mediapipe_draw(image,results):
    mp_drawing.draw_landmarks(image,results.pose_landmarks,mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=1, circle_radius=2),
                              mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=1, circle_radius=1))
    mp_drawing.draw_landmarks(image,results.right_hand_landmarks,mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=1, circle_radius=2),
                              mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=1, circle_radius=1))
    mp_drawing.draw_landmarks(image,results.left_hand_landmarks,mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=1, circle_radius=2),
                              mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=1, circle_radius=1))

In [86]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, lh, rh])

In [None]:
cv2.VideoCapture('C:/Users/mosta/Desktop/direct.mp4')

In [34]:
# install: pip install --upgrade arabic-reshaper
import arabic_reshaper

# install: pip install python-bidi
from bidi.algorithm import get_display
# install: pip install Pillow
from PIL import ImageFont
from PIL import Image
from PIL import ImageDraw

In [35]:
fontFile = "C:/Users/mosta/Downloads/Telegram Desktop/fvf/Sahel.ttf"
font = ImageFont.truetype(fontFile, 70)

In [59]:
# 1. New detection variables
def cap():

            global word
            sequence = []
            global sentence 
            threshold = 0.7
            sentence = []
            cap =cv2.VideoCapture(0)

            # Set mediapipe model 
            with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
                while cap.isOpened():

                    # Read feed
                    ret, frame = cap.read()
                    # Make detections
                    image, results = mediapipe_detection(frame, holistic)
                    print(results)
                    
                    # Draw landmarks
                    mediapipe_draw(image, results)
                    # 2. Prediction logic
                    keypoints = extract_keypoints(results)
            #         sequence.insert(0,keypoints)
            #         sequence = sequence[:30]
                    sequence.append(keypoints)
                    sequence = sequence[-50:]
                    
                    if len(sequence) == 50:
                        res = model.predict(np.expand_dims(sequence, axis=0))[0]
                        print(actions[np.argmax(res)])
                        
                        
                    #3. Viz logic
                        if res[np.argmax(res)] > threshold: 
                            if len(sentence) > 0: 
                                if actions[np.argmax(res)] != sentence[-1]:
                                    sentence.append(actions[np.argmax(res)])
                            else:
                                sentence.append(actions[np.argmax(res)])

                        if len(sentence) > 5: 
                            sentence = sentence[-5:]

                        # Viz probabilities
                        image = prob_viz(res, actions, image, colors)
                    
                    cv2.putText(image, ' '.join(sentence), (3,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
                    textbox.delete('1.0', tk.END)
                    textbox.insert(tk.END, ' '.join(sentence))
                    # Show to screen
                    cv2.imshow('OpenCV Feed', image)
                    # Break gracefully
                    if cv2.waitKey(10) & 0xFF == ord('q'):
                        break
                cap.release()
                cv2.destroyAllWindows()

In [60]:
from tkinter import Button, Frame, Label, StringVar, Tk
from tkinter import *


ro = Tk()
ro.title('capture the images')
ro.geometry("300x300")
button = Button(ro, text="open camera" ,command=cap , bg = "gray" , width = 10  )    # cap command calls the webcam to start capturing
button.pack()

# create a frame with W 250 x H 250
frame2=Frame(ro, width=500, height=500)  
l1 = Label(frame2, text="The Predicted Letter")
var = StringVar()    # create a string variable
var.set(word)      # set it to "letter"
l2 = Label(frame2, textvariable =word)   # display var "letter" as l2 
l3 = Label(frame2, text="The word")



l1.pack()
l2.pack()
l3.pack()


ro.mainloop()

<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.soluti

In [87]:
import tkinter as tk
from tkinter import ttk
import cv2

def cap():

            global word
            sequence = []
            global sentence 
            threshold = 0.7
            sentence = []
            cap =cv2.VideoCapture(0)

            # Set mediapipe model 
            with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
                while cap.isOpened():

                    # Read feed
                    ret, frame = cap.read()
                    # Make detections
                    image, results = mediapipe_detection(frame, holistic)
                    print(results)
                    
                    # Draw landmarks
                    mediapipe_draw(image, results)
                    # 2. Prediction logic
                    keypoints = extract_keypoints(results)
            #         sequence.insert(0,keypoints)
            #         sequence = sequence[:30]
                    sequence.append(keypoints)
                    sequence = sequence[-50:]
                    
                    if len(sequence) == 50:
                        res = model.predict(np.expand_dims(sequence, axis=0))[0]
                        print(actions[np.argmax(res)])
                        
                        
                    #3. Viz logic
                        if res[np.argmax(res)] > threshold: 
                            if len(sentence) > 0: 
                                if actions[np.argmax(res)] != sentence[-1]:
                                    sentence.append(actions[np.argmax(res)])
                            else:
                                sentence.append(actions[np.argmax(res)])

                        if len(sentence) > 5: 
                            sentence = sentence[-5:]
                        

                        sentence_str = ' '.join(sentence)
                        sentence_label.config(text=sentence_str)
                        # Viz probabilities
                        image = prob_viz(res, actions, image, colors)
                    
                    cv2.putText(image, ' '.join(sentence), (3,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
                    
                    # Show to screen
                    cv2.imshow('OpenCV Feed', image)
                    # Break gracefully
                    if cv2.waitKey(10) & 0xFF == ord('q'):
                        break
                cap.release()
                cv2.destroyAllWindows()






# Create the GUI
root = tk.Tk()
root.title('Sign Language Detection')

# Create a label and text box to display the sentence output
output_label = tk.Label(root, text='Sentence Output:')
output_label.pack()
sentence_label = tk.Label(root, font=("Arial", 18))
sentence_label.pack(pady=20)

# Create a button to start the sign language detection
start_button = tk.Button(root, text='Start', command=cap)
start_button.pack()

root.mainloop()

<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.soluti

: 

: 

In [None]:

sentence_str = ' '.join(sentence)
sentence_label.config(text=sentence_str)

# Create the GUI
root = tk.Tk()
root.title('Sign Language Detection')

# Create a label and text box to display the sentence output
output_label = tk.Label(root, text='Sentence Output:')
output_label.pack()
sentence_label = tk.Label(root, font=("Arial", 18))
sentence_label.pack(pady=20)

# Create a button to start the sign language detection
start_button = tk.Button(root, text='Start', command=cap)
start_button.pack()

root.mainloop()

In [79]:
import cv2
import mediapipe as mp
import numpy as np
from tensorflow import keras

# Load the LSTM model
model = keras.models.load_model('C:/Users/mosta/Downloads/ActionDetectionforSignLanguage-main/cam/1 test/927272%.h5')

In [69]:
import tkinter as tk
import cv2
import mediapipe as mp
import numpy as np

# Define your functions for sign language detection
def mediapipe_detection(image, model):
    # Perform mediapipe detection on the image
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = model.process(image)
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

def extract_keypoints(results):
    # Extract keypoints from the mediapipe results
    keypoints = []
    for i in range(21):
        keypoint = [int(results.pose_landmarks.landmark[i].x * 640), int(results.pose_landmarks.landmark[i].y * 480)]
        keypoints.append(keypoint)
    return keypoints

def cap():
    # Open a camera and detect sign language
    sequence = []
    threshold = 0.7
    sentence = []

    cap = cv2.VideoCapture(0)
    with mp.solutions.holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
        while cap.isOpened():
            ret, frame = cap.read()
            image, results = mediapipe_detection(frame, holistic)
            keypoints = extract_keypoints(results)
            sequence.append(keypoints)
            sequence = sequence[-50:]
            
            if len(sequence) == 50:
                res = model.predict(np.expand_dims(sequence, axis=0))[0]
                if res[np.argmax(res)] > threshold:
                    if len(sentence) > 0:
                        if actions[np.argmax(res)] != sentence[-1]:
                            sentence.append(actions[np.argmax(res)])
                    else:
                        sentence.append(actions[np.argmax(res)])
                if len(sentence) > 5:
                    sentence = sentence[-5:]
            
            cv2.putText(image, ' '.join(sentence), (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
            cv2.imshow('Sign Language Detection', image)

            # Update the text box with the current sentence
            textbox.delete('1.0', tk.END)
            textbox.insert(tk.END, ' '.join(sentence))

            if cv2.waitKey(10) & 0xFF == ord('q'):
                break

        cap.release()
        cv2.destroyAllWindows()

# Load your model and actions
model = ...
actions = ...

# Create the GUI
root = tk.Tk()
root.title('Sign Language Detection')

# Create a label and text box to display the sentence output
output_label = tk.Label(root, text='Sentence Output:')
output_label.pack()
textbox = tk.Text(root, height=1, width=50)
textbox.pack()

# Create a button to start the sign language detection
start_button = tk.Button(root, text='Start', command=cap)
start_button.pack()

root.mainloop()

Exception in Tkinter callback
Traceback (most recent call last):
  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.8_3.8.2800.0_x64__qbz5n2kfra8p0\lib\tkinter\__init__.py", line 1892, in __call__
    return self.func(*args)
  File "C:\Users\mosta\AppData\Local\Temp\ipykernel_21368\408157663.py", line 29, in cap
    with mp.solutions.holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
TypeError: 'module' object is not callable
