In [1]:
import cv2
import mediapipe as mp

In [2]:
cap = cv2.VideoCapture(0)
while True:
    ret, frame = cap.read()
    if not ret:
        break
    cv2.imshow("Frame", frame)
    
    # Save frame for dataset
    # cv2.imwrite('path_to_save_frame/frame_name.jpg', frame)
    
    key = cv2.waitKey(1)
    if key == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

In [3]:
mp_hands = mp.solutions.hands
hands = mp_hands.Hands()
mp_drawing = mp.solutions.drawing_utils

In [4]:
def preprocess_frame(frame):
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(frame_rgb)
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
            # Extract hand landmarks
            landmarks = [[lm.x, lm.y, lm.z] for lm in hand_landmarks.landmark]
            return landmarks
    return None

In [5]:
cap = cv2.VideoCapture(0)
while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    landmarks = preprocess_frame(frame)
    if landmarks:
        print(landmarks)  # Save or process landmarks for model training

    cv2.imshow("Frame", frame)
    key = cv2.waitKey(1)
    if key == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()



[[0.2691853940486908, 0.7467424273490906, 8.639130442134046e-07], [0.34788498282432556, 0.7331229448318481, -0.058254536241292953], [0.41401195526123047, 0.6603301167488098, -0.0903095230460167], [0.45727401971817017, 0.5717817544937134, -0.11526846140623093], [0.4999306797981262, 0.4961068332195282, -0.14074409008026123], [0.40450093150138855, 0.4762353003025055, -0.08187305927276611], [0.4557732045650482, 0.3521263301372528, -0.12784701585769653], [0.4868912100791931, 0.266798734664917, -0.15909360349178314], [0.5096608996391296, 0.19086477160453796, -0.18100224435329437], [0.34145209193229675, 0.4383305609226227, -0.08242698013782501], [0.3702108860015869, 0.2831643223762512, -0.12739497423171997], [0.3902703821659088, 0.18009251356124878, -0.15885578095912933], [0.40522319078445435, 0.09292683005332947, -0.17966169118881226], [0.2754462957382202, 0.4441380798816681, -0.08740408718585968], [0.28240668773651123, 0.29564106464385986, -0.13329024612903595], [0.2884930968284607, 0.19531

In [8]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten

# Assuming 'X_train' are the extracted landmarks and 'y_train' are the labels
X_train = ...  # Your training data
y_train = ...  # Your training labels

model = Sequential([
    Flatten(input_shape=(21, 3)),  # 21 landmarks with 3 coordinates (x, y, z)
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(10, activation='softmax')  # 'num_classes' should be the number of signs
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=10, validation_split=0.2)


ValueError: Argument `validation_split` is only supported for tensors or NumPy arrays.Found incompatible type in the input: [<class 'ellipsis'>, <class 'ellipsis'>]

In [9]:
import numpy as np

def real_time_inference(model):
    cap = cv2.VideoCapture(0)
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        landmarks = preprocess_frame(frame)
        if landmarks:
            landmarks = np.array(landmarks).reshape(1, 21, 3)
            prediction = model.predict(landmarks)
            predicted_label = np.argmax(prediction)
            print(f"Predicted sign: {predicted_label}")
        
        cv2.imshow("Frame", frame)
        key = cv2.waitKey(1)
        if key == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()

real_time_inference(model)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 280ms/step
Predicted sign: 7
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
Predicted sign: 7
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
Predicted sign: 7
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
Predicted sign: 7
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
Predicted sign: 7
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
Predicted sign: 7
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
Predicted sign: 7
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
Predicted sign: 7
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
Predicted sign: 7
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
Predicted sign: 7
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
Predicted sign: 7
[1m1/1[