In [1]:
import cv2
import numpy as np
import mediapipe as mp
import tensorflow as tf
from tensorflow import keras
from keras.models import load_model
from keras import layers
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import LabelEncoder

In [2]:
mp_holistic = mp.solutions.holistic 
mp_drawing = mp.solutions.drawing_utils 

def draw_styled_landmarks(image, results):
    
    
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 
    
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 
    image.flags.writeable = False                  
    results = model.process(image)                 
    image.flags.writeable = True                   
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 
    return image, results

In [15]:


def initialize_words():
    return ["good", "knock", "black", "lakh", "my"]


def capture_frames(word, num_samples=20):
    cap = cv2.VideoCapture(0)
    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands()

    frames = []
    labels = []

    print(f"Capturing {num_samples} frames for '{word}'... Press 'k' to capture a frame.")

    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:    
        while len(frames) < num_samples:
            ret, frame = cap.read()
            if not ret:
                break

            
            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            
            results = hands.process(rgb_frame)
            landmarks = results.multi_hand_landmarks

            if landmarks:
                
                hand_landmarks = landmarks[0].landmark
                landmarks_flat = [coord for landmark in hand_landmarks for coord in (landmark.x, landmark.y, landmark.z)]

                
                image, results = mediapipe_detection(frame, holistic)
                draw_styled_landmarks(image, results)
                cv2.imshow('OpenCV Feed', image)
                

                
                key = cv2.waitKey(1)
                if key & 0xFF == ord('k'):
                    frames.append(landmarks_flat)
                    labels.append(word)
                    print(f"Captured {len(frames)} frames")

    cap.release()
    cv2.destroyAllWindows()

    return np.array(frames), np.array(labels)


def train_model(X_train, y_train):
    model = keras.Sequential([
        layers.Input(shape=(63,)),  
        layers.Dense(128, activation='relu'),
        layers.Dense(64, activation='relu'),
        layers.Dense(5, activation='softmax')  
    ])

    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    model.fit(X_train, y_train, epochs=100, batch_size=32)

    return model


def test_model(model):
    cap = cv2.VideoCapture(0)
    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands()

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        
        results = hands.process(rgb_frame)
        landmarks = results.multi_hand_landmarks

        if landmarks:
            
            hand_landmarks = landmarks[0].landmark
            landmarks_flat = [coord for landmark in hand_landmarks for coord in (landmark.x, landmark.y, landmark.z)]

            
            prediction = model.predict(np.array([landmarks_flat]))
            predicted_class = np.argmax(prediction)

            
            cv2.putText(frame, initialize_words()[predicted_class], (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        
        cv2.imshow("Hand Gesture Recognition", frame)

        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()



In [21]:
words = initialize_words()
X_train, y_train = [], []

for word in words:
    frames, labels = capture_frames(word)
    X_train.extend(frames)
    y_train.extend(labels)

label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)

X_train = np.array(X_train)
y_train = np.karray(y_train)


Capturing 20 frames for 'good'... Press 'k' to capture a frame.


KeyboardInterrupt: 

: 

In [17]:

model = train_model(X_train, y_train)

print("Model training complete. Testing the model in real-time...")



Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [20]:

def test_model(model, min_hand_detection_confidence=0.5):
    cap = cv2.VideoCapture(0)
    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands(min_detection_confidence=min_hand_detection_confidence)

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        
        results = hands.process(rgb_frame)
        landmarks = results.multi_hand_landmarks

        if landmarks:
            
            if results.multi_handedness[0].classification[0].score > min_hand_detection_confidence:
                
                hand_landmarks = landmarks[0].landmark
                landmarks_flat = [coord for landmark in hand_landmarks for coord in (landmark.x, landmark.y, landmark.z)]

                
                prediction = model.predict(np.array([landmarks_flat]))
                predicted_class = np.argmax(prediction)

                
                cv2.putText(frame, initialize_words()[predicted_class], (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        
        cv2.imshow("Hand Gesture Recognition", frame)

        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()


test_model(model, min_hand_detection_confidence=0.8)


