In [2]:
import cv2
import numpy as np
import mediapipe as mp
from keras.models import load_model

model_alpha = load_model('Alphabets/model_alpha.h5')
model_numbers = load_model('Numbers/model_numbers.h5')
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
alphabets = [chr(i) for i in range(ord('A'), ord('Z') + 1)]
GESTURES = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

def predict_alphabet(hand_landmarks):
    landmarks = []
    for landmark in hand_landmarks.landmark:
        landmarks.append(landmark.x)
        landmarks.append(landmark.y)
        landmarks.append(landmark.z)

    landmarks = np.array(landmarks)
    landmarks = (landmarks - np.mean(landmarks)) / np.std(landmarks)
    landmarks = landmarks.reshape(1, -1)

    predictions = model_alpha.predict(landmarks)
    predicted_index = np.argmax(predictions)
    return alphabets[predicted_index]

def predict_numbers(hand_landmarks):
    landmarks = []
    for landmark in hand_landmarks.landmark:
        landmarks.append([landmark.x, landmark.y, landmark.z])

    landmarks = np.array(landmarks).flatten()
    landmarks = landmarks / np.max(landmarks)
    landmarks = landmarks.reshape(1, 63, 1)

    predictions = model_numbers.predict(landmarks)
    predicted_index = np.argmax(predictions)
    return GESTURES[predicted_index]

cap = cv2.VideoCapture(0)
model_choice = "numbers"

with mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.7, min_tracking_confidence=0.7) as hands:
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(frame_rgb)

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
                h, w, _ = frame.shape
                x_min = int(min([landmark.x for landmark in hand_landmarks.landmark]) * w)
                x_max = int(max([landmark.x for landmark in hand_landmarks.landmark]) * w)
                y_min = int(min([landmark.y for landmark in hand_landmarks.landmark]) * h)
                y_max = int(max([landmark.y for landmark in hand_landmarks.landmark]) * h)

                cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)

                if model_choice == "numbers":
                    predicted_gesture = predict_numbers(hand_landmarks)
                else:
                    predicted_gesture = predict_alphabet(hand_landmarks)

                cv2.putText(frame, f'Predicted: {predicted_gesture}', (x_min, y_min - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

        cv2.putText(frame, f'Model: {model_choice} (Press "m" to switch)', (10, 60),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

        cv2.imshow('Hand Gesture Recognition', frame)

        if cv2.waitKey(1) & 0xFF == ord('m'):
            model_choice = "alphabets" if model_choice == "numbers" else "numbers"
            print(f"Switched to {model_choice} model.")

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()


Switched to alphabets model.
Switched to numbers model.
