In [1]:
import cv2
import numpy as np
from keras.models import model_from_json
import time

# Load your trained model and weights
model_architecture = "model-bw.json"
model_weights = "model-bw.h5"

with open(model_architecture, "r") as json_file:
    loaded_model_json = json_file.read()
    model = model_from_json(loaded_model_json)
    model.load_weights(model_weights)

# Labels for gesture classes
class_labels = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',' ']

# Define the frame size (sz)
sz = 128  # You can adjust this size according to your camera and model requirements

# Create a function to perform real-time gesture recognition
def predict_gesture_from_webcam():
    cap = cv2.VideoCapture(0)  # Open the webcam (you may need to adjust the camera index)

    while True:
        ret, frame = cap.read()
        if not ret:
            print("Error: Unable to capture video.")
            break

        # Apply object detection or hand tracking here to detect your hand and get the region of interest (ROI)

        # If using OpenCV's Haar Cascade Classifier for hand detection:
        hand_cascade = cv2.CascadeClassifier("hand.xml")
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        hands = hand_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5)

        for (x, y, w, h) in hands:
            roi = frame[y:y + h, x:x + w]
            # Resize and preprocess the ROI
            roi = cv2.resize(roi, (sz, sz))
            roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
            roi = roi / 255.0
            gesture_data = np.expand_dims(roi, axis=(0, -1))
            print(gesture_data)

            # Get the predicted label
            prediction = model.predict(gesture_data)
            predicted_label = class_labels[np.argmax(prediction)]

            # Draw bounding box and label
            cv2.rectangle(frame, (x - 20, y - 20), (x + w + 20, y + h + 20), (0, 255, 0), 2)

            cv2.putText(frame, predicted_label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)

        # Display the frame with bounding box and label using OpenCV
        cv2.imshow('Webcam Feed', frame)

        key = cv2.waitKey(1) & 0xFF
        if key == ord('q'):
            break

    # Release the camera capture object
    cap.release()
    cv2.destroyAllWindows()

# Call the function for real-time prediction
predict_gesture_from_webcam()


[[[[0.72941176]
   [0.72941176]
   [0.7254902 ]
   ...
   [0.68235294]
   [0.68235294]
   [0.68235294]]

  [[0.72941176]
   [0.72941176]
   [0.72941176]
   ...
   [0.68235294]
   [0.68235294]
   [0.68235294]]

  [[0.72941176]
   [0.72941176]
   [0.7254902 ]
   ...
   [0.68235294]
   [0.68235294]
   [0.68235294]]

  ...

  [[0.53333333]
   [0.53333333]
   [0.53333333]
   ...
   [0.41960784]
   [0.41960784]
   [0.41960784]]

  [[0.53333333]
   [0.53333333]
   [0.53333333]
   ...
   [0.41960784]
   [0.41960784]
   [0.41960784]]

  [[0.53333333]
   [0.53333333]
   [0.53333333]
   ...
   [0.41960784]
   [0.41960784]
   [0.41960784]]]]
[[[[0.78039216]
   [0.78039216]
   [0.77647059]
   ...
   [0.84705882]
   [0.84705882]
   [0.84705882]]

  [[0.78039216]
   [0.78039216]
   [0.77647059]
   ...
   [0.84705882]
   [0.84705882]
   [0.84705882]]

  [[0.78039216]
   [0.78039216]
   [0.77647059]
   ...
   [0.84705882]
   [0.84705882]
   [0.84705882]]

  ...

  [[0.58823529]
   [0.58823529]
   [0.58

KeyboardInterrupt: 

In [7]:
cap.release()
cv2.destroyAllWindows()