In [None]:
import cv2
import numpy as np
import mediapipe as mp
from tensorflow.keras.models import load_model
import pickle

# Load the saved model
model = load_model('hope.keras')
print("Model loaded successfully.")
import cv2
import mediapipe as mp
import numpy as np
from tensorflow.keras.models import load_model
import pickle

# Load the saved model and label encoder
model = load_model('hope.keras')
print("Model loaded successfully.")
with open('label_encoder.pkl', 'rb') as f:
    label_encoder = pickle.load(f)

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.7)
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

# Define the ROI
ROI_BOX = (50, 100, 350, 400)  # x1, y1, x2, y2
minValue = 70  # Thresholding minimum value

def process_roi(roi):
    # Convert ROI to grayscale
    gray_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    
    # Apply Gaussian blur to ROI
    blurred_roi = cv2.GaussianBlur(gray_roi, (5, 5), 2)

    # Adaptive thresholding on ROI
    th3 = cv2.adaptiveThreshold(blurred_roi, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
    _, processed_roi = cv2.threshold(th3, minValue, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

    return processed_roi

# Start capturing video using webcam
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Flip the frame horizontally
    frame = cv2.flip(frame, 1)

    # Extract ROI
    x1, y1, x2, y2 = ROI_BOX
    roi = frame[y1:y2, x1:x2]

    # Process the ROI
    processed_roi = process_roi(roi)

    # Process the ROI to detect hand landmarks
    rgb_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
    result = hands.process(rgb_roi)

    if result.multi_hand_landmarks:
        for hand_landmarks in result.multi_hand_landmarks:
            # Save landmarks for prediction
            landmarks = np.array([[lm.x, lm.y, lm.z] for lm in hand_landmarks.landmark]).reshape(1, 21, 3)

            # Predict the gesture using the model
            predictions = model.predict(landmarks)
            predicted_class = np.argmax(predictions, axis=1)
            predicted_label = label_encoder.inverse_transform(predicted_class)[0]

            # Draw landmarks on the original ROI for display
            mp_drawing.draw_landmarks(
                roi,
                hand_landmarks,
                mp_hands.HAND_CONNECTIONS,
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style())

            # Display the predicted label on the frame
            cv2.putText(frame, f'Gesture: {predicted_label}', (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

    # Draw the fixed ROI box on the frame
    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

    # Show the frame
    cv2.imshow("Hand Gesture Recognition", frame)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the video capture object and close all OpenCV windows
cap.release()
cv2.destroyAllWindows()
