In [7]:
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.5
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)



In [8]:
import cv2
import numpy as np
import mediapipe as mp
from keras.models import load_model

# Load the trained model
model = load_model('model.h5')
print("Model loaded successfully!")

# MediaPipe hands initialization
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.7)
mp_drawing = mp.solutions.drawing_utils

# Class mapping
class_mapping = {
    0: "zero", 1: "one", 2: "two", 3: "three", 4: "four", 5: "five", 
    6: "six", 7: "seven", 8: "eight", 9: "nine", 10: "ten", 11: "eleven", 
    12: "twelve", 13: "thirteen", 14: "fourteen", 15: "fifteen", 
    16: "sixteen", 17: "seventeen", 18: "eighteen", 19: "nineteen"
}

# Preprocess the input image
def preprocess_image(image, img_size=224):
    img_arr = cv2.resize(image, (img_size, img_size))
    img_arr = np.expand_dims(img_arr, axis=0)  # Add batch dimension
    img_arr = img_arr / 255.0  # Normalize pixel values
    return img_arr

# Predict function
def predict_image(image, model, class_mapping):
    preprocessed_img = preprocess_image(image)
    print("Preprocessed image shape:", preprocessed_img.shape)
    prediction = model.predict(preprocessed_img)
    print("Prediction output:", prediction)
    predicted_class_index = np.argmax(prediction)
    predicted_class_label = class_mapping[predicted_class_index]
    return predicted_class_label



Model loaded successfully!


In [None]:
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)  # Reduced resolution
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)  # Reduced resolution

# Main loop
while cap.isOpened():
    ret, frame = cap.read()
    if ret:
        frame = cv2.flip(frame, 1)
        clone = frame.copy()
        
        # Convert the BGR image to RGB
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Process the frame with MediaPipe Hands
        result = hands.process(rgb_frame)
        
        if result.multi_hand_landmarks:
            for hand_landmarks in result.multi_hand_landmarks:
                # Get bounding box of the hand
                h, w, c = frame.shape
                x_min, y_min = w, h
                x_max, y_max = 0, 0
                
                for lm in hand_landmarks.landmark:
                    x, y = int(lm.x * w), int(lm.y * h)
                    if x < x_min:
                        x_min = x
                    if y < y_min:
                        y_min = y
                    if x > x_max:
                        x_max = x
                    if y > y_max:
                        y_max = y
                
                # Expand the bounding box slightly to include the whole hand
                x_min = max(0, x_min - 20)
                y_min = max(0, y_min - 20)
                x_max = min(w, x_max + 20)
                y_max = min(h, y_max + 20)
                
                # Extract the hand region
                hand_region = frame[y_min:y_max, x_min:x_max]
                cv2.imshow("Hand Region", hand_region)
                
                if hand_region.size > 0:
                    print("Hand region shape:", hand_region.shape)
                    prediction = predict_image(hand_region, model, class_mapping)
                    print(f"Predicted gesture: {prediction}")
                    cv2.putText(clone, f"Gesture: {prediction}", (50, 50), cv2.FONT_HERSHEY_DUPLEX, 1, (255, 0, 0), 3)
                    
                    # Display the model predictions
                    predictions = model.predict(preprocess_image(hand_region))
                    print("Model predictions:", predictions)
                    
                    # Evaluate the model on the current frame
                    predicted_class_index = np.argmax(predictions)
                    predicted_class_label = class_mapping[predicted_class_index]
                    print(f"Predicted class label: {predicted_class_label}")
                    
                # Draw hand landmarks on the image
                mp_drawing.draw_landmarks(clone, hand_landmarks, mp_hands.HAND_CONNECTIONS)

        cv2.imshow('frame', clone)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    else:
        break

cap.release() 
cv2.destroyAllWindows()
hands.close()

Hand region shape: (81, 65, 3)
Preprocessed image shape: (1, 224, 224, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
Prediction output: [[1.96138740e-06 2.10917897e-05 5.09517463e-07 9.88699019e-01
  3.85507519e-08 2.41475864e-05 3.14856094e-04 5.87702820e-09
  2.05376200e-05 6.23715096e-07 3.63407715e-04 1.05537344e-02
  1.00294894e-13 1.04152526e-15 5.76816532e-08 4.95497263e-12
  4.45399051e-10 2.41502537e-13 2.15282111e-12 2.11048540e-12]]
Predicted gesture: three
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
Model predictions: [[1.96138740e-06 2.10917897e-05 5.09517463e-07 9.88699019e-01
  3.85507519e-08 2.41475864e-05 3.14856094e-04 5.87702820e-09
  2.05376200e-05 6.23715096e-07 3.63407715e-04 1.05537344e-02
  1.00294894e-13 1.04152526e-15 5.76816532e-08 4.95497263e-12
  4.45399051e-10 2.41502537e-13 2.15282111e-12 2.11048540e-12]]
Predicted class label: three
Hand region shape: (82, 63, 3)
Preprocessed image shape: (1, 224, 22