In [2]:
import cv2
import mediapipe as mp
import numpy as np
import pyttsx3
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import load_model

# Load trained model
model = load_model('sign_language_model.h5')  # Ensure you have a trained model
label_encoder = LabelEncoder()
label_encoder.classes_ = np.load('classes.npy')  # Load class labels

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.5)

# Initialize text-to-speech engine
engine = pyttsx3.init()

def speak(text):
    engine.say(text)
    engine.runAndWait()

def extract_hand_features(hand_landmarks):
    features = []
    for landmark in hand_landmarks.landmark:
        features.append(landmark.x)
        features.append(landmark.y)
        features.append(landmark.z)
    return np.array(features).reshape(1, -1)

# Start webcam capture
cap = cv2.VideoCapture(0)
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(image)
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
            features = extract_hand_features(hand_landmarks)
            prediction = model.predict(features)
            class_index = np.argmax(prediction)
            sign_text = label_encoder.inverse_transform([class_index])[0]
            
            cv2.putText(image, sign_text, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
            speak(sign_text)
    
    cv2.imshow('Sign Language to Speech', image)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


ModuleNotFoundError: No module named 'mediapipe.python._framework_bindings'