In [9]:

import pickle
import cv2
import mediapipe as mp
import numpy as np
import time

# --- Load the Model and Class Map ---
try:
    with open('model.p', 'rb') as f:
        load_data = pickle.load(f)
        model = load_data['model']
        class_map = load_data['class_map']
except FileNotFoundError:
    print("Error: model.p not found. Please run the training script first.")
    exit()

# Create a dictionary to map class indices to class names
labels_dict = {v: k for k, v in class_map.items()}
print(f"Labels loaded: {labels_dict}")

# --- Initialize Webcam and MediaPipe ---
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5)

# --- Variables for Sentence Logic ---
sentence = ""
last_prediction = None
prediction_stable_for = 0
last_char_time = time.time()
PREDICTION_THRESHOLD = 20  # Frames the gesture must be stable
COOLDOWN_PERIOD = 2      # Seconds to wait after adding a character

# --- Main Loop ---
while True:
    ret, frame = cap.read()
    if not ret:
        print("Error: Failed to capture frame.")
        break
    
    H, W, _ = frame.shape
    frame = cv2.flip(frame, 1)
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(frame_rgb)
    
    # Create a white box at the bottom for the sentence
    cv2.rectangle(frame, (0, H - 60), (W, H), (255, 255, 255), -1)
    
    # --- Hand Landmark Processing ---
    if results.multi_hand_landmarks:
        all_landmarks = []
        for hand_landmarks in results.multi_hand_landmarks[:2]: # Process up to 2 hands
            mp_drawing.draw_landmarks(
                frame, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style())

            data_aux = []
            x_ = [lm.x for lm in hand_landmarks.landmark]
            y_ = [lm.y for lm in hand_landmarks.landmark]
            for i in range(len(hand_landmarks.landmark)):
                data_aux.append(hand_landmarks.landmark[i].x - min(x_))
                data_aux.append(hand_landmarks.landmark[i].y - min(y_))
            all_landmarks.extend(data_aux)

        # Pad with zeros if only one hand is detected
        if len(all_landmarks) < 84:
            all_landmarks.extend([0] * (84 - len(all_landmarks)))

        # --- Prediction and Logic for Space ---
        prediction = model.predict([np.asarray(all_landmarks[:84])])
        predicted_class_name = labels_dict[int(prediction[0])]

        # Display the gesture name the model sees
        cv2.putText(frame, f'Gesture: {predicted_class_name}', (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 255, 0), 3, cv2.LINE_AA)
        
        # This is the key part: Map the word "space" to a real space character " "
        if predicted_class_name == "space":
            current_char = " "
        else:
            current_char = predicted_class_name
        
        # --- Stability and Cooldown Logic ---
        if current_char == last_prediction:
            prediction_stable_for += 1
        else:
            prediction_stable_for = 1
            last_prediction = current_char
            
        if prediction_stable_for >= PREDICTION_THRESHOLD and (time.time() - last_char_time) > COOLDOWN_PERIOD:
            sentence += current_char
            last_char_time = time.time()
            prediction_stable_for = 0
            
    # Display the final sentence
    cv2.putText(frame, sentence, (20, H - 20), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 0), 2, cv2.LINE_AA)

    cv2.imshow('Sign Language Recognition', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# --- Cleanup ---
cap.release()
cv2.destroyAllWindows()

Labels loaded: {0: '1', 1: 'A', 2: 'B', 3: 'C', 4: 'D', 5: 'E', 6: 'F', 7: 'G', 8: 'H', 9: 'I', 10: 'J', 11: 'K', 12: 'L', 13: 'M', 14: 'N', 15: 'O', 16: 'P', 17: 'Q', 18: 'R', 19: 'space'}


In [3]:
# 2_train_model_named.py
# MODIFIED: Reads named folders and saves a class map with the model.

import os
import pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np

DATA_DIR = './data'

data = []
labels = []

# MODIFIED: Create a mapping from folder name (word) to a number (label)
# Sort the directory names to ensure consistent mapping
class_names = sorted(os.listdir(DATA_DIR))
class_map = {name: i for i, name in enumerate(class_names)}
print(f"Class mapping created: {class_map}")

# Load the data and assign numeric labels based on the folder name
for class_name, class_label in class_map.items():
    class_dir = os.path.join(DATA_DIR, class_name)
    if not os.path.isdir(class_dir):
        continue
    
    for file_path in os.listdir(class_dir):
        try:
            with open(os.path.join(class_dir, file_path), 'rb') as f:
                data_dict = pickle.load(f)
                if len(data_dict['data']) == 84:
                    data.append(data_dict['data'])
                    labels.append(class_label) # Use the numeric label
                else:
                    print(f"Skipping file {file_path} due to incorrect feature count.")
        except Exception as e:
            print(f"Error loading file {file_path}: {e}")

if not data or not labels:
    print("No data loaded. Please run the data collection script first.")
    exit()
    
X = np.asarray(data)
y = np.asarray(labels)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, stratify=y)

model = RandomForestClassifier()
print("Training model...")
model.fit(X_train, y_train)
print("Training complete.")

y_predict = model.predict(X_test)
score = accuracy_score(y_predict, y_test)
print(f'{score * 100:.2f}% accuracy achieved on the test set!')

# MODIFIED: Save both the model and the class map together
save_data = {'model': model, 'class_map': class_map}
with open('model.p', 'wb') as f:
    pickle.dump(save_data, f)

print("Model and class map saved as model.p")


Class mapping created: {'1': 0, 'A': 1, 'B': 2, 'C': 3, 'D': 4, 'E': 5, 'F': 6, 'G': 7, 'H': 8, 'I': 9, 'J': 10, 'K': 11, 'L': 12, 'M': 13, 'N': 14, 'O': 15, 'P': 16, 'Q': 17, 'R': 18, 'space': 19}
Error loading file .ipynb_checkpoints: [Errno 13] Permission denied: './data\\M\\.ipynb_checkpoints'
Training model...
Training complete.
99.86% accuracy achieved on the test set!
Model and class map saved as model.p


In [5]:
# 3_predict_realtime_fixed.py
# FIXED: Now robustly handles cases with more than 2 hands detected.
# MODIFIED: Removed the space added after each word.

import pickle
import cv2
import mediapipe as mp
import numpy as np
import time

# MODIFIED: Correctly load both the model and the class map
try:
    with open('model.p', 'rb') as f:
        load_data = pickle.load(f)
        model = load_data['model']
        class_map = load_data['class_map']
except FileNotFoundError:
    print("Error: model.p not found. Please run the training script first.")
    exit()

# Automatically create the labels dictionary from the loaded class map
labels_dict = {v: k for k, v in class_map.items()}
print(f"Labels loaded automatically: {labels_dict}")

# Start webcam capture
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

# Initialize MediaPipe Hands for two hands
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5)


# Variables for the sentence formation and gesture stability logic
sentence = ""
last_prediction = None
prediction_stable_for = 0
last_char_time = time.time()
PREDICTION_THRESHOLD = 20  # Number of frames a gesture must be stable
COOLDOWN_PERIOD = 2      # Seconds to wait after adding a character

while True:
    ret, frame = cap.read()
    if not ret:
        print("Error: Failed to capture frame.")
        break
    
    H, W, _ = frame.shape
    frame = cv2.flip(frame, 1)
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(frame_rgb)
    
    # Create a white box at the bottom for displaying the sentence
    cv2.rectangle(frame, (0, H - 60), (W, H), (255, 255, 255), -1)
    
    current_prediction = None
    
    if results.multi_hand_landmarks:
        all_landmarks = []
        # --- ⬇️ THIS IS THE MAIN FIX ⬇️ ---
        # By slicing [:2], we guarantee this loop runs at most twice,
        # preventing the feature count from ever exceeding 84.
        for hand_landmarks in results.multi_hand_landmarks[:2]:
            mp_drawing.draw_landmarks(
                frame, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style())

            data_aux = []
            x_ = [landmark.x for landmark in hand_landmarks.landmark]
            y_ = [landmark.y for landmark in hand_landmarks.landmark]
            for i in range(len(hand_landmarks.landmark)):
                data_aux.append(hand_landmarks.landmark[i].x - min(x_))
                data_aux.append(hand_landmarks.landmark[i].y - min(y_))
            all_landmarks.extend(data_aux)

        # Pad if only one hand is detected
        if len(all_landmarks) < 84:
            all_landmarks.extend([0] * (84 - len(all_landmarks)))

        # Make a prediction with the model
        prediction = model.predict([np.asarray(all_landmarks)])
        current_prediction = labels_dict[int(prediction[0])]

        # Display the current predicted gesture
        cv2.putText(frame, f'Gesture: {current_prediction}', (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 255, 0), 3, cv2.LINE_AA)
        
        # Implement the delay mechanism for stable predictions
        if current_prediction == last_prediction:
            prediction_stable_for += 1
        else:
            prediction_stable_for = 1
            last_prediction = current_prediction
            
        # Check if the gesture is stable and cooldown has passed
        if prediction_stable_for >= PREDICTION_THRESHOLD and (time.time() - last_char_time) > COOLDOWN_PERIOD:
            # --- ⬇️ THIS IS THE CHANGE ⬇️ ---
            # Removed the " " to stop adding a space after each word.
            sentence += current_prediction
            last_char_time = time.time()
            prediction_stable_for = 0 # Reset after adding character
            
    # Display the sentence being formed
    cv2.putText(frame, sentence, (20, H - 20), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 0), 2, cv2.LINE_AA)

    # Show the final frame
    cv2.imshow('Sign Language Recognition', frame)

    # Allow exiting the application by pressing 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Clean up
cap.release()
cv2.destroyAllWindows()

Labels loaded automatically: {0: '1', 1: 'A', 2: 'B', 3: 'C', 4: 'D', 5: 'E', 6: 'F', 7: 'G', 8: 'H', 9: 'I', 10: 'J', 11: 'K', 12: 'L', 13: 'M', 14: 'N', 15: 'O', 16: 'P', 17: 'Q', 18: 'R', 19: 'space'}
