In [None]:
# predict_live_cpu.py
# RUN THIS IN YOUR "asl_cpu_env"
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
import mediapipe as mp # Uses the OLD solutions API
from collections import deque
import time

# --- 1. DEFINE CONSTANTS ---
ASL_MODEL_PATH = 'asl_landmark_model.keras' # Your landmark model
class_names = [
    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 
    'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 
    'del', 'nothing', 'space'
]
ZERO_VECTOR_NP = np.array([0.0] * 63, dtype=np.float32).reshape(1, -1)

# --- 2. LOAD YOUR CLASSIFIER MODEL ---
try:
    asl_model = load_model(ASL_MODEL_PATH)
    print("ASL Landmark model loaded successfully!")
except Exception as e:
    print(f"Error loading ASL model: {e}")
    exit()

# --- 3. INITIALIZE MEDIAPIPE (OLD API) ---
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    static_image_mode=False, 
    max_num_hands=1,
    min_detection_confidence=0.7, 
    min_tracking_confidence=0.5
)
mp_drawing = mp.solutions.drawing_utils
print("MediaPipe (Old API) Hand Detector created.")

# --- 4. SET UP WEBCAM ---
print("Opening camera...")
cap = cv2.VideoCapture(0, cv2.CAP_V4L2) 
cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc('M', 'J', 'P','G'))
if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()
print("Camera opened successfully.")

# --- 5. PREDICTION & SMOOTHING VARIABLES ---
predictions_deque = deque(maxlen=10) # Holds the last 10 predictions
current_label = ""

while True:
    ret, frame = cap.read()
    if not ret:
        break
        
    frame = cv2.flip(frame, 1)
    
    # --- 6. MEDIAPIPE HAND DETECTION ---
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(rgb_frame)
    
    model_input = ZERO_VECTOR_NP
    
    if results.multi_hand_landmarks:
        hand_landmarks = results.multi_hand_landmarks[0]
        
        # --- 7. NORMALIZE LANDMARKS (Same as in training) ---
        wrist = hand_landmarks.landmark[mp_hands.HandLandmark.WRIST]
        landmark_vector = []
        for landmark in hand_landmarks.landmark:
            landmark_vector.append(landmark.x - wrist.x)
            landmark_vector.append(landmark.y - wrist.y)
            landmark_vector.append(landmark.z - wrist.z)
        
        model_input = np.array(landmark_vector, dtype=np.float32).reshape(1, -1)
        
        # Draw skeleton on the frame
        mp_drawing.draw_landmarks(
            frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
        
    # --- 8. MAKE A PREDICTION (ALWAYS) ---
    prediction = asl_model.predict(model_input, verbose=0)
    pred_index = np.argmax(prediction[0])
    
    # --- 9. SMOOTH THE PREDICTION ---
    predictions_deque.append(pred_index)
    
    if len(predictions_deque) == 10:
        most_common_pred = np.bincount(predictions_deque).argmax()
        confidence = prediction[0][most_common_pred]
        
        if confidence > 0.6: # 60% confidence
            current_label = class_names[most_common_pred]
        else:
            if class_names[most_common_pred] == 'nothing':
                 current_label = "nothing"
            else:
                 current_label = "..."
    else:
        current_label = "..."

    cv2.putText(
        frame, current_label, (20, 40), 
        cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 2
    )
    cv2.imshow('ASL CPU Prediction - Press "q" to quit', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# --- 10. CLEAN UP ---
print("Cleaning up and closing...")
hands.close()
cap.release()
cv2.destroyAllWindows()