In [5]:
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model

# --- NEW: IMPORT THE CORRECT PREPROCESSOR ---
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input

# --- 1. LOAD YOUR TRAINED MODEL ---
try:
    model = load_model('model1.keras') # Make sure this is the new MobileNetV2 model
    print("Model loaded successfully!")
except Exception as e:
    print(f"Error loading model: {e}")
    print("Make sure 'model1.keras' is in the correct path.")
    exit()

# --- 2. DEFINE YOUR CLASS NAMES ---
class_names = [
    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 
    'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 
    'del', 'nothing', 'space'
]

# --- 3. SET UP WEBCAM (WITH WSL FIXES) ---
print("Opening camera...")
cap = cv2.VideoCapture(0, cv2.CAP_V4L2) 
cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'))

if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()
print("Camera opened successfully.")

# --- 4. DEFINE THE REGION OF INTEREST (ROI) ---
ROI_TOP = 50
ROI_BOTTOM = 350
ROI_RIGHT = 350
ROI_LEFT = 650
IMG_SIZE = (200, 200) # Match your training

# --- 5. PERFORMANCE OPTIMIZATION VARS ---
frame_counter = 0 
current_label = "" 

while True:
    ret, frame = cap.read()
    if not ret:
        print("Error: Can't receive frame. Exiting.")
        break
        
    frame = cv2.flip(frame, 1)
    frame_counter += 1
    cv2.rectangle(frame, (ROI_LEFT, ROI_TOP), (ROI_RIGHT, ROI_BOTTOM), (0, 255, 0), 2)
    
    # --- 6. PRE-PROCESS AND PREDICT (ONLY EVERY 5 FRAMES) ---
    if frame_counter % 5 == 0: 
        try:
            roi = frame[ROI_TOP:ROI_BOTTOM, ROI_RIGHT:ROI_LEFT]
            img = cv2.resize(roi, IMG_SIZE)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # This is correct
            
            # --- !!! THIS IS THE FIX !!! ---
            
            # 1. Expand dimensions to create a "batch" of 1
            # We must do this *before* preprocessing
            img_batch = np.expand_dims(img, axis=0)
            
            # 2. Apply the *exact same* preprocessing as in training
            # This function converts [0, 255] -> [-1, 1]
            img_preprocessed = preprocess_input(img_batch)
            
            # --- 7. MAKE A PREDICTION ---
            # Feed the correctly preprocessed batch to the model
            prediction = model.predict(img_preprocessed, verbose=0) 

            # --- (Rest of your prediction logic is correct) ---
            pred_index = np.argmax(prediction[0])
            pred_label = class_names[pred_index]
            confidence = prediction[0][pred_index] * 100
            
            current_label = f"{pred_label} ({confidence:.2f}%)"
            
        except Exception as e:
            pass

    # --- 8. DISPLAY THE PREDICTION ON *EVERY* FRAME ---
    cv2.putText(
        frame, 
        current_label,
        (ROI_RIGHT - 20, ROI_TOP - 10), 
        cv2.FONT_HERSHEY_SIMPLEX, 
        0.7, 
        (0, 255, 0), 
        2
    )
        
    cv2.imshow('ASL Prediction - Press "q" to quit', frame)

    # --- 9. SET UP EXIT KEY ---
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# --- 10. CLEAN UP ---
print("Cleaning up and closing...")
cap.release()
cv2.destroyAllWindows()

Model loaded successfully!
Opening camera...
Camera opened successfully.
Cleaning up and closing...


## Photo testing


In [None]:
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model

# --- 1. SET THE PATH TO YOUR IMAGE ---
# !!! IMPORTANT: Change this to the path of the image you want to test
IMAGE_PATH = 'asl_alphabet_test/A_test.jpg' 
# For example: '/home/sarvesh/projects/HPR/asl_alphabet_test/A/A_test.jpg'

# --- 2. LOAD YOUR TRAINED MODEL ---
try:
    model = load_model('best_asl_model.keras')
    print("Model loaded successfully!")
except Exception as e:
    print(f"Error loading model: {e}")
    exit()

# --- 3. DEFINE YOUR CLASS NAMES ---
class_names = [
    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 
    'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 
    'del', 'nothing', 'space'
]

# --- 4. DEFINE IMAGE SIZE ---
IMG_SIZE = (200, 200) # Must match your model's input size

# --- 5. LOAD AND PRE-PROCESS THE IMAGE ---
# Load the image from disk
image = cv2.imread(IMAGE_PATH)

if image is None:
    print(f"Error: Could not load image from {IMAGE_PATH}")
    exit()

image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

try:
    # --- This part is the same as your webcam script's "try" block ---
    
    # Resize the image to match the model's expected input size
    img_resized = cv2.resize(image, IMG_SIZE)
    
    # Rescale the image (0-255 -> 0-1)
    img_rescaled = img_resized / 255.0
    
    # Expand dimensions to create a "batch" of 1
    # Model expects shape (1, 200, 200, 3)
    img_batch = np.expand_dims(img_rescaled, axis=0)

    # --- 6. MAKE A PREDICTION ---
    prediction = model.predict(img_batch, verbose=0)
    
    # Get the predicted class index
    pred_index = np.argmax(prediction[0])
    
    # Get the corresponding class name
    pred_label = class_names[pred_index]
    
    # Get the confidence score
    confidence = prediction[0][pred_index] * 100
    
    text = f"{pred_label} ({confidence:.2f}%)"
    print(f"The model predicts: {text}")

    # --- 7. DISPLAY THE PREDICTION ON THE IMAGE ---
    
    # Make the original image a bit bigger for display
    display_image = cv2.resize(image, (500, 500), interpolation=cv2.INTER_AREA)
    
    # Put the prediction text on the display image
    cv2.putText(
        display_image, 
        text, 
        (20, 40), # Position text at the top-left
        cv2.FONT_HERSHEY_SIMPLEX, 
        1.0, 
        (0, 255, 0), # Green text
        2
    )
    
    # Show the image in a new window
    cv2.imshow(f"Prediction: {text}", display_image)
    
    print("Press any key to close the image.")
    cv2.waitKey(0) # Wait indefinitely for a key press
    cv2.destroyAllWindows()

except Exception as e:
    print(f"An error occurred during processing: {e}")

Model loaded successfully!
The model predicts: A (100.00%)
Press any key to close the image.


In [1]:
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model

# --- 1. LOAD YOUR ASL MODEL ---
try:
    asl_model = load_model('model1.keras')
    print("ASL model loaded successfully!")
except Exception as e:
    print(f"Error loading ASL model: {e}")
    exit()

# --- 2. DEFINE CONSTANTS ---
class_names = [
    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 
    'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 
    'del', 'nothing', 'space'
]
# !!! IMPORTANT: Use the size you trained with (128,128) or (200,200)
IMG_SIZE = (128, 128) 

# --- 3. SET UP WEBCAM ---
print("Opening camera...")
cap = cv2.VideoCapture(0, cv2.CAP_V4L2) 
cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'))
if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()
print("Camera opened successfully.")

# --- 4. PREDICTION VARIABLES ---
current_label = ""
frame_counter = 0

# --- 5. DEFINE SKIN COLOR RANGE (in HSV) ---
# This is a common range for many skin tones.
# You MAY need to adjust these values for your specific lighting.
lower_skin = np.array([0, 48, 80], dtype=np.uint8)
upper_skin = np.array([20, 255, 255], dtype=np.uint8)

while True:
    ret, frame = cap.read()
    if not ret:
        break
        
    frame = cv2.flip(frame, 1) # Flip for selfie view
    
    # --- 6. FIND THE HAND (Skin-Tone Detection) ---
    
    # Convert frame to HSV (Hue, Saturation, Value) color space
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
    
    # Create a "mask" that only keeps pixels within the skin-tone range
    skin_mask = cv2.inRange(hsv, lower_skin, upper_skin)
    
    # "Clean up" the mask
    # Erode "shrinks" the white areas, removing small noise
    skin_mask = cv2.erode(skin_mask, np.ones((3,3), np.uint8), iterations=1)
    # Dilate "expands" the white areas, closing gaps
    skin_mask = cv2.dilate(skin_mask, np.ones((3,3), np.uint8), iterations=2)
    
    # Find the contours (outlines) of the "blobs" in the mask
    contours, _ = cv2.findContours(skin_mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    
    # We assume the *largest* contour is the hand
    if contours:
        # Find the contour with the largest area
        c = max(contours, key=cv2.contourArea)
        
        # Only proceed if the area is a reasonable size (not just noise)
        if cv2.contourArea(c) > 1000:
            # Get the bounding box (x, y, width, height) of the hand
            x, y, w, h = cv2.boundingRect(c)
            
            # Draw the dynamic bounding box
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
            
            # --- 7. CROP AND PREDICT ---
            frame_counter += 1
            if frame_counter % 5 == 0:
                try:
                    # Crop the hand
                    hand_roi = frame[y:y+h, x:x+w]
                    
                    # Pre-process this cropped image for *your* model
                    img_for_model = cv2.resize(hand_roi, IMG_SIZE)
                    img_for_model = cv2.cvtColor(img_for_model, cv2.COLOR_BGR2RGB) # BGR -> RGB
                    img_rescaled = img_for_model / 255.0
                    img_batch = np.expand_dims(img_rescaled, axis=0)
                    
                    # Make a prediction
                    prediction = asl_model.predict(img_batch, verbose=0)
                    pred_index = np.argmax(prediction[0])
                    confidence = prediction[0][pred_index]
                    
                    if confidence > 0.5: # 50% confidence threshold
                        pred_label = class_names[pred_index]
                        current_label = f"{pred_label} ({confidence*100:.2f}%)"
                    else:
                        current_label = "..." 
                        
                except Exception as e:
                    current_label = "..."
                    pass 
        else:
            current_label = "" # Largest contour is too small
    else:
        current_label = "" # No contours found

    # --- 8. DISPLAY THE PREDICTION ---
    cv2.putText(
        frame, current_label, (20, 40), 
        cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 2
    )
    cv2.imshow('ASL Prediction (Skin-Tone Detection) - Press "q" to quit', frame)

    # --- 9. SET UP EXIT KEY ---
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# --- 10. CLEAN UP ---
print("Cleaning up and closing...")
cap.release()
cv2.destroyAllWindows()

2025-11-11 11:25:37.129835: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-11 11:25:37.212056: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-11-11 11:25:38.477505: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
I0000 00:00:1762860339.194712  760053 gpu_device.cc:2020] Created de

ASL model loaded successfully!
Opening camera...
Camera opened successfully.


qt.qpa.plugin: Could not find the Qt platform plugin "wayland" in ""
2025-11-11 11:25:51.982726: I external/local_xla/xla/service/service.cc:163] XLA service 0x704a580045e0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-11-11 11:25:51.982761: I external/local_xla/xla/service/service.cc:171]   StreamExecutor device (0): NVIDIA GeForce RTX 4060 Laptop GPU, Compute Capability 8.9
2025-11-11 11:25:51.992158: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-11-11 11:25:52.056408: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:473] Loaded cuDNN version 91500
I0000 00:00:1762860353.373500  760189 device_compiler.h:196] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Cleaning up and closing...
