In [9]:
import cv2
import numpy as np
import torch
from torchvision import transforms, models
from cvzone.HandTrackingModule import HandDetector

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Define the model architecture (MobileNetV2 in this case)
model = models.mobilenet_v2(pretrained=False)
model.classifier[1] = torch.nn.Linear(model.last_channel, 24)  # Use 24 classes as your saved model was trained with 24 classes
model = model.to(device)

# Load the trained model weights
model_path = "/home/mostafabakr/Desktop/Project X/modelsasl_image_model.pth"
model.load_state_dict(torch.load(model_path, map_location=device))
model.eval()  # Set the model to evaluation mode

# Class names (ensure you have 24 classes in the list corresponding to your dataset)
class_names = [
    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'O', 'P',
    'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y'
]

# Hand detection setup
detector = HandDetector(maxHands=1)

# Webcam setup
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

# Preprocessing transformation for the hand images
img_size = 224
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

while True:
    success, frame = cap.read()
    if not success:
        print("Error: Could not read frame from webcam.")
        break

    # Detect hand
    hands, frame = detector.findHands(frame)
    if hands:
        # Extract bounding box for the first detected hand
        hand = hands[0]
        x, y, w, h = hand['bbox']

        # Crop and preprocess hand image
        try:
            hand_image = frame[max(0, y - 20):min(frame.shape[0], y + h + 20),
                               max(0, x - 20):min(frame.shape[1], x + w + 20)]

            if hand_image.size != 0:
                # Center the hand in a padded square image
                padded_image = np.ones((img_size, img_size, 3), np.uint8) * 255
                h_crop, w_crop = hand_image.shape[:2]
                scale = img_size / max(h_crop, w_crop)
                resized = cv2.resize(hand_image, (int(w_crop * scale), int(h_crop * scale)))
                start_x = (img_size - resized.shape[1]) // 2
                start_y = (img_size - resized.shape[0]) // 2
                padded_image[start_y:start_y + resized.shape[0], start_x:start_x + resized.shape[1]] = resized

                # Transform and predict
                input_image = transform(padded_image).unsqueeze(0).to(device)
                with torch.no_grad():
                    outputs = model(input_image)
                    _, predicted = torch.max(outputs, 1)
                    predicted_index = predicted.item()

                # Validate prediction index
                if 0 <= predicted_index < len(class_names):
                    label = class_names[predicted_index]
                else:
                    label = "Unknown"

                # Draw bounding box and prediction
                cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
                cv2.putText(frame, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        except Exception as e:
            print("Error processing hand image:", e)

    # Display the frame
    cv2.imshow("Hand Detection", frame)

    # Exit loop on 'q' key
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()


Using device: cuda


  model.load_state_dict(torch.load(model_path, map_location=device))
