In [None]:
import os
import mediapipe as mp
import cv2
from tqdm import tqdm
import matplotlib.pyplot as plt
import pickle
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [3]:
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils 
mp_drawing_styles = mp.solutions.drawing_styles
hands = mp_hands.Hands(
    static_image_mode=True,       # For static images
    max_num_hands=2,               # Maximum number of hands to detect
    min_detection_confidence=0.5,  # Lower confidence threshold for detection
    min_tracking_confidence=0.5    # Lower tracking confidence
)

In [None]:
data_dir = 'data/Train'
data = []
labels = []
target_size = (384, 384)  # Target size for processing

valid_extensions = (".jpg", ".jpeg", ".png")

for class_dir in sorted(os.listdir(data_dir)):
    class_path = os.path.join(data_dir, class_dir)
    
    if not os.path.isdir(class_path):
        continue

    for img_file in sorted(os.listdir(class_path)):
        img_path = os.path.join(class_path, img_file)
        
        if not img_file.lower().endswith(valid_extensions):
            continue

        # Read and preprocess image
        img = cv2.imread(img_path)
        if img is None:
            print(f"Unable to read: {img_path}")
            continue
            
        # Convert to RGB and resize
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_rgb = cv2.resize(img_rgb, target_size, interpolation=cv2.INTER_AREA)
        
        # Improve contrast using CLAHE
        lab = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2LAB)
        l, a, b = cv2.split(lab)
        clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
        limg = cv2.merge([clahe.apply(l), a, b])
        img_processed = cv2.cvtColor(limg, cv2.COLOR_LAB2RGB)

        # Detect hands
        results = hands.process(img_processed)
        
        if results.multi_hand_landmarks:
            print(f"Hand detected in: {img_path}")
            data_aux = []
            
            # Extract landmarks for all detected hands
            for hand_landmarks in results.multi_hand_landmarks:
                # Store normalized landmark coordinates
                for landmark in hand_landmarks.landmark:
                    data_aux.extend([landmark.x, landmark.y])
                
            data.append(data_aux)
            labels.append(class_dir)
        else:
            print(f"No hand detected in: {img_path}")

# Save the data
with open('data.pickle', 'wb') as f:
    pickle.dump({'data': data, 'labels': labels}, f)

print(f"✅ Saved {len(data)} samples to data.pickle")

# Release MediaPipe resources
hands.close()

✅ Hand detected in: data/Train\a\0.jpg
✅ Hand detected in: data/Train\a\1.jpg
✅ Hand detected in: data/Train\a\10.jpg
✅ Hand detected in: data/Train\a\100.jpg
✅ Hand detected in: data/Train\a\101.jpg
✅ Hand detected in: data/Train\a\102.jpg
✅ Hand detected in: data/Train\a\103.jpg
✅ Hand detected in: data/Train\a\104.jpg
✅ Hand detected in: data/Train\a\105.jpg
✅ Hand detected in: data/Train\a\106.jpg
✅ Hand detected in: data/Train\a\107.jpg
✅ Hand detected in: data/Train\a\108.jpg
✅ Hand detected in: data/Train\a\109.jpg
✅ Hand detected in: data/Train\a\11.jpg
✅ Hand detected in: data/Train\a\110.jpg
✅ Hand detected in: data/Train\a\111.jpg
✅ Hand detected in: data/Train\a\112.jpg
✅ Hand detected in: data/Train\a\113.jpg
✅ Hand detected in: data/Train\a\114.jpg
✅ Hand detected in: data/Train\a\115.jpg
✅ Hand detected in: data/Train\a\116.jpg
✅ Hand detected in: data/Train\a\117.jpg
✅ Hand detected in: data/Train\a\118.jpg
✅ Hand detected in: data/Train\a\119.jpg
✅ Hand detected in: da

In [4]:
# visualize landmarks
data_dir = 'data'



for class_dir in sorted(os.listdir(data_dir)):
    class_path = os.path.join(data_dir, class_dir)
    
    if not os.path.isdir(class_path):
        continue

    for img_file in sorted(os.listdir(class_path)):
        img_path = os.path.join(class_path, img_file)
        
        if not img_file.lower().endswith(valid_extensions):
            continue

        # Read and preprocess image
        img = cv2.imread(img_path)
        if img is None:
            print(f"⚠️ Unable to read: {img_path}")
            continue
            
        # Convert to RGB and resize
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        # img_rgb = cv2.resize(img_rgb, target_size, interpolation=cv2.INTER_AREA)
        
        # Improve contrast using CLAHE
        lab = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2LAB)
        l, a, b = cv2.split(lab)
        clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
        limg = cv2.merge([clahe.apply(l), a, b])
        img_processed = cv2.cvtColor(limg, cv2.COLOR_LAB2RGB)

        # Detect hands
        results = hands.process(img_processed)
        
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(
                    img_rgb, # img to draw
                    hand_landmarks,
                    mp_hands.HAND_CONNECTIONS,
                    mp_drawing_styles.get_default_hand_landmarks_style(),
                    mp_drawing_styles.get_default_hand_connections_style()

                )

            
        plt.figure()
        plt.title(i)
        plt.imshow(img_processed)
plt.show()

In [5]:
# Check maximum feature length
max_length = max(len(sample) for sample in data)

# ✅ Fix inconsistent data: Pad shorter samples
fixed_data = np.array([sample + [0] * (max_length - len(sample)) for sample in data], dtype=np.float32)

# ✅ Ensure labels match the new dataset
fixed_labels = labels  # Labels remain unchanged



# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(fixed_data, fixed_labels, test_size=0.15, random_state=22, shuffle=True)

# Train the model
model = RandomForestClassifier(random_state=22)
model.fit(X_train, y_train)

# Make predictions
pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, pred)
print(f"✅ Model Accuracy: {accuracy:.2f}")


✅ Model Accuracy: 1.00


In [6]:
# save model
f = open('model.p', 'wb')
pickle.dump({'model':model},f)
f.close() 

In [7]:
# load model
model_dict = pickle.load(open('model.p','rb'))
model = model_dict['model']

In [None]:
cap = cv2.VideoCapture(1)  # Open webcam

with mp_hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5) as hands:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print("❌ Camera error")
            break

        H, W, _ = frame.shape
        frame = cv2.flip(frame, 1)  # Flip horizontally for natural interaction
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(frame_rgb)

        data_aux = []
        x_, y_ = [], []

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

                for lm in hand_landmarks.landmark[:21]:  # 21 landmarks
                    x, y, z = lm.x, lm.y, lm.z  # ✅ Extract x, y, z
                    data_aux.extend([x, y, z])
                    x_.append(x)
                    y_.append(y)

            # Ensure correct feature size
            if len(data_aux) == model.n_features_in_:
                prediction = model.predict([data_aux])[0]

                # Bounding Box
                x1, y1 = int(min(x_) * W) - 20, int(min(y_) * H) - 20
                x2, y2 = int(max(x_) * W) + 20, int(max(y_) * H) + 20
                cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 99, 173), 4)
                cv2.putText(frame, prediction, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 3)

        cv2.imshow("Hand Sign Recognition", frame)

        if cv2.waitKey(10) & 0xFF == ord("q"):
            break

cap.release()
cv2.destroyAllWindows()

NameError: name 'model' is not defined

: 