In [None]:
import cv2
import mediapipe as mp
import os
import json
import time

# MediaPipe setup
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5)

def collect_gesture_data(gesture_name, num_sequences, sequence_length, capture_duration):
    cap = cv2.VideoCapture(0)
    
    for sequence_num in range(num_sequences):
        print(f'Collecting sequence {sequence_num} for gesture {gesture_name}')
        sequence = []
        start_time = time.time()
        
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            
            # Process the frame
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = hands.process(frame_rgb)

            # Draw the hand landmarks on the frame
            if results.multi_hand_landmarks:
                for hand_landmarks in results.multi_hand_landmarks:
                    mp.solutions.drawing_utils.draw_landmarks(
                        frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
                    landmarks = [[lm.x, lm.y, lm.z] for lm in hand_landmarks.landmark]
                    sequence.append(landmarks)
            
            cv2.imshow('Collecting Gestures', frame)
            if time.time() - start_time > capture_duration:
                break
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        
        # After the loop, trim or pad the sequence to have a consistent length
        sequence = sequence[:sequence_length] if len(sequence) > sequence_length else sequence + [[0, 0, 0]] * (sequence_length - len(sequence))
        
        # Save sequences to a file
        sequence_path = os.path.join('gestures_dataset', gesture_name, f'seq_{sequence_num}.json')
        os.makedirs(os.path.dirname(sequence_path), exist_ok=True)
        with open(sequence_path, 'w') as f:
            json.dump(sequence, f)

        print(f'Saved sequence {sequence_num} for gesture {gesture_name}')

    cap.release()
    cv2.destroyAllWindows()

# Collect data for 'swipe left' and 'swipe right'
# Assuming we want to capture each gesture for 2 seconds
collect_gesture_data('swipe_left', num_sequences=30, sequence_length=30, capture_duration=2)
collect_gesture_data('swipe_right', num_sequences=30, sequence_length=30, capture_duration=2)


In [1]:
import cv2
import mediapipe as mp
import os
import json
import time

In [None]:
import numpy as np

def load_data(gesture_names, sequence_length, num_landmarks=21, num_coordinates=3):
    sequences = []
    labels = []

    for gesture_name in gesture_names:
        data_path = os.path.join('gestures_dataset', gesture_name)
        class_index = gesture_names.index(gesture_name)
        
        for sequence_file in os.listdir(data_path):
            with open(os.path.join(data_path, sequence_file), 'r') as f:
                sequence = json.load(f)
                
                # Pad each frame to have a consistent number of landmarks with a consistent number of coordinates
                padded_sequence = []
                for frame in sequence:
                    # Ensure frame has a consistent number of landmarks
                    if len(frame) > num_landmarks:
                        frame = frame[:num_landmarks]
                    elif len(frame) < num_landmarks:
                        frame.extend([[0, 0, 0] for _ in range(num_landmarks - len(frame))])
                    
                    # Ensure each landmark has a consistent number of coordinates
                    padded_frame = []
                    for landmark in frame:
                        if isinstance(landmark, list):
                            if len(landmark) > num_coordinates:
                                landmark = landmark[:num_coordinates]
                            elif len(landmark) < num_coordinates:
                                landmark.extend([0] * (num_coordinates - len(landmark)))
                        else:
                            # If landmark is not a list, it's an error in data structure
                            landmark = [0] * num_coordinates
                        padded_frame.append(landmark)
                    padded_sequence.append(padded_frame)
                
                # Ensure the sequence has a consistent number of frames
                if len(padded_sequence) > sequence_length:
                    padded_sequence = padded_sequence[:sequence_length]
                elif len(padded_sequence) < sequence_length:
                    padded_sequence.extend([[[0] * num_coordinates for _ in range(num_landmarks)] for _ in range(sequence_length - len(padded_sequence))])
                
                sequences.append(padded_sequence)
                labels.append(class_index)

    sequences_np = np.zeros((len(sequences), sequence_length, num_landmarks, num_coordinates), dtype=np.float32)
    
    for i, sequence in enumerate(sequences):
        sequences_np[i] = np.array(sequence, dtype=np.float32)

    return sequences_np, np.array(labels)

gesture_names = ['swipe_left', 'swipe_right']
sequences, labels = load_data(gesture_names, sequence_length=30)


In [None]:
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

# One-hot encode the labels
labels = to_categorical(labels, num_classes=len(gesture_names))

# Split the data
X_train, X_test, y_train, y_test = train_test_split(sequences, labels, test_size=0.2, random_state=42)

# Build the model
model = create_model(sequence_length=30, num_features=63)  # 63 features (21 landmarks * 3 coordinates)

# Train the model
model.fit(X_train, y_train, epochs=20, validation_data=(X_test, y_test))

# Save the model for later use
model.save('swipe_gesture_model.h5')
