In [2]:
import cv2
import mediapipe as mp
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import ModelCheckpoint
from keras.models import load_model

In [5]:
mp_hands = mp.solutions.hands
hands = mp_hands.Hands()
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [33]:
def extract_landmarks(image):
    results_hands = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    
    
    hand_landmarks = results_hands.multi_hand_landmarks
    
    
    return hand_landmarks



def preprocess_landmarks(hand_landmarks):
    
    hand_features = []
    if hand_landmarks:
        for hand_lm in hand_landmarks:
            hand_feature = np.array([landmark.x for landmark in hand_lm.landmark] +
                                    [landmark.y for landmark in hand_lm.landmark])
            hand_features.append(hand_feature)
    
    
    return hand_features

def draw_styled_landmarks(image, results):
    
    
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 
    
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 
    image.flags.writeable = False                  
    results = model.process(image)                 
    image.flags.writeable = True                   
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 
    return image, results



def capture_landmarks(word, num_samples=10):  
    landmarks_data = {'word': [], 'hand_landmarks': []}
    
    cap = cv2.VideoCapture(0)
    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
        for i in range(1,num_samples,1):
            ret, frame = cap.read()
            hand_landmarks = extract_landmarks(frame)

            
            image, results = mediapipe_detection(frame, holistic)
            draw_styled_landmarks(image, results)
            cv2.imshow('OpenCV Feed', image)

            
            if cv2.waitKey(1) & 0xFF == ord('k'):
                hand_features = preprocess_landmarks(hand_landmarks)
                if hand_features:
                    i=i+1
                    landmarks_data['word'].append(word)
                    landmarks_data['hand_landmarks'].append(hand_features)
                    print(f"Landmarks captured for {word}.")

            
            elif cv2.waitKey(1) & 0xFF == ord('q'):
                break

        cap.release()
        cv2.destroyAllWindows()

    return landmarks_data



In [34]:
word = input("Enter the word for which you want to capture landmarks: ")
landmarks_data = capture_landmarks(word)

In [28]:

X = np.concatenate(landmarks_data['hand_landmarks'])
y = np.repeat(landmarks_data['word'], len(landmarks_data['hand_landmarks']))

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)


print("Shape of X before reshaping:", X.shape)
print("Shape of y_encoded:", y_encoded.shape)


X = X.reshape(-1, 42)


print("Shape of X after reshaping:", X.shape)


X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)


X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)


model = Sequential()
model.add(Dense(128, input_shape=(X_train.shape[1],), activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(len(np.unique(y)), activation='softmax'))


model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


model_checkpoint = ModelCheckpoint('gesture_recognition_model.h5', save_best_only=True, monitor='val_loss', mode='min', verbose=1)


model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32, callbacks=[model_checkpoint])

print("Model trained and saved.")

Shape of X before reshaping: (10, 42)
Shape of y_encoded: (100,)
Shape of X after reshaping: (10, 42)


ValueError: Found input variables with inconsistent numbers of samples: [10, 100]