In [2]:
import cv2
from matplotlib import pyplot as plt
import numpy as np
import mediapipe as mp
import time
import os
import json


from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, InputLayer, Dropout 
from tensorflow.keras.callbacks import TensorBoard


In [4]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities
mp_face_mesh = mp.solutions.face_mesh 

In [5]:
def extract_keypoints(results) : 
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(132)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(63)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(63)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(1404)

    return np.concatenate([face, pose, lh, rh])
    

In [6]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

    return image, results
    

In [7]:
x = []
actions = np.array(['hello', 'thanks', 'iloveyou', 'book'])
data_path = os.path.join('MP_DATA')
no_sequences = 30
sequence_length = 30

In [117]:
y = []

import os
import cv2
import numpy as np
import json
import mediapipe as mp

# Mediapipe setup
mp_holistic = mp.solutions.holistic.Holistic(
    static_image_mode=False,
    model_complexity=1,
    enable_segmentation=False,
    refine_face_landmarks=True
)
mp_drawing = mp.solutions.drawing_utils

# Function to extract keypoints from a Mediapipe result
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] 
                     for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(132)
    lh = np.array([[res.x, res.y, res.z] 
                   for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(63)
    rh = np.array([[res.x, res.y, res.z] 
                   for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(63)
    face = np.array([[res.x, res.y, res.z] 
                     for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(1404)
    
    return np.concatenate([face, pose, lh, rh])

# Function for Mediapipe detection
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

# Root directory containing gloss subfolders like 'book'
root_dir = 'DATA_COLLECTION_version_3'
gloss = 'book'
x = []

with open('WLASL_v0.3.json', 'r') as f:
    data = json.load(f)

for item in data:
    if item['gloss'] != gloss:
        continue

    for video in item['instances']:
        video_id = video['video_id']
        frame_start = video['frame_start']
        frame_end = video['frame_end']

        video_path = os.path.join(root_dir, gloss, f"{video_id}.mp4")

        if not os.path.exists(video_path):
            print(f"❌ Video not found: {video_path}")
            continue

        cap = cv2.VideoCapture(video_path)
        count = 0
        vid = []

        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                print(f"⚠️  Failed to read frame from {video_id}")
                break
            
            #print('frame : {}'.format(count))
            count += 1

            # Only process frames in desired range and limit to 30 total
            if frame_start <= count <= frame_end and (count - frame_start) < 30:
                image, results = mediapipe_detection(frame, mp_holistic)
                keypoints = extract_keypoints(results)
                keypoints = keypoints[:1662]  # Truncate to 1662 keypoints
                vid.append(keypoints)

        cap.release()

        # Post-processing: pad or truncate to 30 frames
        if len(vid) < 30:
            padding = [np.zeros(1662) for _ in range(30 - len(vid))]
            vid.extend(padding)
        else:
            vid = vid[:30]

        # Convert to numpy array and append to dataset
        print('vid len : {} , vid[0] len : {}'.format(len(vid),len(vid[0])))
        vid_array = np.array(vid)
        x.append(vid_array)
        y.append(label_map['book'])

# Final shape check
X = np.array(x)
print("✅ Final shape of X:", X.shape)  # Expected: (num_videos, 30, 1662)

# Optional: release Mediapipe model
mp_holistic.close()


⚠️  Failed to read frame from 69241
vid len : 30 , vid[0] len : 1662
❌ Video not found: DATA_COLLECTION_version_3\book\65225.mp4
⚠️  Failed to read frame from 68011
vid len : 30 , vid[0] len : 1662
❌ Video not found: DATA_COLLECTION_version_3\book\68208.mp4
⚠️  Failed to read frame from 68012
vid len : 30 , vid[0] len : 1662
⚠️  Failed to read frame from 70212
vid len : 30 , vid[0] len : 1662
⚠️  Failed to read frame from 70266
vid len : 30 , vid[0] len : 1662
❌ Video not found: DATA_COLLECTION_version_3\book\07085.mp4
❌ Video not found: DATA_COLLECTION_version_3\book\07086.mp4
❌ Video not found: DATA_COLLECTION_version_3\book\07087.mp4
⚠️  Failed to read frame from 07069
vid len : 30 , vid[0] len : 1662
❌ Video not found: DATA_COLLECTION_version_3\book\07088.mp4
❌ Video not found: DATA_COLLECTION_version_3\book\07089.mp4
❌ Video not found: DATA_COLLECTION_version_3\book\07090.mp4
❌ Video not found: DATA_COLLECTION_version_3\book\07091.mp4
❌ Video not found: DATA_COLLECTION_version_3\b

In [8]:
label_map = {label : num for num, label in enumerate(actions)}

In [9]:
X.shape, y

NameError: name 'X' is not defined

In [121]:
videos, labels = [], []
for lab in y : 
    labels.append(lab)


for action in actions[:-1] : 
    for sequence in range(no_sequences) :
        window = []
        for frame in range(sequence_length) :
            res = np.load(os.path.join(data_path, action, str(sequence), "{}.npy".format(frame))) 
            window.append(res)
        videos.append(window)
        labels.append(label_map[action])

XX = np.array(videos)
cx = np.concatenate((X, XX), axis=0)
X = cx
X.shape

(101, 30, 1662)

In [10]:
log_dir = os.path.join('logs')
tb_callback = TensorBoard(log_dir = log_dir)

In [11]:

model = Sequential()
model.add(InputLayer(shape=(30, 1662)))  # Input Layer

# LSTM Layers with tanh activation (default) and Dropout
model.add(LSTM(64, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(64, return_sequences=False))  # Final LSTM layer

# Fully Connected Layers
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

In [124]:
Y = to_categorical(labels).astype(int)
X.shape, Y.shape

((101, 30, 1662), (101, 4))

In [125]:
# Train on the new class
new_model.fit(X, Y, epochs=800, batch_size=16)

Epoch 1/800
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.1130 - loss: 8.6671
Epoch 2/800
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 0.1387 - loss: 7.6111
Epoch 3/800
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.0938 - loss: 6.3798
Epoch 4/800
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.0960 - loss: 4.7653
Epoch 5/800
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.1900 - loss: 3.1944
Epoch 6/800
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.3023 - loss: 2.3282
Epoch 7/800
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.6002 - loss: 1.5186
Epoch 8/800
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.5750 - loss: 1.6883
Epoch 9/800
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

<keras.src.callbacks.history.History at 0x234a5a34800>

In [126]:
new_model.save('hattay.keras')

In [15]:
new_model.load_weights('hattay.keras')

NameError: name 'new_model' is not defined

In [12]:

colors = [(245,117,16), (117,245,16), (16,117,245), (255, 0, 255)]  # Added magenta for 'book'

def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        color = colors[num % len(colors)]  # Safe even if colors < len(actions)
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), color, -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), 
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
    return output_frame  # <- this is important



In [13]:


# 1. New detection variables
sequence = []
sentence = []
threshold = 0.8

cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        #print(results)
        
        # Draw landmarks
        #draw_landmarks(image, results)
        
        # 2. Prediction logic
        keypoints = extract_keypoints(results)
#         sequence.insert(0,keypoints)
#         sequence = sequence[:30]
        sequence.append(keypoints)
        sequence = sequence[-30:]
        
        if len(sequence) == 30:
            res = new_model.predict(np.expand_dims(sequence, axis=0))[0]
            #print(actions[np.argmax(res)])
            print(res.shape)
            
        #3. Viz logic
            if res[np.argmax(res)] > threshold: 
                if len(sentence) > 0: 
                    if actions[np.argmax(res)] != sentence[-1]:
                        sentence.append(actions[np.argmax(res)])
                else:
                    sentence.append(actions[np.argmax(res)])

            if len(sentence) > 5: 
                sentence = sentence[-5:]

            # Viz probabilities
            image = prob_viz(res, actions, image, colors)
            
        cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(image, ' '.join(sentence), (3,30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Show to screen
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

NameError: name 'new_model' is not defined