In [5]:
import cv2
import numpy as np
import mediapipe as mp
import tensorflow as tf
from collections import deque
import time

# Ensure TensorFlow uses mixed precision for faster computation on supported GPUs
try:
    from tensorflow.keras import mixed_precision
    mixed_precision.set_global_policy('mixed_float16')
    print("Mixed precision enabled.")
except ImportError:
    print("Mixed precision not available.")

# Define actions
actions = np.array(['hello', 'father', 'mother', 'deaf', 'no', 'love',
                   "help", "please", "more", "thankyou"])

# Build the model architecture
model = tf.keras.Sequential([
    tf.keras.layers.Conv1D(64, kernel_size=3, activation='relu', padding='same', input_shape=(30, 1662)),
    tf.keras.layers.MaxPooling1D(pool_size=2),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.3),

    tf.keras.layers.Conv1D(128, kernel_size=3, activation='relu', padding='same'),
    tf.keras.layers.MaxPooling1D(pool_size=2),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.3),

    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.Dropout(0.3),

    tf.keras.layers.Bidirectional(tf.keras.layers.GRU(256, return_sequences=True)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.Dropout(0.3),

    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(512, return_sequences=True)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.Dropout(0.3),

    tf.keras.layers.LSTM(256, return_sequences=True),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.Dropout(0.3),

    tf.keras.layers.LSTM(128, return_sequences=False),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.Dropout(0.3),

    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(actions.shape[0], activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Load the weights
model.load_weights('./models/LSTM_refined2.h5')

# Initialize Mediapipe
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

# Function to detect and process the image
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
    image.flags.writeable = False                   # Improve performance
    results = model.process(image)                  # Make prediction
    image.flags.writeable = True                    # Restore image
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # Convert back to BGR
    return image, results

# Function to extract keypoints
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33 * 4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468 * 3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21 * 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21 * 3)
    return np.concatenate([pose, face, lh, rh])

# Initialize variables
sequence = deque(maxlen=30)
sentence = []
threshold = 0.8
previous_action = None

# Initialize video capture
cap = cv2.VideoCapture('./uploads/uploaded_video.mp4')

if not cap.isOpened():
    print("Error: Could not open video.")
    exit()

# Set target FPS
target_fps = 5
video_fps = cap.get(cv2.CAP_PROP_FPS)
frame_interval = max(int(video_fps / target_fps), 1)
frame_count = 0

# Verify GPU usage
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

# Define TensorFlow prediction function with @tf.function for optimization
@tf.function
def predict_action(input_sequence):
    return model(input_sequence, training=False)

# Open file to write predictions
with open('predictions.txt', 'w') as f:
    with mp_holistic.Holistic(min_detection_confidence=0.8, min_tracking_confidence=0.8, static_image_mode=False) as holistic:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            if frame_count % frame_interval != 0:
                frame_count += 1
                continue

            frame = cv2.resize(frame, (640, 360))
            image, results = mediapipe_detection(frame, holistic)

            # Extract keypoints
            keypoints = extract_keypoints(results)
            sequence.append(keypoints)

            if len(sequence) == 30:
                # Convert sequence to numpy array and expand dimensions for batch
                input_sequence = np.expand_dims(np.array(sequence), axis=0).astype(np.float16)  # Use float16 if mixed precision

                # Make prediction
                res = predict_action(input_sequence)[0].numpy()

                # Decode prediction
                action = actions[np.argmax(res)]
                confidence = res[np.argmax(res)]

                if confidence > threshold and action != previous_action:
                    current_frame = int(cap.get(cv2.CAP_PROP_POS_FRAMES))
                    f.write(f"Frame {current_frame}: {action}, Confidence: {confidence}\n")
                    previous_action = action

                    if not sentence or action != sentence[-1]:
                        sentence.append(action)

                if len(sentence) > 5:
                    sentence = sentence[-5:]

            frame_count += 1

cap.release()
cv2.destroyAllWindows()


Mixed precision enabled.
Num GPUs Available:  0
