In [1]:
import os
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
import mediapipe as mp

In [2]:
import os

BASE_DIR = r"../Datasets/Video_DATASET"

dataset_categories = {
    "Normal": 0,
    "Limping": 1,
    "Slouch": 2,
    "No_arm_swing": 3,
    "Circumduction": 4
}

for category, label in dataset_categories.items():
    folder_path = os.path.join(BASE_DIR, category)
    if os.path.exists(folder_path):
        print(f"✅ Found folder: {folder_path}")
    else:
        print(f"❌ Folder not found: {folder_path}")


✅ Found folder: ../Datasets/Video_DATASET\Normal
✅ Found folder: ../Datasets/Video_DATASET\Limping
✅ Found folder: ../Datasets/Video_DATASET\Slouch
✅ Found folder: ../Datasets/Video_DATASET\No_arm_swing
✅ Found folder: ../Datasets/Video_DATASET\Circumduction


In [None]:
# Initialize MediaPipe Pose
mp_pose = mp.solutions.pose
pose = mp_pose.Pose()

# Function to extract pose keypoints from a video
def extract_keypoints_from_video(video_path, max_frames=100):
    cap = cv2.VideoCapture(video_path)
    keypoints_sequence = []

    if not cap.isOpened():
        print(f"❌ Error: Could not open {video_path}")
        return None

    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret or frame_count >= max_frames:
            break

        # Convert to RGB and process with MediaPipe Pose
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = pose.process(frame_rgb)

        if results.pose_landmarks:
            row = []  # No frame number included
            for landmark in results.pose_landmarks.landmark:
                # Store normalized keypoints (x, y, z, visibility)
                row.extend([landmark.x, landmark.y, landmark.z, landmark.visibility])

            keypoints_sequence.append(row)

        frame_count += 1

    cap.release()
    
    if len(keypoints_sequence) > 0:
        return np.array(keypoints_sequence)  # Shape: (frames, 132)
    else:
        return None

# Function to process all videos in a dataset category
def process_videos(video_folder, label, max_videos=None):
    data = []
    labels = []
    video_files = os.listdir(video_folder)
    
    if max_videos:
        video_files = video_files[:max_videos]
    
    for video_file in tqdm(video_files, desc=f"Processing {label}"):
        video_path = os.path.join(video_folder, video_file)
        keypoints = extract_keypoints_from_video(video_path)
        
        if keypoints is not None:
            for frame_data in keypoints:
                data.append(frame_data)
                labels.append(label)
    
    return data, labels

# Define dataset categories and process videos
dataset_categories = {
    "Normal": 0,
    "Limping": 1,
    "Slouch": 2,
    "No_arm_swing": 3,
    "Circumduction": 4
}

all_data = []
all_labels = []

for category, label in dataset_categories.items():
    folder_path = f"../Datasets/Video_DATASET/{category}"
    if os.path.exists(folder_path):
        data, labels = process_videos(folder_path, label)
        all_data.extend(data)
        all_labels.extend(labels)
    else:
        print(f"⚠️ Warning: Folder {folder_path} not found.")

# Convert to DataFrame and save as CSV (excluding "Frame" column)
if all_data:
    columns = [f"K{i}_{c}" for i in range(33) for c in ("x", "y", "z", "visibility")] + ["label"]
    df = pd.DataFrame(all_data, columns=columns[:-1])  # Exclude Label column temporarily
    df["label"] = all_labels  # Add label column separately
    df.to_csv("../Datasets/CSV_DATASET/gait_keypoints.csv", index=False)
    print("📂 Keypoints saved to gait_keypoints.csv")

Processing 0: 100%|██████████| 5/5 [00:13<00:00,  2.67s/it]
Processing 1: 100%|██████████| 7/7 [00:19<00:00,  2.81s/it]
Processing 2: 100%|██████████| 4/4 [00:10<00:00,  2.75s/it]
Processing 3: 100%|██████████| 3/3 [00:09<00:00,  3.23s/it]
Processing 4:  50%|█████     | 3/6 [00:08<00:08,  2.74s/it]