## 4. Feature Engineering

### `04_feature_engineering.ipynb`

In [None]:
# Import necessary libraries
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from ultralytics import YOLO
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
%matplotlib inline

# Set paths
FALL_DATA_DIR = "../data/le2i"
WORKOUT_VIDEOS_DIR = "../data/workout_videos"
OUTPUT_DIR = "../outputs/features"
os.makedirs(OUTPUT_DIR, exist_ok=True)

print(f"Checking data directories:")
print(f"FALL_DATA_DIR exists: {os.path.exists(FALL_DATA_DIR)}")
print(f"WORKOUT_VIDEOS_DIR exists: {os.path.exists(WORKOUT_VIDEOS_DIR)}")
print(f"OUTPUT_DIR exists: {os.path.exists(OUTPUT_DIR)}")

# Initialize YOLO11 models
print("Loading YOLO11 models...")
try:
    yolo_model = YOLO11('yolo11n.pt')  # For person detection
    yolo_pose_model = YOLO11('yolo11n-pose.pt')  # For pose estimation
    print("Models loaded successfully")
except Exception as e:
    print(f"Error loading models: {e}")
    print("Make sure YOLO11 is installed: pip install yolo11")

# Function to extract features from a video
def extract_features_from_video(video_path, num_frames=16):
    """Extract pose and motion features from a video using YOLO11"""
    try:
        cap = cv2.VideoCapture(video_path)
        if not cap.isOpened():
            print(f"Error: Could not open video {video_path}")
            return None, None
            
        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        
        print(f"Video info: {frame_width}x{frame_height}, {frame_count} frames")
        
        # Calculate frame indices to sample
        indices = np.linspace(0, frame_count-1, num_frames, dtype=int)
        
        # Keypoint sequences
        keypoint_sequence = []
        is_pose_detected = []
        person_detected = []
        
        for idx in indices:
            cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
            ret, frame = cap.read()
            
            if not ret:
                # If frame reading fails, add placeholder
                keypoint_sequence.append(None)
                is_pose_detected.append(False)
                person_detected.append(False)
                continue
            
            # Person detection with YOLO11
            person_results = yolo_model(frame, classes=[0], conf=0.5)
            
            person_found = False
            
            if len(person_results[0].boxes) > 0:
                person_found = True
                person_detected.append(True)
            else:
                person_detected.append(False)
            
            # Pose detection with YOLO11 pose
            pose_results = yolo_pose_model(frame)
            
            if len(pose_results[0].keypoints.data) > 0:
                is_pose_detected.append(True)
                
                # Get the pose with highest confidence (usually the largest one)
                best_pose_idx = 0
                if len(pose_results[0].boxes) > 1:
                    # If multiple poses, get the one with highest confidence
                    confidences = [box.conf.item() for box in pose_results[0].boxes]
                    best_pose_idx = np.argmax(confidences)
                
                # Extract keypoints for the best pose
                pose_keypoints = pose_results[0].keypoints.data[best_pose_idx]
                
                # Convert to a flat list format
                frame_keypoints = []
                for kp_idx in range(pose_keypoints.shape[0]):
                    x, y, conf = pose_keypoints[kp_idx].tolist()
                    # Normalize coordinates
                    frame_keypoints.extend([
                        float(x) / frame_width,
                        float(y) / frame_height,
                        0.0,  # YOLO11 doesn't provide z-coordinate
                        float(conf)
                    ])
                
                keypoint_sequence.append(frame_keypoints)
            else:
                is_pose_detected.append(False)
                keypoint_sequence.append(None)
        
        cap.release()
        
        # Statistics
        pose_detection_rate = sum(is_pose_detected) / len(is_pose_detected) if is_pose_detected else 0
        person_detection_rate = sum(person_detected) / len(person_detected) if person_detected else 0
        
        print(f"Video: {os.path.basename(video_path)}")
        print(f"Person detection rate: {person_detection_rate:.2f}")
        print(f"Pose detection rate: {pose_detection_rate:.2f}")
        
        # If no poses detected, return None
        if not any(is_pose_detected):
            print("No poses detected in this video")
            return None, None
        
        # Calculate pose-based features
        valid_keypoints = [kp for kp in keypoint_sequence if kp is not None]
        
        if not valid_keypoints:
            print("No valid keypoints found")
            return None, None
        
        # Create features
        features = {}
        
        # 1. Keypoint velocities
        if len(valid_keypoints) >= 2:
            velocities = []
            for i in range(1, len(valid_keypoints)):
                frame_velocities = []
                for j in range(0, len(valid_keypoints[i]), 4):  # Each keypoint has 4 values (x,y,z,v)
                    if j+3 < len(valid_keypoints[i]) and j+3 < len(valid_keypoints[i-1]):
                        # Calculate Euclidean distance between consecutive frames
                        dx = valid_keypoints[i][j] - valid_keypoints[i-1][j]
                        dy = valid_keypoints[i][j+1] - valid_keypoints[i-1][j+1]
                        # Note: YOLO11 doesn't have z-coordinate, so we use 0
                        velocity = np.sqrt(dx**2 + dy**2)
                        frame_velocities.append(velocity)
                
                if frame_velocities:
                    velocities.append(frame_velocities)
            
            if velocities:
                features['max_velocity'] = np.max([np.max(v) if v else 0 for v in velocities])
                features['mean_velocity'] = np.mean([np.mean(v) if v else 0 for v in velocities])
                features['std_velocity'] = np.std([np.std(v) if v else 0 for v in velocities]) if len(velocities) > 1 else 0
        
        # 2. Pose height variation
        heights = []
        for kp in valid_keypoints:
            y_values = [kp[i+1] for i in range(0, len(kp), 4)]
            if y_values:
                height = max(y_values) - min(y_values)
                heights.append(height)
        
        if heights:
            features['max_height'] = np.max(heights)
            features['min_height'] = np.min(heights)
            features['height_range'] = np.max(heights) - np.min(heights)
        
        # 3. Pose width variation
        widths = []
        for kp in valid_keypoints:
            x_values = [kp[i] for i in range(0, len(kp), 4)]
            if x_values:
                width = max(x_values) - min(x_values)
                widths.append(width)
        
        if widths:
            features['max_width'] = np.max(widths)
            features['min_width'] = np.min(widths)
            features['width_range'] = np.max(widths) - np.min(widths)
        
        # 4. Vertical position change (for falls)
        # Note: In YOLO11 pose, nose is keypoint index 0
        nose_positions = []
        for kp in valid_keypoints:
            if len(kp) >= 4:  # Nose is first landmark
                nose_positions.append(kp[1])  # y-coordinate of nose
        
        if nose_positions and len(nose_positions) >= 2:
            features['head_y_change'] = nose_positions[-1] - nose_positions[0]
            features['max_head_y_change'] = np.max(nose_positions) - np.min(nose_positions)
        
        print(f"Successfully extracted {len(features)} features")
        
        # Return both raw keypoints and extracted features
        return valid_keypoints, features
        
    except Exception as e:
        print(f"Error processing video {video_path}: {e}")
        return None, None

# Process videos and extract features
def process_videos_and_extract_features(videos, label, max_count=100):
    all_features = []
    feature_dicts = []
    video_paths = []
    
    print(f"Found {len(videos)} {label} videos, processing up to {max_count}")
    
    for i, video_path in enumerate(videos[:max_count]):
        print(f"\nProcessing video {i+1}/{min(len(videos), max_count)}: {video_path}")
        keypoints, features = extract_features_from_video(str(video_path))
        
        if features:
            # Add label and video path to features
            features['label'] = label
            features['video_path'] = str(video_path)
            
            feature_dicts.append(features)
            video_paths.append(str(video_path))
            
            # Store keypoints sequence
            if keypoints:
                all_features.append((keypoints, label))
    
    # Create DataFrame from feature dictionaries
    if feature_dicts:
        df = pd.DataFrame(feature_dicts)
        print(f"Successfully extracted features from {len(feature_dicts)} {label} videos")
        return df, all_features
    
    print(f"No features could be extracted from {label} videos")
    return None, None

# Initialize variables
combined_df = None
fall_df = None
workout_df = None

# Find all videos
print("Finding videos...")
fall_videos = list(Path(FALL_DATA_DIR).rglob("*.avi"))
workout_videos = list(Path(WORKOUT_VIDEOS_DIR).rglob("*.mp4"))

print(f"Found {len(fall_videos)} fall videos")
print(f"Found {len(workout_videos)} workout videos")

# Extract features from fall videos
print("\nExtracting features from fall videos...")
fall_df, fall_features = process_videos_and_extract_features(fall_videos, "fall")

# Extract features from workout videos
print("\nExtracting features from workout videos...")
workout_df, workout_features = process_videos_and_extract_features(workout_videos, "workout")

# Prepare to combine and save features
print("\nPreparing to save features to CSV:")
dfs_to_combine = []

if fall_df is not None:
    print(f"Fall features: {len(fall_df)} rows")
    dfs_to_combine.append(fall_df)
else:
    print("No fall features extracted successfully")



if workout_df is not None:
    print(f"Workout features: {len(workout_df)} rows")
    dfs_to_combine.append(workout_df)
else:
    print("No workout features extracted successfully")

# Save even if only some categories have data
if dfs_to_combine:
    combined_df = pd.concat(dfs_to_combine, ignore_index=True)
    csv_path = os.path.join(OUTPUT_DIR, "video_features.csv")
    combined_df.to_csv(csv_path, index=False)
    print(f"Features saved to {csv_path} with {len(combined_df)} total rows")
    
    # Display statistics
    print("\nFeature Statistics:")
    print(combined_df.groupby('label').size())
    
    # Display feature correlation matrix
    try:
        numeric_df = combined_df.select_dtypes(include=[np.number])
        plt.figure(figsize=(12, 10))
        correlation = numeric_df.corr()
        plt.imshow(correlation, cmap='coolwarm', interpolation='none')
        plt.colorbar()
        plt.xticks(range(len(correlation.columns)), correlation.columns, rotation=90)
        plt.yticks(range(len(correlation.columns)), correlation.columns)
        plt.title('Feature Correlation Matrix')
        plt.tight_layout()
        plt.show()
        
        # Visualize features with PCA
        features_for_pca = numeric_df.drop(columns=['label'], errors='ignore')
        
        # Handle missing values
        features_for_pca = features_for_pca.fillna(0)
        
        # Standardize features
        scaler = StandardScaler()
        scaled_features = scaler.fit_transform(features_for_pca)
        
        # Apply PCA
        pca = PCA(n_components=2)
        pca_result = pca.fit_transform(scaled_features)
        
        # Create DataFrame for plotting
        pca_df = pd.DataFrame({
            'PCA1': pca_result[:, 0],
            'PCA2': pca_result[:, 1],
            'label': combined_df['label']
        })
        
        # Plot PCA results
        plt.figure(figsize=(10, 8))
        for label, color in zip(['fall', 'workout'], ['red', 'green']):
            subset = pca_df[pca_df['label'] == label]
            if not subset.empty:
                plt.scatter(subset['PCA1'], subset['PCA2'], c=color, label=label, alpha=0.7)
        
        plt.title('PCA of Video Features')
        plt.xlabel(f'PCA1 ({pca.explained_variance_ratio_[0]:.2%} variance)')
        plt.ylabel(f'PCA2 ({pca.explained_variance_ratio_[1]:.2%} variance)')
        plt.legend()
        plt.grid(True, alpha=0.3)
        plt.show()
        
        print(f"Total variance explained by 2 components: {sum(pca.explained_variance_ratio_):.2%}")
    
        # Feature importance analysis
        # Create feature set for binary classification (workout vs. non-workout)
        X = combined_df.select_dtypes(include=[np.number]).fillna(0)
        combined_df['is_workout'] = combined_df['label'] == 'workout'
        y = combined_df['is_workout']
        
        # Train a simple Random Forest to get feature importance
        from sklearn.ensemble import RandomForestClassifier
        
        rf = RandomForestClassifier(n_estimators=100, random_state=42)
        rf.fit(X, y)
        
        # Plot feature importance
        feature_importance = pd.DataFrame({
            'feature': X.columns,
            'importance': rf.feature_importances_
        }).sort_values('importance', ascending=False)
        
        plt.figure(figsize=(12, 6))
        plt.bar(feature_importance['feature'], feature_importance['importance'])
        plt.title('Feature Importance for Workout vs. Non-Workout Classification')
        plt.xticks(rotation=90)
        plt.tight_layout()
        plt.show()
        
        print("Top 5 most important features:")
        print(feature_importance.head(5))
    except Exception as e:
        print(f"Error during visualization or analysis: {e}")
        print("Continuing with basic results")
else:
    print("No features were successfully extracted from any videos. CSV not created.")
    
print("\nFeature extraction process complete.")

Checking data directories:
FALL_DATA_DIR exists: True
WORKOUT_VIDEOS_DIR exists: True
OUTPUT_DIR exists: True
Loading YOLO11 models...
Error loading models: name 'YOLO11' is not defined
Make sure YOLO11 is installed: pip install yolo11
Finding videos...
Found 190 fall videos
Found 590 workout videos

Extracting features from fall videos...
Found 190 fall videos, processing up to 100

Processing video 1/100: ..\data\le2i\Coffee_room_01\Coffee_room_01\Videos\video (1).avi
Video info: 320x240, 157 frames

0: 480x640 (no detections), 86.8ms
Speed: 2.4ms preprocess, 86.8ms inference, 1.1ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 77.4ms
Speed: 2.2ms preprocess, 77.4ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 60.1ms
Speed: 2.2ms preprocess, 60.1ms inference, 0.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 55.1ms
Speed: 2.1ms preprocess, 55.1ms inference, 1.1ms postprocess per