In [3]:
import cv2
import mediapipe as mp
from feat import Detector
import numpy as np
import tempfile
import csv

In [2]:
# Load video
video_path = '1100011002.avi'  # replace with your file
cap = cv2.VideoCapture(video_path)
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

# Get target frames: start, middle, end
target_indices = [0, frame_count // 2, frame_count - 1]
frames = {}

for idx in target_indices:
    cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
    ret, frame = cap.read()
    if ret:
        frames[idx] = frame
    else:
        print(f"⚠️ Failed to read frame at index {idx}")

cap.release()

# Initialize Py-Feat detector
feat_detector = Detector(
    face_model="retinaface",
    landmark_model="mobilefacenet",
    au_model='svm', # or 'xgb' for XGBoost model
    emotion_model="resmasknet",
    facepose_model="img2pose"
)

# Initialize MediaPipe Holistic
mp_holistic = mp.solutions.holistic
holistic = mp_holistic.Holistic(static_image_mode=True)

# Store results
results_dict = {}

# Helper function to extract landmarks
def extract_landmarks(landmarks, label):
    if landmarks is None:
        print(f"\n{label} landmarks: Not detected")
        return None
    points = [(lm.x, lm.y, lm.z) for lm in landmarks.landmark]
    print(f"\n{label} landmarks:")
    for i, pt in enumerate(points):
        print(f"{i}: x={pt[0]:.4f}, y={pt[1]:.4f}, z={pt[2]:.4f}")
    return points

# Process each selected frame
for idx, frame in frames.items():
    print(f"\n=== Frame at index {idx} ===")

    # ----- Py-Feat: Extract Action Units -----
    
    with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
        temp_filename = tmp.name
        cv2.imwrite(temp_filename, frame)
        feats = feat_detector.detect_image(temp_filename)
        
    au = feats.aus.iloc[0].to_dict()
    print("\nAction Units (AUs):")
    for k, v in au.items():
        print(f"{k}: {v:.4f}")

    # ----- MediaPipe Holistic: Extract Landmarks -----
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = holistic.process(rgb_frame)

    left_hand = extract_landmarks(results.left_hand_landmarks, "Left Hand")
    right_hand = extract_landmarks(results.right_hand_landmarks, "Right Hand")
    body = extract_landmarks(results.pose_landmarks, "Body")

    # Store results
    results_dict[idx] = {
        'action_units': au,
        'left_hand': left_hand,
        'right_hand': right_hand,
        'body': body
    }

# Clean up
holistic.close()

# ✅ Optional: Print structured summary of all results
print("\n\n=== Summary of All Results ===")
for idx, data in results_dict.items():
    print(f"\n--- Frame Index: {idx} ---")
    print("Action Units:", data['action_units'])
    print("Left Hand Landmarks:", data['left_hand'])
    print("Right Hand Landmarks:", data['right_hand'])
    print("Body Landmarks:", data['body'])


=== Frame at index 0 ===


100%|██████████| 1/1 [00:00<00:00,  1.02it/s]



Action Units (AUs):
AU01: 1.0000
AU02: 0.0000
AU04: 0.0000
AU05: 0.0000
AU06: 0.0000
AU07: 0.0000
AU09: 0.0000
AU10: 0.0000
AU11: 0.0000
AU12: 0.0000
AU14: 0.0000
AU15: 1.0000
AU17: 0.0000
AU20: 0.0000
AU23: 1.0000
AU24: 0.0000
AU25: 1.0000
AU26: 0.0000
AU28: 1.0000
AU43: 0.0000

Left Hand landmarks: Not detected

Right Hand landmarks: Not detected

Body landmarks:
0: x=0.5819, y=0.7569, z=-0.8294
1: x=0.5994, y=0.7077, z=-0.7791
2: x=0.6146, y=0.7094, z=-0.7791
3: x=0.6275, y=0.7119, z=-0.7794
4: x=0.5556, y=0.7061, z=-0.7910
5: x=0.5390, y=0.7073, z=-0.7905
6: x=0.5241, y=0.7095, z=-0.7908
7: x=0.6497, y=0.7389, z=-0.4171
8: x=0.5011, y=0.7386, z=-0.4530
9: x=0.6078, y=0.8205, z=-0.6896
10: x=0.5536, y=0.8184, z=-0.6997
11: x=0.7483, y=0.9371, z=-0.1809
12: x=0.4070, y=1.0024, z=-0.3035
13: x=0.8722, y=1.0004, z=-0.2140
14: x=0.3723, y=1.3269, z=-0.4563
15: x=0.8527, y=1.3641, z=-0.5323
16: x=0.4312, y=1.4576, z=-0.9956
17: x=0.8763, y=1.4480, z=-0.6295
18: x=0.4345, y=1.5261, z=-1.

100%|██████████| 1/1 [00:00<00:00,  1.08it/s]



Action Units (AUs):
AU01: 0.0000
AU02: 0.0000
AU04: 0.0000
AU05: 0.0000
AU06: 0.0000
AU07: 0.0000
AU09: 0.0000
AU10: 0.0000
AU11: 0.0000
AU12: 0.0000
AU14: 0.0000
AU15: 0.0000
AU17: 0.0000
AU20: 0.0000
AU23: 1.0000
AU24: 0.0000
AU25: 1.0000
AU26: 1.0000
AU28: 1.0000
AU43: 0.0000

Left Hand landmarks: Not detected

Right Hand landmarks: Not detected

Body landmarks:
0: x=0.5685, y=0.7530, z=-0.8903
1: x=0.5884, y=0.7026, z=-0.8405
2: x=0.6045, y=0.7021, z=-0.8407
3: x=0.6187, y=0.7025, z=-0.8409
4: x=0.5429, y=0.7072, z=-0.8476
5: x=0.5268, y=0.7099, z=-0.8471
6: x=0.5133, y=0.7132, z=-0.8474
7: x=0.6450, y=0.7276, z=-0.4831
8: x=0.4942, y=0.7393, z=-0.5019
9: x=0.5978, y=0.8170, z=-0.7529
10: x=0.5452, y=0.8160, z=-0.7585
11: x=0.7508, y=0.9346, z=-0.2552
12: x=0.3995, y=0.9891, z=-0.3234
13: x=0.8235, y=1.2303, z=-0.2625
14: x=0.3648, y=1.3053, z=-0.4044
15: x=0.8070, y=1.4703, z=-0.6074
16: x=0.4029, y=1.4702, z=-0.9404
17: x=0.8291, y=1.5615, z=-0.7176
18: x=0.3977, y=1.5478, z=-1.

100%|██████████| 1/1 [00:01<00:00,  1.07s/it]


Action Units (AUs):
AU01: 0.0000
AU02: 0.0000
AU04: 0.0000
AU05: 0.0000
AU06: 0.0000
AU07: 0.0000
AU09: 0.0000
AU10: 0.0000
AU11: 0.0000
AU12: 0.0000
AU14: 0.0000
AU15: 0.0000
AU17: 0.0000
AU20: 0.0000
AU23: 1.0000
AU24: 0.0000
AU25: 1.0000
AU26: 0.0000
AU28: 1.0000
AU43: 0.0000

Left Hand landmarks: Not detected

Right Hand landmarks: Not detected

Body landmarks:
0: x=0.5547, y=0.7530, z=-0.9011
1: x=0.5752, y=0.7017, z=-0.8509
2: x=0.5906, y=0.6999, z=-0.8510
3: x=0.6060, y=0.6992, z=-0.8511
4: x=0.5280, y=0.7109, z=-0.8577
5: x=0.5138, y=0.7149, z=-0.8571
6: x=0.5016, y=0.7193, z=-0.8574
7: x=0.6334, y=0.7255, z=-0.4886
8: x=0.4882, y=0.7463, z=-0.5049
9: x=0.5899, y=0.8157, z=-0.7616
10: x=0.5339, y=0.8194, z=-0.7665
11: x=0.7548, y=0.9432, z=-0.2435
12: x=0.3981, y=0.9953, z=-0.3433
13: x=0.8222, y=1.2512, z=-0.2617
14: x=0.3594, y=1.3247, z=-0.4744
15: x=0.8137, y=1.4692, z=-0.6169
16: x=0.4247, y=1.4666, z=-1.0385
17: x=0.8394, y=1.5503, z=-0.7243
18: x=0.4286, y=1.5410, z=-1.




In [7]:
import cv2
import csv
import os
import tempfile
from feat.detector import Detector  # ✅ Correct import for Detector

# Initialize the detector
detector = Detector(
    face_model="retinaface",
    landmark_model="mobilefacenet",
    au_model='svm', # or 'xgb' for XGBoost model
    facepose_model="img2pose"
)

# Path to your video
video_path = '1100011002.avi'

# Open the video
cap = cv2.VideoCapture(video_path)
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

# Get indices of 3 target frames: start, middle, end
target_indices = [0, frame_count // 2, frame_count - 1]
all_features = []

for idx in target_indices:
    cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
    ret, frame = cap.read()
    if not ret:
        print(f"⚠️ Could not read frame {idx}")
        continue

    # Save frame to temp file
    with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
        temp_filename = tmp.name
        cv2.imwrite(temp_filename, frame)

    # Extract features from the saved image
    result = detector.detect_image(temp_filename)

    # Filter AU features only
    au_features = result.loc[:, result.columns.str.startswith('AU')]
    all_features.append(au_features.iloc[0].tolist())

    os.remove(temp_filename)

cap.release()

# Generate column names like 1_AU01, 2_AU01, etc.
column_names = []
for i, _ in enumerate(all_features, start=1):
    au_cols = au_features.columns
    column_names.extend([f"{i}_{col}" for col in au_cols])

# Flatten all features into one row
flattened_features = [val for sublist in all_features for val in sublist]

# Save to CSV
with open('output_features.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(column_names)
    writer.writerow(flattened_features)

print("✅ Features extracted using Detector and saved to output_features.csv")


100%|██████████| 1/1 [00:00<00:00,  1.17it/s]
100%|██████████| 1/1 [00:00<00:00,  1.18it/s]
100%|██████████| 1/1 [00:01<00:00,  1.01s/it]

✅ Features extracted using Detector and saved to output_features.csv





In [None]:
import cv2
import csv
import os
import tempfile
from feat.detector import Detector
import mediapipe as mp

# Initialize Py-Feat Detector
detector = Detector(
    face_model="retinaface",
    landmark_model="mobilefacenet",
    au_model='svm', # or 'xgb' for XGBoost model
    facepose_model="img2pose"
)

# Initialize MediaPipe Holistic
mp_holistic = mp.solutions.holistic
holistic = mp_holistic.Holistic(static_image_mode=True)

# Path to your video
video_path = '1100011002.avi'
cap = cv2.VideoCapture(video_path)
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

# Select 3 frames: start, middle, end
target_indices = [0, frame_count // 2, frame_count - 1]
all_feat_features = []
all_mp_features = []
mp_column_names = []
feat_column_names = []

for frame_idx, idx in enumerate(target_indices, start=1):
    cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
    ret, frame = cap.read()
    if not ret:
        print(f"⚠️ Could not read frame {idx}")
        continue

    # --- Py-Feat AU Extraction ---
    with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
        temp_filename = tmp.name
        cv2.imwrite(temp_filename, frame)

    feat_result = detector.detect_image(temp_filename)
    au_features = feat_result.loc[:, feat_result.columns.str.startswith('AU')]
    au_values = au_features.iloc[0].tolist()
    all_feat_features.extend(au_values)
    if not feat_column_names:
        feat_column_names = [f"{frame_idx}_{col}" for col in au_features.columns]

    os.remove(temp_filename)

    # --- MediaPipe Landmark Extraction ---
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = holistic.process(frame_rgb)

    def extract_landmarks(landmarks, label, lm_values, mp_column_names, frame_idx, add_columns):
        count = 21 if label in ["LHAND", "RHAND"] else 33
        if landmarks:
            for i, lm in enumerate(landmarks.landmark):
                lm_values.extend([lm.x, lm.y, lm.z])
                if add_columns:
                    mp_column_names.extend([
                        f"{frame_idx}_{label}_{i}_x",
                        f"{frame_idx}_{label}_{i}_y",
                        f"{frame_idx}_{label}_{i}_z"
                    ])
        else:
            for i in range(count):
                lm_values.extend([0.0, 0.0, 0.0])
                if add_columns:
                    mp_column_names.extend([
                        f"{frame_idx}_{label}_{i}_x",
                        f"{frame_idx}_{label}_{i}_y",
                        f"{frame_idx}_{label}_{i}_z"
                    ])

    lm_values = []
    extract_landmarks(results.left_hand_landmarks, "LHAND", lm_values, mp_column_names, frame_idx)
    extract_landmarks(results.right_hand_landmarks, "RHAND", lm_values, mp_column_names, frame_idx)
    extract_landmarks(results.pose_landmarks, "POSE", lm_values, mp_column_names, frame_idx)


    all_mp_features.extend(lm_values)

cap.release()
holistic.close()

# Combine headers and values
column_names = feat_column_names + mp_column_names
all_features = all_feat_features + all_mp_features

# Write to CSV
with open('combined_features.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(column_names)
    writer.writerow(all_features)

print("✅ Combined AU and body/hand landmark features saved to combined_features.csv")


100%|██████████| 1/1 [00:00<00:00,  1.06it/s]
100%|██████████| 1/1 [00:00<00:00,  1.12it/s]
100%|██████████| 1/1 [00:01<00:00,  1.02s/it]

✅ Combined AU and body/hand landmark features saved to combined_features.csv





In [None]:
import cv2
import csv
from feat import Detector
import mediapipe as mp
import os

# Initialize detectors
detector = Detector(
    face_model="retinaface",
    landmark_model="mobilefacenet",
    au_model='svm', # or 'xgb' for XGBoost model
    facepose_model="img2pose"
)
mp_holistic = mp.solutions.holistic
holistic = mp_holistic.Holistic(static_image_mode=True)

# Get all AU columns
au_columns = [f"{i}_{col}" for i in range(1, 4) for col in detector.au_columns]

# Custom function to extract MediaPipe landmarks
def extract_landmarks(landmarks, label, lm_values, mp_column_names, frame_idx, add_columns):
    count = 21 if label in ["LHAND", "RHAND"] else 33
    if landmarks:
        for i, lm in enumerate(landmarks.landmark):
            lm_values.extend([lm.x, lm.y, lm.z])
            if add_columns:
                mp_column_names.extend([
                    f"{frame_idx}_{label}_{i}_x",
                    f"{frame_idx}_{label}_{i}_y",
                    f"{frame_idx}_{label}_{i}_z"
                ])
    else:
        for i in range(count):
            lm_values.extend([0.0, 0.0, 0.0])
            if add_columns:
                mp_column_names.extend([
                    f"{frame_idx}_{label}_{i}_x",
                    f"{frame_idx}_{label}_{i}_y",
                    f"{frame_idx}_{label}_{i}_z"
                ])

# Directory and output
input_folder = "videos"
output_csv = "video_features.csv"

all_rows = []
mp_column_names = []  # Will only be filled once

for filename in os.listdir(input_folder):
    if filename.endswith(".avi"):
        video_path = os.path.join(input_folder, filename)
        cap = cv2.VideoCapture(video_path)
        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        target_indices = [0, frame_count // 2, frame_count - 1]

        row = []

        for frame_idx, frame_no in enumerate(target_indices, start=1):
            cap.set(cv2.CAP_PROP_POS_FRAMES, frame_no)
            ret, frame = cap.read()

            if not ret:
                print(f"Failed to read frame {frame_no} from {filename}")
                row.extend([0.0] * (len(detector.au_columns) + 3 * (33 + 21 + 21)))  # Pad with zeros
                continue

            # Py-Feat AU detection
            feat_result = detector.detect_image(frame)
            if len(feat_result) > 0:
                au_data = feat_result.loc[0, detector.au_columns].tolist()
            else:
                au_data = [0.0] * len(detector.au_columns)

            # MediaPipe landmark detection
            image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = holistic.process(image_rgb)

            lm_values = []
            add_columns = (not mp_column_names)  # Only true once

            extract_landmarks(results.left_hand_landmarks, "LHAND", lm_values, mp_column_names, frame_idx, add_columns)
            extract_landmarks(results.right_hand_landmarks, "RHAND", lm_values, mp_column_names, frame_idx, add_columns)
            extract_landmarks(results.pose_landmarks, "POSE", lm_values, mp_column_names, frame_idx, add_columns)

            row.extend([*au_data, *lm_values])

        cap.release()
        all_rows.append([filename] + row)

# Write CSV
with open(output_csv, "w", newline="") as f:
    writer = csv.writer(f)
    header = ["video"] + au_columns + mp_column_names
    writer.writerow(header)
    writer.writerows(all_rows)

print(f"Features extracted and saved to {output_csv}")


AttributeError: 'Detector' object has no attribute 'au_columns'