In [1]:
import cv2
import mediapipe as mp
import pandas as pd
import numpy as np
from tqdm import tqdm
import os

# Load the data (modify path if needed)
df = pd.read_csv("/Users/suryanshpatel/Projects/Directed Readings/Technical/src/data/Finaldata_combined.csv")

In [5]:
df


Unnamed: 0,Subject,Trial,Sweep,Label,Timestamp,Full_Path_RGB,Full_Path_Depth
0,2,2,10,0,03-32-54-8112,/Users/suryanshpatel/Projects/Directed Reading...,/Users/suryanshpatel/Projects/Directed Reading...
1,2,2,10,0,03-32-55-3457,/Users/suryanshpatel/Projects/Directed Reading...,/Users/suryanshpatel/Projects/Directed Reading...
2,2,2,10,0,03-32-55-4113,/Users/suryanshpatel/Projects/Directed Reading...,/Users/suryanshpatel/Projects/Directed Reading...
3,2,2,10,0,03-32-55-4463,/Users/suryanshpatel/Projects/Directed Reading...,/Users/suryanshpatel/Projects/Directed Reading...
4,2,2,10,0,03-32-54-5795,/Users/suryanshpatel/Projects/Directed Reading...,/Users/suryanshpatel/Projects/Directed Reading...
...,...,...,...,...,...,...,...
54380,19,1,23,4,01-59-57-6801,/Users/suryanshpatel/Projects/Directed Reading...,/Users/suryanshpatel/Projects/Directed Reading...
54381,19,1,23,4,01-59-57-6480,/Users/suryanshpatel/Projects/Directed Reading...,/Users/suryanshpatel/Projects/Directed Reading...
54382,19,1,23,4,01-59-57-5799,/Users/suryanshpatel/Projects/Directed Reading...,/Users/suryanshpatel/Projects/Directed Reading...
54383,19,1,23,4,01-59-57-6125,/Users/suryanshpatel/Projects/Directed Reading...,/Users/suryanshpatel/Projects/Directed Reading...


In [None]:
# Initialize MediaPipe Face Mesh with blendshape support
mp_face_mesh = mp.solutions.face_mesh
mp_face_detection = mp.solutions.face_detection
mp_drawing = mp.solutions.drawing_utils
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True)

# Container for all features
all_features = []

for _, row in tqdm(df.iterrows(), total=len(df)):
    rgb_path = row['Full_Path_RGB']
    depth_path = row['Full_Path_Depth']
    label = row['Label']

    try:
        # Read RGB image
        image = cv2.imread(rgb_path)
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Read Depth image (assumes grayscale float or 8bit)
        depth_image = cv2.imread(depth_path, cv2.IMREAD_GRAYSCALE)

        # Process with MediaPipe
        results = face_mesh.process(image_rgb)

        if results.multi_face_landmarks:
            landmarks = results.multi_face_landmarks[0]
            xyz = []
            for lm in landmarks.landmark:
                xyz.extend([lm.x, lm.y, lm.z])

            # Expression features placeholder (MediaPipe expression API can be added if needed)
            expression_features = []  # e.g., use dummy or separate logic if required

            # Depth features
            depth_mean = np.mean(depth_image)
            depth_std = np.std(depth_image)

            # Combine all features
            features = xyz + expression_features + [depth_mean, depth_std, label]
            all_features.append(features)
    except Exception as e:
        print(f"Skipping {rgb_path} due to error: {e}")
        continue

# Build column names dynamically
num_landmarks = 468
xyz_cols = [f"{axis}{i}" for i in range(num_landmarks) for axis in ['x', 'y', 'z']]
expression_cols = []  # Add if you use blendshapes
depth_cols = ['depth_mean', 'depth_std']
columns = xyz_cols + expression_cols + depth_cols + ['Label']

# Create DataFrame
features_df = pd.DataFrame(all_features, columns=columns)

# Save to CSV for training
features_df.to_csv("pain_features.csv", index=False)
print("Saved feature DataFrame with shape:", features_df.shape)


I0000 00:00:1744060618.441747  777683 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.4), renderer: Apple M2
W0000 00:00:1744060618.444579  785212 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
  0%|          | 0/54385 [00:00<?, ?it/s]W0000 00:00:1744060618.448721  785214 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
100%|██████████| 54385/54385 [09:50<00:00, 92.14it/s] 


Saved feature DataFrame with shape: (7898, 1407)


In [6]:
features_df.shape

(7898, 1407)

Sequences

In [None]:
# import pandas as pd


# sequence_length = 8
# sequences = []

# # Sort the DataFrame for consistency
# df_sorted = df.sort_values(by=['Subject', 'Trial', 'Sweep', 'Label', 'Timestamp'])

# # Group by folder-defining columns
# grouped = df_sorted.groupby(['Subject', 'Trial', 'Sweep', 'Label'])

# # Iterate over each group to create sequences
# for _, group in grouped:
#     group = group.reset_index(drop=True)
#     if len(group) >= sequence_length:
#         for i in range(len(group) - sequence_length + 1):
#             sequence = group.iloc[i:i+sequence_length]
#             sequences.append({
#                 'Subject': sequence['Subject'].iloc[0],
#                 'Trial': sequence['Trial'].iloc[0],
#                 'Sweep': sequence['Sweep'].iloc[0],
#                 'Label': sequence['Label'].iloc[0],
#                 'Timestamps': sequence['Timestamp'].tolist(),
#                 'RGB_Paths': sequence['Full_Path_RGB'].tolist(),
#                 'Depth_Paths': sequence['Full_Path_Depth'].tolist()
#             })

# # Convert to DataFrame
# sequences_df = pd.DataFrame(sequences)

# # Preview
# print(sequences_df.head())


   Subject  Trial  Sweep  Label  \
0        1      1      1      0   
1        1      1      1      0   
2        1      1      1      0   
3        1      1      1      0   
4        1      1      1      0   

                                          Timestamps  \
0  [11-51-02-0121, 11-51-02-0782, 11-51-02-1118, ...   
1  [11-51-02-0782, 11-51-02-1118, 11-51-02-1451, ...   
2  [11-51-02-1118, 11-51-02-1451, 11-51-02-2123, ...   
3  [11-51-02-1451, 11-51-02-2123, 11-51-02-2441, ...   
4  [11-51-02-2123, 11-51-02-2441, 11-51-02-2778, ...   

                                           RGB_Paths  \
0  [/Users/suryanshpatel/Projects/Directed Readin...   
1  [/Users/suryanshpatel/Projects/Directed Readin...   
2  [/Users/suryanshpatel/Projects/Directed Readin...   
3  [/Users/suryanshpatel/Projects/Directed Readin...   
4  [/Users/suryanshpatel/Projects/Directed Readin...   

                                         Depth_Paths  
0  [/Users/suryanshpatel/Projects/Directed Readin...  
1  [/

In [10]:
sequences_df.to_csv("/Users/suryanshpatel/Projects/Directed Readings/Technical/src/data/sequences.csv", index=False)
print("Saved sequences DataFrame with shape:", sequences_df.shape)

Saved sequences DataFrame with shape: (32531, 7)


In [None]:
# sequences_df[sequences_df['Label'] == 1]

Unnamed: 0,Subject,Trial,Sweep,Label,Timestamps,RGB_Paths,Depth_Paths
62,1,1,2,1,"[11-51-32-2733, 11-51-32-3396, 11-51-32-3732, ...",[/Users/suryanshpatel/Projects/Directed Readin...,[/Users/suryanshpatel/Projects/Directed Readin...
63,1,1,2,1,"[11-51-32-3396, 11-51-32-3732, 11-51-32-4400, ...",[/Users/suryanshpatel/Projects/Directed Readin...,[/Users/suryanshpatel/Projects/Directed Readin...
64,1,1,2,1,"[11-51-32-3732, 11-51-32-4400, 11-51-32-4727, ...",[/Users/suryanshpatel/Projects/Directed Readin...,[/Users/suryanshpatel/Projects/Directed Readin...
65,1,1,2,1,"[11-51-32-4400, 11-51-32-4727, 11-51-32-5074, ...",[/Users/suryanshpatel/Projects/Directed Readin...,[/Users/suryanshpatel/Projects/Directed Readin...
66,1,1,2,1,"[11-51-32-4727, 11-51-32-5074, 11-51-32-5397, ...",[/Users/suryanshpatel/Projects/Directed Readin...,[/Users/suryanshpatel/Projects/Directed Readin...
...,...,...,...,...,...,...,...
32526,20,2,40,1,"[04-38-57-9162, 04-38-57-9833, 04-38-58-0492, ...",[/Users/suryanshpatel/Projects/Directed Readin...,[/Users/suryanshpatel/Projects/Directed Readin...
32527,20,2,40,1,"[04-38-57-9833, 04-38-58-0492, 04-38-58-0836, ...",[/Users/suryanshpatel/Projects/Directed Readin...,[/Users/suryanshpatel/Projects/Directed Readin...
32528,20,2,40,1,"[04-38-58-0492, 04-38-58-0836, 04-38-58-2159, ...",[/Users/suryanshpatel/Projects/Directed Readin...,[/Users/suryanshpatel/Projects/Directed Readin...
32529,20,2,40,1,"[04-38-58-0836, 04-38-58-2159, 04-38-58-2490, ...",[/Users/suryanshpatel/Projects/Directed Readin...,[/Users/suryanshpatel/Projects/Directed Readin...


In [13]:
import pandas as pd

sequence_length = 8
sequence_stride = 1  # you can make it >1 to skip steps
sequences = []

# Sort by subject, trial, timestamp to preserve time order
df_sorted = df.sort_values(by=['Subject', 'Trial', 'Timestamp']).reset_index(drop=True)

# Group by subject and trial (sweeps/labels might change within)
grouped = df_sorted.groupby(['Subject', 'Trial'])

for (subject, trial), group in grouped:
    group = group.reset_index(drop=True)
    if len(group) >= sequence_length:
        for i in range(0, len(group) - sequence_length + 1, sequence_stride):
            seq = group.iloc[i:i+sequence_length]
            sequences.append({
                'Subject': subject,
                'Trial': trial,
                'Start_Timestamp': seq['Timestamp'].iloc[0],
                'End_Timestamp': seq['Timestamp'].iloc[-1],
                'Label': seq['Label'].iloc[-1],  # Can also use mode(seq['Label']) if you want majority
                'Timestamps': seq['Timestamp'].tolist(),
                'RGB_Paths': seq['Full_Path_RGB'].tolist(),
                'Depth_Paths': seq['Full_Path_Depth'].tolist(),
                'Labels': seq['Label'].tolist()  # Optional: to keep track of the label sequence
            })

# Create the final sequence DataFrame
sequences_df = pd.DataFrame(sequences)

# Preview
sequences_df.to_csv("/Users/suryanshpatel/Projects/Directed Readings/Technical/src/data/sequences2.csv", index=False)
sequences_df.head()

Unnamed: 0,Subject,Trial,Start_Timestamp,End_Timestamp,Label,Timestamps,RGB_Paths,Depth_Paths,Labels
0,1,1,11-51-02-0121,11-51-02-3452,0,"[11-51-02-0121, 11-51-02-0782, 11-51-02-1118, ...",[/Users/suryanshpatel/Projects/Directed Readin...,[/Users/suryanshpatel/Projects/Directed Readin...,"[0, 0, 0, 0, 0, 0, 0, 0]"
1,1,1,11-51-02-0782,11-51-02-3779,0,"[11-51-02-0782, 11-51-02-1118, 11-51-02-1451, ...",[/Users/suryanshpatel/Projects/Directed Readin...,[/Users/suryanshpatel/Projects/Directed Readin...,"[0, 0, 0, 0, 0, 0, 0, 0]"
2,1,1,11-51-02-1118,11-51-02-4121,0,"[11-51-02-1118, 11-51-02-1451, 11-51-02-2123, ...",[/Users/suryanshpatel/Projects/Directed Readin...,[/Users/suryanshpatel/Projects/Directed Readin...,"[0, 0, 0, 0, 0, 0, 0, 0]"
3,1,1,11-51-02-1451,11-51-02-4451,0,"[11-51-02-1451, 11-51-02-2123, 11-51-02-2441, ...",[/Users/suryanshpatel/Projects/Directed Readin...,[/Users/suryanshpatel/Projects/Directed Readin...,"[0, 0, 0, 0, 0, 0, 0, 0]"
4,1,1,11-51-02-2123,11-51-02-4781,0,"[11-51-02-2123, 11-51-02-2441, 11-51-02-2778, ...",[/Users/suryanshpatel/Projects/Directed Readin...,[/Users/suryanshpatel/Projects/Directed Readin...,"[0, 0, 0, 0, 0, 0, 0, 0]"


In [None]:
print(len(sequences_df['Labels'].value_counts())) # count of labels combinations

61


In [25]:
import mediapipe as mp
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm

# Mediapipe setup
mp_pose = mp.solutions.pose
pose_detector = mp_pose.Pose(static_image_mode=True)

def extract_pose_landmarks(image_path):
    try:
        image = cv2.imread(image_path)
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        results = pose_detector.process(image_rgb)

        if results.pose_landmarks:
            # Extract and flatten x, y, z, visibility for each landmark
            landmarks = results.pose_landmarks.landmark
            return np.array([[lmk.x, lmk.y, lmk.z, lmk.visibility] for lmk in landmarks]).flatten()
        else:
            return None  # No detection
    except Exception as e:
        print(f"Error with image {image_path}: {e}")
        return None

# Process each sequence
sequence_landmarks = []

for _, row in tqdm(sequences_df.iterrows(), total=len(sequences_df)):
    subject = row['Subject']
    trial = row['Trial']
    start_ts = row['Start_Timestamp']
    end_ts = row['End_Timestamp']
    label = row['Label']
    paths = row['RGB_Paths']

    landmarks_seq = []
    for path in paths:
        landmark_vector = extract_pose_landmarks(path)
        if landmark_vector is not None:
            landmarks_seq.append(landmark_vector)
        else:
            # If any frame fails, skip the whole sequence (optional)
            landmarks_seq = None
            break

    if landmarks_seq:
        sequence_landmarks.append({
            'Subject': subject,
            'Trial': trial,
            'Start_Timestamp': start_ts,
            'End_Timestamp': end_ts,
            'Label': label,
            'Landmark_Sequence': np.stack(landmarks_seq)
        })

# Create final DataFrame for training
landmarks_df = pd.DataFrame(sequence_landmarks)
landmarks_df.head()
# landmarks_df.to_csv("/Users/suryanshpatel/Projects/Directed Readings/Technical/src/data/landmarks_sequences.csv", index=False)

I0000 00:00:1745259575.209133  928701 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.4), renderer: Apple M2
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
  0%|          | 0/54105 [00:00<?, ?it/s]W0000 00:00:1745259575.313866 1041961 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1745259575.332114 1041964 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1745259575.359379 1041958 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.
100%|██████████| 54105/54105 [3:41:32<00:00,  4.07it/s]  


Unnamed: 0,Subject,Trial,Start_Timestamp,End_Timestamp,Label,Landmark_Sequence
0,1,1,11-51-02-0121,11-51-02-3452,0,"[[0.5301783680915833, 0.39306363463401794, -0...."
1,1,1,11-51-02-0782,11-51-02-3779,0,"[[0.5285842418670654, 0.3886697292327881, -0.4..."
2,1,1,11-51-02-1118,11-51-02-4121,0,"[[0.5316091179847717, 0.39035293459892273, -0...."
3,1,1,11-51-02-1451,11-51-02-4451,0,"[[0.5293503999710083, 0.3902014195919037, -0.4..."
4,1,1,11-51-02-2123,11-51-02-4781,0,"[[0.5324769020080566, 0.39163902401924133, -0...."


In [26]:
landmarks_df.to_csv("/Users/suryanshpatel/Projects/Directed Readings/Technical/src/data/landmarks_sequences.csv", index=False)

In [27]:
landmarks_df.head()

Unnamed: 0,Subject,Trial,Start_Timestamp,End_Timestamp,Label,Landmark_Sequence
0,1,1,11-51-02-0121,11-51-02-3452,0,"[[0.5301783680915833, 0.39306363463401794, -0...."
1,1,1,11-51-02-0782,11-51-02-3779,0,"[[0.5285842418670654, 0.3886697292327881, -0.4..."
2,1,1,11-51-02-1118,11-51-02-4121,0,"[[0.5316091179847717, 0.39035293459892273, -0...."
3,1,1,11-51-02-1451,11-51-02-4451,0,"[[0.5293503999710083, 0.3902014195919037, -0.4..."
4,1,1,11-51-02-2123,11-51-02-4781,0,"[[0.5324769020080566, 0.39163902401924133, -0...."


In [None]:
landmarks_df['Landmark_Sequence'].iloc[0]

0.5301783680915833

In [36]:
landmarks_df.shape

(54105, 6)

In [37]:
landmarks_df.head()

Unnamed: 0,Subject,Trial,Start_Timestamp,End_Timestamp,Label,Landmark_Sequence
0,1,1,11-51-02-0121,11-51-02-3452,0,"[[0.5301783680915833, 0.39306363463401794, -0...."
1,1,1,11-51-02-0782,11-51-02-3779,0,"[[0.5285842418670654, 0.3886697292327881, -0.4..."
2,1,1,11-51-02-1118,11-51-02-4121,0,"[[0.5316091179847717, 0.39035293459892273, -0...."
3,1,1,11-51-02-1451,11-51-02-4451,0,"[[0.5293503999710083, 0.3902014195919037, -0.4..."
4,1,1,11-51-02-2123,11-51-02-4781,0,"[[0.5324769020080566, 0.39163902401924133, -0...."


In [44]:
landmarks_df['Landmark_Sequence'].shape

(54105,)