In [None]:
import pandas as pd

# Sampling rate
sampling_rate = 2000  # Hz

# === Function to load and process behavioral data ===
def load_behavioral(session_name, subject_id):
    # Load CSV
    filename = f"data/Behavioral_data/{session_name}Subject{subject_id}.csv"
    df = pd.read_csv(filename)
    
    # Filter out "Rest" blocks
    df = df[df['n_back'] != 'Rest']
    
    # Clean n_back
    df['n_back'] = df['n_back'].str.strip().str.lower()
    
    # Map to 1_back / 3_back
    def map_nback(text):
        if 'one back' in text:
            return '1_back'
        elif 'three back' in text:
            return '3_back'
        else:
            return None
    
    df['n_back_task'] = df['n_back'].apply(map_nback)
    
    # Correctness
    df['correct'] = df['Response'] == df['Correct_Response']
    
    # Group by TrialNumber (block) and n_back
    trial_summary = df.groupby(['TrialNumber', 'n_back_task']).agg(
        accuracy=('correct', 'mean'),
        mean_rt=('Response_Time', 'mean')
    ).reset_index()
    
    # Add session label
    trial_summary['session'] = session_name.lower()
    
    return trial_summary

# === Main loop over participants ===
participants = ['3F', '4F', '6M', '8M', '11F']
all_data = []

for pid in participants:
    subject_num = pid[:-1]
    gender = pid[-1]
    
    # Load behavioral summaries
    calming_behav = load_behavioral('Calming', subject_num)
    vexing_behav = load_behavioral('Vexing', subject_num)
    
    # Combine sessions — preserves session order (Calming first, then Vexing)
    behavior_df = pd.concat([calming_behav, vexing_behav], ignore_index=True)
    
    # Load EDA and TRIGGERS_BLOCK (block-level triggers!)
    eda_path = f"data/Biopac_data/EDA/Subject{subject_num}{gender}_EDA.csv"
    triggers_path = f"data/Biopac_data/Timing/Subject{subject_num}{gender}_Triggers_block.csv"
    
    eda_df = pd.read_csv(eda_path, header=None, names=['EDA'])
    triggers_df = pd.read_csv(triggers_path)
    
    print(f"\nParticipant {pid} → triggers_df shape: {triggers_df.shape} (should be ~8 rows, wide format)")

    # === Loop through behavioral rows — IN ORIGINAL ORDER ===
    for idx, behav_row in behavior_df.sort_values(['session', 'TrialNumber']).iterrows():
        session_label = behav_row['session']
        nback_label = behav_row['n_back_task']
        trial_number = behav_row['TrialNumber']
        
        found = False
        
        # Search for first unused matching trigger
        for t_idx, t_row in triggers_df.iterrows():
            start_col = f"{session_label}_{nback_label}_start"
            end_col = f"{session_label}_{nback_label}_end"
            
            if pd.notna(t_row[start_col]) and pd.notna(t_row[end_col]):
                # Process this EDA slice
                start_time = t_row[start_col]
                end_time = t_row[end_col]
                
                start_idx = int(start_time * sampling_rate)
                end_idx = int(end_time * sampling_rate)
                
                eda_slice = eda_df['EDA'].iloc[start_idx:end_idx]
                mean_eda = eda_slice.mean()
                
                # Mark this trigger as used
                triggers_df.at[t_idx, start_col] = None
                triggers_df.at[t_idx, end_col] = None
                
                # Append row
                all_data.append({
                    'participant': pid,
                    'trial': len(all_data) + 1,
                    'condition': f"{session_label.capitalize()} {nback_label.replace('_', '-').capitalize()}",
                    'mean_eda': 0 if pd.isna(mean_eda) else mean_eda,
                    'accuracy': 0 if pd.isna(behav_row['accuracy']) else behav_row['accuracy'],
                    'mean_rt': 0 if pd.isna(behav_row['mean_rt']) else behav_row['mean_rt']
                })
                
                found = True
                break
        
        if not found:
            print(f"WARNING: No matching trigger found for {session_label} {nback_label} TrialNumber {trial_number}")

    # Participant progress summary
    trials_processed = len(all_data) // len(participants)
    print(f"Participant {pid}: {trials_processed} trials processed (expected: 32)")

# Combine all participants into one DataFrame
combined_df = pd.DataFrame(all_data)

# Optional: save to CSV
combined_df.to_csv('df.csv', index=False)

# Preview result
print(f"\nFinal combined_df shape: {combined_df.shape}")
print(combined_df.head())


ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [8]:
import pandas as pd

# Sampling rates
EDA_SAMPLING_RATE = 2000      # Hz
OHB_SAMPLING_RATE = 7.6294    # Hz

# === Function to load and process behavioral data ===
def load_behavioral(session_name, subject_id):
    filename = f"data/Behavioral_data/{session_name}Subject{subject_id}.csv"
    df = pd.read_csv(filename)
    df = df[df['n_back'] != 'Rest']
    df['n_back'] = df['n_back'].str.strip().str.lower()

    def map_nback(text):
        if 'one back' in text:
            return '1_back'
        elif 'three back' in text:
            return '3_back'
        return None
    df['n_back_task'] = df['n_back'].apply(map_nback)

    df['correct'] = df['Response'] == df['Correct_Response']
    summary = (
        df.groupby(['TrialNumber', 'n_back_task'])
          .agg(accuracy=('correct', 'mean'), mean_rt=('Response_Time', 'mean'))
          .reset_index()
    )
    summary['session'] = session_name.lower()
    return summary

# === Main processing ===
participants = ['3F', '4F', '6M', '8M', '11F']
all_data = []

for pid in participants:
    subj, gender = pid[:-1], pid[-1]
    calming = load_behavioral('Calming', subj)
    vexing = load_behavioral('Vexing', subj)
    behav_df = pd.concat([calming, vexing], ignore_index=True)

    # Load EDA data and triggers (first 8 cols)
    eda = pd.read_csv(
        f"data/Biopac_data/EDA/Subject{subj}{gender}_EDA.csv",
        header=None, names=['EDA']
    )
    trig_eda = pd.read_csv(
        f"data/Biopac_data/Timing/Subject{subj}{gender}_Triggers_block.csv"
    ).iloc[:, :8]

    # Load OHb raw data as a single series and its triggers (first 8 cols)
    raw_ohb = pd.read_csv(
        f"data/fNIRS_data/ohb/Subject{subj}{gender}_ohb.csv",
        header=None
    )
    # Flatten to 1D and create a Series
    ohb = pd.Series(raw_ohb.values.flatten(), name='ohb')
    trig_ohb = pd.read_csv(
        f"data/fNIRS_data/Subject{subj}{gender}_Triggers.csv"
    ).iloc[:, :8]

    print(f"\nParticipant {pid}: EDA triggers {trig_eda.shape}, OHb triggers {trig_ohb.shape}")

    # Iterate through trials in original order
    for _, behav in behav_df.sort_values(['session', 'TrialNumber']).iterrows():
        sess = behav['session']
        task = behav['n_back_task']
        start_col = f"{sess}_{task}_start"
        end_col   = f"{sess}_{task}_end"
        found = False

        # Check each trigger row index
        for idx in trig_eda.index:
            e_start = trig_eda.at[idx, start_col]
            e_end   = trig_eda.at[idx, end_col]
            if pd.notna(e_start) and pd.notna(e_end):
                # EDA slice and mean
                s_e = int(e_start * EDA_SAMPLING_RATE)
                e_e = int(e_end   * EDA_SAMPLING_RATE)
                mean_eda = eda['EDA'].iloc[s_e:e_e].mean()
                trig_eda.at[idx, start_col] = None
                trig_eda.at[idx, end_col]   = None

                # OHb slice and mean using its own triggers
                o_start = trig_ohb.at[idx, start_col]
                o_end   = trig_ohb.at[idx, end_col]
                s_o = int(o_start * OHB_SAMPLING_RATE)
                e_o = int(o_end   * OHB_SAMPLING_RATE)
                mean_ohb = ohb.iloc[s_o:e_o].mean()
                trig_ohb.at[idx, start_col] = None
                trig_ohb.at[idx, end_col]   = None

                all_data.append({
                    'participant': pid,
                    'condition': f"{sess.capitalize()} {task.replace('_','-').capitalize()}",
                    'mean_eda': mean_eda,
                    'mean_ohb': 0 if pd.isna(mean_ohb) else mean_ohb,
                    'accuracy': behav['accuracy'],
                    'mean_rt': behav['mean_rt']
                })
                found = True
                break

        if not found:
            print(f"WARNING: No trigger for {sess} {task} trial {behav['TrialNumber']}")

    processed = len(all_data) // len(participants)
    print(f"Participant {pid} processed {processed} trials (expected 32)")

# Save with full precision
df_combined = pd.DataFrame(all_data)
df_combined.to_csv('data/df.csv', index=False, float_format='%.18e')

print(f"\nFinal combined shape: {df_combined.shape}")
print(df_combined.head())





Participant 3F: EDA triggers (8, 8), OHb triggers (8, 8)
Participant 3F processed 6 trials (expected 32)

Participant 4F: EDA triggers (8, 8), OHb triggers (8, 8)
Participant 4F processed 12 trials (expected 32)

Participant 6M: EDA triggers (8, 8), OHb triggers (8, 8)
Participant 6M processed 19 trials (expected 32)

Participant 8M: EDA triggers (8, 8), OHb triggers (8, 8)
Participant 8M processed 25 trials (expected 32)

Participant 11F: EDA triggers (8, 8), OHb triggers (8, 8)
Participant 11F processed 32 trials (expected 32)

Final combined shape: (160, 6)
  participant       condition  mean_eda  mean_ohb  accuracy     mean_rt
0          3F  Calming 1-back  7.537035  0.008164  0.863636  564.409091
1          3F  Calming 3-back  7.053329  0.003207  0.681818  488.136364
2          3F  Calming 3-back  6.492472 -0.000443  0.727273  492.272727
3          3F  Calming 1-back  6.135949 -0.000339  0.954545  308.954545
4          3F  Calming 3-back  5.944514  0.000300  0.681818  656.863636


In [5]:
import pandas as pd
import os
import glob
from datetime import datetime, timedelta
import re

def parse_time_to_seconds(time_str):
    """Convert time string (HH:MM:SS or HH:MM:SS.mmm) to seconds"""
    try:
        # Handle microseconds if present
        if '.' in time_str:
            time_part, microsec_part = time_str.split('.')
            microseconds = int(microsec_part.ljust(6, '0')[:6])  # Pad or truncate to 6 digits
        else:
            time_part = time_str
            microseconds = 0
        
        # Parse time components
        time_parts = time_part.split(':')
        hours = int(time_parts[0])
        minutes = int(time_parts[1])
        seconds = int(time_parts[2])
        
        total_seconds = hours * 3600 + minutes * 60 + seconds + microseconds / 1000000
        return total_seconds
    except Exception as e:
        print(f"Error parsing time '{time_str}': {e}")
        return 0

def inspect_file_structure(filepath, num_lines=10):
    """Inspect the structure of a file to understand its format"""
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            lines = [f.readline().strip() for _ in range(num_lines)]
        
        print(f"\nInspecting {os.path.basename(filepath)}:")
        for i, line in enumerate(lines):
            if line:  # Only print non-empty lines
                print(f"Line {i+1}: {line}")
        print("-" * 50)
        
        return lines
    except Exception as e:
        print(f"Error inspecting {filepath}: {e}")
        return []

def read_detailed_file(filepath):
    """Read a detailed.txt file and return DataFrame with robust parsing"""
    try:
        # Based on the file structure analysis, we know:
        # Line 1: "Video analysis detailed log"
        # Line 2: (empty)
        # Line 3: "Face Model: General"
        # Line 4: "Frame rate: 23.98"
        # Line 5: (empty)
        # Line 6: Header row with column names
        # Line 7+: Data rows
        
        # So we need to skip the first 5 rows and use row 6 as header (index 5, or skiprows=5)
        print(f"  Reading with skiprows=5, header=0...")
        df = pd.read_csv(filepath, sep='\t', skiprows=5, header=0)
        
        if not df.empty and len(df.columns) > 1:
            print(f"  Success! Shape: {df.shape}")
            print(f"  Columns: {list(df.columns)}")
            
            # Convert Video Time column to seconds
            if 'Video Time' in df.columns:
                print(f"  Converting Video Time to seconds...")
                df['Time_seconds'] = df['Video Time'].apply(parse_time_to_seconds)
                df['Time'] = df['Video Time']  # Keep original for reference
            
            # Handle FIT_FAILED and FIND_FAILED values
            # Replace these with NaN for numeric columns
            for col in df.columns:
                if col not in ['Video Time', 'Time']:
                    df[col] = df[col].replace(['FIT_FAILED', 'FIND_FAILED'], pd.NA)
                    # Try to convert to numeric, keeping non-numeric as is
                    df[col] = pd.to_numeric(df[col], errors='ignore')
            
            return df
        else:
            print(f"  Failed to read properly - shape: {df.shape}")
            return pd.DataFrame()
            
    except Exception as e:
        print(f"  Error reading {filepath}: {e}")
        # Fallback to the original robust approach
        return read_detailed_file_fallback(filepath)

def read_detailed_file_fallback(filepath):
    """Fallback method for reading detailed files"""
    try:
        # First, inspect the file structure
        lines = inspect_file_structure(filepath)
        
        # Try different parsing approaches
        approaches = [
            # Skip initial lines that might be metadata
            {'sep': '\t', 'skiprows': 5, 'header': 0},
            {'sep': '\t', 'skiprows': 4, 'header': 0},
            {'sep': '\t', 'skiprows': 6, 'header': 0},
            {'sep': '\t', 'skiprows': 3, 'header': 0},
            # Standard tab-separated with different header positions
            {'sep': '\t', 'header': 5},
            {'sep': '\t', 'header': 4},
            {'sep': '\t', 'header': 6},
        ]
        
        for i, params in enumerate(approaches):
            try:
                print(f"  Trying fallback approach {i+1}: {params}")
                df = pd.read_csv(filepath, **params)
                
                if not df.empty and len(df.columns) > 5:  # Expect at least 6 columns
                    print(f"  Success! Shape: {df.shape}")
                    print(f"  Columns: {list(df.columns)}")
                    
                    # Convert Time column to seconds if it exists
                    time_columns = [col for col in df.columns if 'time' in col.lower()]
                    if time_columns:
                        time_col = time_columns[0]
                        print(f"  Found time column: {time_col}")
                        df['Time_seconds'] = df[time_col].apply(parse_time_to_seconds)
                        df['Time'] = df[time_col]
                    
                    return df
                    
            except Exception as e:
                print(f"  Fallback approach {i+1} failed: {e}")
                continue
        
        print("  All fallback approaches failed.")
        return pd.DataFrame()
        
    except Exception as e:
        print(f"Fallback error reading {filepath}: {e}")
        return pd.DataFrame()

def manual_parse_file(filepath):
    """Manually parse file when standard methods fail"""
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            lines = f.readlines()
        
        # Find the actual data start (look for lines with consistent field counts)
        data_lines = []
        header_line = None
        
        for i, line in enumerate(lines):
            line = line.strip()
            if not line:
                continue
            
            # Try different separators
            for sep in ['\t', ',', ' ']:
                fields = line.split(sep)
                if len(fields) > 5:  # Assuming at least 5 fields for meaningful data
                    if header_line is None:
                        # This might be the header
                        header_line = fields
                        header_idx = i
                        break
                    elif len(fields) == len(header_line):
                        # This is a data line with matching field count
                        data_lines.append(fields)
                        break
        
        if header_line and data_lines:
            df = pd.DataFrame(data_lines, columns=header_line)
            print(f"  Manual parsing successful! Shape: {df.shape}")
            return df
        else:
            print("  Manual parsing failed - could not identify data structure")
            return pd.DataFrame()
            
    except Exception as e:
        print(f"Manual parsing error: {e}")
        return pd.DataFrame()

def get_video_duration(df):
    """Get the duration of a video from its DataFrame"""
    if df.empty or 'Time_seconds' not in df.columns:
        return 0
    return df['Time_seconds'].max()

def process_subject_data(subject_folder):
    """Process all video files for a single subject"""
    subject_name = os.path.basename(subject_folder)
    print(f"Processing {subject_name}...")
    
    # Find all detailed.txt files
    detailed_files = glob.glob(os.path.join(subject_folder, "*detailed.txt"))
    detailed_files.sort()  # Ensure proper order
    
    if not detailed_files:
        print(f"No detailed.txt files found for {subject_name}")
        return pd.DataFrame()
    
    all_dataframes = []
    cumulative_time = 0
    
    for i, filepath in enumerate(detailed_files):
        print(f"  Processing file {i+1}: {os.path.basename(filepath)}")
        
        # Read the file
        df = read_detailed_file(filepath)
        
        if df.empty:
            continue
        
        # Add subject identifier
        df['Subject'] = subject_name
        df['Video_Number'] = i + 1
        df['Original_Time'] = df['Time'] if 'Time' in df.columns else None
        
        # Adjust time for sequential videos (videos 2, 3, 4, etc.)
        if i > 0 and 'Time_seconds' in df.columns:
            df['Time_seconds_adjusted'] = df['Time_seconds'] + cumulative_time
        else:
            df['Time_seconds_adjusted'] = df['Time_seconds'] if 'Time_seconds' in df.columns else 0
        
        # Update cumulative time for next video
        if 'Time_seconds' in df.columns:
            video_duration = get_video_duration(df)
            cumulative_time += video_duration
        
        all_dataframes.append(df)
    
    if all_dataframes:
        concatenated_df = pd.concat(all_dataframes, ignore_index=True)
        print(f"  Concatenated {len(all_dataframes)} files for {subject_name}")
        return concatenated_df
    else:
        return pd.DataFrame()

def extract_all_face_reader_data(base_path="data/Face_reader_data"):
    """Extract and concatenate all Face Reader data"""
    
    # Define subjects and their expected video counts
    subjects_info = {
        'subject_3F': 4,
        'subject_4F': 4,
        'subject_8M': 3,
        'subject_11F': 4
    }
    
    all_subjects_data = []
    
    for subject_name, expected_videos in subjects_info.items():
        subject_folder = os.path.join(base_path, subject_name)
        
        if not os.path.exists(subject_folder):
            print(f"Warning: Folder {subject_folder} not found")
            continue
        
        # Process subject data
        subject_df = process_subject_data(subject_folder)
        
        if not subject_df.empty:
            all_subjects_data.append(subject_df)
            print(f"Successfully processed {subject_name} - {len(subject_df)} rows")
        else:
            print(f"No data extracted for {subject_name}")
    
    # Concatenate all subjects
    if all_subjects_data:
        final_df = pd.concat(all_subjects_data, ignore_index=True)
        print(f"\nFinal concatenated dataset: {len(final_df)} rows, {len(final_df.columns)} columns")
        print(f"Subjects included: {final_df['Subject'].unique().tolist()}")
        
        # Display column information
        print(f"\nColumns in final dataset:")
        for col in final_df.columns:
            print(f"  - {col}")
        
        return final_df
    else:
        print("No data was successfully extracted from any subject")
        return pd.DataFrame()

# Additional utility functions for data analysis

def get_onset_time(subject_folder):
    """Get the onset time of the first video for a subject (if available)"""
    # Look for onset information in state or detailed files
    onset_files = glob.glob(os.path.join(subject_folder, "*1_video*"))
    
    # This is a placeholder - you may need to modify based on actual file format
    # that contains the unix timestamp onset information
    return None

def align_with_biopac_time(df, subject_onset_times=None):
    """Align Face Reader data with Biopac data using onset times"""
    if subject_onset_times is None:
        print("No onset times provided - returning data with relative timestamps")
        return df
    
    # Add absolute timestamps based on onset times
    for subject in df['Subject'].unique():
        if subject in subject_onset_times:
            mask = df['Subject'] == subject
            onset_time = subject_onset_times[subject]
            # Convert relative time to absolute time
            df.loc[mask, 'Absolute_Time'] = onset_time + df.loc[mask, 'Time_seconds_adjusted']
    
    return df

def diagnose_files(base_path="data/Face_reader_data"):
    """Diagnose file structures to understand the format"""
    subjects_info = {
        'subject_3F': 4,
        'subject_4F': 4,
        'subject_8M': 3,
        'subject_11F': 4
    }
    
    print("=== FILE STRUCTURE DIAGNOSIS ===")
    
    for subject_name in subjects_info.keys():
        subject_folder = os.path.join(base_path, subject_name)
        
        if not os.path.exists(subject_folder):
            print(f"Folder {subject_folder} not found")
            continue
        
        print(f"\n--- {subject_name} ---")
        detailed_files = glob.glob(os.path.join(subject_folder, "*detailed.txt"))
        detailed_files.sort()
        
        if detailed_files:
            # Just inspect the first file for each subject
            inspect_file_structure(detailed_files[0], num_lines=15)
        else:
            print(f"No detailed.txt files found in {subject_folder}")

# Main execution
if __name__ == "__main__":
    # Extract all data
    face_reader_df = extract_all_face_reader_data()
    
    # Display sample of the data
    if not face_reader_df.empty:
        print("\nSample of extracted data:")
        print(face_reader_df.head(10))
        
        print(f"\nColumns in the dataset:")
        for col in face_reader_df.columns:
            print(f"  - {col}")
        
        print("\nData summary by subject:")
        if 'Time_seconds_adjusted' in face_reader_df.columns:
            summary = face_reader_df.groupby(['Subject', 'Video_Number']).agg({
                'Time_seconds_adjusted': ['min', 'max', 'count']
            }).round(2)
            print(summary)
        
        # Check for failed readings
        print("\nChecking for failed facial recognition data:")
        for col in ['Neutral', 'Happy', 'Sad', 'Angry', 'Surprised', 'Scared', 'Disgusted']:
            if col in face_reader_df.columns:
                failed_count = face_reader_df[col].isna().sum()
                total_count = len(face_reader_df)
                print(f"  {col}: {failed_count}/{total_count} ({failed_count/total_count*100:.1f}%) failed readings")
        
        # Save to CSV
        output_filename = "concatenated_face_reader_data.csv"
        face_reader_df.to_csv(output_filename, index=False)
        print(f"\nData saved to {output_filename}")
        
        # Show some actual emotion data
        print("\nSample emotion data (first 5 rows with valid readings):")
        emotion_cols = ['Video Time', 'Neutral', 'Happy', 'Sad', 'Angry', 'Surprised', 'Scared', 'Disgusted', 'Valence', 'Arousal']
        available_emotion_cols = [col for col in emotion_cols if col in face_reader_df.columns]
        if available_emotion_cols:
            # Find first few rows without NaN values
            valid_data = face_reader_df.dropna(subset=[col for col in available_emotion_cols if col != 'Video Time'])
            if not valid_data.empty:
                print(valid_data[available_emotion_cols].head())
            else:
                print("No rows found without failed readings")
    else:
        print("\nNo data was extracted. Please check the file format.")
    
    # Example of how to use the alignment function (you'll need to provide onset times)
    # onset_times = {
    #     'subject_3F': 1640995200,  # Example unix timestamp
    #     'subject_4F': 1640995300,
    #     'subject_8M': 1640995400,
    #     'subject_11F': 1640995500
    # }
    # aligned_df = align_with_biopac_time(face_reader_df, onset_times)

Processing subject_3F...
  Processing file 1: Subject_3F_Analysis_1_video_detailed.txt
  Reading with skiprows=5, header=0...
  Success! Shape: (23111, 10)
  Columns: ['Video Time', 'Neutral', 'Happy', 'Sad', 'Angry', 'Surprised', 'Scared', 'Disgusted', 'Valence', 'Arousal']
  Converting Video Time to seconds...
  Processing file 2: Subject_3F_Analysis_2_video_detailed.txt
  Reading with skiprows=5, header=0...


  df[col] = pd.to_numeric(df[col], errors='ignore')
  df[col] = pd.to_numeric(df[col], errors='ignore')


  Success! Shape: (23111, 10)
  Columns: ['Video Time', 'Neutral', 'Happy', 'Sad', 'Angry', 'Surprised', 'Scared', 'Disgusted', 'Valence', 'Arousal']
  Converting Video Time to seconds...
  Processing file 3: Subject_3F_Analysis_3_video_detailed.txt
  Reading with skiprows=5, header=0...
  Success! Shape: (23111, 10)
  Columns: ['Video Time', 'Neutral', 'Happy', 'Sad', 'Angry', 'Surprised', 'Scared', 'Disgusted', 'Valence', 'Arousal']
  Converting Video Time to seconds...


  df[col] = pd.to_numeric(df[col], errors='ignore')
  df[col] = pd.to_numeric(df[col], errors='ignore')
  df[col] = pd.to_numeric(df[col], errors='ignore')


  Processing file 4: Subject_3F_Analysis_4_video_detailed.txt
  Reading with skiprows=5, header=0...
  Success! Shape: (2428, 10)
  Columns: ['Video Time', 'Neutral', 'Happy', 'Sad', 'Angry', 'Surprised', 'Scared', 'Disgusted', 'Valence', 'Arousal']
  Converting Video Time to seconds...
  Concatenated 4 files for subject_3F
Successfully processed subject_3F - 71761 rows
Processing subject_4F...
  Processing file 1: Subject_4F_Analysis_1_video_detailed.txt
  Reading with skiprows=5, header=0...
  Success! Shape: (23111, 10)
  Columns: ['Video Time', 'Neutral', 'Happy', 'Sad', 'Angry', 'Surprised', 'Scared', 'Disgusted', 'Valence', 'Arousal']
  Converting Video Time to seconds...
  Processing file 2: Subject_4F_Analysis_2_video_detailed.txt
  Reading with skiprows=5, header=0...
  Success! Shape: (23111, 10)
  Columns: ['Video Time', 'Neutral', 'Happy', 'Sad', 'Angry', 'Surprised', 'Scared', 'Disgusted', 'Valence', 'Arousal']
  Converting Video Time to seconds...
  Processing file 3: Sub

  df[col] = pd.to_numeric(df[col], errors='ignore')
  df[col] = pd.to_numeric(df[col], errors='ignore')


  Success! Shape: (23111, 10)
  Columns: ['Video Time', 'Neutral', 'Happy', 'Sad', 'Angry', 'Surprised', 'Scared', 'Disgusted', 'Valence', 'Arousal']
  Converting Video Time to seconds...
  Processing file 4: Subject_4F_Analysis_4_video_detailed.txt
  Reading with skiprows=5, header=0...
  Success! Shape: (18046, 10)
  Columns: ['Video Time', 'Neutral', 'Happy', 'Sad', 'Angry', 'Surprised', 'Scared', 'Disgusted', 'Valence', 'Arousal']
  Converting Video Time to seconds...


  df[col] = pd.to_numeric(df[col], errors='ignore')
  df[col] = pd.to_numeric(df[col], errors='ignore')


  Concatenated 4 files for subject_4F
Successfully processed subject_4F - 87379 rows
Processing subject_8M...
  Processing file 1: Subject_8M_Analysis_1_video_detailed.txt
  Reading with skiprows=5, header=0...
  Success! Shape: (23111, 10)
  Columns: ['Video Time', 'Neutral', 'Happy', 'Sad', 'Angry', 'Surprised', 'Scared', 'Disgusted', 'Valence', 'Arousal']
  Converting Video Time to seconds...
  Processing file 2: Subject_8M_Analysis_2_video_detailed.txt
  Reading with skiprows=5, header=0...
  Success! Shape: (23111, 10)
  Columns: ['Video Time', 'Neutral', 'Happy', 'Sad', 'Angry', 'Surprised', 'Scared', 'Disgusted', 'Valence', 'Arousal']
  Converting Video Time to seconds...


  df[col] = pd.to_numeric(df[col], errors='ignore')
  df[col] = pd.to_numeric(df[col], errors='ignore')


  Processing file 3: Subject_8M_Analysis_3_video_detailed.txt
  Reading with skiprows=5, header=0...
  Success! Shape: (21560, 10)
  Columns: ['Video Time', 'Neutral', 'Happy', 'Sad', 'Angry', 'Surprised', 'Scared', 'Disgusted', 'Valence', 'Arousal']
  Converting Video Time to seconds...
  Concatenated 3 files for subject_8M
Successfully processed subject_8M - 67782 rows
Processing subject_11F...
  Processing file 1: Subject_11F_Analysis_1_video_detailed.txt
  Reading with skiprows=5, header=0...
  Success! Shape: (23111, 10)
  Columns: ['Video Time', 'Neutral', 'Happy', 'Sad', 'Angry', 'Surprised', 'Scared', 'Disgusted', 'Valence', 'Arousal']
  Converting Video Time to seconds...


  df[col] = pd.to_numeric(df[col], errors='ignore')
  df[col] = pd.to_numeric(df[col], errors='ignore')


  Processing file 2: Subject_11F_Analysis_2_video_detailed.txt
  Reading with skiprows=5, header=0...
  Success! Shape: (23111, 10)
  Columns: ['Video Time', 'Neutral', 'Happy', 'Sad', 'Angry', 'Surprised', 'Scared', 'Disgusted', 'Valence', 'Arousal']
  Converting Video Time to seconds...
  Processing file 3: Subject_11F_Analysis_3_video_detailed.txt
  Reading with skiprows=5, header=0...
  Success! Shape: (23111, 10)
  Columns: ['Video Time', 'Neutral', 'Happy', 'Sad', 'Angry', 'Surprised', 'Scared', 'Disgusted', 'Valence', 'Arousal']
  Converting Video Time to seconds...


  df[col] = pd.to_numeric(df[col], errors='ignore')
  df[col] = pd.to_numeric(df[col], errors='ignore')


  Processing file 4: Subject_11F_Analysis_4_video_detailed.txt
  Reading with skiprows=5, header=0...
  Success! Shape: (47, 10)
  Columns: ['Video Time', 'Neutral', 'Happy', 'Sad', 'Angry', 'Surprised', 'Scared', 'Disgusted', 'Valence', 'Arousal']
  Converting Video Time to seconds...
  Concatenated 4 files for subject_11F
Successfully processed subject_11F - 69380 rows

Final concatenated dataset: 296302 rows, 16 columns
Subjects included: ['subject_3F', 'subject_4F', 'subject_8M', 'subject_11F']

Columns in final dataset:
  - Video Time
  - Neutral
  - Happy
  - Sad
  - Angry
  - Surprised
  - Scared
  - Disgusted
  - Valence
  - Arousal
  - Time_seconds
  - Time
  - Subject
  - Video_Number
  - Original_Time
  - Time_seconds_adjusted

Sample of extracted data:
     Video Time   Neutral     Happy       Sad     Angry  Surprised    Scared  \
0  00:00:00.000  0.725320  0.001752  0.313713  0.015713   0.005477  0.005976   
1  00:00:00.041  0.711566  0.002049  0.318334  0.018173   0.00644

In [6]:
face_reader = pd.read_csv('concatenated_face_reader_data.csv')
face_reader

Unnamed: 0,Video Time,Neutral,Happy,Sad,Angry,Surprised,Scared,Disgusted,Valence,Arousal,Time_seconds,Time,Subject,Video_Number,Original_Time,Time_seconds_adjusted
0,00:00:00.000,0.725320,0.001752,0.313713,0.015713,0.005477,0.005976,0.024165,-0.311961,0.464322,0.000,00:00:00.000,subject_3F,1,00:00:00.000,0.000
1,00:00:00.041,0.711566,0.002049,0.318334,0.018173,0.006442,0.012076,0.030354,-0.316285,0.464322,0.041,00:00:00.041,subject_3F,1,00:00:00.041,0.041
2,00:00:00.083,0.699748,0.002356,0.322387,0.020403,0.007285,0.017145,0.035767,-0.320031,0.463365,0.083,00:00:00.083,subject_3F,1,00:00:00.083,0.083
3,00:00:00.125,0.690467,0.002739,0.325771,0.022579,0.008102,0.021219,0.040750,-0.323032,0.461983,0.125,00:00:00.125,subject_3F,1,00:00:00.125,0.125
4,00:00:00.166,0.684551,0.003211,0.326954,0.024626,0.008869,0.025046,0.044649,-0.323743,0.460842,0.166,00:00:00.166,subject_3F,1,00:00:00.166,0.166
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
296297,00:00:01.751,,,,,,,,,,1.751,00:00:01.751,subject_11F,4,00:00:01.751,2893.388
296298,00:00:01.793,,,,,,,,,,1.793,00:00:01.793,subject_11F,4,00:00:01.793,2893.430
296299,00:00:01.835,0.547326,0.227016,0.091208,0.065439,0.176164,0.057305,0.134598,0.092418,0.560882,1.835,00:00:01.835,subject_11F,4,00:00:01.835,2893.472
296300,00:00:01.876,,,,,,,,,,1.876,00:00:01.876,subject_11F,4,00:00:01.876,2893.513
