In [1]:
import pandas as pd

sampling_rate = 2000  # Biopac EDA sampling rate in Hz

def load_behavioral(session_name, subject_id):
    # Construct path rooted at data/
    filename = f"data/Behavioral_data/{session_name}Subject{subject_id}.csv"
    df = pd.read_csv(filename)
    
    # Clean and normalize n_back
    df['n_back'] = df['n_back'].str.strip().str.lower()
    
    def map_nback(text):
        if 'one back' in text:
            return '1_back'
        elif 'three back' in text:
            return '3_back'
        else:
            return None
    
    df['n_back_task'] = df['n_back'].apply(map_nback)
    
    # Calculate accuracy by trial and n_back_task
    # Check correctness by comparing Response and Correct_Response
    df['correct'] = df['Response'] == df['Correct_Response']
    trial_summary = df.groupby(['TrialNumber', 'n_back_task']).agg(
        accuracy=('correct', 'mean'),
        mean_rt=('Response_Time', 'mean')
    ).reset_index()
    
    trial_summary['session'] = session_name.lower()
    
    return trial_summary

participants = ['3F', '4F', '6M', '8M', '11F']
all_data = []

for pid in participants:
    subject_num = pid[:-1]
    gender = pid[-1]
    
    calming_behav = load_behavioral('Calming', subject_num)
    vexing_behav = load_behavioral('Vexing', subject_num)
    
    behavior_df = pd.concat([calming_behav, vexing_behav], ignore_index=True)
    
    eda_path = f"data/Biopac_data/EDA/Subject{subject_num}{gender}_EDA.csv"
    triggers_path = f"data/Biopac_data/Timing/Subject{subject_num}{gender}_Triggers_trial.csv"
    
    eda_df = pd.read_csv(eda_path, header = None, names = ['EDA'])
    triggers_df = pd.read_csv(triggers_path)
    
    for idx, row in triggers_df.iterrows():
        for condition_prefix in ['calming', 'vexing']:
            for nback in ['1_back', '3_back']:
                start_col = f"{condition_prefix}_{nback}_start"
                end_col = f"{condition_prefix}_{nback}_end"
                
                if start_col in triggers_df.columns and end_col in triggers_df.columns:
                    start_time = row[start_col]
                    end_time = row[end_col]
                    
                    if pd.isna(start_time) or pd.isna(end_time):
                        continue
                    
                    start_idx = int(start_time * sampling_rate)
                    end_idx = int(end_time * sampling_rate)
                    
                    eda_slice = eda_df['EDA'].iloc[start_idx:end_idx]
                    mean_eda = eda_slice.mean()
                    
                    session_label = condition_prefix
                    nback_label = nback
                    
                    behav_match = behavior_df[
                        (behavior_df['session'] == session_label) & 
                        (behavior_df['n_back_task'] == nback_label) & 
                        (behavior_df['TrialNumber'] == (idx + 1))
                    ]
                    
                    accuracy = behav_match['accuracy'].values[0] if not behav_match.empty else None
                    mean_rt = behav_match['mean_rt'].values[0] if not behav_match.empty else None
                    
                    all_data.append({
                        'participant': pid,
                        'trial': idx + 1,
                        'condition': f"{condition_prefix.capitalize()} {nback.replace('_', '-').capitalize()}",
                        'mean_eda': mean_eda,
                        'accuracy': accuracy,
                        'mean_rt': mean_rt
                    })

combined_df = pd.DataFrame(all_data)

# combined_df.to_csv('df.csv')

In [10]:
import pandas as pd

# Sampling rate
sampling_rate = 2000  # Hz

# === Function to load and process behavioral data ===
def load_behavioral(session_name, subject_id):
    # Load CSV
    filename = f"data/Behavioral_data/{session_name}Subject{subject_id}.csv"
    df = pd.read_csv(filename)
    
    # Filter out "Rest" blocks
    df = df[df['n_back'] != 'Rest']
    
    # Clean n_back
    df['n_back'] = df['n_back'].str.strip().str.lower()
    
    # Map to 1_back / 3_back
    def map_nback(text):
        if 'one back' in text:
            return '1_back'
        elif 'three back' in text:
            return '3_back'
        else:
            return None
    
    df['n_back_task'] = df['n_back'].apply(map_nback)
    
    # Correctness
    df['correct'] = df['Response'] == df['Correct_Response']
    
    # Group by TrialNumber (block) and n_back
    trial_summary = df.groupby(['TrialNumber', 'n_back_task']).agg(
        accuracy=('correct', 'mean'),
        mean_rt=('Response_Time', 'mean')
    ).reset_index()
    
    # Add session label
    trial_summary['session'] = session_name.lower()
    
    return trial_summary

# === Main loop over participants ===
participants = ['3F', '4F', '6M', '8M', '11F']
all_data = []

for pid in participants:
    subject_num = pid[:-1]
    gender = pid[-1]
    
    # Load behavioral summaries
    calming_behav = load_behavioral('Calming', subject_num)
    vexing_behav = load_behavioral('Vexing', subject_num)
    
    # Combine sessions
    behavior_df = pd.concat([calming_behav, vexing_behav], ignore_index=True)
    
    # Load EDA and TRIGGERS_BLOCK (block-level triggers!)
    eda_path = f"data/Biopac_data/EDA/Subject{subject_num}{gender}_EDA.csv"
    triggers_path = f"data/Biopac_data/Timing/Subject{subject_num}{gender}_Triggers_block.csv"
    
    eda_df = pd.read_csv(eda_path, header=None, names=['EDA'])
    triggers_df = pd.read_csv(triggers_path)
    
    print(f"\nParticipant {pid} → triggers_df shape: {triggers_df.shape} (should be ~8 rows, wide format)")

    # === NEW: Initialize block counters per session/condition ===
    block_counter = {
        'calming': {
            '1_back': 1,
            '3_back': 1
        },
        'vexing': {
            '1_back': 1,
            '3_back': 1
        }
    }

    # Global trial_counter for output
    trial_counter_global = 1
    
    # Loop over triggers rows — EACH row contains 4 BLOCKS (1-back, 3-back, calming/vexing)
    for idx, row in triggers_df.iterrows():
        for condition_prefix in ['calming', 'vexing']:
            for nback in ['1_back', '3_back']:
                start_col = f"{condition_prefix}_{nback}_start"
                end_col = f"{condition_prefix}_{nback}_end"
                
                if pd.notna(row[start_col]) and pd.notna(row[end_col]):
                    # Process this block
                    start_time = row[start_col]
                    end_time = row[end_col]
                    
                    start_idx = int(start_time * sampling_rate)
                    end_idx = int(end_time * sampling_rate)
                    
                    eda_slice = eda_df['EDA'].iloc[start_idx:end_idx]
                    mean_eda = eda_slice.mean()
                    
                    session_label = condition_prefix
                    nback_label = nback
                    
                    # === NEW: Use block_counter to match behavioral block ===
                    block_num = block_counter[session_label][nback_label]
                    
                    behav_match = behavior_df[
                        (behavior_df['session'] == session_label) &
                        (behavior_df['n_back_task'] == nback_label)
                    ].sort_values('TrialNumber')
                    
                    # Get correct row
                    if block_num <= len(behav_match):
                        behav_row = behav_match.iloc[block_num - 1]
                        accuracy = behav_row['accuracy']
                        mean_rt = behav_row['mean_rt']
                    else:
                        print(f"WARNING: Participant {pid} → no behavioral match for {session_label} {nback_label} block {block_num}")
                        accuracy = 0
                        mean_rt = 0
                    
                    # === Set NA → 0 ===
                    if pd.isna(accuracy):
                        accuracy = 0
                    if pd.isna(mean_rt):
                        mean_rt = 0
                    if pd.isna(mean_eda):
                        mean_eda = 0
                    
                    # Append row
                    all_data.append({
                        'participant': pid,
                        'trial': trial_counter_global,
                        'condition': f"{condition_prefix.capitalize()} {nback.replace('_', '-').capitalize()}",
                        'mean_eda': mean_eda,
                        'accuracy': accuracy,
                        'mean_rt': mean_rt
                    })
                    
                    # Increment counters
                    block_counter[session_label][nback_label] += 1
                    trial_counter_global += 1
    
    # Participant progress summary
    print(f"Participant {pid}: {trial_counter_global - 1} trials processed (expected: 32)")

# Combine all participants into one DataFrame
combined_df = pd.DataFrame(all_data)

# Optional: save to CSV
# combined_df.to_csv('df.csv', index=False)

# Preview result
print(f"\nFinal combined_df shape: {combined_df.shape}")
print(combined_df.head())



Participant 3F → triggers_df shape: (8, 8) (should be ~8 rows, wide format)
Participant 3F: 32 trials processed (expected: 32)

Participant 4F → triggers_df shape: (8, 8) (should be ~8 rows, wide format)
Participant 4F: 32 trials processed (expected: 32)

Participant 6M → triggers_df shape: (8, 8) (should be ~8 rows, wide format)
Participant 6M: 32 trials processed (expected: 32)

Participant 8M → triggers_df shape: (8, 8) (should be ~8 rows, wide format)
Participant 8M: 32 trials processed (expected: 32)

Participant 11F → triggers_df shape: (8, 8) (should be ~8 rows, wide format)
Participant 11F: 32 trials processed (expected: 32)

Final combined_df shape: (160, 6)
  participant  trial       condition  mean_eda  accuracy     mean_rt
0          3F      1  Calming 1-back  7.537035  0.863636  564.409091
1          3F      2  Calming 3-back  7.053329  0.681818  488.136364
2          3F      3   Vexing 1-back  6.525453  0.909091  292.227273
3          3F      4   Vexing 3-back  6.014120 

In [11]:
combined_df

Unnamed: 0,participant,trial,condition,mean_eda,accuracy,mean_rt
0,3F,1,Calming 1-back,7.537035,0.863636,564.409091
1,3F,2,Calming 3-back,7.053329,0.681818,488.136364
2,3F,3,Vexing 1-back,6.525453,0.909091,292.227273
3,3F,4,Vexing 3-back,6.014120,0.727273,617.045455
4,3F,5,Calming 1-back,6.135949,0.954545,308.954545
...,...,...,...,...,...,...
155,11F,28,Vexing 3-back,6.541623,0.727273,215.181818
156,11F,29,Calming 1-back,7.934698,0.909091,294.590909
157,11F,30,Calming 3-back,7.691966,0.727273,519.636364
158,11F,31,Vexing 1-back,6.379597,0.818182,267.545455


In [15]:
import pandas as pd

# Sampling rate
sampling_rate = 2000  # Hz

# === Function to load and process behavioral data ===
def load_behavioral(session_name, subject_id):
    # Load CSV
    filename = f"data/Behavioral_data/{session_name}Subject{subject_id}.csv"
    df = pd.read_csv(filename)
    
    # Filter out "Rest" blocks
    df = df[df['n_back'] != 'Rest']
    
    # Clean n_back
    df['n_back'] = df['n_back'].str.strip().str.lower()
    
    # Map to 1_back / 3_back
    def map_nback(text):
        if 'one back' in text:
            return '1_back'
        elif 'three back' in text:
            return '3_back'
        else:
            return None
    
    df['n_back_task'] = df['n_back'].apply(map_nback)
    
    # Correctness
    df['correct'] = df['Response'] == df['Correct_Response']
    
    # Group by TrialNumber (block) and n_back
    trial_summary = df.groupby(['TrialNumber', 'n_back_task']).agg(
        accuracy=('correct', 'mean'),
        mean_rt=('Response_Time', 'mean')
    ).reset_index()
    
    # Add session label
    trial_summary['session'] = session_name.lower()
    
    return trial_summary

# === Main loop over participants ===
participants = ['3F', '4F', '6M', '8M', '11F']
all_data = []

for pid in participants:
    subject_num = pid[:-1]
    gender = pid[-1]
    
    # Load behavioral summaries
    calming_behav = load_behavioral('Calming', subject_num)
    vexing_behav = load_behavioral('Vexing', subject_num)
    
    # Combine sessions — preserves session order (Calming first, then Vexing)
    behavior_df = pd.concat([calming_behav, vexing_behav], ignore_index=True)
    
    # Load EDA and TRIGGERS_BLOCK (block-level triggers!)
    eda_path = f"data/Biopac_data/EDA/Subject{subject_num}{gender}_EDA.csv"
    triggers_path = f"data/Biopac_data/Timing/Subject{subject_num}{gender}_Triggers_block.csv"
    
    eda_df = pd.read_csv(eda_path, header=None, names=['EDA'])
    triggers_df = pd.read_csv(triggers_path)
    
    print(f"\nParticipant {pid} → triggers_df shape: {triggers_df.shape} (should be ~8 rows, wide format)")

    # === Loop through behavioral rows — IN ORIGINAL ORDER ===
    for idx, behav_row in behavior_df.sort_values(['session', 'TrialNumber']).iterrows():
        session_label = behav_row['session']
        nback_label = behav_row['n_back_task']
        trial_number = behav_row['TrialNumber']
        
        found = False
        
        # Search for first unused matching trigger
        for t_idx, t_row in triggers_df.iterrows():
            start_col = f"{session_label}_{nback_label}_start"
            end_col = f"{session_label}_{nback_label}_end"
            
            if pd.notna(t_row[start_col]) and pd.notna(t_row[end_col]):
                # Process this EDA slice
                start_time = t_row[start_col]
                end_time = t_row[end_col]
                
                start_idx = int(start_time * sampling_rate)
                end_idx = int(end_time * sampling_rate)
                
                eda_slice = eda_df['EDA'].iloc[start_idx:end_idx]
                mean_eda = eda_slice.mean()
                
                # Mark this trigger as used
                triggers_df.at[t_idx, start_col] = None
                triggers_df.at[t_idx, end_col] = None
                
                # Append row
                all_data.append({
                    'participant': pid,
                    'trial': len(all_data) + 1,
                    'condition': f"{session_label.capitalize()} {nback_label.replace('_', '-').capitalize()}",
                    'mean_eda': 0 if pd.isna(mean_eda) else mean_eda,
                    'accuracy': 0 if pd.isna(behav_row['accuracy']) else behav_row['accuracy'],
                    'mean_rt': 0 if pd.isna(behav_row['mean_rt']) else behav_row['mean_rt']
                })
                
                found = True
                break
        
        if not found:
            print(f"WARNING: No matching trigger found for {session_label} {nback_label} TrialNumber {trial_number}")

    # Participant progress summary
    trials_processed = len(all_data) // len(participants)
    print(f"Participant {pid}: {trials_processed} trials processed (expected: 32)")

# Combine all participants into one DataFrame
combined_df = pd.DataFrame(all_data)

# Optional: save to CSV
combined_df.to_csv('df.csv', index=False)

# Preview result
print(f"\nFinal combined_df shape: {combined_df.shape}")
print(combined_df.head())



Participant 3F → triggers_df shape: (8, 8) (should be ~8 rows, wide format)
Participant 3F: 6 trials processed (expected: 32)

Participant 4F → triggers_df shape: (8, 8) (should be ~8 rows, wide format)
Participant 4F: 12 trials processed (expected: 32)

Participant 6M → triggers_df shape: (8, 8) (should be ~8 rows, wide format)
Participant 6M: 19 trials processed (expected: 32)

Participant 8M → triggers_df shape: (8, 8) (should be ~8 rows, wide format)
Participant 8M: 25 trials processed (expected: 32)

Participant 11F → triggers_df shape: (8, 8) (should be ~8 rows, wide format)
Participant 11F: 32 trials processed (expected: 32)

Final combined_df shape: (160, 6)
  participant  trial       condition  mean_eda  accuracy     mean_rt
0          3F      1  Calming 1-back  7.537035  0.863636  564.409091
1          3F      2  Calming 3-back  7.053329  0.681818  488.136364
2          3F      3  Calming 3-back  6.492472  0.727273  492.272727
3          3F      4  Calming 1-back  6.135949  