# PCIbex results parser + eye-tracking metrics

This notebook parses the PCIbex results file (`results_dev.csv`) into readable event and trial tables, extracts key self-paced reading timings (r1..), question/choice info, collects EyeTracker calibration events, and processes the separate eye-tracking AOI data (`et-Gabor001.csv`) to compute standard metrics per trial and AOI (left/right).

Outputs:
- `parsed_events.csv`: all parsed events with normalized columns
- `parsed_trials_with_et.csv`: one row per trial with RTs, metadata, choice, and AOI metrics (when matched)
- Quick validation summary at the end

In [32]:
# Imports & paths
import pandas as pd
import numpy as np
from pathlib import Path
import re

ROOT = Path(r"c:\\Users\\parti\\Projects\\pcibex-hun")
RESULTS_PATH = ROOT / "results_dev.csv"
# Prefer root experiment_data.csv; fallback to chunk_includes if needed
EXP_CANDIDATES = [ROOT / "experiment_data.csv", ROOT / "chunk_includes" / "experiment_data.csv"]
for _p in EXP_CANDIDATES:
    if _p.exists():
        EXP_PATH = _p
        break
else:
    EXP_PATH = EXP_CANDIDATES[0]
ET_PATH = ROOT / "et-Gabor001.csv"


def read_pcibex_results(path):
    # Read raw lines; handle comment header blocks to build a DataFrame
    rows = []
    with open(path, 'r', encoding='utf-8') as f:
        for line in f:
            line = line.rstrip('\n')
            if not line or line.startswith('#'):
                continue
            parts = [p.strip() for p in line.split(',')]
            rows.append(parts)

    maxlen = max(len(r) for r in rows) if rows else 0
    df = pd.DataFrame([r + ['']*(maxlen-len(r)) for r in rows])

    base_cols = [
        'Results reception time', 'MD5', 'Controller', 'Order', 'Inner', 'Label',
        'Group', 'PennElementType', 'PennElementName', 'Parameter', 'Value', 'EventTime',
        'prolific_id', 'Comments'
    ]
    colnames = []
    for i in range(maxlen):
        colnames.append(base_cols[i] if i < len(base_cols) else f'Extra{i-len(base_cols)+1}')
    df.columns = colnames

    # Normalize
    df['timestamp'] = pd.to_datetime(df['Results reception time'], errors='coerce')
    df['EventTime'] = pd.to_numeric(df['EventTime'], errors='coerce')
    for c in ['Label','PennElementType','PennElementName','Parameter','Value','prolific_id','Comments']:
        if c in df.columns:
            df[c] = df[c].astype(str).replace({'nan':'', 'None':'', 'NaN':''})

    # Map extras to extended schema
    trailing_map = ['ID','item','condition','group','image_left','image_right','participant_id','Comments2']
    extra_cols = [c for c in df.columns if c.startswith('Extra')]
    for idx, name in enumerate(extra_cols[:len(trailing_map)]):
        df[trailing_map[idx]] = df[name]

    # Numerics
    for c in ['Order','Inner','Group','ID','item']:
        if c in df.columns:
            df[c] = pd.to_numeric(df[c], errors='coerce')

    # Participant ID extraction (strict): only from TextInput Final for prolific/participant/id
    df['participant_id'] = ''
    mask_textinput = df['PennElementType'].str.lower().eq('textinput')
    mask_final = df['Parameter'].str.lower().eq('final')
    mask_name = df['PennElementName'].str.contains('prolific|participant|id', case=False, na=False)
    pid_rows = df[mask_textinput & mask_final & mask_name & (df['Value']!='')]
    if not pid_rows.empty:
        # Use the last reported ID per MD5
        id_map = pid_rows.sort_values('EventTime').groupby('MD5')['Value'].last()
        df['participant_id'] = df['MD5'].map(id_map).fillna('')

    # Fallback 1: Extended schema column if present
    if 'participant_id' in df.columns:
        df['participant_id'] = df['participant_id'].astype(str)

    # Fallback 2: prolific_id column when it looks like a real ID (not literal placeholder)
    valid_prol = df['prolific_id'].notna() & (df['prolific_id']!='') & (~df['prolific_id'].str.fullmatch(r'prolific_id', case=False, na=False))
    df.loc[df['participant_id'].eq('') & valid_prol, 'participant_id'] = df.loc[df['participant_id'].eq('') & valid_prol, 'prolific_id']

    # Fallback 3: ET filename pattern et-<ID>.csv
    if (df['participant_id'].eq('').all()):
        m = re.search(r"et-([^./\\]+)", str(ET_PATH.name))
        if m:
            df['participant_id'] = m.group(1)

    # Forward/back fill per MD5 to ensure all rows carry the id
    df['participant_id'] = df.groupby('MD5')['participant_id'].ffill().bfill()

    return df

events_raw = read_pcibex_results(RESULTS_PATH)
print(f"Parsed raw rows: {len(events_raw)} | columns: {len(events_raw.columns)}")
print('Using EXP_PATH =', EXP_PATH)
print('Participant ids found:', events_raw['participant_id'].dropna().unique().tolist())
display(events_raw.head(10))

Parsed raw rows: 642 | columns: 29
Using EXP_PATH = c:\Users\parti\Projects\pcibex-hun\chunk_includes\experiment_data.csv
Participant ids found: ['Gabor001']


Unnamed: 0,Results reception time,MD5,Controller,Order,Inner,Label,Group,PennElementType,PennElementName,Parameter,...,Extra6,Extra7,timestamp,ID,item,condition,group,image_left,image_right,participant_id
0,1758169182,a3fbd08eb2e85cb6cad0ac3c680949dd,PennController,0,0,welcome,,PennController,0,_Trial_,...,,,NaT,,,,,,,Gabor001
1,1758169182,a3fbd08eb2e85cb6cad0ac3c680949dd,PennController,0,0,welcome,,PennController,0,_Header_,...,,,NaT,,,,,,,Gabor001
2,1758169182,a3fbd08eb2e85cb6cad0ac3c680949dd,PennController,0,0,welcome,,PennController,0,_Header_,...,,,NaT,,,,,,,Gabor001
3,1758169182,a3fbd08eb2e85cb6cad0ac3c680949dd,PennController,0,0,welcome,,Html,consent_form,,...,,,NaT,,,,,,,Gabor001
4,1758169182,a3fbd08eb2e85cb6cad0ac3c680949dd,PennController,0,0,welcome,,PennController,0,_Trial_,...,,,NaT,,,,,,,Gabor001
5,1758169182,a3fbd08eb2e85cb6cad0ac3c680949dd,PennController,1,0,calibration,,PennController,1,_Trial_,...,,,NaT,,,,,,,Gabor001
6,1758169182,a3fbd08eb2e85cb6cad0ac3c680949dd,PennController,1,0,calibration,,PennController,1,_Header_,...,,,NaT,,,,,,,Gabor001
7,1758169182,a3fbd08eb2e85cb6cad0ac3c680949dd,PennController,1,0,calibration,,PennController,1,_Header_,...,,,NaT,,,,,,,Gabor001
8,1758169182,a3fbd08eb2e85cb6cad0ac3c680949dd,PennController,1,0,calibration,,EyeTracker,tracker,calibration,...,,,NaT,,,,,,,Gabor001
9,1758169182,a3fbd08eb2e85cb6cad0ac3c680949dd,PennController,1,0,calibration,,PennController,1,_Trial_,...,,,NaT,,,,,,,Gabor001


In [33]:
# Build events and trials
ev = events_raw.copy()

# Sort by participant/time for deterministic grouping
ev = ev.sort_values(['MD5','EventTime']).reset_index(drop=True)

# Trial segmentation using _Trial_ Start/End markers if present; otherwise use changes in Label within experiment/practice blocks
trial_start_mask = (ev['Parameter'].eq('_Trial_') & ev['Value'].eq('Start'))
ev['trial_start_flag'] = trial_start_mask

# Fallback: mark start when Label changes within each MD5
label_change = ev.groupby('MD5', sort=False)['Label'].transform(lambda s: s.ne(s.shift(1)))
ev['trial_start_flag'] = ev['trial_start_flag'] | label_change.fillna(False)

# Create trial_id per MD5 starting at 0
ev['trial_id'] = ev.groupby('MD5', sort=False)['trial_start_flag'].cumsum() - 1

# Per-trial metadata: last non-null observed values
# Helper: last non-null
import math

def last_non_null_series(s):
    s2 = s.dropna()
    s2 = s2[s2.astype(str)!='']
    return s2.iloc[-1] if len(s2) else np.nan

trial_meta_agg = (ev.groupby(['MD5','trial_id'], sort=False).apply(lambda g: pd.Series({
        'Label': last_non_null_series(g['Label']),
        'Order': pd.to_numeric(last_non_null_series(g['Order']), errors='coerce'),
        'ID': pd.to_numeric(last_non_null_series(g['ID']), errors='coerce'),
        'item': pd.to_numeric(last_non_null_series(g['item']), errors='coerce'),
        'condition': last_non_null_series(g['condition']),
        'group': pd.to_numeric(last_non_null_series(g['group']), errors='coerce'),
        'image_left_obs': last_non_null_series(g['image_left']),
        'image_right_obs': last_non_null_series(g['image_right']),
        'participant_id': last_non_null_series(g['participant_id']),
        'trial_start_time': g.loc[g['Parameter'].eq('_Trial_') & g['Value'].eq('Start'), 'EventTime'].min()
    }))
    .reset_index()
)

# If trial_start_time is NaN, use first EventTime in trial
fallback_start = ev.groupby(['MD5','trial_id'], sort=False)['EventTime'].min().reset_index().rename(columns={'EventTime':'fallback_start'})
trial_meta_agg = trial_meta_agg.merge(fallback_start, on=['MD5','trial_id'], how='left')
trial_meta_agg['trial_start_time'] = trial_meta_agg['trial_start_time'].fillna(trial_meta_agg['fallback_start'])
trial_meta_agg = trial_meta_agg.drop(columns=['fallback_start'])

# Practice detection and item normalization early
trial_meta_agg['is_practice'] = trial_meta_agg['Label'].str.contains('practice', case=False, na=False)
trial_meta_agg['item'] = np.where(trial_meta_agg['is_practice'], 0, trial_meta_agg['item'].fillna(trial_meta_agg['ID']))
trial_meta_agg['item'] = pd.to_numeric(trial_meta_agg['item'], errors='coerce')

# Keypress events for SPR: consider Key/Press events
is_key = ev['PennElementType'].str.contains('Key', na=False) | ev['Parameter'].str.contains('Key', na=False) | ev['Value'].str.contains('Pressed', na=False)
keys = ev[is_key].copy().sort_values(['MD5','trial_id','EventTime'])
# Inter-key deltas
keys['prev_time'] = keys.groupby(['MD5','trial_id'], sort=False)['EventTime'].shift(1)
keys['rt'] = keys['EventTime'] - keys['prev_time']
# First key latency r1
first_key = keys.groupby(['MD5','trial_id'], sort=False).first().reset_index()
first_key = first_key.merge(trial_meta_agg[['MD5','trial_id','trial_start_time']], on=['MD5','trial_id'], how='left')
first_key['r1'] = first_key['EventTime'] - first_key['trial_start_time']
# Subsequent regions r2.. by cumcount
keys['key_idx'] = keys.groupby(['MD5','trial_id'], sort=False).cumcount() + 1
rts_wide = keys.pivot_table(index=['MD5','trial_id'], columns='key_idx', values='rt', aggfunc='first')
rts_wide = rts_wide.add_prefix('r').rename(columns={'r1':'r2'})
rts_wide = rts_wide.reset_index()

# Selector choice
sel = ev[(ev['PennElementType'].str.contains('Selector', na=False)) & (ev['Parameter'].str.contains('Selection', na=False))].copy()
choice = sel.sort_values(['MD5','trial_id','EventTime']).groupby(['MD5','trial_id'], sort=False).last().reset_index()[['MD5','trial_id','Value','EventTime']]
choice = choice.rename(columns={'Value':'choice', 'EventTime':'choice_time'})

# Question onset approximated as last event before first Selector in trial
first_sel = sel.sort_values(['MD5','trial_id','EventTime']).groupby(['MD5','trial_id'], sort=False).first().reset_index()[['MD5','trial_id','EventTime']]
first_sel = first_sel.rename(columns={'EventTime':'first_sel_time'})
ev_sorted = ev.sort_values(['MD5','trial_id','EventTime'])
prev_to_sel = ev_sorted.merge(first_sel, on=['MD5','trial_id'], how='left')
prev_to_sel = prev_to_sel[prev_to_sel['EventTime'] <= prev_to_sel['first_sel_time']]
q_onset = prev_to_sel.groupby(['MD5','trial_id'], sort=False)['EventTime'].max().reset_index().rename(columns={'EventTime':'question_onset'})

# EyeTracker calibration events
cal = ev[(ev['PennElementType'].str.contains('EyeTracker', na=False)) & (ev['Parameter'].str.contains('Calibration', na=False))]
calib = cal.groupby(['MD5'], sort=False)[['Value','EventTime']].last().reset_index().rename(columns={'Value':'calibration_value','EventTime':'calibration_time'})

# Planned metadata from experiment_data.csv
exp = pd.read_csv(EXP_PATH)
exp_cols = {c.lower(): c for c in exp.columns}
left_col = exp_cols.get('left') or exp_cols.get('image_left') or 'left'
right_col = exp_cols.get('right') or exp_cols.get('image_right') or 'right'
exp = exp.rename(columns={left_col:'left_planned', right_col:'right_planned'})
exp['item'] = pd.to_numeric(exp['item'], errors='coerce')

# Merge planned metadata onto trials (one-to-one by item)
trials = trial_meta_agg.merge(exp[['item','condition','group','left_planned','right_planned']], on='item', how='left', suffixes=('','_planned'))

# Prefer planned condition/group for non-practice; keep observed for practice
trials['condition_final'] = np.where(trials['is_practice'], trials['condition'], trials['condition_planned'].where(trials['condition_planned'].notna(), trials['condition']))
trials['group_final'] = np.where(trials['is_practice'], trials['group'], trials['group_planned'].where(trials['group_planned'].notna(), trials['group']))

# Observed sides already taken as last non-null during trial
trials['left_obs'] = trials['image_left_obs']
trials['right_obs'] = trials['image_right_obs']

# Merge RTs
trials = trials.merge(first_key[['MD5','trial_id','r1']], on=['MD5','trial_id'], how='left')
trials = trials.merge(rts_wide, on=['MD5','trial_id'], how='left')

# Merge choice and compute decision times
trials = trials.merge(choice, on=['MD5','trial_id'], how='left')
trials = trials.merge(q_onset, on=['MD5','trial_id'], how='left')
trials['decision_time_ms'] = trials['choice_time'] - trials['question_onset']

# Viewing window
end_times = ev_sorted.groupby(['MD5','trial_id'], sort=False)['EventTime'].max().reset_index().rename(columns={'EventTime':'trial_end_time'})
trials = trials.merge(end_times, on=['MD5','trial_id'], how='left')
trials['viewing_window_ms'] = trials['trial_end_time'] - trials['trial_start_time']

# Attach calibration per participant
trials = trials.merge(calib[['MD5','calibration_value','calibration_time']], on='MD5', how='left')

# Reorder columns
front_cols = ['participant_id','MD5','trial_id','Label','is_practice','item','condition_final','group_final','left_planned','right_planned','left_obs','right_obs','choice','decision_time_ms','r1']
rt_cols = [c for c in trials.columns if c.startswith('r') and c not in ['right_planned','r1']]
other_cols = [c for c in trials.columns if c not in front_cols + rt_cols]
trials = trials[front_cols + rt_cols + [c for c in other_cols if c not in front_cols + rt_cols]]

print('Trials shape (before ET merge):', trials.shape)
display(trials.head(10))

Trials shape (before ET merge): (148, 38)


  trial_meta_agg = (ev.groupby(['MD5','trial_id'], sort=False).apply(lambda g: pd.Series({


Unnamed: 0,participant_id,MD5,trial_id,Label,is_practice,item,condition_final,group_final,left_planned,right_planned,...,image_right_obs,trial_start_time,condition_planned,group_planned,choice_time,question_onset,trial_end_time,viewing_window_ms,calibration_value,calibration_time
0,Gabor001,a3fbd08eb2e85cb6cad0ac3c680949dd,0,welcome,False,,,,,,...,,1758167696392,,,,,1758167755113,58721,,
1,Gabor001,a3fbd08eb2e85cb6cad0ac3c680949dd,1,calibration,False,,,,,,...,,1758167755122,,,,,1758167820176,65054,,
2,Gabor001,a3fbd08eb2e85cb6cad0ac3c680949dd,2,check_preloaded,False,,,,,,...,,1758167820180,,,,,1758167821189,1009,,
3,Gabor001,a3fbd08eb2e85cb6cad0ac3c680949dd,3,participant_data,False,,,,,,...,,1758167821192,,,,,1758167840219,19027,,
4,Gabor001,a3fbd08eb2e85cb6cad0ac3c680949dd,4,instructions,False,,,,,,...,,1758167840223,,,,,1758167884108,43885,,
5,Gabor001,a3fbd08eb2e85cb6cad0ac3c680949dd,5,practice,True,0.0,,,,,...,participant_id,1758167884112,,,1758168000000.0,1758168000000.0,1758167903255,19143,,
6,Gabor001,a3fbd08eb2e85cb6cad0ac3c680949dd,6,practice,True,0.0,,,,,...,participant_id,1758167903257,,,1758168000000.0,1758168000000.0,1758167922780,19523,,
7,Gabor001,a3fbd08eb2e85cb6cad0ac3c680949dd,7,practice,True,0.0,,,,,...,participant_id,1758167922784,,,1758168000000.0,1758168000000.0,1758167959931,37147,,
8,Gabor001,a3fbd08eb2e85cb6cad0ac3c680949dd,8,start,False,,,,,,...,,1758167959935,,,,,1758167964034,4099,,
9,Gabor001,a3fbd08eb2e85cb6cad0ac3c680949dd,9,experiment,False,24.0,self-directed,c,építész,mérnök,...,participant_id,1758167964037,self-directed,c,1758168000000.0,1758168000000.0,1758168031007,66970,,


In [34]:
# Eye-tracking AOI metrics (left/right)
et = pd.read_csv(ET_PATH)
et.columns = [c.strip() for c in et.columns]

# Try to standardize column names across potential schemas
# We expect: trial, times (ms), and AOI flags for left/right in some form
trial_col = 'trial' if 'trial' in et.columns else et.columns[0]
time_col = 'times' if 'times' in et.columns else [c for c in et.columns if 'time' in c.lower()][0]
# Candidate AOI columns
left_cands = [c for c in et.columns if 'left' in c.lower()]
right_cands = [c for c in et.columns if 'right' in c.lower()]

# Pick one left and one right by preference

def pick_one(cands, prefs):
    for p in prefs:
        for c in cands:
            if c.lower() == p:
                return c
    return cands[0] if cands else None

left_col_et = pick_one(left_cands, ['_left_canvas', 'left', '_left_canvas_practice'])
right_col_et = pick_one(right_cands, ['_right_canvas', 'right', '_right_canvas_practice'])

# Clean: drop rows where both AOIs == 0 or both NaN
et[time_col] = pd.to_numeric(et[time_col], errors='coerce')
for c in [left_col_et, right_col_et]:
    et[c] = pd.to_numeric(et[c], errors='coerce')
clean = et.loc[~((et[left_col_et].fillna(0) == 0) & (et[right_col_et].fillna(0) == 0))].copy()

# Compute metrics per trial and AOI

def aoi_metrics(df, aoi_col):
    d = df.sort_values(time_col).copy()
    d['on'] = d[aoi_col].fillna(0).astype(int)
    # Fixation count: transitions 0->1
    d['prev_on'] = d.groupby(trial_col)['on'].shift(1).fillna(0).astype(int)
    d['start_fix'] = (d['prev_on'] == 0) & (d['on'] == 1)
    fix_count = d.groupby(trial_col)['start_fix'].sum()
    # Dwell time: sum of durations where on==1 (delta times)
    d['next_time'] = d.groupby(trial_col)[time_col].shift(-1)
    d['dur'] = (d['next_time'] - d[time_col]).clip(lower=0)
    dwell = d.loc[d['on'] == 1].groupby(trial_col)['dur'].sum()
    # Time to first fixation: first time where on==1 minus first time in trial
    trial_start = d.groupby(trial_col)[time_col].min()
    first_on_time = d[d['on'] == 1].groupby(trial_col)[time_col].min()
    tff = first_on_time - trial_start
    # Revisits via number of runs of consecutive 1s
    d['run_id'] = d.groupby(trial_col)['start_fix'].cumsum()
    run_counts = d[d['on'] == 1].groupby(trial_col)['run_id'].nunique()
    revisits = run_counts - 1
    # Average fixation duration
    avg_fix = dwell / run_counts
    out = pd.DataFrame({
        'fixation_count': fix_count,
        'dwell_ms': dwell,
        'tff_ms': tff,
        'revisit_count': revisits,
        'avg_fix_ms': avg_fix,
    })
    return out.reset_index()

metrics_left = aoi_metrics(clean, left_col_et)
metrics_right = aoi_metrics(clean, right_col_et)
metrics_left = metrics_left.add_prefix('left_'); metrics_left = metrics_left.rename(columns={f'left_{trial_col}': 'trial'})
metrics_right = metrics_right.add_prefix('right_'); metrics_right = metrics_right.rename(columns={f'right_{trial_col}': 'trial'})

et_metrics = metrics_left.merge(metrics_right, on='trial', how='outer')
# Ensure trial is numeric and aligns with trials.trial_id (both are ordinal identifiers within participant)
et_metrics['trial'] = pd.to_numeric(et_metrics['trial'], errors='coerce')
display(et_metrics.head(10))

Unnamed: 0,trial,left_fixation_count,left_dwell_ms,left_tff_ms,left_revisit_count,left_avg_fix_ms,right_fixation_count,right_dwell_ms,right_tff_ms,right_revisit_count,right_avg_fix_ms
0,12,3,629.0,0,2,209.666667,3,1691.0,481,2,563.666667
1,13,10,3235.0,490,9,323.5,10,5327.0,0,9,532.7
2,14,7,527.0,73,6,75.285714,8,2776.0,0,7,347.0
3,15,5,814.0,336,4,162.8,5,1364.0,0,4,272.8
4,16,2,1258.0,490,1,629.0,3,1699.0,0,2,566.333333
5,17,6,1879.0,70,5,313.166667,7,1384.0,0,6,197.714286
6,18,3,191.0,0,2,63.666667,3,2763.0,76,2,921.0
7,19,9,1143.0,105,8,127.0,9,2098.0,0,8,233.111111
8,20,1,2149.0,33,0,2149.0,1,33.0,0,0,33.0
9,21,3,1364.0,0,2,454.666667,3,744.0,84,2,248.0


In [35]:
# Merge AOI metrics, save outputs, preview & validate
# First attempt: merge by trial_id
trials_et = trials.merge(et_metrics, left_on='trial_id', right_on='trial', how='left')
trials_et = trials_et.drop(columns=['trial'])

# If very few ET rows merged, fallback to merging by Order
et_cols = [c for c in et_metrics.columns if c != 'trial']
merged_count = trials_et[et_cols].notna().any(axis=1).sum()
if merged_count < (0.4 * len(trials_et)) and 'Order' in trials.columns:
    trials_et_alt = trials.merge(et_metrics, left_on='Order', right_on='trial', how='left')
    trials_et_alt = trials_et_alt.drop(columns=['trial'])
    merged_count_alt = trials_et_alt[et_cols].notna().any(axis=1).sum()
    if merged_count_alt > merged_count:
        trials_et = trials_et_alt

# Deduplicate to one row per (MD5, trial_id)
trials_et = trials_et.sort_values(['MD5','trial_id','trial_end_time']).drop_duplicates(['MD5','trial_id'], keep='last')

# Enforce single non-practice group per participant by majority vote
if 'group_final' in trials_et.columns:
    def mode_or_first(s):
        vc = s.dropna().value_counts()
        return vc.index[0] if len(vc) else np.nan
    majority_group = (trials_et[~trials_et['is_practice']]
        .groupby('participant_id', dropna=False)['group_final']
        .apply(mode_or_first)
        .rename('majority_group'))
    trials_et = trials_et.merge(majority_group, on='participant_id', how='left')
    mask_keep = trials_et['is_practice'] | (trials_et['group_final'] == trials_et['majority_group']) | trials_et['majority_group'].isna()
    trials_et = trials_et[mask_keep].drop(columns=['majority_group'])

# Save outputs
events_out = ROOT / 'parsed_events.csv'
trials_out = ROOT / 'parsed_trials_with_et.csv'
events_raw.to_csv(events_out, index=False)
trials_et.to_csv(trials_out, index=False)
print('Saved:', events_out)
print('Saved:', trials_out)

# Preview participant rows
pid = trials_et['participant_id'].dropna().astype(str).unique()
print('Participants in data:', pid)
display(trials_et.head(15))

# Quick checks
print('Missing counts:')
for col in ['item','condition_final','group_final','choice']:
    miss = trials_et[col].isna().sum()
    print(f'  {col}: {miss}')
print('Unique groups per participant (non-practice):')
gp = (trials_et[~trials_et['is_practice']].groupby('participant_id', dropna=False)['group_final'].nunique())
print(gp)
if (gp>1).any():
    print('WARNING: Some participants have >1 group; check item->planned group mapping.')

Saved: c:\Users\parti\Projects\pcibex-hun\parsed_events.csv
Saved: c:\Users\parti\Projects\pcibex-hun\parsed_trials_with_et.csv
Participants in data: ['Gabor001']


Unnamed: 0,participant_id,MD5,trial_id,Label,is_practice,item,condition_final,group_final,left_planned,right_planned,...,left_fixation_count,left_dwell_ms,left_tff_ms,left_revisit_count,left_avg_fix_ms,right_fixation_count,right_dwell_ms,right_tff_ms,right_revisit_count,right_avg_fix_ms
5,Gabor001,a3fbd08eb2e85cb6cad0ac3c680949dd,5,practice,True,0.0,,,,,...,,,,,,,,,,
6,Gabor001,a3fbd08eb2e85cb6cad0ac3c680949dd,6,practice,True,0.0,,,,,...,,,,,,,,,,
7,Gabor001,a3fbd08eb2e85cb6cad0ac3c680949dd,7,practice,True,0.0,,,,,...,,,,,,,,,,
9,Gabor001,a3fbd08eb2e85cb6cad0ac3c680949dd,9,experiment,False,24.0,other-directed-x,c,mérnök,építész,...,,,,,,,,,,
10,Gabor001,a3fbd08eb2e85cb6cad0ac3c680949dd,10,experiment,False,24.0,other-directed-x,c,mérnök,építész,...,,,,,,,,,,
11,Gabor001,a3fbd08eb2e85cb6cad0ac3c680949dd,11,experiment,False,34.0,attention-check,c,elefánt,hangya,...,,,,,,,,,,
14,Gabor001,a3fbd08eb2e85cb6cad0ac3c680949dd,14,experiment,False,33.0,attention-check,c,burgonya,paradicsom,...,7.0,527.0,73.0,6.0,75.285714,8.0,2776.0,0.0,7.0,347.0
16,Gabor001,a3fbd08eb2e85cb6cad0ac3c680949dd,16,experiment,False,13.0,contrastive,c,13a,13b,...,2.0,1258.0,490.0,1.0,629.0,3.0,1699.0,0.0,2.0,566.333333
19,Gabor001,a3fbd08eb2e85cb6cad0ac3c680949dd,19,experiment,False,1.0,contrastive,c,1a,1b,...,9.0,1143.0,105.0,8.0,127.0,9.0,2098.0,0.0,8.0,233.111111
22,Gabor001,a3fbd08eb2e85cb6cad0ac3c680949dd,22,experiment,False,27.0,other-directed-x,c,kórboncnok,katona,...,8.0,2888.0,0.0,7.0,361.0,7.0,2200.0,40.0,6.0,314.285714


Missing counts:
  item: 0
  condition_final: 3
  group_final: 3
  choice: 0
Unique groups per participant (non-practice):
participant_id
Gabor001    1
Name: group_final, dtype: int64
