In [4]:
import pandas as pd

filename = "results_dev.csv"

columns = []
data_rows = []

# Step 1: Parse the file and identify column headers
with open(filename, encoding="utf-8") as f:
    for line in f:
        line = line.strip()

        # Skip empty lines
        if not line:
            continue

        # Detect column header line
        if line.startswith("#") and "Columns below this comment are as follows" in line:
            next_line = next(f).strip()
            while next_line.startswith("#"):
                colname = next_line.strip("# ").replace(".", "").replace(",", "").strip()
                if colname:
                    columns.append(colname)
                next_line = next(f).strip()
        elif not line.startswith("#"):
            # Only start collecting rows once headers are set
            if columns:
                fields = line.split(",")
                if len(fields) >= len(columns):
                    row = dict(zip(columns, fields))
                    data_rows.append(row)

# Step 2: Convert to dataframe
df_raw = pd.DataFrame(data_rows)
df_raw

Unnamed: 0,1 Results reception time,2 MD5 hash of participant's IP address,3 Controller name,4 Order number of item,5 Inner element number,6 Label,7 Latin Square Group,8 PennElementType,9 PennElementName,10 Parameter,11 Value,12 EventTime,13 prolific_id,14 Comments
0,1753593201,4c435816b733b899c8179a3e0022201d,PennController,0,0,welcome,,PennController,0,_Header_,Start,1753592901249,prolific_id,
1,1753593201,4c435816b733b899c8179a3e0022201d,PennController,0,0,welcome,,PennController,0,_Header_,End,1753592901249,prolific_id,
2,1753593201,4c435816b733b899c8179a3e0022201d,PennController,0,0,welcome,,Html,consent_form,,checked,1753592915919,prolific_id,checkbox
3,1753593201,4c435816b733b899c8179a3e0022201d,PennController,0,0,welcome,,PennController,0,_Trial_,End,1753592915919,prolific_id,
4,1753593201,4c435816b733b899c8179a3e0022201d,PennController,1,0,calibration,,PennController,1,_Trial_,Start,1753592915926,prolific_id,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140,1753593201,4c435816b733b899c8179a3e0022201d,PennController,11,0,experiment,,Key,r7,PressedKey,,1753593195075,prolific_id,undefined
141,1753593201,4c435816b733b899c8179a3e0022201d,PennController,11,0,experiment,,Key,ready,PressedKey,,1753593197316,prolific_id,undefined
142,1753593201,4c435816b733b899c8179a3e0022201d,PennController,11,0,experiment,,Selector,choice_selector,Selection,left_canvas,1753593200773,prolific_id,undefined
143,1753593201,4c435816b733b899c8179a3e0022201d,PennController,11,0,experiment,,EyeTracker,tracker,Filename,httpsfarmpcibexnetrDigcCS/861f317e-f7f9-0129-c...,1753593201297,prolific_id,undefined


In [None]:
# Step 3: Clean up and extract relevant fields
df_raw["EventTime"] = pd.to_numeric(df_raw.get("EventTime", 0), errors="coerce")
df_raw["participant_id"] = df_raw["participant_id"].replace({"undefined": None})
df_raw["ItemID"] = df_raw["item"]
df_raw["Selected"] = df_raw["Value"]

# Capture Selector events
selectors = df_raw[df_raw["PennElementType"] == "Selector"]
selectors = selectors[selectors["Parameter"] == "choice_selector"]

# Find response time anchors
key_ready = df_raw[df_raw["Parameter"] == "ready"]
key_r0 = df_raw[df_raw["Parameter"] == "r0"]

# Merge timing data
merged = selectors.merge(
    key_ready[["participant_id", "item", "EventTime"]],
    on=["participant_id", "item"],
    suffixes=("", "_ready"))

merged = merged.merge(
    key_r0[["participant_id", "item", "EventTime"]],
    on=["participant_id", "item"],
    suffixes=("", "_start"))

merged["ResponseTime"] = merged["EventTime_ready"] - merged["EventTime_start"]

# Final frame
final_df = merged[[
    "participant_id", "item", "condition", "image_left", "image_right", "Selected", "ResponseTime"
]].rename(columns={
    "participant_id": "Participant",
    "item": "ItemID",
    "condition": "Condition",
    "image_left": "Image_Left",
    "image_right": "Image_Right"
})

print(final_df.head())