In [2]:
import pandas as pd
import numpy as np

In [3]:
events = pd.DataFrame({
    "decision_id": ["D1","D1","D1","D2","D2","D3","D3","D3","D3"],
    "event_type": [
        "view","scroll","option_change",
        "view","exit",
        "view","scroll","hover","exit"
    ],
    "event_time": pd.to_datetime([
        "2024-01-01 10:00:00",
        "2024-01-01 10:00:20",
        "2024-01-01 10:01:10",
        "2024-01-01 11:00:00",
        "2024-01-01 11:00:15",
        "2024-01-01 12:00:00",
        "2024-01-01 12:00:10",
        "2024-01-01 12:00:30",
        "2024-01-01 12:01:20"
    ])
})

events

Unnamed: 0,decision_id,event_type,event_time
0,D1,view,2024-01-01 10:00:00
1,D1,scroll,2024-01-01 10:00:20
2,D1,option_change,2024-01-01 10:01:10
3,D2,view,2024-01-01 11:00:00
4,D2,exit,2024-01-01 11:00:15
5,D3,view,2024-01-01 12:00:00
6,D3,scroll,2024-01-01 12:00:10
7,D3,hover,2024-01-01 12:00:30
8,D3,exit,2024-01-01 12:01:20


In [4]:
decision_level = (
    events
    .groupby("decision_id")
    .agg(
        total_time_sec=("event_time", lambda x: (x.max() - x.min()).seconds),
        option_actions=("event_type", lambda x: x.isin(["option_change","backtrack"]).sum()),
        attention_actions=("event_type", lambda x: x.isin(["scroll","hover"]).sum())
    )
    .reset_index()
)

decision_level

Unnamed: 0,decision_id,total_time_sec,option_actions,attention_actions
0,D1,70,1,1
1,D2,15,0,0
2,D3,80,0,2


In [5]:
decision_level["deep_engagement"] = (
    (decision_level["total_time_sec"] >= 45) &
    (decision_level["option_actions"] >= 1) &
    (decision_level["attention_actions"] >= 1)
).astype(int)

decision_level

Unnamed: 0,decision_id,total_time_sec,option_actions,attention_actions,deep_engagement
0,D1,70,1,1,1
1,D2,15,0,0,0
2,D3,80,0,2,0


In [6]:
# Find first evaluative action timestamp per decision
eval_events = events[events["event_type"].isin(
    ["option_change", "backtrack", "compare", "hover"]
)]

hesitation_start = (
    eval_events
    .groupby("decision_id")["event_time"]
    .min()
    .reset_index()
    .rename(columns={"event_time": "hesitation_start_time"})
)

hesitation_start

Unnamed: 0,decision_id,hesitation_start_time
0,D1,2024-01-01 10:01:10
1,D3,2024-01-01 12:00:30


In [7]:
decision_level = decision_level.merge(
    hesitation_start,
    on="decision_id",
    how="left"
)

decision_level

Unnamed: 0,decision_id,total_time_sec,option_actions,attention_actions,deep_engagement,hesitation_start_time
0,D1,70,1,1,1,2024-01-01 10:01:10
1,D2,15,0,0,0,NaT
2,D3,80,0,2,0,2024-01-01 12:00:30


In [8]:
# Get last event time per decision (proxy for exit time)
last_event = (
    events
    .groupby("decision_id")["event_time"]
    .max()
    .reset_index()
    .rename(columns={"event_time": "exit_time"})
)

decision_level = decision_level.merge(last_event, on="decision_id")

# Time from hesitation start to exit
decision_level["time_from_hesitation"] = (
    decision_level["exit_time"] - decision_level["hesitation_start_time"]
).dt.seconds

decision_level

Unnamed: 0,decision_id,total_time_sec,option_actions,attention_actions,deep_engagement,hesitation_start_time,exit_time,time_from_hesitation
0,D1,70,1,1,1,2024-01-01 10:01:10,2024-01-01 10:01:10,0.0
1,D2,15,0,0,0,NaT,2024-01-01 11:00:15,
2,D3,80,0,2,0,2024-01-01 12:00:30,2024-01-01 12:01:20,50.0


In [9]:
decision_level["excessive_hesitation"] = (
    (decision_level["time_from_hesitation"] >= 30) &
    (decision_level["option_actions"] >= 2)
).astype(int)

decision_level

Unnamed: 0,decision_id,total_time_sec,option_actions,attention_actions,deep_engagement,hesitation_start_time,exit_time,time_from_hesitation,excessive_hesitation
0,D1,70,1,1,1,2024-01-01 10:01:10,2024-01-01 10:01:10,0.0,0
1,D2,15,0,0,0,NaT,2024-01-01 11:00:15,,0
2,D3,80,0,2,0,2024-01-01 12:00:30,2024-01-01 12:01:20,50.0,0


In [10]:
# Simulate purchase events (none in our toy data)
purchase_events = events[events["event_type"] == "purchase"]

purchased = (
    purchase_events["decision_id"]
    .drop_duplicates()
    .to_frame(name="decision_id")
)

decision_level["purchased"] = (
    decision_level["decision_id"]
    .isin(purchased["decision_id"])
    .astype(int)
)

In [11]:
decision_level["abandoned_after_engagement"] = (
    (decision_level["purchased"] == 0) &
    (decision_level["deep_engagement"] == 1)
).astype(int)

decision_level

Unnamed: 0,decision_id,total_time_sec,option_actions,attention_actions,deep_engagement,hesitation_start_time,exit_time,time_from_hesitation,excessive_hesitation,purchased,abandoned_after_engagement
0,D1,70,1,1,1,2024-01-01 10:01:10,2024-01-01 10:01:10,0.0,0,0,1
1,D2,15,0,0,0,NaT,2024-01-01 11:00:15,,0,0,0
2,D3,80,0,2,0,2024-01-01 12:00:30,2024-01-01 12:01:20,50.0,0,0,0


In [12]:
decision_level["T_raw"] = decision_level["time_from_hesitation"]
decision_level["O_raw"] = decision_level["option_actions"]
decision_level["B_raw"] = decision_level["option_actions"]

decision_level["C_raw"] = (
    events
    .groupby("decision_id")["event_type"]
    .apply(lambda x: x.isin(["help_click", "error"]).sum())
    .reindex(decision_level["decision_id"])
    .values
)

In [13]:
def normalize(series):
    upper = series.quantile(0.95)
    clipped = series.clip(upper=upper)
    return (clipped - clipped.min()) / (clipped.max() - clipped.min() + 1e-9)

In [14]:
decision_level["T_norm"] = normalize(decision_level["T_raw"])
decision_level["O_norm"] = normalize(decision_level["O_raw"])
decision_level["B_norm"] = normalize(decision_level["B_raw"])
decision_level["C_norm"] = normalize(decision_level["C_raw"])

In [15]:
decision_level["cognitive_load"] = (
    0.35 * decision_level["T_norm"] +
    0.25 * decision_level["O_norm"] +
    0.25 * decision_level["B_norm"] +
    0.15 * decision_level["C_norm"]
)

In [16]:
analytics_table = decision_level[[
    "decision_id",
    "total_time_sec",
    "option_actions",
    "attention_actions",
    "hesitation_start_time",
    "time_from_hesitation",
    "deep_engagement",
    "excessive_hesitation",
    "abandoned_after_engagement",
    "cognitive_load"
]].copy()