In [None]:
!pip install psycopg2 dotenv sqlalchemy pandas numpy==1.26.4 bokeh

In [None]:
# installs, imports, dotenv…
from dotenv import load_dotenv
load_dotenv()

from sqlalchemy import create_engine, inspect
import os

# build URL from the locally‑forwarded port
user     = os.getenv("DB_USER")
pw       = os.getenv("DB_PASSWORD")
host     = os.getenv("DB_HOST")
port     = os.getenv("DB_PORT")
db       = os.getenv("DB_NAME")
engine   = create_engine(f"postgresql://{user}:{pw}@{host}:{port}/{db}")

In [None]:
from bokeh.transform import factor_cmap
from bokeh.palettes import Category10
from bokeh.transform import factor_cmap, factor_mark
from bokeh.layouts import column
from bokeh.models import ColumnDataSource, HoverTool, Segment, Text, Div
from bokeh.plotting import figure, show
from bokeh.io import output_notebook


output_notebook()

In [None]:
import pandas as pd

# inspect tables
inspector = inspect(engine)
print("Tables:", inspector.get_table_names())

In [None]:
ilogs = pd.read_sql("SELECT * FROM interaction_logs;", engine)
ilogs.head()

In [None]:
surveys = pd.read_sql("SELECT * FROM survey_responses;", engine)
surveys.head()

In [None]:
snapshots = pd.read_sql("SELECT * FROM text_snapshots;", engine)
snapshots.head()

In [None]:
participant_id = "5f48002958dd331635f1d767"
p_ilogs = ilogs.loc[ilogs["participant_id"] == participant_id]
p_ilogs.head()

In [None]:
p_ilogs["event_type"].unique()

In [None]:
set([event_type.split(":")[0] for event_type in ilogs["event_type"].unique()])

In [None]:
def add_extra_col(participant_id):
    p_ilogs = ilogs.loc[ilogs["participant_id"] == participant_id]

    local = p_ilogs.copy()

    def label_events(row):
        if "keystroke:paste" in row['event_type']:
            return "paste"
        if "keystroke:copy" in row['event_type']:
            return "cut/copy"
        if "keystroke:cut" in row['event_type']:
            return "cut/copy"
        if "keystroke" in row['event_type']:
            return "keystroke"
        if "api_call" in row['event_type']:
            return "api_call"
        if "browser" in row['event_type']:
            return "browser_event"
        if "button" in row['event_type']:
            return "button_press"
        if "text_selection" in row['event_type']:
            return "text_selection"
        return "null"

    local["cat_event_type"] = local.apply(label_events, axis=1)

    local["minutes_from_stage_start"] = local["time_from_stage_start"] / 60000
    
    return local

In [None]:
def make_whole_timeline(participant_id):
    
    p_ilogs = ilogs.loc[ilogs["participant_id"] == participant_id]
    local = p_ilogs.copy()
    
    stage_factors = ["Outline", "Draft", "Revision"]
    num_stages = len(stage_factors)

    source = ColumnDataSource(data=local)

    p = figure(
        width=800, 
        height=400,
        title=f"High-level Timeline for Participant {participant_id}",
        x_axis_label="Timestamp",
        y_axis_label="Time from Stage Start"
    )

    p.scatter(
        x="created_at", 
        y="time_from_stage_start",
        color=factor_cmap('stage', f'Category10_{num_stages}', stage_factors),
        source=source,
        size=12,
        fill_alpha=.4,
        legend_field="stage"
    )


    hover = HoverTool(tooltips=[('event_type', '@event_type')])
    p.add_tools(hover)

    show(p)

In [None]:
def make_stages_timeline(participant_id):
    
    local = add_extra_col(participant_id)
    
    stages = ["Outline", "Draft", "Revision"]
    event_factors = list(local["cat_event_type"].unique())
    num_events = len(event_factors)

    shared_x_range = (0,16)
    shared_y_range = (0,2)

    annotation_map = {
        "browser_event": {"offset": 0.9, "icon": "⌕"},
        "api_call": {"offset": -0.3, "icon": "⚡︎"},
        "button_press": {"offset": 0.3, "icon": "▶"},
        "paste": {"offset": -0.6, "icon": "⤵"},
        "cut/copy": {"offset": 0.6, "icon": "✂"},
    }
    
    for key, val in annotation_map.items():
        print(f'{val["icon"]} - {key}')


    def make_stage_plot(stage, x_range, show_legend=False):

        stage_data = local.loc[local["stage"] == stage].copy()
        stage_data["y"] = 1

        source = ColumnDataSource(data=stage_data)

        p = figure(
            width=800, 
            height=150,
            x_range=x_range,
            y_range=shared_y_range,
            title=f"Stage: {stage}",
            x_axis_label="Minutes from Stage Start",
            y_axis_label=""
        )

        p.scatter(
            x="minutes_from_stage_start", 
            y="y",
            color=factor_cmap('cat_event_type', f'Category10_{num_events}', event_factors),
            source=source,
            size=12,
            fill_alpha=.5,
            line_color=None,
            legend_field="cat_event_type"
        )

        # Add annotations
        for event_type, props in annotation_map.items():
            event_rows = stage_data[stage_data["cat_event_type"] == event_type]
            if event_rows.empty:
                continue

            x = event_rows["minutes_from_stage_start"]
            y0 = event_rows["y"]
            y1 = y0 + props["offset"]


            annotation_source = ColumnDataSource(data=dict(
                x=x,
                x2=x,
                y0=y0,
                y1=y1,
                icon=[props["icon"]] * len(x),
                icon_y=y1,
            ))

            # Add line (stem)
            p.segment(x0="x", y0="y0", x1="x2", y1="y1", source=annotation_source, 
                      line_width=1, line_color="grey")

            # Add icon at end
            p.text(x="x", y="icon_y", text="icon", source=annotation_source,
                   text_align="center", text_baseline="middle", text_font_size="10pt")



        hover = HoverTool(tooltips=[('event_type', '@event_type')])
        p.add_tools(hover)

        if show_legend:
            p.add_layout(p.legend[0], 'above')  # Moves legend out of main plot area
        else:
            p.legend.visible = False

        p.yaxis.visible = False         # hides the axis (line, ticks, and labels)

        return p

    plots = [make_stage_plot(stage, shared_x_range, show_legend=False) for stage in stages]
    header = Div(text=f"<h2>Event Timeline for Participant {participant_id}</h2>", width=800)
    show(column(*plots))

In [None]:
pid = "5f48002958dd331635f1d767"

In [None]:
make_whole_timeline(pid)

In [None]:
make_stages_timeline(pid)

In [None]:
local = snapshots.loc[(snapshots["participant_id"] == participant_id) & (snapshots["type"] == "partial")]

with pd.option_context('display.max_colwidth', None):
  display(local)