In [1]:
import pandas as pd
import numpy as np

# --- KEYPRESS FEATURE EXTRACTION ---
def extract_keystroke_features(path):
    df = pd.read_csv(path, header=0, names=["event", "time", "readable_time"])

    features = []
    press_time = None

    for _, row in df.iterrows():
        event, t = row["event"], float(row["time"])

        if event == "press":
            press_time = t  # store the press time

        elif event == "release" and press_time is not None:
            hold_time = t - press_time
            features.append({"HoldTime": hold_time})
            press_time = None  # reset for next press

    return pd.DataFrame(features)




# --- MOUSE MOVEMENT & CLICK FEATURE EXTRACTION ---
def extract_mouse_features(path):
    df = pd.read_csv(path)  # ← Let it use the actual headers: event_type, x, y, timestamp, datetime
    move_features = []
    click_features = []
    prev = {"x": None, "y": None, "t": None, "speed": None}
    click_start_time = None

    for _, row in df.iterrows():
        event, t = row["event_type"], float(row["timestamp"])
        if event == "move":
            x, y = float(row["x"]), float(row["y"])
            if prev["x"] is not None:
                dist = ((x - prev["x"])**2 + (y - prev["y"])**2)**0.5
                dt = t - prev["t"] + 1e-6
                speed = dist / dt
                acc = (speed - prev["speed"]) / dt if prev["speed"] is not None else 0
                move_features.append({"Speed": speed, "Acceleration": acc})
                prev["speed"] = speed
            prev.update({"x": x, "y": y, "t": t})

        elif event == "press":
            click_start_time = t

        elif event == "release" and click_start_time is not None:
            duration = t - click_start_time
            click_features.append({"ClickDuration": duration})
            click_start_time = None

    move_df = pd.DataFrame(move_features)
    click_df = pd.DataFrame(click_features)
    return move_df, click_df




# --- MAIN EXECUTION ---
if __name__ == "__main__":
    keyboard_file = "../data/keyboard_raw.csv"
    mouse_file = "../data/mouse_raw.csv"

    kf = extract_keystroke_features(keyboard_file)
    mf_speed_accel, mf_click = extract_mouse_features(mouse_file)

    kf.to_csv("../data/keystroke_features.csv", index=False)
    mf_speed_accel.to_csv("../data/mouse_motion_features.csv", index=False)
    mf_click.to_csv("../data/mouse_click_features.csv", index=False)

    print("✅ Keyboard Features:")
    print(kf.head())

    print("\n✅ Mouse Motion Features:")
    print(mf_speed_accel.head())

    print("\n✅ Mouse Click Features:")
    print(mf_click.head())


✅ Keyboard Features:
   HoldTime
0  0.123012
1  0.116312
2  0.112349
3  0.119937
4  0.120452

✅ Mouse Motion Features:
         Speed   Acceleration
0   441.163551       0.000000
1   635.262745   35984.089135
2   861.390836   18642.313332
3  3066.740532  566325.660459
4  1016.797357 -174537.187755

✅ Mouse Click Features:
   ClickDuration
0       0.098504
1       0.116682
2       0.084002
3       0.082947
4       0.083062
