In [1]:
import os
import pandas as pd
from collections import defaultdict

# Set your input and output paths
folder_path = r"C:\Users\karun\OneDrive\Documents\RIK\data\TWOS-dataset\mouse_ano"
output_path = r"C:\Users\karun\OneDrive\Documents\RIK\outputs\twos_mouse_summary.csv"

# Storage containers
daily_activity = defaultdict(lambda: defaultdict(int))
unique_positions = defaultdict(set)

# Process each .log file
for file in os.listdir(folder_path):
    if not file.endswith(".log"):
        continue

    file_path = os.path.join(folder_path, file)

    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            for line in f:
                parts = line.strip().split(';')
                if len(parts) < 2:
                    continue

                timestamp = parts[0].replace('"', '')
                event_type = parts[1].strip()

                # Parse timestamp
                try:
                    dt = pd.to_datetime(timestamp, errors='coerce')
                    if pd.isnull(dt):
                        continue
                    date_only = dt.date()
                except:
                    continue

                # Infer user
                user = os.path.splitext(file)[0].split('.')[0]
                if len(parts) >= 6:
                    user = parts[4].strip()
                elif len(parts) >= 5 and parts[4].strip().startswith("User"):
                    user = parts[4].strip()

                key = (user, date_only)

                # Session counter
                if event_type in ["RESTART", "REFRESH"]:
                    daily_activity[key]["session_count_per_day"] += 1
                    continue

                # Mouse Events
                if event_type == "Mouse Moved":
                    daily_activity[key]["mouse_moves_per_day"] += 1
                    if len(parts) >= 4:
                        position = f"{parts[2].strip()}_{parts[3].strip()}"
                        unique_positions[key].add(position)

                elif event_type == "Left Click":
                    daily_activity[key]["left_clicks_per_day"] += 1

                elif event_type == "Right Click":
                    daily_activity[key]["right_clicks_per_day"] += 1

                elif event_type == "Double Click":
                    daily_activity[key]["double_clicks_per_day"] += 1

                elif event_type == "Drag":
                    daily_activity[key]["drag_events_per_day"] += 1

    except Exception as e:
        print(f"Error processing {file_path}: {e}")

# Compile final daily summaries
records = []
for (user, date), metrics in daily_activity.items():
    record = {
        "user": user,
        "date_only": date,
        **metrics,
        "unique_positions_per_day": len(unique_positions[(user, date)])
    }
    records.append(record)

# Final DataFrame
df_summary = pd.DataFrame(records)
df_summary.sort_values(by=["user", "date_only"], inplace=True)
df_summary.to_csv(output_path, index=False)

print("Mouse data processed and saved.")
print(f"Output path: {output_path}")
print(f"Rows: {df_summary.shape[0]}, Columns: {df_summary.shape[1]}")


Mouse data processed and saved.
Output path: C:\Users\karun\OneDrive\Documents\RIK\outputs\twos_mouse_summary.csv
Rows: 91, Columns: 5
