In [2]:
import os
import pandas as pd
from datetime import datetime
from collections import defaultdict

folder_path = r"C:/Users/karun/OneDrive/Documents/RIK/data/TWOS-dataset/keystroke_ano"
output_path = r"C:/Users/karun/OneDrive/Documents/RIK/outputs/twos_keystroke_summary.csv"

summary_data = []

for file in os.listdir(folder_path):
    file_path = os.path.join(folder_path, file)
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            data = []
            for line in f:
                parts = line.strip().split(',')
                parts = [p.strip('"') for p in parts]
                if len(parts) >= 2:
                    timestamp_str = parts[0]
                    event_type = parts[1].lower()
                    key = parts[2] if len(parts) > 2 else None
                    user = parts[3] if len(parts) > 3 else os.path.basename(file).split('.')[0]
                    
                    try:
                        timestamp = pd.to_datetime(timestamp_str, errors='coerce')
                        if pd.isna(timestamp):
                            continue
                        data.append({
                            'user': user,
                            'timestamp': timestamp,
                            'date_only': timestamp.date(),
                            'event': event_type,
                            'key': key
                        })
                    except Exception:
                        continue

        if not data:
            continue

        df = pd.DataFrame(data)
        grouped = df.groupby(['user', 'date_only'])

        for (user, date), group in grouped:
            key_press = (group['event'] == 'press').sum()
            key_release = (group['event'] == 'release').sum()
            unique_keys = group['key'].nunique()
            session_count = (df['event'] == 'restart').sum()
            total_keys = key_press + key_release
            avg_keys_per_session = round(total_keys / session_count, 2) if session_count > 0 else total_keys

            summary_data.append({
                'user': user,
                'date_only': date,
                'key_presses_per_day': key_press,
                'key_releases_per_day': key_release,
                'unique_keys_used_per_day': unique_keys,
                'session_count_per_day': session_count if session_count > 0 else 1,
                'avg_keys_per_session': avg_keys_per_session
            })

    except Exception as e:
        print(f"Error processing {file_path}: {e}")

# Save results
summary_df = pd.DataFrame(summary_data)
summary_df.to_csv(output_path, index=False)
print(f"Keystroke summary saved to: {output_path}")
print(f"Total Rows: {summary_df.shape[0]}, Columns: {summary_df.shape[1]}")


Keystroke summary saved to: C:/Users/karun/OneDrive/Documents/RIK/outputs/twos_keystroke_summary.csv
Total Rows: 3572, Columns: 7
