# Feature Extraction Pipeline

Extract ML-ready features from canonical events.


In [None]:
import pandas as pd
import numpy as np
from pathlib import Path

# Load events
events_df = pd.read_parquet("../data/events.parquet")

# Extract team-level features
kills = events_df[events_df.event_type == "KILL"]
features = kills.groupby(["match_id", "team", "round"]).agg(
    kills=("event_id", "count"),
    headshots=("payload", lambda x: sum(1 for p in x if p.get("headshot", False)))
).reset_index()

# Add rolling averages
features["avg_kills_last_5"] = features.groupby(["match_id", "team"])["kills"].rolling(5, min_periods=1).mean().reset_index(0, drop=True)

# Save features
Path("../data/features").mkdir(exist_ok=True)
features.to_parquet("../data/features/team_features.parquet")

print(f"Extracted {len(features)} feature vectors")
