<h1>Parsing</h1>

In [1]:
import os
import pandas as pd
import numpy as np
from demoparser2 import DemoParser
from tqdm import tqdm

In [2]:
def euclidean_distance(x1, y1, x2, y2):
    return np.sqrt((x2 - x1)**2 + (y2 - y1)**2)

In [3]:
def map_weapon_group(weapon_name):
    if not isinstance(weapon_name, str):
        return "unknown"
    weapon_name = weapon_name.lower()
    if any(w in weapon_name for w in ["deagle", "glock", "usp", "p250", "tec9", "cz75", "five", "revolver"]):
        return "pistol"
    elif any(w in weapon_name for w in ["ak47", "m4a", "galil", "famas", "aug", "sg", "scar", "bizon"]):
        return "rifle"
    elif any(w in weapon_name for w in ["awp", "ssg", "scout"]):
        return "sniper"
    elif any(w in weapon_name for w in ["ump", "mac", "mp7", "mp9", "mp5"]):
        return "smg"
    elif any(w in weapon_name for w in ["m249", "negev"]):
        return "lmg"
    elif any(w in weapon_name for w in ["nova", "xm", "mag", "sawedoff"]):
        return "shotgun"
    elif any(w in weapon_name for w in ["knife", "zeus"]):
        return "melee"
    else:
        return "unknown" 

In [4]:
cheater_ids = {
    76561199418416731,
}  
blacklist_ids = {
    76561198186059819,
    76561199100764445,
    76561198966647937,
    76561198055577933,
    76561198000262946,
    76561198108693836,
    76561198857014968,
    76561198159777385,
    76561197971438801,
    76561199860940176,
    76561198905866395,
    76561198882862591,
    76561198073420344,
    76561199126463159,
    76561198387190894,
    76561198203272974,
    76561199591446144,
    76561199033190948,
}
input_dir = "../data/demonstration/"  
output_dir = "../data/demonstration/parsed"
os.makedirs(output_dir, exist_ok=True)

In [11]:
for filename in tqdm(os.listdir(input_dir), desc="Parsing demos"):
    if not filename.endswith('.dem'):
        continue
    demo_path = os.path.join(input_dir, filename)
    demo_base = os.path.splitext(filename)[0]

    try:
        parser = DemoParser(demo_path)
        events = parser.parse_event("player_death", player=["X", "Y", "Z", "pitch", "yaw", "steamid"])
        ticks_df = parser.parse_ticks(["tick", "steamid", "X", "Y", "Z", "pitch", "yaw"])

        for _, event in events.iterrows():
            attacker = event.get("attacker_steamid")
            victim = event.get("user_steamid")
            tick = event["tick"]

            if not attacker or not victim or int(attacker) in blacklist_ids:
                continue

            attacker_int = int(attacker)
            label = 1 if attacker_int in cheater_ids else 0

            start_tick, end_tick = tick - 300, tick
            attacker_window = ticks_df[
                ticks_df["tick"].between(start_tick, end_tick) &
                (ticks_df["steamid"] == attacker_int)
            ].drop_duplicates(subset="tick")

            if attacker_window.empty:
                continue

            full_index = list(range(start_tick, end_tick))
            attacker_window = (
                attacker_window.set_index("tick")
                .reindex(full_index)
                .ffill()
                .reset_index()
                .rename(columns={"index": "tick"})
            )

            attacker_window["steamid"] = attacker_int
            attacker_window["label"] = label

            weapon = event.get("weapon", "unknown").lower()
            attacker_window["weapon_name"] = weapon
            attacker_window["weapon_type"] = map_weapon_group(weapon)

            dist = euclidean_distance(
                event.get("attacker_X", 0),
                event.get("attacker_Y", 0),
                event.get("user_X", 0),
                event.get("user_Y", 0)
            )
            attacker_window["kill_distance"] = dist

            if attacker_window.shape[0] >= 2:
                pitch_delta = attacker_window["pitch"].iloc[-1] - attacker_window["pitch"].iloc[-2]
                yaw_delta = attacker_window["yaw"].iloc[-1] - attacker_window["yaw"].iloc[-2]
            else:
                pitch_delta = yaw_delta = 0

            attacker_window["pitch_delta_at_kill"] = pitch_delta
            attacker_window["yaw_delta_at_kill"] = yaw_delta

            if attacker_window.shape[0] >= 2:
                dx = attacker_window["X"].iloc[-1] - attacker_window["X"].iloc[-2]
                dy = attacker_window["Y"].iloc[-1] - attacker_window["Y"].iloc[-2]
                dz = attacker_window["Z"].iloc[-1] - attacker_window["Z"].iloc[-2]
                speed = np.sqrt(dx**2 + dy**2 + dz**2)
            else:
                speed = 0

            attacker_window["player_speed"] = speed

            subfolder = "cheater" if label == 1 else "legit"
            user_dir = os.path.join(output_dir, subfolder, f"user_{attacker}")
            os.makedirs(user_dir, exist_ok=True)

            csv_name = f"{demo_base}_kill_{start_tick}_to_{end_tick}.csv"
            csv_path = os.path.join(user_dir, csv_name)
            attacker_window.to_csv(csv_path, index=False)
            print(f"Saved {csv_path}")

    except Exception as e:
        print(f"Failed to parse {filename}: {e}")

Parsing demos:  17%|█████                         | 1/6 [00:01<00:05,  1.04s/it]

Saved ../data/demonstration/parsed/legit/user_76561198173700433/ava-legit-firstkill_kill_6325_to_6625.csv
Saved ../data/demonstration/parsed/legit/user_76561198173700433/ava-legit-firstkill_kill_7573_to_7873.csv
Saved ../data/demonstration/parsed/legit/user_76561198173700433/ava-legit-firstkill_kill_11203_to_11503.csv
Saved ../data/demonstration/parsed/legit/user_76561198173700433/ava-legit-firstkill_kill_11966_to_12266.csv
Saved ../data/demonstration/parsed/legit/user_76561198173700433/ava-legit-firstkill_kill_12535_to_12835.csv
Saved ../data/demonstration/parsed/legit/user_76561198173700433/ava-legit-firstkill_kill_19428_to_19728.csv
Saved ../data/demonstration/parsed/legit/user_76561198173700433/ava-legit-firstkill_kill_26419_to_26719.csv
Saved ../data/demonstration/parsed/legit/user_76561198173700433/ava-legit-firstkill_kill_27044_to_27344.csv
Saved ../data/demonstration/parsed/legit/user_76561198173700433/ava-legit-firstkill_kill_27788_to_28088.csv
Saved ../data/demonstration/pars

Parsing demos: 100%|██████████████████████████████| 6/6 [00:01<00:00,  4.27it/s]

Saved ../data/demonstration/parsed/cheater/user_76561199418416731/kyousuke-round2-kill3_kill_5426_to_5726.csv
Saved ../data/demonstration/parsed/cheater/user_76561199418416731/kyousuke-round2-kill3_kill_5582_to_5882.csv
Saved ../data/demonstration/parsed/cheater/user_76561199418416731/kyousuke-round2-kill3_kill_6041_to_6341.csv
Saved ../data/demonstration/parsed/cheater/user_76561199418416731/kyousuke-round2-kill3_kill_6519_to_6819.csv
Saved ../data/demonstration/parsed/cheater/user_76561199418416731/kyousuke-round2-kill3_kill_11021_to_11321.csv
Saved ../data/demonstration/parsed/cheater/user_76561199418416731/kyousuke-round2-kill3_kill_16221_to_16521.csv
Saved ../data/demonstration/parsed/cheater/user_76561199418416731/kyousuke-round2-kill3_kill_19325_to_19625.csv
Saved ../data/demonstration/parsed/cheater/user_76561199418416731/kyousuke-round2-kill3_kill_19435_to_19735.csv
Saved ../data/demonstration/parsed/cheater/user_76561199418416731/kyousuke-round2-kill3_kill_19654_to_19954.csv





<h1>Feature Engineering</h1>

In [7]:
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder

In [8]:
weapon_encoder = OneHotEncoder(sparse_output=False, handle_unknown="ignore")
WEAPON_GROUPS = ['pistol', 'rifle', 'sniper', 'smg', 'lmg', 'shotgun', 'melee', 'grenade', 'unknown']
weapon_encoder.fit(np.array(WEAPON_GROUPS).reshape(-1, 1))

In [9]:
def engineer_features(df):
    df = df.copy()
    if 'steamid' in df.columns:
        df = df.drop(columns=['steamid'])

    df['pitch_velocity'] = df['pitch'].diff() / df['tick'].diff()
    df['yaw_velocity'] = df['yaw'].diff() / df['tick'].diff()
    df['pitch_acceleration'] = df['pitch_velocity'].diff() / df['tick'].diff()
    df['yaw_acceleration'] = df['yaw_velocity'].diff() / df['tick'].diff()
    df['pitch_jerk'] = df['pitch_acceleration'].diff() / df['tick'].diff()
    df['yaw_jerk'] = df['yaw_acceleration'].diff() / df['tick'].diff()

    df['snap_magnitude'] = np.sqrt(df['pitch_delta_at_kill']**2 + df['yaw_delta_at_kill']**2)
    df['speed_rolling_std'] = df['player_speed'].rolling(window=10, min_periods=1).std()
    df['speed_rolling_mean'] = df['player_speed'].rolling(window=10, min_periods=1).mean()
    df['position_delta'] = np.sqrt(df['X'].diff()**2 + df['Y'].diff()**2 + df['Z'].diff()**2)
    df['position_jumpiness'] = df['position_delta'].rolling(window=10, min_periods=1).std()
    df['cumulative_pitch'] = df['pitch'].cumsum()
    df['cumulative_yaw'] = df['yaw'].cumsum()
    df['angle_magnitude'] = np.sqrt(df['pitch'].diff()**2 + df['yaw'].diff()**2)

    df['yaw_change_sign'] = np.sign(df['yaw_velocity'].diff())
    df['pitch_change_sign'] = np.sign(df['pitch_velocity'].diff())
    df['direction_flips'] = (df['yaw_change_sign'].diff().abs() > 0).astype(int)
    df['flip_rate'] = df['direction_flips'].rolling(window=10).sum()

    df['yaw_rolling_std'] = df['yaw'].rolling(window=10, min_periods=1).std()
    df['pitch_rolling_std'] = df['pitch'].rolling(window=10, min_periods=1).std()
    df['yaw_rolling_mean'] = df['yaw'].rolling(window=10, min_periods=1).mean()
    df['pitch_rolling_mean'] = df['pitch'].rolling(window=10, min_periods=1).mean()

    df['pitch_peaks'] = ((df['pitch_velocity'].diff().shift(-1) < 0) &
                         (df['pitch_velocity'].diff() > 0)).astype(int)
    df['yaw_peaks'] = ((df['yaw_velocity'].diff().shift(-1) < 0) &
                       (df['yaw_velocity'].diff() > 0)).astype(int)

    for col in ['pitch', 'yaw', 'pitch_velocity', 'yaw_velocity', 'angle_magnitude']:
        df[f'{col}_mean'] = df[col].mean()
        df[f'{col}_std'] = df[col].std()
        df[f'{col}_min'] = df[col].min()
        df[f'{col}_max'] = df[col].max()
        df[f'{col}_range'] = df[f'{col}_max'] - df[f'{col}_min']
        df[f'{col}_skew'] = df[col].skew()
        df[f'{col}_kurtosis'] = df[col].kurt()

    if 'weapon_type' in df.columns:
        encoded_weapons = weapon_encoder.transform(df[['weapon_type']])
        encoded_df = pd.DataFrame(encoded_weapons, columns=[f'weapon_{cls}' for cls in weapon_encoder.categories_[0]])
        df = pd.concat([df.reset_index(drop=True), encoded_df.reset_index(drop=True)], axis=1)

    df = df.drop(columns=[col for col in ['name', 'weapon_name', 'weapon_type'] if col in df.columns])
    return df.dropna()

In [12]:
base_input = "../data/demonstration/parsed"
base_output = "../data/demonstration/features"
os.makedirs(base_output, exist_ok=True)

for category in ["cheater", "legit"]:
    input_path = os.path.join(base_input, category)
    output_path = os.path.join(base_output, category)
    os.makedirs(output_path, exist_ok=True)

    for user_folder in os.listdir(input_path):
        user_dir = os.path.join(input_path, user_folder)
        if not os.path.isdir(user_dir):
            continue
        for file in os.listdir(user_dir):
            if not file.endswith(".csv"):
                continue
            try:
                fpath = os.path.join(user_dir, file)
                df = pd.read_csv(fpath)
                df_features = engineer_features(df)
                outname = f"engineered_{file}"
                df_features.to_csv(os.path.join(output_path, outname), index=False)
                print(f" Processed: {outname}")
            except Exception as e:
                print(f" Error in {file}: {e}")



 Processed: engineered_kyousuke-round2-kill3_kill_11021_to_11321.csv
 Processed: engineered_kyousuke-round2-kill3_kill_16221_to_16521.csv
 Processed: engineered_kyousuke-round2-kill3_kill_6519_to_6819.csv
 Processed: engineered_kyousuke-round2-kill3_kill_5426_to_5726.csv
 Processed: engineered_kyousuke-round2-kill3_kill_47006_to_47306.csv
 Processed: engineered_kyousuke-round2-kill3_kill_43550_to_43850.csv
 Processed: engineered_kyousuke-round2-kill3_kill_19654_to_19954.csv




 Processed: engineered_kyousuke-round2-kill3_kill_5582_to_5882.csv
 Processed: engineered_kyousuke-round2-kill3_kill_6041_to_6341.csv
 Processed: engineered_kyousuke-round2-kill3_kill_23547_to_23847.csv
 Processed: engineered_kyousuke-round2-kill3_kill_19435_to_19735.csv
 Processed: engineered_kyousuke-round2-kill3_kill_19325_to_19625.csv
 Processed: engineered_ava-legit-firstkill_kill_91928_to_92228.csv
 Processed: engineered_ava-legit-firstkill_kill_92701_to_93001.csv




 Processed: engineered_ava-legit-firstkill_kill_6325_to_6625.csv
 Processed: engineered_ava-legit-firstkill_kill_45088_to_45388.csv
 Processed: engineered_ava-legit-firstkill_kill_149963_to_150263.csv
 Processed: engineered_ava-legit-firstkill_kill_26419_to_26719.csv
 Processed: engineered_ava-legit-firstkill_kill_91447_to_91747.csv
 Processed: engineered_ava-legit-firstkill_kill_11203_to_11503.csv
 Processed: engineered_ava-legit-firstkill_kill_27044_to_27344.csv




 Processed: engineered_ava-legit-firstkill_kill_126852_to_127152.csv
 Processed: engineered_ava-legit-firstkill_kill_7573_to_7873.csv
 Processed: engineered_ava-legit-firstkill_kill_84408_to_84708.csv
 Processed: engineered_ava-legit-firstkill_kill_71079_to_71379.csv
 Processed: engineered_ava-legit-firstkill_kill_12535_to_12835.csv
 Processed: engineered_ava-legit-firstkill_kill_135168_to_135468.csv
 Processed: engineered_ava-legit-firstkill_kill_86581_to_86881.csv
 Processed: engineered_ava-legit-firstkill_kill_137062_to_137362.csv
 Processed: engineered_ava-legit-firstkill_kill_62942_to_63242.csv
 Processed: engineered_ava-legit-firstkill_kill_27788_to_28088.csv
 Processed: engineered_ava-legit-firstkill_kill_19428_to_19728.csv
 Processed: engineered_ava-legit-firstkill_kill_11966_to_12266.csv


