In [30]:
import glob
import pandas as pd
import numpy as np
from pathlib import Path

files = sorted(glob.glob("../data/telemetry_run*.csv"))
if not files: files = ["../data/telemetry.csv"]   # fallback

dfs = [pd.read_csv(p) for p in files]
df = pd.concat(dfs, ignore_index=True)

assert set(df.columns) == {
    "timestamp","x","y","vx","vy","action","ping_ms","cheat_flag"
}


In [31]:
df = df.copy()

# speed
df["speed"] = np.sqrt(df["vx"]**2 + df["vy"]**2)

# accel from differences of velocity (Δvx, Δvy)
df["ax"] = df["vx"].diff().fillna(0.0)
df["ay"] = df["vy"].diff().fillna(0.0)
df.loc[df.index[0], ["ax","ay"]] = 0.0  # first row has no prev
df["accel_mag"] = np.sqrt(df["ax"]**2 + df["ay"]**2)

# trailing rolling window (size=5) of speed — same as agent
W = 5
df["speed_roll_mean"] = df["speed"].rolling(W, min_periods=1).mean()
df["speed_roll_std"]  = df["speed"].rolling(W, min_periods=1).std().fillna(0.0)

FEATURES = ["speed","accel_mag","speed_roll_mean","speed_roll_std","ping_ms","action"]
X = df[FEATURES].values
y = df["cheat_flag"].values.astype(int)


In [32]:
# Simple blocked split: first 80% train, last 20% test
split = int(len(df)*0.80)
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# Safety: ensure both classes in train
import numpy as np
print("Train counts:", np.bincount(y_train))
print("Test  counts:", np.bincount(y_test))
assert len(np.unique(y_train)) == 2, "Adjust split or generate more data"


Train counts: [218  22]
Test  counts: [49 11]


In [33]:
import pandas as pd

feature_order = ["speed", "accel_mag", "speed_roll_mean", "speed_roll_std", "ping_ms", "action"]

# If X is currently a NumPy array:
X_df = pd.DataFrame(X, columns=feature_order)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X_df, y, test_size=0.2, random_state=42, stratify=y
)

FEATURES = X_train.columns  # now this works

# Train as you already do
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

logreg = Pipeline([
    ("scaler", StandardScaler()),
    ("clf", LogisticRegression(max_iter=1000, class_weight="balanced", random_state=42))
])
logreg.fit(X_train, y_train)

scaler = logreg.named_steps["scaler"]
clf    = logreg.named_steps["clf"]

proba_test = logreg.predict_proba(X_test)[:,1]
pred_test  = (proba_test >= 0.5).astype(int)


In [34]:
import numpy as np
from sklearn.metrics import precision_recall_fscore_support

best = None
for thr in np.linspace(0.2, 0.9, 36):
    pred = (proba_test >= thr).astype(int)
    p, r, f, _ = precision_recall_fscore_support(y_test, pred, average='binary')
    cand = (f, p, r, thr)
    if (best is None) or (cand > best):
        best = cand
print("Best by F1 -> F1=%.3f, P=%.3f, R=%.3f at thr=%.2f" % best)


Best by F1 -> F1=1.000, P=1.000, R=1.000 at thr=0.88


In [35]:
import json, os, time
import numpy as np
import pandas as pd

scaler = logreg.named_steps["scaler"]
clf    = logreg.named_steps["clf"]

# 1) Normalize types to JSON-safe Python builtins
def to_list(x):
    if isinstance(x, (list, tuple)):
        return list(x)
    if isinstance(x, (np.ndarray,)):
        return x.tolist()
    if isinstance(x, (pd.Index, pd.Series)):
        return x.tolist()
    return x  # assume already a builtin

FEATURES_SAFE = to_list(FEATURES)
MEAN_SAFE     = to_list(scaler.mean_)
SCALE_SAFE    = to_list(scaler.scale_)
COEF_SAFE     = to_list(clf.coef_[0])
INTERCEPT_SAFE= float(np.asarray(clf.intercept_[0]))

export = {
  "schema_version": 1,
  "type": "logistic_regression",
  "features": FEATURES_SAFE,             # order matters!
  "scaler_mean": MEAN_SAFE,
  "scaler_scale": SCALE_SAFE,
  "coef": COEF_SAFE,
  "intercept": INTERCEPT_SAFE,
  "decision_threshold": 0.70,            # set your chosen threshold
  "notes": "trained on concatenated runs; trailing window=5; accel=diff(v)"
}

# 2) Write with a timestamped filename
os.makedirs("../models", exist_ok=True)
ts = time.strftime("%Y%m%d_%H%M%S")
out_path = f"../models/logreg_export_{ts}.json"
with open(out_path, "w") as f:
    json.dump(export, f, indent=2)

print("Wrote", out_path)


Wrote ../models/logreg_export_20251014_213425.json
