In [1]:
# Notebook setup: ensure we run from repo root (so relative paths like configs/ and runs/ work)
import os
import sys
from pathlib import Path

def _find_repo_root(start: Path | None = None) -> Path:
    p = (start or Path.cwd()).resolve()
    for cand in [p] + list(p.parents):
        if (cand / "pyproject.toml").exists() and (cand / "src").exists():
            return cand
    # Fallback: if executed from notebooks/, go one level up
    if p.name.lower() == "notebooks" and (p.parent / "src").exists():
        return p.parent
    return p

REPO_ROOT = _find_repo_root()
os.chdir(REPO_ROOT)

src_path = REPO_ROOT / "src"
if src_path.exists() and str(src_path) not in sys.path:
    sys.path.insert(0, str(src_path))

print("Repo root:", REPO_ROOT)

# Note:
# This notebook assumes configs/ and runs/ are relative to the repo root.


Repo root: C:\Users\Martín\Desktop\inkswarm-core\usul-inkswarm-detectlab


# FeatureLab — login_attempt


This notebook builds **login_attempt** features for an existing run_id, then inspects the resulting feature table.


In [11]:
from pathlib import Path
import pandas as pd

from inkswarm_detectlab.config import load_config
from inkswarm_detectlab.features import build_login_features_for_run
from inkswarm_detectlab.io.tables import read_auto

cfg_path = Path("configs/skynet_smoke.yaml")
if not cfg_path.exists():
    cfg_path = Path("configs") / "configs" / "skynet_smoke.yaml"
cfg = load_config(cfg_path)
#run_id = cfg.run.run_id #or "RUN_XXX_0005"  # <-- change me if needed
run_id = "RUN_2026"
RUN_ID = run_id

# Build (idempotent unless --force)
build_login_features_for_run(cfg, run_id=run_id, force=True)

feat_df = read_auto(Path(cfg.paths.runs_dir) / run_id / "features" / "login_attempt" / "features")
feat_df.head()


  df[rname] = (df[name] / denom).fillna(0.0)
  df[ps] = sums[payment_col]
  df[pm] = (df[ps] / denom).fillna(0.0)
  df[cnt_name] = sums["_one"]
  df[name] = sums[oc]
  df[rname] = (df[name] / denom).fillna(0.0)
  df[name] = sums[oc]
  df[rname] = (df[name] / denom).fillna(0.0)
  df[name] = sums[oc]
  df[rname] = (df[name] / denom).fillna(0.0)
  df[name] = sums[oc]
  df[rname] = (df[name] / denom).fillna(0.0)
  df[ps] = sums[payment_col]
  df[pm] = (df[ps] / denom).fillna(0.0)
  df[cnt_name] = sums["_one"]
  df[name] = sums[oc]
  df[rname] = (df[name] / denom).fillna(0.0)
  df[name] = sums[oc]
  df[rname] = (df[name] / denom).fillna(0.0)
  df[name] = sums[oc]
  df[rname] = (df[name] / denom).fillna(0.0)
  df[name] = sums[oc]
  df[rname] = (df[name] / denom).fillna(0.0)
  df[ps] = sums[payment_col]
  df[pm] = (df[ps] / denom).fillna(0.0)
  df[cnt_name] = sums["_one"]
  df[name] = sums[oc]
  df[rname] = (df[name] / denom).fillna(0.0)
  df[name] = sums[oc]
  df[rname] = (df[name] / denom).

Unnamed: 0,cross__checkout_attempt__device_1h__adverse_cnt,cross__checkout_attempt__device_1h__adverse_rate,cross__checkout_attempt__device_1h__event_cnt,cross__checkout_attempt__device_1h__failure_cnt,cross__checkout_attempt__device_1h__failure_rate,cross__checkout_attempt__device_1h__payment_value_mean,cross__checkout_attempt__device_1h__payment_value_sum,cross__checkout_attempt__device_1h__review_cnt,cross__checkout_attempt__device_1h__review_rate,cross__checkout_attempt__device_1h__success_cnt,...,user_7d__lockout_rate,user_7d__success_cnt,user_7d__success_rate,user_7d__support_contacted_cnt,user_7d__support_cost_usd_sum,user_7d__support_handle_seconds_sum,user_7d__support_wait_seconds_sum,user_7d__uniq_device_fingerprint_hash_cnt,user_7d__uniq_ip_hash_cnt,user_id
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,user_00035
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,user_00101
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,user_00028
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1,user_00035
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.25,0.0,0.0,0.0,0.0,0,0,user_00061


In [12]:
# Basic sanity: label prevalence + a few feature columns
label_cols = [c for c in feat_df.columns if c.startswith("label_")]
feat_df[label_cols].mean().sort_values(ascending=False)


label_benign           0.939675
label_replicators      0.044624
label_the_chameleon    0.030208
label_the_mule         0.025801
dtype: float64

In [13]:
# Show a couple of features
cols = [c for c in feat_df.columns if "__attempt_cnt" in c][:5] + [c for c in feat_df.columns if c.endswith("failure_rate")][:5]
feat_df[cols].describe().T


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
device_1h__attempt_cnt,10891.0,0.890276,1.192488,0.0,0.0,1.0,1.0,10.0
device_24h__attempt_cnt,10891.0,17.359747,14.702718,0.0,7.0,14.0,23.0,70.0
device_6h__attempt_cnt,10891.0,5.018456,4.716804,0.0,2.0,4.0,7.0,27.0
device_7d__attempt_cnt,10891.0,65.047746,69.096827,0.0,18.0,44.0,87.0,423.0
ip_1h__attempt_cnt,10891.0,0.898816,1.1972,0.0,0.0,1.0,1.0,10.0
cross__checkout_attempt__device_1h__failure_rate,10891.0,9.2e-05,0.009582,0.0,0.0,0.0,0.0,1.0
cross__checkout_attempt__device_24h__failure_rate,10891.0,0.004231,0.060305,0.0,0.0,0.0,0.0,1.0
cross__checkout_attempt__device_6h__failure_rate,10891.0,0.001607,0.038595,0.0,0.0,0.0,0.0,1.0
cross__checkout_attempt__device_7d__failure_rate,10891.0,0.010449,0.081828,0.0,0.0,0.0,0.0,1.0
cross__checkout_attempt__ip_1h__failure_rate,10891.0,9.2e-05,0.009582,0.0,0.0,0.0,0.0,1.0


# Notebook visibility helpers

Use these helpers to locate feature cache and tail logs.


In [14]:
from pathlib import Path
from inkswarm_detectlab.ui.notebook_tools import find_run_dir, print_run_tree, tail_text

#RUN_ID = "RR2_MVP_ZIP_A_0002"  # <-- change me
ROOT = Path("..").resolve() if Path.cwd().name == "notebooks" else Path(".").resolve()
run_dir = find_run_dir(ROOT, RUN_ID)
print_run_tree(run_dir)

print("\n--- featurelab.log (tail) ---\n")
print(tail_text(run_dir / "share/logs/featurelab.log", n_lines=200))


- share/logs: C:\Users\Martín\Desktop\inkswarm-core\usul-inkswarm-detectlab\runs\RUN_2026\share\logs 
- share/reports: C:\Users\Martín\Desktop\inkswarm-core\usul-inkswarm-detectlab\runs\RUN_2026\share\reports 
- models: C:\Users\Martín\Desktop\inkswarm-core\usul-inkswarm-detectlab\runs\RUN_2026\models 
- reports: C:\Users\Martín\Desktop\inkswarm-core\usul-inkswarm-detectlab\runs\RUN_2026\reports 
- logs: C:\Users\Martín\Desktop\inkswarm-core\usul-inkswarm-detectlab\runs\RUN_2026\logs 

--- featurelab.log (tail) ---

<missing: C:\Users\Martín\Desktop\inkswarm-core\usul-inkswarm-detectlab\runs\RUN_2026\share\logs\featurelab.log>
