In [1]:
import pandas as pd
p='artifacts/results_stream.parquet'
df = pd.read_parquet(p)
df['final_score'] = df['baseline_score']
df['decision'] = df['final_score'].apply(lambda s: 'block' if s>=0.9 else ('review' if s>=0.6 else 'allow'))
df.to_parquet('artifacts/results_stream_baseline_only.parquet', index=False)
df.to_csv('artifacts/results_stream_baseline_only.csv', index=False)
print("Wrote baseline-only outputs:", len(df))
print(df['decision'].value_counts())


Wrote baseline-only outputs: 5000000
allow    5000000
Name: decision, dtype: int64


In [2]:
import pandas as pd 
df = pd.read_parquet('artifacts/results_stream_baseline_only.parquet') 
print("rows:", len(df)) 
print("baseline_score quantiles:", df['baseline_score'].quantile([0,0.01,0.1,0.5,0.9,0.99,1]).to_dict()) 
print("max baseline_score:", df['baseline_score'].max()) 
print("count baseline>=0.9:", int((df['baseline_score'] >= 0.9).sum())) 
print("count baseline>=0.6:", int((df['baseline_score'] >= 0.6).sum()))

rows: 5000000
baseline_score quantiles: {0.0: 0.0, 0.01: 0.0, 0.1: 0.0, 0.5: 0.0435471546350291, 0.9: 0.0451618229854689, 0.99: 0.046326626111371, 1.0: 0.0655737704918032}
max baseline_score: 0.0655737704918032
count baseline>=0.9: 0
count baseline>=0.6: 0


In [3]:
import pandas as pd, duckdb 
df = pd.read_parquet('artifacts/results_stream_baseline_only.parquet') 
k_alert = 500_000 
sorted_scores = df['baseline_score'].sort_values(ascending=False).reset_index(drop=True) 
thr_alert = float(sorted_scores.iloc[k_alert-1]) 
df['decision'] = df['baseline_score'].apply(lambda s: 'review' 
if s >= thr_alert else 'allow') 
out_pq = 'artifacts/results_stream_alerts_500k_review_only.parquet' 
out_csv = 'artifacts/results_stream_alerts_500k_review_only.csv'
df.to_parquet(out_pq, index=False) 
df.to_csv(out_csv, index=False) 
print("Wrote review-only alerts at threshold:", thr_alert) 
q = f""" SELECT COUNT(*) AS tp FROM ( SELECT transaction_id FROM '{out_pq}' ORDER BY baseline_score DESC LIMIT 500000 ) r JOIN 'financial_fraud_detection_dataset.csv' o USING (transaction_id) WHERE o.is_fraud = TRUE; """ 
print("TP in top500k:", duckdb.query(q).fetchdf()['tp'].iloc[0]) 

Wrote review-only alerts at threshold: 0.0451618229854689
TP in top500k: 30849


In [4]:
import pandas as pd, duckdb 
dfp = 'artifacts/results_stream_baseline_only.parquet' 
df = pd.read_parquet(dfp) 
ks = [50,100,500,1000,5000,10000,50000,100000,500000] 
print("k, threshold, TP, precision") 
for k in ks: 
    thr = float(df['baseline_score'].sort_values(ascending=False).iloc[k-1]) 
    tp = duckdb.query(f""" SELECT COUNT(*) AS tp FROM ( SELECT transaction_id FROM '{dfp}' ORDER BY baseline_score DESC LIMIT {k} ) r JOIN 'financial_fraud_detection_dataset.csv' o USING(transaction_id) WHERE o.is_fraud = TRUE """).fetchdf()['tp'].iloc[0] 
    print(f"{k}, {thr:.8f}, {tp}, {tp/k:.6f}")

k, threshold, TP, precision
50, 0.06557377, 48, 0.960000
100, 0.06557377, 5, 0.050000
500, 0.06557377, 144, 0.288000
1000, 0.06557377, 298, 0.298000
5000, 0.04632663, 1729, 0.345800
10000, 0.04632663, 1954, 0.195400
50000, 0.04632663, 6509, 0.130180
100000, 0.04632663, 12099, 0.120990
500000, 0.04516182, 30849, 0.061698


In [5]:
import pandas as pd 
k_alert = 500_000 
df = pd.read_parquet('artifacts/results_stream_baseline_only.parquet') 
df_sorted = df.sort_values(['baseline_score','transaction_id'], ascending=[False, True]).reset_index(drop=True) 
thr_alert = float(df_sorted['baseline_score'].iloc[k_alert-1]) 
df['decision'] = df['baseline_score'].apply(lambda s: 'review' 
if s >= thr_alert else 'allow') 
df.to_parquet('artifacts/results_stream_alerts_500k_review_only_deterministic.parquet', index=False) 
df.to_csv('artifacts/results_stream_alerts_500k_review_only_deterministic.csv', index=False) 
print("Wrote deterministic review-only top500k at thr:", thr_alert)

Wrote deterministic review-only top500k at thr: 0.0451618229854689


In [6]:
import pandas as pd 
df = pd.read_parquet('artifacts/results_stream_baseline_only.parquet')
df_sorted = df.sort_values(['baseline_score','transaction_id'], ascending=[False, True]).reset_index(drop=True) 
k_alert = 500000 
k_block = 50 
thr_block = float(df_sorted['baseline_score'].iloc[k_block-1]) 
thr_alert = float(df_sorted['baseline_score'].iloc[k_alert-1]) 
def decide(s): 
    if s >= thr_block: 
        return 'block' 
    elif s >= thr_alert: 
        return 'review' 
    else: 
        return 'allow' 
    df['decision'] = df['baseline_score'].apply(decide) 
    df.to_parquet('artifacts/results_stream_alerts_500k_block50.parquet', index=False) 
    df.to_csv('artifacts/results_stream_alerts_500k_block50.csv', index=False) 
    print("Wrote block50 + review500k; thresholds:", thr_block, thr_alert) 
    print(df['decision'].value_counts())

In [7]:
# --- Guard: force baseline combine if anomaly median is suspiciously high ---
import os

# configurable via env var (default 0.6)
_guard_th = os.getenv('ANOM_MEDIAN_GUARD', '0.6')
try:
    guard_threshold = float(_guard_th)
except Exception:
    guard_threshold = 0.6

# attempt to compute anomaly median from the DataFrame used for recombine.
# common names: df_all, df_results, results_df — try a few fallbacks
anom_median = None
for candidate_df_name in ('df_all', 'results_df', 'df_results', 'df'):
    try:
        candidate = globals().get(candidate_df_name)
        if candidate is not None and 'anomaly_score' in candidate.columns:
            anom_median = float(candidate['anomaly_score'].median())
            break
    except Exception:
        anom_median = None

# As a last resort, try to read a recent artifacts file (non-blocking)
if anom_median is None:
    try:
        import pandas as pd
        if os.path.exists('artifacts/results_stream.parquet'):
            tmp = pd.read_parquet('artifacts/results_stream.parquet', columns=['anomaly_score'])
            anom_median = float(tmp['anomaly_score'].median())
        elif os.path.exists('artifacts/results_stream.csv'):
            tmp = pd.read_csv('artifacts/results_stream.csv', usecols=['anomaly_score'])
            anom_median = float(tmp['anomaly_score'].median())
    except Exception:
        anom_median = None

if anom_median is not None and anom_median > guard_threshold:
    print(f"[guard] anomaly median={anom_median:.3f} > {guard_threshold} — forcing combine_mode='baseline'")
    # modify args if present, otherwise set local variable used by recombine
    try:
        args.combine_mode = 'baseline'
    except Exception:
        try:
            chosen_combine_mode = 'baseline'
        except Exception:
            pass
    # write an artifact flag so ops/monitoring can detect guard triggers
    try:
        with open('artifacts/guard_triggered.txt', 'w') as fh:
            fh.write(f'anom_median={anom_median:.6f}, threshold={guard_threshold}\n')
    except Exception:
        pass
# ------------------------------------------------------------------

[guard] anomaly median=0.922 > 0.6 — forcing combine_mode='baseline'


In [1]:
import pandas as pd 
df = pd.read_csv('financial_fraud_detection_dataset.csv', nrows=5) 
print("columns:", df.columns.tolist()) 
print(df.dtypes.to_dict()) 
print("\nfirst rows:\n", df.head(5).to_string(index=False))

columns: ['transaction_id', 'timestamp', 'sender_account', 'receiver_account', 'amount', 'transaction_type', 'merchant_category', 'location', 'device_used', 'is_fraud', 'fraud_type', 'time_since_last_transaction', 'spending_deviation_score', 'velocity_score', 'geo_anomaly_score', 'payment_channel', 'ip_address', 'device_hash']
{'transaction_id': dtype('O'), 'timestamp': dtype('O'), 'sender_account': dtype('O'), 'receiver_account': dtype('O'), 'amount': dtype('float64'), 'transaction_type': dtype('O'), 'merchant_category': dtype('O'), 'location': dtype('O'), 'device_used': dtype('O'), 'is_fraud': dtype('bool'), 'fraud_type': dtype('float64'), 'time_since_last_transaction': dtype('float64'), 'spending_deviation_score': dtype('float64'), 'velocity_score': dtype('int64'), 'geo_anomaly_score': dtype('float64'), 'payment_channel': dtype('O'), 'ip_address': dtype('O'), 'device_hash': dtype('O')}

first rows:
 transaction_id                  timestamp sender_account receiver_account  amount tr

In [2]:
import pandas as pd 
df = pd.read_csv('financial_fraud_detection_dataset.csv', nrows=1000) 
cols = [c for c in df.select_dtypes(include=['number']).columns if c != 'transaction_id'] 
print(','.join(cols))

amount,fraud_type,time_since_last_transaction,spending_deviation_score,velocity_score,geo_anomaly_score


In [3]:
import json, pandas as pd
m = json.load(open('artifacts/level2_isof/manifest.json'))
print("manifest p1,p99:", m.get('p1'), m.get('p99'))
raw = pd.read_parquet('artifacts/level2_isof/raw_scores.parquet')
print("raw metric summary:\n", raw.iloc[:,1].describe())
anom = pd.read_parquet('artifacts/level2_isof/anomaly_scores.parquet')
print("anomaly_score quantiles:\n", anom['anomaly_score'].quantile([0.0,0.01,0.5,0.99,1.0]).to_dict())
print("fraction anomaly_score > 0.5:", (anom['anomaly_score'] > 0.5).mean())

manifest p1,p99: 0.42115231571845724 0.6269080576836324
raw metric summary:
 count    2000.000000
mean        0.488622
std         0.045754
min         0.415034
25%         0.454074
50%         0.481693
75%         0.514196
max         0.698280
Name: raw_isof_score, dtype: float64
anomaly_score quantiles:
 {0.0: 0.0, 0.01: 3.5829746331111693e-06, 0.5: 0.29423407960382386, 0.99: 0.9999413239835215, 1.0: 1.0}
fraction anomaly_score > 0.5: 0.1955


In [4]:
import pandas as pd, os
p='artifacts/results_stream_recombined.parquet'
print("exists:", os.path.exists(p))
if os.path.exists(p):
    df=pd.read_parquet(p)
    print("rows:", len(df))
    print("columns:", df.columns.tolist())
    if 'final_score' in df.columns and 'baseline_score' in df.columns:
        print("final==baseline all?:", (df['final_score']==df['baseline_score']).all())
    if 'anomaly_score' in df.columns:
        print("anomaly_score median:", float(df['anomaly_score'].median()))

exists: True
rows: 5000000
columns: ['transaction_id', 'baseline_score', 'anomaly_score_x', 'llm_adjustment', 'final_score', 'decision', 'anomaly_score_y', 'llm_evidence_ids', 'topk_evidence_ids', 'llm_adjustment_valid', 'llm_adjustment_clamped']
final==baseline all?: True


In [5]:
import json, pandas as pd
m = json.load(open('artifacts/level2_isof_recomputed/manifest.json'))
print("manifest p1,p99:", m.get('p1'), m.get('p99'))
raw = pd.read_parquet('artifacts/level2_isof_recomputed/raw_scores.parquet')
print("raw metric quantiles:\n", raw.iloc[:,1].quantile([0.0,0.01,0.1,0.5,0.9,0.99,1.0]).to_dict())
anom = pd.read_parquet('artifacts/level2_isof_recomputed/anomaly_scores.parquet')
print("anomaly_score quantiles:\n", anom['anomaly_score'].quantile([0.0,0.01,0.5,0.9,0.99,1.0]).to_dict())
print("fraction anomaly_score > 0.95:", (anom['anomaly_score'] > 0.95).mean())
print("median anomaly_score:", float(anom['anomaly_score'].median()))

manifest p1,p99: 0.39893067927202036 0.6375998816089603
raw metric quantiles:
 {0.0: 0.3887290595292977, 0.01: 0.39893067927202036, 0.1: 0.4121032179585345, 0.5: 0.4557627288236442, 0.9: 0.5629627460461307, 0.99: 0.6375998816089603, 1.0: 0.757819854954771}
anomaly_score quantiles:
 {0.0: 0.0, 0.01: 6.478815355932801e-08, 0.5: 0.23812058277796372, 0.9: 0.6872778941228411, 0.99: 0.9999964522586751, 1.0: 1.0}
fraction anomaly_score > 0.95: 0.01445
median anomaly_score: 0.23812058277796372


In [8]:
# Use the already defined variable p for the recombined file
before = pd.read_parquet('artifacts/results_stream.parquet')
after = pd.read_parquet(p)
# join on transaction_id (assumes same index/rows)
df = before.merge(after[['transaction_id','final_score']], on='transaction_id', suffixes=('_before','_after'))
df['delta'] = df['final_score_after'] - df['baseline_score']
print("rows:", len(df))
print("rows where final != baseline:", (df['final_score_after'] != df['baseline_score']).sum())
print("fraction influenced:", (df['final_score_after'] != df['baseline_score']).mean())
print("top positive deltas:\n", df.sort_values('delta', ascending=False).head(10)[['transaction_id','baseline_score','final_score_after','delta']].to_string(index=False))
print("top negative deltas:\n", df.sort_values('delta').head(10)[['transaction_id','baseline_score','final_score_after','delta']].to_string(index=False))

rows: 5000000
rows where final != baseline: 0
fraction influenced: 0.0
top positive deltas:
 transaction_id  baseline_score  final_score_after  delta
       T100000        0.000000           0.000000    0.0
      T3433331        0.043547           0.043547    0.0
      T3433338        0.043547           0.043547    0.0
      T3433337        0.043547           0.043547    0.0
      T3433336        0.043547           0.043547    0.0
      T3433335        0.043547           0.043547    0.0
      T3433334        0.043547           0.043547    0.0
      T3433333        0.043547           0.043547    0.0
      T3433332        0.043547           0.043547    0.0
      T3433330        0.043547           0.043547    0.0
top negative deltas:
 transaction_id  baseline_score  final_score_after  delta
       T100000        0.000000           0.000000    0.0
      T3433337        0.043547           0.043547    0.0
      T3433336        0.043547           0.043547    0.0
      T3433335        0.043547

In [9]:
# filename: scripts/diag_level2_merge.py
# Run: PYTHONPATH=. python scripts/diag_level2_merge.py
import pandas as pd
from pathlib import Path
import json
p_res = Path('artifacts/results_stream.parquet')
# find likely anomaly artifact (adjust path if you used a different name)
p_anom_candidates = list(Path('artifacts').glob('**/anomaly_scores*.parquet'))
p_anom = p_anom_candidates[0] if p_anom_candidates else Path('artifacts/level2_isof/anomaly_scores.parquet')

print("results file:", p_res.exists(), p_res)
print("anomaly candidate picks:", [str(p) for p in p_anom_candidates])
print("using anomaly:", p_anom, "exists:", p_anom.exists())

res = pd.read_parquet(p_res)
an = pd.read_parquet(p_anom)

print("\nresults columns:", res.columns.tolist())
print("anomaly columns:", an.columns.tolist())
print("\nSample transaction_ids (results):", res['transaction_id'].head(5).tolist())
print("Sample transaction_ids (anom):", an['transaction_id'].head(5).tolist())
print("dtypes:", res['transaction_id'].dtype, an['transaction_id'].dtype)

merged = res.merge(an, on='transaction_id', how='left', suffixes=('','_anom'))
print("\nMerged shape:", merged.shape)
print("Count anomaly_score notnull after merge:", merged.get('anomaly_score', merged.get('anomaly_score_anom')).notna().sum(), "of", len(merged))
print("fraction anomaly_score null:", merged.get('anomaly_score', merged.get('anomaly_score_anom')).isna().mean())

# gate diagnostics (use gate 0.20)
gate = 0.20
mask_base_low = merged['baseline_score'] < gate
print(f"baseline < {gate} count:", mask_base_low.sum())
print("baseline<gate & anomaly present:", ((mask_base_low) & merged.get('anomaly_score', merged.get('anomaly_score_anom')).notna()).sum())

print("\nTop merged rows where baseline<gate but anomaly is null (show a few):")
print(merged.loc[(mask_base_low)&merged.get('anomaly_score', merged.get('anomaly_score_anom')).isna()].head(10).to_string(index=False))

results file: True artifacts/results_stream.parquet
anomaly candidate picks: ['artifacts/level2_isof/anomaly_scores.parquet', 'artifacts/level2_isof_v20260124124228/anomaly_scores.parquet', 'artifacts/level2_isof_recomputed/anomaly_scores.parquet']
using anomaly: artifacts/level2_isof/anomaly_scores.parquet exists: True

results columns: ['transaction_id', 'baseline_score', 'anomaly_score', 'llm_adjustment', 'final_score', 'decision']
anomaly columns: ['transaction_id', 'anomaly_score']

Sample transaction_ids (results): ['T100000', 'T100001', 'T100002', 'T100003', 'T100004']
Sample transaction_ids (anom): ['T100000', 'T100001', 'T100002', 'T100003', 'T100004']
dtypes: object object

Merged shape: (5000000, 7)
Count anomaly_score notnull after merge: 5000000 of 5000000
fraction anomaly_score null: 0.0
baseline < 0.2 count: 5000000
baseline<gate & anomaly present: 5000000

Top merged rows where baseline<gate but anomaly is null (show a few):
Empty DataFrame
Columns: [transaction_id, bas

In [10]:
import pandas as pd
from pathlib import Path

res = pd.read_parquet('artifacts/results_stream.parquet')
anom = pd.read_parquet('artifacts/level2_isof/anomaly_scores.parquet')

print("results: rows", len(res))
print("results.anomaly_score dtype:", res['anomaly_score'].dtype)
print("results.anomaly_score not-null:", res['anomaly_score'].notna().sum(), "null:", res['anomaly_score'].isna().sum())
try:
    print("results.anom quantiles:", res['anomaly_score'].quantile([0,0.01,0.1,0.5,0.9,0.99,1]).to_dict())
except Exception as e:
    print("results.anom quantile error:", e)

print("\nartifact: rows", len(anom))
print("artifact.anomaly_score dtype:", anom['anomaly_score'].dtype)
print("artifact.anomaly_score not-null:", anom['anomaly_score'].notna().sum(), "null:", anom['anomaly_score'].isna().sum())
try:
    print("artifact.anom quantiles:", anom['anomaly_score'].quantile([0,0.01,0.1,0.5,0.9,0.99,1]).to_dict())
except Exception as e:
    print("artifact.anom quantile error:", e)

# sample a few raw values from artifact
print("\nartifact sample anomaly_score values (first 20):")
print(anom['anomaly_score'].head(20).tolist())

results: rows 5000000
results.anomaly_score dtype: float64
results.anomaly_score not-null: 5000000 null: 0
results.anom quantiles: {0.0: 0.0, 0.01: 0.002883526452322168, 0.1: 0.01916182418789604, 0.5: 0.9215508206289067, 0.9: 0.9607781988833756, 0.99: 1.0, 1.0: 1.0}

artifact: rows 2000
artifact.anomaly_score dtype: float64
artifact.anomaly_score not-null: 2000 null: 0
artifact.anom quantiles: {0.0: 0.0, 0.01: 3.5829746331111693e-06, 0.1: 0.07250616112361015, 0.5: 0.29423407960382386, 0.9: 0.6431644716512526, 0.99: 0.9999413239835215, 1.0: 1.0}

artifact sample anomaly_score values (first 20):
[0.21454713382763085, 0.3619142161417679, 1.0, 0.6451698734372677, 0.08393591256526844, 0.4089534658326943, 0.7547174108721049, 0.040989094916549805, 0.4640156722499847, 0.17752572933277636, 0.7125686910253216, 0.24199506296472278, 0.1226680351794046, 0.05140764511096533, 0.8462056704484046, 0.37846051169538103, 0.722816857637804, 0.49290286487415946, 0.08902687798943476, 0.6430994289986957]


In [11]:
import pandas as pd
before = pd.read_parquet('artifacts/results_stream.parquet')
after = pd.read_parquet('artifacts/results_stream_recombined_test.parquet')
print("rows:", len(before))
print("rows where final != baseline:", (after['final_score'] != before['baseline_score']).sum())
print("fraction influenced:", ((after['final_score'] != before['baseline_score']).mean()))


rows: 5000000
rows where final != baseline: 0
fraction influenced: 0.0


In [12]:
import pandas as pd
before = pd.read_parquet('artifacts/results_stream.parquet')
after = pd.read_parquet('artifacts/results_stream_recombined_test.parquet')
print("rows:", len(before))
print("rows where final != baseline:", (after['final_score'] != before['baseline_score']).sum())
print("fraction influenced:", ((after['final_score'] != before['baseline_score']).mean()))


rows: 5000000
rows where final != baseline: 0
fraction influenced: 0.0
