In [5]:
import duckdb
q1 = """
SELECT SUM(CASE WHEN o.is_fraud THEN 1 ELSE 0 END) AS tp
FROM (SELECT transaction_id FROM '/workspaces/fraud-detection-duckdb-llm/artifacts/results_stream.parquet' ORDER BY baseline_score DESC LIMIT 100) r
JOIN '/workspaces/fraud-detection-duckdb-llm/financial_fraud_detection_dataset.csv' o USING (transaction_id);
"""
q2 = """
SELECT SUM(CASE WHEN o.is_fraud THEN 1 ELSE 0 END) AS tp
FROM (SELECT transaction_id FROM '/workspaces/fraud-detection-duckdb-llm/artifacts/results_stream.parquet' ORDER BY final_score DESC LIMIT 100) r
JOIN '/workspaces/fraud-detection-duckdb-llm/financial_fraud_detection_dataset.csv' o USING (transaction_id);
"""
print('Baseline top100 frauds:', duckdb.query(q1).fetchdf())
print('Final top100 frauds:', duckdb.query(q2).fetchdf())

Baseline top100 frauds:      tp
0  93.0
Final top100 frauds:     tp
0  1.0


In [8]:
import duckdb 
q = """ SELECT r.transaction_id, r.baseline_score, r.anomaly_score, r.final_score, o.is_fraud FROM '/workspaces/fraud-detection-duckdb-llm/artifacts/results_stream.parquet' r JOIN '/workspaces/fraud-detection-duckdb-llm/financial_fraud_detection_dataset.csv' o USING (transaction_id) ORDER BY r.final_score DESC LIMIT 50; """ 
print(duckdb.query(q).df()) 

   transaction_id  baseline_score  anomaly_score  final_score  is_fraud
0         T156071        0.000000            1.0          1.0     False
1         T156146        0.000000            1.0          1.0     False
2         T156697        0.000000            1.0          1.0     False
3         T156784        0.000000            1.0          1.0     False
4         T156918        0.000000            1.0          1.0     False
5         T157400        0.000000            1.0          1.0     False
6         T157629        0.000000            1.0          1.0     False
7         T157752        0.000000            1.0          1.0     False
8         T157860        0.000000            1.0          1.0     False
9         T158165        0.000000            1.0          1.0     False
10        T158180        0.000000            1.0          1.0     False
11        T158233        0.000000            1.0          1.0     False
12        T158244        0.000000            1.0          1.0   

In [10]:
import pandas as pd
df = pd.read_parquet('/workspaces/fraud-detection-duckdb-llm/artifacts/results_stream.parquet') 
for col in ['baseline_score','anomaly_score','final_score']: 
    print(col, df[col].quantile([0,0.01,0.1,0.5,0.9,0.95,0.99,1]).to_dict()) 

baseline_score {0.0: 0.0, 0.01: 0.0, 0.1: 0.0, 0.5: 0.0435471546350291, 0.9: 0.0451618229854689, 0.95: 0.0452653485952133, 0.99: 0.046326626111371, 1.0: 0.0655737704918032}
anomaly_score {0.0: 0.0, 0.01: 0.002883526452322168, 0.1: 0.01916182418789604, 0.5: 0.9215508206289067, 0.9: 0.9607781988833756, 0.95: 0.9728427744390743, 0.99: 1.0, 1.0: 1.0}
final_score {0.0: 0.0, 0.01: 0.009363344075304085, 0.1: 0.0435471546350291, 0.5: 0.9215508206289067, 0.9: 0.9607781988833756, 0.95: 0.9728427744390743, 0.99: 1.0, 1.0: 1.0}


In [4]:
import pandas as pd
p = '/workspaces/fraud-detection-duckdb-llm/artifacts/results_stream.parquet'
df = pd.read_parquet(p)
##print("rows:", len(df))
##print(df.head(10).to_string(index=False))
##print("\nDecision counts:\n", df['decision'].value_counts())
out = '/workspaces/fraud-detection-duckdb-llm/artifacts/results_stream.csv'
df.to_csv(out, index=False)
print("Wrote", out, "rows:", len(df))

Wrote /workspaces/fraud-detection-duckdb-llm/artifacts/results_stream.csv rows: 5000000


In [5]:
import pandas as pd, numpy as np
from sklearn.metrics import roc_auc_score, average_precision_score

##res = pd.read_parquet('artifacts/results_stream.parquet')
orig = pd.read_csv('/workspaces/fraud-detection-duckdb-llm/financial_fraud_detection_dataset.csv', usecols=['transaction_id','is_fraud'])
m = orig.merge(df, on='transaction_id', how='left')
y = m['is_fraud'].astype(str).str.upper().map({'TRUE':1,'FALSE':0}).fillna(0).astype(int)
print("rows:", len(m), "positives:", y.sum())
print("Baseline AUC:", roc_auc_score(y, m['baseline_score']))
print("Combined AUC:", roc_auc_score(y, m['final_score']))
print("Baseline PR-AUC:", average_precision_score(y, m['baseline_score']))
print("Combined PR-AUC:", average_precision_score(y, m['final_score']))
for k in (50,100,500):
    topb = m.sort_values('baseline_score', ascending=False).head(k)['is_fraud'].astype(str).str.upper().map({'TRUE':1,'FALSE':0}).sum()
    topf = m.sort_values('final_score', ascending=False).head(k)['is_fraud'].astype(str).str.upper().map({'TRUE':1,'FALSE':0}).sum()
    print(f"Precision@{k}: baseline={(topb/k):.3f}, final={(topf/k):.3f}")

rows: 5000000 positives: 179553
Baseline AUC: 0.618114701100321
Combined AUC: 0.519599362990919
Baseline PR-AUC: 0.05420722358447542
Combined PR-AUC: 0.03767990075434456
Precision@50: baseline=0.520, final=0.100
Precision@100: baseline=0.520, final=0.050
Precision@500: baseline=0.554, final=0.032


In [8]:
import duckdb
q = """
SELECT
  (SELECT SUM(CASE WHEN o.is_fraud ILIKE 'TRUE' THEN 1 ELSE 0 END) FROM (SELECT transaction_id FROM '/workspaces/fraud-detection-duckdb-llm/artifacts/results_stream.parquet' ORDER BY baseline_score DESC LIMIT 100) r JOIN '/workspaces/fraud-detection-duckdb-llm/financial_fraud_detection_dataset.csv' o USING(transaction_id)) AS baseline_top100_tp,
  (SELECT SUM(CASE WHEN o.is_fraud ILIKE 'TRUE' THEN 1 ELSE 0 END) FROM (SELECT transaction_id FROM '/workspaces/fraud-detection-duckdb-llm/artifacts/results_stream.parquet' ORDER BY final_score DESC LIMIT 100) r JOIN '/workspaces/fraud-detection-duckdb-llm/financial_fraud_detection_dataset.csv' o USING(transaction_id)) AS final_top100_tp
"""
print(duckdb.query(q).fetchdf())

BinderException: Binder Error: No function matches the given name and argument types '~~*(BOOLEAN, STRING_LITERAL)'. You might need to add explicit type casts.
	Candidate functions:
	~~*(VARCHAR, VARCHAR) -> BOOLEAN


In [1]:
import duckdb

q = """
SELECT
  (SELECT SUM(CASE WHEN o.is_fraud = TRUE THEN 1 ELSE 0 END)
     FROM (SELECT transaction_id FROM 'artifacts/results_stream.parquet' ORDER BY baseline_score DESC LIMIT 100) r
     JOIN 'financial_fraud_detection_dataset.csv' o USING (transaction_id)
  ) AS baseline_top100_tp,
  (SELECT SUM(CASE WHEN o.is_fraud = TRUE THEN 1 ELSE 0 END)
     FROM (SELECT transaction_id FROM 'artifacts/results_stream.parquet' ORDER BY final_score DESC LIMIT 100) r
     JOIN 'financial_fraud_detection_dataset.csv' o USING (transaction_id)
  ) AS final_top100_tp;
"""

print(duckdb.query(q).fetchdf())

   baseline_top100_tp  final_top100_tp
0                93.0              4.0


In [2]:
import pandas as pd 
p='/workspaces/fraud-detection-duckdb-llm/artifacts/results_stream.parquet'
df = pd.read_parquet(p) 
df['final_score'] = df['baseline_score'] 
df['decision'] = df['final_score'].apply(lambda s: 'block' if s>=0.9 else ('review' if s>=0.6 else 'allow')) 
df.to_parquet('/workspaces/fraud-detection-duckdb-llm/artifacts/results_stream_baseline_only.parquet', index=False) 
df.to_csv('/workspaces/fraud-detection-duckdb-llm/artifacts/results_stream_baseline_only.csv', index=False) 
print("Wrote /workspaces/fraud-detection-duckdb-llm/artifacts/results_stream_baseline_only.{parquet,csv}, rows:", len(df)) 
print(df['decision'].value_counts())

Wrote /workspaces/fraud-detection-duckdb-llm/artifacts/results_stream_baseline_only.{parquet,csv}, rows: 5000000
allow    5000000
Name: decision, dtype: int64


In [3]:
import duckdb 
q = """ SELECT (SELECT COUNT() FROM (SELECT transaction_id FROM '/workspaces/fraud-detection-duckdb-llm/artifacts/results_stream.parquet' ORDER BY baseline_score DESC LIMIT 100) r JOIN '/workspaces/fraud-detection-duckdb-llm/financial_fraud_detection_dataset.csv' o USING (transaction_id) WHERE o.is_fraud = TRUE) AS baseline_top100_tp, (SELECT COUNT() FROM (SELECT transaction_id FROM '/workspaces/fraud-detection-duckdb-llm/artifacts/results_stream.parquet' ORDER BY final_score DESC LIMIT 100) r JOIN '/workspaces/fraud-detection-duckdb-llm/financial_fraud_detection_dataset.csv' o USING (transaction_id) WHERE o.is_fraud = TRUE) AS final_top100_tp; """ 
print(duckdb.query(q).fetchdf())

   baseline_top100_tp  final_top100_tp
0                   3                4


In [4]:
import duckdb

# compute TP counts and precision@k for several ks and overlap between top lists
ks = [50,100,500,1000]
queries = []
for k in ks:
    queries.append(f"""
    SELECT
      {k} AS k,
      (SELECT COUNT(*) FROM (SELECT transaction_id FROM 'artifacts/results_stream.parquet' ORDER BY baseline_score DESC LIMIT {k}) r
         JOIN 'financial_fraud_detection_dataset.csv' o USING (transaction_id) WHERE o.is_fraud = TRUE) AS baseline_tp,
      (SELECT COUNT(*) FROM (SELECT transaction_id FROM 'artifacts/results_stream.parquet' ORDER BY final_score DESC LIMIT {k}) r
         JOIN 'financial_fraud_detection_dataset.csv' o USING (transaction_id) WHERE o.is_fraud = TRUE) AS final_tp,
      (SELECT COUNT(*) FROM
         (SELECT transaction_id FROM 'artifacts/results_stream.parquet' ORDER BY baseline_score DESC LIMIT {k}) b
         JOIN (SELECT transaction_id FROM 'artifacts/results_stream.parquet' ORDER BY final_score DESC LIMIT {k}) f USING (transaction_id)
      ) AS overlap_topk
    """)
q = " UNION ALL ".join(queries) + ";"
print(duckdb.query(q).fetchdf())

      k  baseline_tp  final_tp  overlap_topk
0    50            3         2             0
1   100           93         4             0
2   500          483         0             0
3  1000          725        31             3


In [10]:
import pandas as pd, json, os
p = 'artifacts/results_stream_sample_recalibrated.parquet'
if os.path.exists(p):
    df = pd.read_parquet(p)
    print("rows:", len(df))
    print("anomaly quantiles:", df['anomaly_score'].quantile([0.01,0.1,0.5,0.9,0.99]).to_dict())
    print("baseline quantiles:", df['baseline_score'].quantile([0.01,0.1,0.5,0.9,0.99]).to_dict())
    print("final quantiles:", df['final_score'].quantile([0.01,0.1,0.5,0.9,0.99]).to_dict())
else:
    print("No sample recalibrated output found:", p)

mfile = 'artifacts/manifest_ae_anomaly_e40c79565705.sample.updated.json'
if os.path.exists(mfile):
    print("\nmanifest:", json.load(open(mfile)))
else:
    print("\nNo sample manifest found:", mfile)

No sample recalibrated output found: artifacts/results_stream_sample_recalibrated.parquet

No sample manifest found: artifacts/manifest_ae_anomaly_e40c79565705.sample.updated.json


In [12]:
import os, joblib, json, pprint, traceback
p = 'artifacts/ae_anomaly_e40c79565705.pkl'
print("artifact exists:", os.path.exists(p))
try:
    a = joblib.load(p)
    print("Loaded artifact type:", type(a))
    print("Top-level keys:", list(a.keys()))
    # show relevant fields if present
    for k in ('model_file','model_dir','scaler','features','manifest','iso'):
        if k in a:
            print(f"\n--- {k} ---")
            v = a[k]
            if isinstance(v, (str,)):
                print(v, "exists:", os.path.exists(v))
            else:
                try:
                    pprint.pprint(type(v))
                except Exception:
                    print("type:", type(v))
    # print manifest summary if present
    if 'manifest' in a:
        print("\nmanifest snippet:")
        pprint.pprint({kk:a['manifest'].get(kk) for kk in ['p1_mse','p99_mse'] if kk in a['manifest']})
except Exception as e:
    print("Failed to load/inspect anomaly artifact:", e)
    traceback.print_exc()

artifact exists: True
Loaded artifact type: <class 'dict'>
Top-level keys: ['scaler', 'features', 'manifest', 'encoder', 'model_file']

--- model_file ---
artifacts/ae_anomaly_e40c79565705.keras exists: True

--- scaler ---
<class 'sklearn.preprocessing._data.StandardScaler'>

--- features ---
<class 'list'>

--- manifest ---
<class 'dict'>

manifest snippet:
{'p1_mse': 0.04913485795259476, 'p99_mse': 1.0583398342132568}


In [13]:
import os, joblib, traceback
try:
    an = joblib.load('artifacts/ae_anomaly_e40c79565705.pkl')
    model_path = an.get('model_file') or an.get('model_dir')
    print("Attempting to load TF model from:", model_path)
    import tensorflow as tf
    m = tf.keras.models.load_model(model_path, compile=False)
    print("Loaded TF model; summary (first 3 lines):")
    s = []
    m.summary(print_fn=lambda x: s.append(x))
    print("\n".join(s[:3]))
except Exception as e:
    print("TF model load/predict failed:", e)
    traceback.print_exc()

Attempting to load TF model from: artifacts/ae_anomaly_e40c79565705.keras


2026-01-18 11:50:05.605766: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2026-01-18 11:50:10.292743: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2026-01-18 11:50:12.229292: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1768737015.790291   29853 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1768737016.665672   29853 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1768737023.887482   29853 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linkin

Loaded TF model; summary (first 3 lines):


Model: "tabular_autoencoder"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ input_layer (InputLayer)        │ (None, 279)            │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense (Dense)                   │ (None, 558)            │       156,240 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_1 (Dense)                 │ (None, 279)            │       155,961 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ bottleneck (Dense)              │ (None, 8)              │         2,240 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_2 (Dense)                 │ (None, 279)            │         2,511 │
├─────────────────────────────────┼────────────

In [1]:
import duckdb
q = """
SELECT
  (SELECT COUNT(*) FROM (SELECT transaction_id FROM 'artifacts/results_stream_recalibrated.parquet' ORDER BY baseline_score DESC LIMIT 100) r
   JOIN 'financial_fraud_detection_dataset.csv' o USING (transaction_id) WHERE o.is_fraud = TRUE) AS baseline_top100_tp,
  (SELECT COUNT(*) FROM (SELECT transaction_id FROM 'artifacts/results_stream_recalibrated.parquet' ORDER BY final_score DESC LIMIT 100) r
   JOIN 'financial_fraud_detection_dataset.csv' o USING (transaction_id) WHERE o.is_fraud = TRUE) AS final_top100_tp;
"""
print(duckdb.query(q).fetchdf())

IOException: IO Error: No files found that match the pattern "artifacts/results_stream_recalibrated.parquet"

In [1]:
import pandas as pd, json, numpy as np
errors = pd.read_parquet('artifacts/ae_recon_errors_sample.parquet')['recon_error'] 
if os.path.exists('artifacts/ae_recon_errors_sample.parquet'):
    print('Test')
else: 
    None 
print("errors available:", errors is not None)

FileNotFoundError: [Errno 2] No such file or directory: 'artifacts/ae_recon_errors_sample.parquet'

In [1]:
import pandas as pd, json
df = pd.read_parquet('artifacts/results_stream.parquet').head(1000)  # or sample
out = []
for _, r in df.iterrows():
    tx = str(r['transaction_id'])
    # Build 3 short evidence items (replace with real retrieval in prod)
    topk = [
        {"id": "E1", "text": f"amount={r.get('amount','?')}; baseline={r.get('baseline_score'):.4f}" if 'amount' in r else f"baseline={r.get('baseline_score'):.4f}"},
        {"id": "E2", "text": f"merchant_risk={r.get('merchant_risk','?')}"},
        {"id": "E3", "text": "recent device mismatch" }
    ]
    out.append({"transaction_id": tx, "top_k_evidence": topk})
# write as parquet with JSON strings (the Level-3 runner supports object lists)
pd.DataFrame(out).to_parquet('artifacts/topk_evidence.parquet', index=False)
print("wrote artifacts/topk_evidence.parquet with", len(out), "rows")

wrote artifacts/topk_evidence.parquet with 1000 rows
