In [8]:
import sys, platform
print("Python:", sys.version)
print("Platform:", platform.platform())

Python: 3.12.12 (main, Oct 10 2025, 08:52:57) [GCC 11.4.0]
Platform: Linux-6.6.105+-x86_64-with-glibc2.35


In [9]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, average_precision_score, precision_recall_curve, f1_score, precision_score, recall_score, confusion_matrix
import lightgbm as lgb
import tensorflow as tf
from tensorflow.keras.models import load_model
import joblib


In [10]:
from google.colab import drive
drive.mount('/content/drive')

csv_path = "/content/drive/MyDrive/financial-fraud-dataset-train.csv"

# AE artefact'ları (daha önce kaydetmiştin)
ae_model_path = "/content/drive/MyDrive/improved_autoencoder.h5"      # adını sen nasıl kaydettiysen ona göre düzelt
scaler_path   = "/content/drive/MyDrive/ae_scaler.pkl"       # adını sen nasıl kaydettiysen ona göre düzelt


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [11]:
AE_FEATURES = [
    "amount_ngn",
    "spending_deviation_score",
    "velocity_score",
    "user_avg_txn_amt",
    "user_std_txn_amt",
    "txn_hour",
    "is_night_txn",
    "user_txn_frequency_24h",
    "txn_count_last_1h",
    "avg_gap_between_txns",
    "device_seen_count",
    "is_device_shared",
    "new_device_transaction",
    "geospatial_velocity_anomaly"
]


In [12]:
LGBM_BASE_FEATURES = [
    "amount_ngn",
    "user_avg_txn_amt",
    "user_std_txn_amt",
    "user_txn_frequency_24h",
    "txn_count_last_1h",
    "txn_count_last_24h",
    "total_amount_last_1h",
    "avg_gap_between_txns",
    "txn_hour",
    "is_weekend",
    "is_night_txn",
    "device_seen_count",
    "is_device_shared",
    "ip_seen_count",
    "is_ip_shared",
    "new_device_transaction",
    "geospatial_velocity_anomaly",
]


In [13]:
LGBM_HYBRID_FEATURES = LGBM_BASE_FEATURES + ["anomaly_score"]


In [14]:
df = pd.read_csv(csv_path)
print("Dataset shape:", df.shape)

required = set(AE_FEATURES + LGBM_BASE_FEATURES + ["is_fraud", "timestamp"])
missing = sorted([c for c in required if c not in df.columns])
print("Missing:", missing[:20], "..." if len(missing) > 20 else "")
assert len(missing) == 0, f"Missing columns: {missing}"


Dataset shape: (5000000, 45)
Missing: [] 


In [15]:
df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
df = df.dropna(subset=["timestamp"]).sort_values("timestamp").reset_index(drop=True)

split_ratio = 0.8
split_idx = int(len(df) * split_ratio)

df_train = df.iloc[:split_idx].copy()
df_test  = df.iloc[split_idx:].copy()

print("Train:", df_train.shape, "Test:", df_test.shape)
print("Train fraud rate:", df_train["is_fraud"].mean())
print("Test fraud rate:", df_test["is_fraud"].mean())


Train: (4000000, 45) Test: (1000000, 45)
Train fraud rate: 0.03586475
Test fraud rate: 0.036094


In [17]:
ae_model = load_model(ae_model_path, compile=False)
scaler = joblib.load(scaler_path)

print("AE loaded.")


AE loaded.


In [18]:
def compute_anomaly_score(df_part: pd.DataFrame, batch_size: int = 4096) -> np.ndarray:
    X = df_part[AE_FEATURES].copy()
    X_scaled = scaler.transform(X)

    recon = ae_model.predict(X_scaled, batch_size=batch_size, verbose=0)
    mse = np.mean(np.square(X_scaled - recon), axis=1)
    return mse

df_train["anomaly_score"] = compute_anomaly_score(df_train)
df_test["anomaly_score"]  = compute_anomaly_score(df_test)

print("anomaly_score added.")
print(df_train["anomaly_score"].describe())


anomaly_score added.
count    4.000000e+06
mean     2.915279e-01
std      3.008645e-01
min      2.632969e-03
25%      1.262492e-01
50%      2.207375e-01
75%      3.760874e-01
max      2.362180e+01
Name: anomaly_score, dtype: float64


In [19]:
y_train = df_train["is_fraud"].astype(int).values
y_test  = df_test["is_fraud"].astype(int).values

X_train_base = df_train[LGBM_BASE_FEATURES].copy()
X_test_base  = df_test[LGBM_BASE_FEATURES].copy()

X_train_hyb = df_train[LGBM_HYBRID_FEATURES].copy()
X_test_hyb  = df_test[LGBM_HYBRID_FEATURES].copy()

print("Base:", X_train_base.shape, X_test_base.shape)
print("Hybrid:", X_train_hyb.shape, X_test_hyb.shape)


Base: (4000000, 17) (1000000, 17)
Hybrid: (4000000, 18) (1000000, 18)


In [20]:
neg = (y_train == 0).sum()
pos = (y_train == 1).sum()
scale_pos_weight = neg / max(pos, 1)
print("neg:", neg, "pos:", pos, "scale_pos_weight:", scale_pos_weight)

params = {
    "objective": "binary",
    "metric": "auc",
    "learning_rate": 0.05,
    "num_leaves": 64,
    "max_depth": -1,
    "min_data_in_leaf": 200,
    "feature_fraction": 0.9,
    "bagging_fraction": 0.8,
    "bagging_freq": 1,
    "lambda_l2": 1.0,
    #"scale_pos_weight": scale_pos_weight,
    "verbosity": -1,
    "n_jobs": -1,
}


neg: 3856541 pos: 143459 scale_pos_weight: 26.882530897329552


In [21]:
dtrain = lgb.Dataset(X_train_base, label=y_train)
dvalid = lgb.Dataset(X_test_base, label=y_test, reference=dtrain)

baseline_model = lgb.train(
    params,
    dtrain,
    num_boost_round=2000,
    valid_sets=[dtrain, dvalid],
    valid_names=["train", "test"],
    callbacks=[lgb.early_stopping(stopping_rounds=50), lgb.log_evaluation(50)]
)


Training until validation scores don't improve for 50 rounds
[50]	train's auc: 0.642826	test's auc: 0.593888
Early stopping, best iteration is:
[3]	train's auc: 0.604888	test's auc: 0.595375


In [22]:
dtrain_h = lgb.Dataset(X_train_hyb, label=y_train)
dvalid_h = lgb.Dataset(X_test_hyb, label=y_test, reference=dtrain_h)

hybrid_model = lgb.train(
    params,
    dtrain_h,
    num_boost_round=2000,
    valid_sets=[dtrain_h, dvalid_h],
    valid_names=["train", "test"],
    callbacks=[lgb.early_stopping(stopping_rounds=50), lgb.log_evaluation(50)]
)


Training until validation scores don't improve for 50 rounds
[50]	train's auc: 0.64387	test's auc: 0.5953
Early stopping, best iteration is:
[10]	train's auc: 0.613797	test's auc: 0.596117


In [23]:
def evaluate(model, X_test, y_test, name="model"):
    proba = model.predict(X_test, num_iteration=model.best_iteration)

    auc = roc_auc_score(y_test, proba)
    ap  = average_precision_score(y_test, proba)

    # Threshold seçimi: F1 maksimum
    precision, recall, thresholds = precision_recall_curve(y_test, proba)
    f1s = (2 * precision * recall) / (precision + recall + 1e-12)
    best_idx = np.argmax(f1s)
    best_thr = thresholds[max(best_idx - 1, 0)] if len(thresholds) else 0.5  # güvenli
    y_pred = (proba >= best_thr).astype(int)

    f1 = f1_score(y_test, y_pred)
    p  = precision_score(y_test, y_pred, zero_division=0)
    r  = recall_score(y_test, y_pred, zero_division=0)
    cm = confusion_matrix(y_test, y_pred)

    print(f"\n=== {name} ===")
    print("ROC AUC:", auc)
    print("PR AUC :", ap)
    print("Best thr (F1):", best_thr)
    print("F1:", f1, "Precision:", p, "Recall:", r)
    print("Confusion matrix:\n", cm)

    return {"auc": auc, "ap": ap, "best_thr": float(best_thr), "f1": f1, "precision": p, "recall": r}

baseline_metrics = evaluate(baseline_model, X_test_base, y_test, "Baseline (Behavior only)")
hybrid_metrics   = evaluate(hybrid_model, X_test_hyb, y_test, "Hybrid (Behavior + anomaly_score)")



=== Baseline (Behavior only) ===
ROC AUC: 0.5953746139754432
PR AUC : 0.04438384948171939
Best thr (F1): 0.06805729959275923
F1: 0.08418586963368742 Precision: 0.04394314329137602 Recall: 0.999722945641935
Confusion matrix:
 [[178838 785068]
 [    10  36084]]

=== Hybrid (Behavior + anomaly_score) ===
ROC AUC: 0.5961172594518097
PR AUC : 0.044638200470589445
Best thr (F1): 0.20798234799114368
F1: 0.08419957567500126 Precision: 0.043958571101789974 Recall: 0.9956225411425722
Confusion matrix:
 [[182345 781561]
 [   158  35936]]


In [25]:
#####################################
RISK_FEATURES = [
    "merchant_fraud_rate",
    "channel_risk_score",
    "persona_fraud_risk",
    "location_fraud_risk"
]

LGBM_BEHAVIOR_RISK = LGBM_BASE_FEATURES + RISK_FEATURES
LGBM_FULL = LGBM_BASE_FEATURES + RISK_FEATURES + ["anomaly_score"]

X_train_risk = df_train[LGBM_BEHAVIOR_RISK].copy()
X_test_risk  = df_test[LGBM_BEHAVIOR_RISK].copy()

X_train_full = df_train[LGBM_FULL].copy()
X_test_full  = df_test[LGBM_FULL].copy()


In [26]:
dtrain_r = lgb.Dataset(X_train_risk, label=y_train)
dvalid_r = lgb.Dataset(X_test_risk, label=y_test)

risk_model = lgb.train(
    params,
    dtrain_r,
    num_boost_round=2000,
    valid_sets=[dtrain_r, dvalid_r],
    valid_names=["train", "test"],
    callbacks=[lgb.early_stopping(50), lgb.log_evaluation(50)]
)


Training until validation scores don't improve for 50 rounds
[50]	train's auc: 0.648386	test's auc: 0.590851
Early stopping, best iteration is:
[1]	train's auc: 0.597756	test's auc: 0.593508


In [27]:
dtrain_f = lgb.Dataset(X_train_full, label=y_train)
dvalid_f = lgb.Dataset(X_test_full, label=y_test)

full_model = lgb.train(
    params,
    dtrain_f,
    num_boost_round=2000,
    valid_sets=[dtrain_f, dvalid_f],
    valid_names=["train", "test"],
    callbacks=[lgb.early_stopping(50), lgb.log_evaluation(50)]
)


Training until validation scores don't improve for 50 rounds
[50]	train's auc: 0.650015	test's auc: 0.5946
[100]	train's auc: 0.675858	test's auc: 0.594122
Early stopping, best iteration is:
[50]	train's auc: 0.650015	test's auc: 0.5946


In [28]:
risk_metrics = evaluate(risk_model, X_test_risk, y_test, "Behavior + Risk")
full_metrics = evaluate(full_model, X_test_full, y_test, "Full (Behavior + Risk + AE)")



=== Behavior + Risk ===
ROC AUC: 0.5935081699675997
PR AUC : 0.04399865054577965
Best thr (F1): 0.056499968586668455
F1: 0.08416217669265202 Precision: 0.043934468637586085 Recall: 0.997534216213221
Confusion matrix:
 [[180395 783511]
 [    89  36005]]

=== Full (Behavior + Risk + AE) ===
ROC AUC: 0.5945996743772317
PR AUC : 0.04428976778700337
Best thr (F1): 0.34581698720596604
F1: 0.08417620325439336 Precision: 0.04393755486107138 Recall: 0.9998891782567739
Confusion matrix:
 [[178603 785303]
 [     4  36090]]


In [24]:
import pandas as pd

imp = pd.DataFrame({
    "feature": X_train_hyb.columns,
    "importance": hybrid_model.feature_importance(importance_type="gain")
}).sort_values("importance", ascending=False)

imp.head(20)


Unnamed: 0,feature,importance
15,new_device_transaction,7814353.0
1,user_avg_txn_amt,19446.09
13,ip_seen_count,19357.28
17,anomaly_score,18765.37
2,user_std_txn_amt,17591.02
0,amount_ngn,12966.48
7,avg_gap_between_txns,12520.72
8,txn_hour,9814.391
6,total_amount_last_1h,8893.408
11,device_seen_count,2489.449


In [None]:
# LightGBM model
joblib.dump(hybrid_model, "/content/lgbm_model.pkl")

# training metrics (tez için sakla)
report = {
    "baseline": baseline_metrics,
    "hybrid": hybrid_metrics,
    "lgbm_base_features": LGBM_BASE_FEATURES,
    "lgbm_hybrid_features": LGBM_HYBRID_FEATURES,
    "ae_features": AE_FEATURES
}
joblib.dump(report, "/content/training_report.pkl")

print("Saved: lgbm_model.pkl, training_report.pkl")


In [None]:
from google.colab import files
files.download("/content/lgbm_model.pkl")
files.download("/content/training_report.pkl")
