In [19]:
import json, joblib, numpy as np, pandas as pd
from sklearn.calibration import CalibratedClassifierCV
from sklearn.externals.array_api_compat.numpy import full_like
from sklearn.metrics import roc_auc_score, precision_recall_curve, f1_score, auc
import shap

In [20]:
model  = joblib.load('models/xgb_day6.joblib')
full = pd.read_parquet('data/processed/train_full_with_day6.parquet')
feature_cols = [c for c in full.columns if c not in ('TransactionID', 'isFraud', 'dt', 'TransactionDT')]

In [26]:
# get booster feature names (model's training features)
booster = model.get_booster()
trained_feats = booster.feature_names
print("Model expects", len(trained_feats), "features. Sample:", trained_feats[:10])


Model expects 485 features. Sample: ['TransactionAmt', 'ProductCD', 'card1', 'card2', 'card3', 'card4', 'card5', 'card6', 'addr1', 'addr2']


In [21]:
# After this, every feature should be a numeric dtype
bad = [c for c in feature_cols if full[c].dtype not in ('int8','int16','int32','int64','float16','float32','float64')]
if bad:
    print("Non-numeric columns still present:", bad)
else:
    print("All feature columns numeric. Good to go.")

Non-numeric columns still present: ['ProductCD', 'card4', 'card6', 'P_emaildomain', 'R_emaildomain', 'M1', 'M2', 'M3', 'M4', 'M5', 'M6', 'M7', 'M8', 'M9', 'id_12', 'id_15', 'id_16', 'id_23', 'id_27', 'id_28', 'id_29', 'id_30', 'id_31', 'id_33', 'id_34', 'id_35', 'id_36', 'id_37', 'id_38', 'DeviceType', 'DeviceInfo', 'P_email_provider', 'R_email_provider']


In [22]:
# 1) normalize dtypes for all feature columns on the master 'full' DataFrame
#    This ensures train/val/test use the exact same integer encodings.
feat = feature_cols  # from previous cell

for c in feat:
    # If object -> convert to categorical and then to codes
    if full[c].dtype == 'object':
        full[c] = full[c].astype('category')
    # If it's categorical, convert to integer codes (preserves mapping since full is master)
    if str(full[c].dtype) == 'category':
        full[c] = full[c].cat.codes.astype('int32')
    # If it's boolean, convert to int
    if full[c].dtype == 'bool':
        full[c] = full[c].astype('int8')

In [23]:
n = len(full)
te = int(0.70*n)
ve = int(0.85*n)

X_train = full.iloc[:te][feature_cols]
y_train = full.iloc[:te]['isFraud'].astype(int)
X_val = full.iloc[te:ve][feature_cols]
y_val = full.iloc[te:ve]['isFraud'].astype(int)
X_test = full.iloc[ve:][feature_cols]
y_test = full.iloc[ve:]['isFraud'].astype(int)

In [24]:
# 3 quick sanity checks
print("Dtypes sample:", {c: str(full[c].dtype) for c in feat[:10]})
print("Splits shapes:", X_train.shape, X_val.shape, X_test.shape)
print("Any NaNs in features:", X_train.isna().any().any(), X_val.isna().any().any(), X_test.isna().any().any())

Dtypes sample: {'TransactionAmt': 'float32', 'ProductCD': 'int32', 'card1': 'int32', 'card2': 'float32', 'card3': 'float32', 'card4': 'int32', 'card5': 'float32', 'card6': 'int32', 'addr1': 'float32', 'addr2': 'float32'}
Splits shapes: (413378, 488) (88581, 488) (88581, 488)
Any NaNs in features: True True True


In [25]:
# applying Platt scaling (sigmoid) to turn raw XGB scores into better-calibrated probabilities.
# cv='prefit' means we fit a small logistic on model scores using val set.

# tree models often output overconfident scores. Calibrated probabilities are actionable for thresholds, SLAs, and precision-at-k business decisions.
from sklearn.calibration import CalibratedClassifierCV, FrozenEstimator
frozen = FrozenEstimator(model)
cal = CalibratedClassifierCV(frozen, method='sigmoid', cv="prefit")
cal.fit(X_val, y_val)   # now X_val has only numeric dtypes



ValueError: feature_names mismatch: ['TransactionAmt', 'ProductCD', 'card1', 'card2', 'card3', 'card4', 'card5', 'card6', 'addr1', 'addr2', 'dist1', 'dist2', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', 'D1', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9', 'D10', 'D11', 'D12', 'D13', 'D14', 'D15', 'M1', 'M2', 'M3', 'M4', 'M5', 'M6', 'M7', 'M8', 'M9', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'V29', 'V30', 'V31', 'V32', 'V33', 'V34', 'V35', 'V36', 'V37', 'V38', 'V39', 'V40', 'V41', 'V42', 'V43', 'V44', 'V45', 'V46', 'V47', 'V48', 'V49', 'V50', 'V51', 'V52', 'V53', 'V54', 'V55', 'V56', 'V57', 'V58', 'V59', 'V60', 'V61', 'V62', 'V63', 'V64', 'V65', 'V66', 'V67', 'V68', 'V69', 'V70', 'V71', 'V72', 'V73', 'V74', 'V75', 'V76', 'V77', 'V78', 'V79', 'V80', 'V81', 'V82', 'V83', 'V84', 'V85', 'V86', 'V87', 'V88', 'V89', 'V90', 'V91', 'V92', 'V93', 'V94', 'V95', 'V96', 'V97', 'V98', 'V99', 'V100', 'V101', 'V102', 'V103', 'V104', 'V105', 'V106', 'V107', 'V108', 'V109', 'V110', 'V111', 'V112', 'V113', 'V114', 'V115', 'V116', 'V117', 'V118', 'V119', 'V120', 'V121', 'V122', 'V123', 'V124', 'V125', 'V126', 'V127', 'V128', 'V129', 'V130', 'V131', 'V132', 'V133', 'V134', 'V135', 'V136', 'V137', 'V138', 'V139', 'V140', 'V141', 'V142', 'V143', 'V144', 'V145', 'V146', 'V147', 'V148', 'V149', 'V150', 'V151', 'V152', 'V153', 'V154', 'V155', 'V156', 'V157', 'V158', 'V159', 'V160', 'V161', 'V162', 'V163', 'V164', 'V165', 'V166', 'V167', 'V168', 'V169', 'V170', 'V171', 'V172', 'V173', 'V174', 'V175', 'V176', 'V177', 'V178', 'V179', 'V180', 'V181', 'V182', 'V183', 'V184', 'V185', 'V186', 'V187', 'V188', 'V189', 'V190', 'V191', 'V192', 'V193', 'V194', 'V195', 'V196', 'V197', 'V198', 'V199', 'V200', 'V201', 'V202', 'V203', 'V204', 'V205', 'V206', 'V207', 'V208', 'V209', 'V210', 'V211', 'V212', 'V213', 'V214', 'V215', 'V216', 'V217', 'V218', 'V219', 'V220', 'V221', 'V222', 'V223', 'V224', 'V225', 'V226', 'V227', 'V228', 'V229', 'V230', 'V231', 'V232', 'V233', 'V234', 'V235', 'V236', 'V237', 'V238', 'V239', 'V240', 'V241', 'V242', 'V243', 'V244', 'V245', 'V246', 'V247', 'V248', 'V249', 'V250', 'V251', 'V252', 'V253', 'V254', 'V255', 'V256', 'V257', 'V258', 'V259', 'V260', 'V261', 'V262', 'V263', 'V264', 'V265', 'V266', 'V267', 'V268', 'V269', 'V270', 'V271', 'V272', 'V273', 'V274', 'V275', 'V276', 'V277', 'V278', 'V279', 'V280', 'V281', 'V282', 'V283', 'V284', 'V285', 'V286', 'V287', 'V288', 'V289', 'V290', 'V291', 'V292', 'V293', 'V294', 'V295', 'V296', 'V297', 'V298', 'V299', 'V300', 'V301', 'V302', 'V303', 'V304', 'V305', 'V306', 'V307', 'V308', 'V309', 'V310', 'V311', 'V312', 'V313', 'V314', 'V315', 'V316', 'V317', 'V318', 'V319', 'V320', 'V321', 'V322', 'V323', 'V324', 'V325', 'V326', 'V327', 'V328', 'V329', 'V330', 'V331', 'V332', 'V333', 'V334', 'V335', 'V336', 'V337', 'V338', 'V339', 'id_01', 'id_02', 'id_03', 'id_04', 'id_05', 'id_06', 'id_07', 'id_08', 'id_09', 'id_10', 'id_11', 'id_12', 'id_13', 'id_14', 'id_15', 'id_16', 'id_17', 'id_18', 'id_19', 'id_20', 'id_21', 'id_22', 'id_23', 'id_24', 'id_25', 'id_26', 'id_27', 'id_28', 'id_29', 'id_30', 'id_31', 'id_32', 'id_33', 'id_34', 'id_35', 'id_36', 'id_37', 'id_38', 'DeviceType', 'P_email_provider', 'R_email_provider', 'email_domain_mismatch', 'P_emaildomain_freq', 'R_emaildomain_freq', 'P_email_provider_freq', 'R_email_provider_freq', 'P_email_rare', 'R_email_rare', 'device_freq', 'is_rare_device', 'device_type_freq', 'addr1_freq', 'addr2_freq', 'id_01_freq', 'id_02_freq', 'id_03_freq', 'id_04_freq', 'id_05_freq', 'id_06_freq', 'id_07_freq', 'id_08_freq', 'id_09_freq', 'id_10_freq', 'id_11_freq', 'id_12_freq', 'id_13_freq', 'id_14_freq', 'id_15_freq', 'id_16_freq', 'id_17_freq', 'id_18_freq', 'id_19_freq', 'id_20_freq', 'id_21_freq', 'id_22_freq', 'id_23_freq', 'id_24_freq', 'id_25_freq', 'id_26_freq', 'id_27_freq', 'id_28_freq', 'id_29_freq', 'id_30_freq', 'id_31_freq', 'id_32_freq', 'id_33_freq', 'id_34_freq', 'id_35_freq', 'id_36_freq', 'id_37_freq', 'id_38_freq', 'id_17_rare', 'id_31_rare', 'id_33_rare', 'card1_freq', 'card6_freq'] ['TransactionAmt', 'ProductCD', 'card1', 'card2', 'card3', 'card4', 'card5', 'card6', 'addr1', 'addr2', 'dist1', 'dist2', 'P_emaildomain', 'R_emaildomain', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', 'D1', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9', 'D10', 'D11', 'D12', 'D13', 'D14', 'D15', 'M1', 'M2', 'M3', 'M4', 'M5', 'M6', 'M7', 'M8', 'M9', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'V29', 'V30', 'V31', 'V32', 'V33', 'V34', 'V35', 'V36', 'V37', 'V38', 'V39', 'V40', 'V41', 'V42', 'V43', 'V44', 'V45', 'V46', 'V47', 'V48', 'V49', 'V50', 'V51', 'V52', 'V53', 'V54', 'V55', 'V56', 'V57', 'V58', 'V59', 'V60', 'V61', 'V62', 'V63', 'V64', 'V65', 'V66', 'V67', 'V68', 'V69', 'V70', 'V71', 'V72', 'V73', 'V74', 'V75', 'V76', 'V77', 'V78', 'V79', 'V80', 'V81', 'V82', 'V83', 'V84', 'V85', 'V86', 'V87', 'V88', 'V89', 'V90', 'V91', 'V92', 'V93', 'V94', 'V95', 'V96', 'V97', 'V98', 'V99', 'V100', 'V101', 'V102', 'V103', 'V104', 'V105', 'V106', 'V107', 'V108', 'V109', 'V110', 'V111', 'V112', 'V113', 'V114', 'V115', 'V116', 'V117', 'V118', 'V119', 'V120', 'V121', 'V122', 'V123', 'V124', 'V125', 'V126', 'V127', 'V128', 'V129', 'V130', 'V131', 'V132', 'V133', 'V134', 'V135', 'V136', 'V137', 'V138', 'V139', 'V140', 'V141', 'V142', 'V143', 'V144', 'V145', 'V146', 'V147', 'V148', 'V149', 'V150', 'V151', 'V152', 'V153', 'V154', 'V155', 'V156', 'V157', 'V158', 'V159', 'V160', 'V161', 'V162', 'V163', 'V164', 'V165', 'V166', 'V167', 'V168', 'V169', 'V170', 'V171', 'V172', 'V173', 'V174', 'V175', 'V176', 'V177', 'V178', 'V179', 'V180', 'V181', 'V182', 'V183', 'V184', 'V185', 'V186', 'V187', 'V188', 'V189', 'V190', 'V191', 'V192', 'V193', 'V194', 'V195', 'V196', 'V197', 'V198', 'V199', 'V200', 'V201', 'V202', 'V203', 'V204', 'V205', 'V206', 'V207', 'V208', 'V209', 'V210', 'V211', 'V212', 'V213', 'V214', 'V215', 'V216', 'V217', 'V218', 'V219', 'V220', 'V221', 'V222', 'V223', 'V224', 'V225', 'V226', 'V227', 'V228', 'V229', 'V230', 'V231', 'V232', 'V233', 'V234', 'V235', 'V236', 'V237', 'V238', 'V239', 'V240', 'V241', 'V242', 'V243', 'V244', 'V245', 'V246', 'V247', 'V248', 'V249', 'V250', 'V251', 'V252', 'V253', 'V254', 'V255', 'V256', 'V257', 'V258', 'V259', 'V260', 'V261', 'V262', 'V263', 'V264', 'V265', 'V266', 'V267', 'V268', 'V269', 'V270', 'V271', 'V272', 'V273', 'V274', 'V275', 'V276', 'V277', 'V278', 'V279', 'V280', 'V281', 'V282', 'V283', 'V284', 'V285', 'V286', 'V287', 'V288', 'V289', 'V290', 'V291', 'V292', 'V293', 'V294', 'V295', 'V296', 'V297', 'V298', 'V299', 'V300', 'V301', 'V302', 'V303', 'V304', 'V305', 'V306', 'V307', 'V308', 'V309', 'V310', 'V311', 'V312', 'V313', 'V314', 'V315', 'V316', 'V317', 'V318', 'V319', 'V320', 'V321', 'V322', 'V323', 'V324', 'V325', 'V326', 'V327', 'V328', 'V329', 'V330', 'V331', 'V332', 'V333', 'V334', 'V335', 'V336', 'V337', 'V338', 'V339', 'id_01', 'id_02', 'id_03', 'id_04', 'id_05', 'id_06', 'id_07', 'id_08', 'id_09', 'id_10', 'id_11', 'id_12', 'id_13', 'id_14', 'id_15', 'id_16', 'id_17', 'id_18', 'id_19', 'id_20', 'id_21', 'id_22', 'id_23', 'id_24', 'id_25', 'id_26', 'id_27', 'id_28', 'id_29', 'id_30', 'id_31', 'id_32', 'id_33', 'id_34', 'id_35', 'id_36', 'id_37', 'id_38', 'DeviceType', 'DeviceInfo', 'P_email_provider', 'R_email_provider', 'email_domain_mismatch', 'P_emaildomain_freq', 'R_emaildomain_freq', 'P_email_provider_freq', 'R_email_provider_freq', 'P_email_rare', 'R_email_rare', 'device_freq', 'is_rare_device', 'device_type_freq', 'addr1_freq', 'addr2_freq', 'id_01_freq', 'id_02_freq', 'id_03_freq', 'id_04_freq', 'id_05_freq', 'id_06_freq', 'id_07_freq', 'id_08_freq', 'id_09_freq', 'id_10_freq', 'id_11_freq', 'id_12_freq', 'id_13_freq', 'id_14_freq', 'id_15_freq', 'id_16_freq', 'id_17_freq', 'id_18_freq', 'id_19_freq', 'id_20_freq', 'id_21_freq', 'id_22_freq', 'id_23_freq', 'id_24_freq', 'id_25_freq', 'id_26_freq', 'id_27_freq', 'id_28_freq', 'id_29_freq', 'id_30_freq', 'id_31_freq', 'id_32_freq', 'id_33_freq', 'id_34_freq', 'id_35_freq', 'id_36_freq', 'id_37_freq', 'id_38_freq', 'id_17_rare', 'id_31_rare', 'id_33_rare', 'card1_freq', 'card6_freq']
training data did not have the following fields: R_emaildomain, DeviceInfo, P_emaildomain

In [None]:
# ---- calibration (Platt) ----
cal = CalibratedClassifierCV(model, method='sigmoid', cv='prefit')
cal.fit(X_val, y_val)
joblib.dump(cal, "models/xgb_day7_calibrated.joblib")

# ---- threshold search on val ----
probs_val = cal.predict_proba(X_val)[:,1]
ths = np.linspace(0.01,0.99,200)
f1s = [f1_score(y_val, probs_val>t) for t in ths]
best_f1_t = float(ths[np.argmax(f1s)])
# precision@k (top k fraction)
def top_k_capture(y_true, probs, k=0.01):
    kN = max(1,int(len(probs)*k))
    idx = np.argsort(probs)[::-1][:kN]
    return y_true.iloc[idx].mean()
top1_val = top_k_capture(y_val, probs_val, 0.01)

# ---- test metrics at best_f1_t ----
probs_test = cal.predict_proba(X_test)[:,1]
roc_test = roc_auc_score(y_test, probs_test)
prec, rec, _ = precision_recall_curve(y_test, probs_test)
pr_test = auc(rec, prec)
pred_test = (probs_test > best_f1_t).astype(int)
from sklearn.metrics import classification_report
cr = classification_report(y_test, pred_test, digits=4)
top1_test = top_k_capture(y_test, probs_test, 0.01)

# ---- stability per month ----
if 'dt' in full.columns:
    full['month'] = full['dt'].dt.to_period('M')
    months = sorted(full['month'].unique())
    month_stats=[]
    for m in months:
        mask = full['month']==m
        if mask.sum()<50: continue
        p = cal.predict_proba(full.loc[mask, feature_cols])[:,1]
        month_stats.append((str(m), float(roc_auc_score(full.loc[mask,'isFraud'], p))))
else:
    month_stats=[]

# ---- SHAP (sample) ----
sample_idx = X_test.sample(2000, random_state=42).index
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_test.loc[sample_idx])
# save a small summary (top features by mean|shap|)
shap_mean = np.abs(shap_values).mean(axis=0)
top_shap = list(pd.Series(shap_mean, index=feature_cols).sort_values(ascending=False).head(10).index)

# ---- save thresholds + summary ----
out = {
  "best_f1_threshold": best_f1_t,
  "top1_val_capture": float(top1_val),
  "roc_test": float(roc_test),
  "pr_test": float(pr_test),
  "top1_test_capture": float(top1_test),
  "top_shap_features": top_shap,
  "month_roc": month_stats
}
with open("models/day7_summary.json","w") as f: json.dump(out,f,indent=2)
print("SAVED: models/xgb_day7_calibrated.joblib, models/day7_summary.json")

# ---- print deliverables ----
print("\n--- DELIVERABLES ---")
print(f"Best F1 threshold (val): {best_f1_t:.4f}")
print(f"ROC test: {roc_test:.4f}  PR-AUC test: {pr_test:.4f}")
print("Top-1% test capture:", round(top1_test,4))
print("\nClassification report at best threshold:\n", cr)
print("Top SHAP features:", top_shap)
print("Month-wise ROC (sample):", month_stats[:10])


In [27]:
import joblib, numpy as np, pandas as pd
from sklearn.calibration import CalibratedClassifierCV, FrozenEstimator

# load model & data
model = joblib.load("models/xgb_day6.joblib")
full = pd.read_parquet("data/processed/train_full_with_day6.parquet")

# get booster feature names (model's training features)
booster = model.get_booster()
trained_feats = booster.feature_names
print("Model expects", len(trained_feats), "features. Sample:", trained_feats[:10])

# pick same positional splits
n=len(full); te=int(0.70*n); ve=int(0.85*n)

# create X_val_raw from full (all candidate features)
# we’ll force alignment to trained_feats below
X_val_raw = full.iloc[te:ve].copy()
y_val = full.iloc[te:ve]['isFraud'].astype(int)

# 1) Add missing features (that model expects but are not in X_val_raw)
miss = [c for c in trained_feats if c not in X_val_raw.columns]
if miss:
    print("Adding missing columns (filled -999):", miss[:20], "… total", len(miss))
    for c in miss:
        X_val_raw[c] = -999

# 2) Drop unexpected extras (not needed but harmless)
extra = [c for c in X_val_raw.columns if c not in trained_feats]
if extra:
    print("Dropping extra columns not in model (sample):", extra[:20], "… total", len(extra))
    X_val_raw = X_val_raw.drop(columns=extra)

# 3) Reorder exactly as model.feature_names
X_val_aligned = X_val_raw[trained_feats].copy()

# 4) Ensure numeric dtypes: convert object/categorical/bool -> numeric codes or ints
for c in X_val_aligned.columns:
    if X_val_aligned[c].dtype == 'object':
        X_val_aligned[c] = X_val_aligned[c].astype('category').cat.codes
    if str(X_val_aligned[c].dtype).startswith('category'):
        X_val_aligned[c] = X_val_aligned[c].cat.codes
    if X_val_aligned[c].dtype == 'bool':
        X_val_aligned[c] = X_val_aligned[c].astype('int8')

# 5) Fill NaNs (match training fill strategy)
X_val_aligned = X_val_aligned.fillna(-999)

# quick sanity
print("Final X_val shape:", X_val_aligned.shape)
non_numeric = [c for c in X_val_aligned.columns if X_val_aligned[c].dtype not in (np.int8,np.int16,np.int32,np.int64,np.float16,np.float32,np.float64)]
print("Non-numeric left:", non_numeric)

# 6) Calibrate using FrozenEstimator (no refit risk)
frozen = FrozenEstimator(model)
cal = CalibratedClassifierCV(frozen, method='sigmoid', cv="prefit")
cal.fit(X_val_aligned, y_val)   # should run without feature mismatch now

joblib.dump(cal, "models/xgb_day7_calibrated.joblib")
print("Calibrated model saved to models/xgb_day7_calibrated.joblib")


Model expects 485 features. Sample: ['TransactionAmt', 'ProductCD', 'card1', 'card2', 'card3', 'card4', 'card5', 'card6', 'addr1', 'addr2']
Dropping extra columns not in model (sample): ['TransactionID', 'isFraud', 'TransactionDT', 'P_emaildomain', 'R_emaildomain', 'DeviceInfo', 'dt'] … total 7
Final X_val shape: (88581, 485)
Non-numeric left: []




Calibrated model saved to models/xgb_day7_calibrated.joblib


  grad = np.asarray([-g @ F, -g.sum()], dtype=np.float64)
  grad = np.asarray([-g @ F, -g.sum()], dtype=np.float64)
  grad = np.asarray([-g @ F, -g.sum()], dtype=np.float64)


In [28]:
# probs on val (from calibrated model you just saved)
probs_val = cal.predict_proba(X_val_aligned)[:,1]

# distributions & trouble flags
import numpy as np, pandas as pd
print("probs min/median/max:", probs_val.min(), np.median(probs_val), probs_val.max())
print("unique probs count:", np.unique(np.round(probs_val,6)).size)
print("counts of extreme probs:", ((probs_val==0).sum(), (probs_val==1).sum()))
print("val pos count:", y_val.sum(), " / ", len(y_val))


probs min/median/max: 0.00554740928962251 0.006881584425518473 0.8089915802762615
unique probs count: 24078
counts of extreme probs: (np.int64(0), np.int64(0))
val pos count: 3042  /  88581


In [29]:
from sklearn.metrics import roc_auc_score, precision_recall_curve, auc, f1_score, classification_report
roc_val = roc_auc_score(y_val, probs_val)
prec, rec, ths = precision_recall_curve(y_val, probs_val); pr_val = auc(rec, prec)
# best F1 on val
f1s = [( (prec_i*rec_i*2)/(prec_i+rec_i+1e-9) ) for prec_i,rec_i in zip(prec,rec)]
best_idx = np.argmax(f1s)
best_th = ths[best_idx-1] if best_idx>0 else 0.5
print("ROC-val, PR-val:", roc_val, pr_val, "best-F1-th:", best_th)


ROC-val, PR-val: 0.9201276837409074 0.5910724257294672 best-F1-th: 0.43383615224333966


In [31]:
# --- align + encode + fill for X_test, then evaluate ---
import numpy as np
from sklearn.metrics import roc_auc_score, precision_recall_curve, auc

# trained_feats, full, ve already exist from earlier cells
X_test_raw = full.iloc[ve:].copy()
y_test = full.iloc[ve:]['isFraud'].astype(int)

# 1) add missing cols model expects
miss = [c for c in trained_feats if c not in X_test_raw.columns]
for c in miss:
    X_test_raw[c] = -999

# 2) drop extras
extra = [c for c in X_test_raw.columns if c not in trained_feats]
if extra:
    X_test_raw = X_test_raw.drop(columns=extra)

# 3) reorder
X_test_raw = X_test_raw[trained_feats].copy()

# 4) Convert object/categorical/bool -> numeric codes BEFORE fillna
for c in X_test_raw.columns:
    if X_test_raw[c].dtype == 'object':
        X_test_raw[c] = X_test_raw[c].astype('category')
    # If categorical (including those you may have created earlier), convert to codes
    if str(X_test_raw[c].dtype).startswith('category'):
        # keep -1 for missing categories (cat.codes uses -1)
        X_test_raw[c] = X_test_raw[c].cat.codes.astype('int32')
    if X_test_raw[c].dtype == 'bool':
        X_test_raw[c] = X_test_raw[c].astype('int8')

# 5) Now safe to fill NaNs with sentinel
X_test_aligned = X_test_raw.fillna(-999)

# sanity check
non_numeric = [c for c in X_test_aligned.columns if X_test_aligned[c].dtype not in (np.int8,np.int16,np.int32,np.int64,np.float16,np.float32,np.float64)]
print("Non-numeric after conversion (should be empty):", non_numeric)
print("X_test_aligned shape:", X_test_aligned.shape)

# 6) predict & metrics
probs_test = cal.predict_proba(X_test_aligned)[:,1]
roc_test = roc_auc_score(y_test, probs_test)
prec, rec, _ = precision_recall_curve(y_test, probs_test)
pr_test = auc(rec, prec)
print("ROC-test:", round(roc_test,4), "PR-test:", round(pr_test,4))


Non-numeric after conversion (should be empty): []
X_test_aligned shape: (88581, 485)
ROC-test: 0.8899 PR-test: 0.5159


In [33]:
from sklearn.metrics import classification_report, roc_auc_score, precision_recall_curve, average_precision_score
import numpy as np

# probs_val, probs_test, y_val, y_test, best_th already exist

# Metrics
roc_val = roc_auc_score(y_val, probs_val)
roc_test = roc_auc_score(y_test, probs_test)

# PR-AUC (robust)
pr_val = average_precision_score(y_val, probs_val)
pr_test = average_precision_score(y_test, probs_test)

print("ROC-val:", round(roc_val,4), " ROC-test:", round(roc_test,4))
print("PR-val (average_precision):", round(pr_val,4), " PR-test:", round(pr_test,4))

# Classification report at chosen threshold
yhat_test = (probs_test >= best_th).astype(int)
print("\nTest classification report (th={:.4f}):".format(best_th))
print(classification_report(y_test, yhat_test, digits=4))

# Top-1% fraud capture
k = max(1, int(0.01 * len(probs_test)))
topk_idx = np.argsort(probs_test)[-k:]
# y_test is a pandas Series (aligned); get values at indices
top1_frac = y_test.iloc[topk_idx].sum() / (y_test.sum() + 1e-9)
print("Top-1% fraud capture:", round(top1_frac,4))


ROC-val: 0.9201  ROC-test: 0.8899
PR-val (average_precision): 0.5911  PR-test: 0.516

Test classification report (th=0.4338):
              precision    recall  f1-score   support

           0     0.9798    0.9909    0.9853     85498
           1     0.6332    0.4340    0.5150      3083

    accuracy                         0.9716     88581
   macro avg     0.8065    0.7125    0.7502     88581
weighted avg     0.9678    0.9716    0.9690     88581

Top-1% fraud capture: 0.2562
