In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, KFold, StratifiedKFold, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

# Generate synthetic fraud detection dataset
np.random.seed(42)
n_samples = 1000
transaction_amount = np.random.exponential(scale=100, size=n_samples)
time_of_day = np.random.uniform(0, 24, size=n_samples)
location_risk = np.random.uniform(0, 1, size=n_samples)
merchant_category = np.random.uniform(0, 10, size=n_samples)

fraud_probability = (
    0.1 * (transaction_amount > 200) +
    0.15 * (time_of_day > 22) +
    0.2 * (location_risk > 0.7) +
    0.05 * np.random.randn(n_samples)
)
fraud = (fraud_probability > 0.3).astype(int)

df = pd.DataFrame({
    'transaction_amount': transaction_amount,
    'time_of_day': time_of_day,
    'location_risk': location_risk,
    'merchant_category': merchant_category,
    'fraud': fraud
})

X = df.drop('fraud', axis=1)
y = df['fraud']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42, max_depth=5)
model.fit(X_train, y_train)

# 5-fold CV Accuracy
cv = KFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = cross_val_score(model, X_train, y_train, cv=cv, scoring='accuracy')
cv_mean_score = cv_scores.mean()
cv_std_score = cv_scores.std()

print("5-Fold Accuracy CV:", cv_scores)
print("Mean Accuracy:", cv_mean_score)
print("Std Accuracy:", cv_std_score)

# Stratified 5-fold CV Accuracy
stratified_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
stratified_cv_scores = cross_val_score(model, X_train, y_train, cv=stratified_cv, scoring='accuracy')
stratified_cv_mean = stratified_cv_scores.mean()
stratified_cv_std = stratified_cv_scores.std()

print("Stratified CV:", stratified_cv_scores)
print("Mean Stratified Accuracy:", stratified_cv_mean)
print("Std Stratified Accuracy:", stratified_cv_std)

# F1 and ROC AUC CV
cv_f1_scores = cross_val_score(model, X_train, y_train, cv=stratified_cv, scoring='f1')
cv_f1_mean = cv_f1_scores.mean()

cv_roc_auc_scores = cross_val_score(model, X_train, y_train, cv=stratified_cv, scoring='roc_auc')
cv_roc_auc_mean = cv_roc_auc_scores.mean()

print("F1 Scores:", cv_f1_scores)
print("Mean F1:", cv_f1_mean)
print("ROC AUC Scores:", cv_roc_auc_scores)
print("Mean ROC AUC:", cv_roc_auc_mean)
