# 🧑‍💻 Local Fraud Detection Exploration Notebook

This version runs **completely local** using scikit-learn + XGBoost.
- Generate 1000-row synthetic fraud dataset
- Train/test split
- Train XGBoost model
- Evaluate with accuracy, confusion matrix
- Predict fraud score for sample transactions

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from xgboost import XGBClassifier

np.random.seed(42)

## Generate Synthetic Dataset

In [None]:
n = 1000
data = {
    "txn_id": np.arange(1, n+1),
    "amount": np.random.exponential(scale=100, size=n).round(2),
    "time_delta": np.random.exponential(scale=60, size=n).round(2),
    "device_score": np.random.uniform(0, 1, n).round(3),
    "geo_distance": np.random.exponential(scale=50, size=n).round(2),
    "num_prev_txns": np.random.poisson(lam=3, size=n),
    "is_night": np.random.choice([0, 1], size=n, p=[0.7, 0.3])
}

df = pd.DataFrame(data)

# Fraud label with rules + randomness
fraud_prob = (
    0.2*(df["amount"] > 500).astype(int) +
    0.2*(df["geo_distance"] > 100).astype(int) +
    0.2*(df["is_night"] == 1).astype(int) +
    0.1*(df["device_score"] < 0.3).astype(int)
)
fraud_prob = np.clip(fraud_prob, 0, 1)
df["fraud_label"] = (np.random.rand(n) < fraud_prob).astype(int)

df.head()

## Train/Test Split

In [None]:
X = df.drop(["fraud_label","txn_id"], axis=1)
y = df["fraud_label"]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, stratify=y, random_state=42)
X_train.shape, X_test.shape

## Train XGBoost Model

In [None]:
model = XGBClassifier(
    n_estimators=200,
    max_depth=5,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    use_label_encoder=False,
    eval_metric='logloss'
)

model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

## Predict Sample Transaction

In [None]:
sample = X_test[0:1]
prob = model.predict_proba(sample)[0][1]
print("Fraud probability:", prob)