<a href="https://colab.research.google.com/github/paviayyala/AIML-Lab/blob/main/FRAUD_DETECTION.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# -------------------------------
# FRAUD DETECTION - INLINE DATASET
# MODEL: RANDOM FOREST
# -------------------------------

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

# -------------------------------
# 1. Create Inline Fraud Dataset
# -------------------------------
# Features:
# amount           -> transaction amount
# txn_count_24h    -> number of transactions in last 24 hours
# avg_amount_7d    -> avg transaction amount in last 7 days
# is_night         -> 1 if transaction is at night
# is_foreign       -> 1 if foreign transaction
# label (fraud)    -> 1 = fraud, 0 = legit

data = {
    "amount":        [120, 1500, 50, 5000, 70, 9000, 60, 40, 8000, 55,
                       200, 1800, 65, 6000, 90, 7200, 45, 30, 9500, 85],
    "txn_count_24h": [2, 15, 1, 20, 2, 25, 1, 1, 22, 2,
                      3, 14, 2, 18, 3, 21, 1, 1, 30, 3],
    "avg_amount_7d":[110, 900, 45, 3000, 60, 5000, 55, 35, 4500, 50,
                     180, 850, 60, 2800, 80, 4800, 40, 30, 5200, 75],
    "is_night":      [0, 1, 0, 1, 0, 1, 0, 0, 1, 0,
                      0, 1, 0, 1, 0, 1, 0, 0, 1, 0],
    "is_foreign":    [0, 1, 0, 1, 0, 1, 0, 0, 1, 0,
                      0, 1, 0, 1, 0, 1, 0, 0, 1, 0],
    "fraud":         [0, 1, 0, 1, 0, 1, 0, 0, 1, 0,
                      0, 1, 0, 1, 0, 1, 0, 0, 1, 0]
}

df = pd.DataFrame(data)

print("Dataset:")
print(df.head())
print("\nFraud Distribution:")
print(df["fraud"].value_counts())

# -------------------------------
# 2. Split Features & Labels
# -------------------------------
X = df.drop("fraud", axis=1)
y = df["fraud"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.3,
    stratify=y,
    random_state=42
)

# -------------------------------
# 3. Train Random Forest Model
# -------------------------------
model = RandomForestClassifier(
    n_estimators=200,
    max_depth=6,
    class_weight="balanced",
    random_state=42
)

model.fit(X_train, y_train)

# -------------------------------
# 4. Evaluate Model
# -------------------------------
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:, 1]

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("ROC-AUC Score:", roc_auc_score(y_test, y_prob))

# -------------------------------
# 5. Feature Importance
# -------------------------------
importance_df = pd.DataFrame({
    "Feature": X.columns,
    "Importance": model.feature_importances_
}).sort_values(by="Importance", ascending=False)

print("\nFeature Importance:")
print(importance_df)


Dataset:
   amount  txn_count_24h  avg_amount_7d  is_night  is_foreign  fraud
0     120              2            110         0           0      0
1    1500             15            900         1           1      1
2      50              1             45         0           0      0
3    5000             20           3000         1           1      1
4      70              2             60         0           0      0

Fraud Distribution:
fraud
0    12
1     8
Name: count, dtype: int64

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         4
           1       1.00      1.00      1.00         2

    accuracy                           1.00         6
   macro avg       1.00      1.00      1.00         6
weighted avg       1.00      1.00      1.00         6

Confusion Matrix:
[[4 0]
 [0 2]]
ROC-AUC Score: 1.0

Feature Importance:
         Feature  Importance
4     is_foreign       0.320
3       is_night       0.2