In [1]:
#phase1

In [2]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

In [3]:
import streamlit as st
import numpy as np
import joblib


In [4]:
df = pd.read_csv("C:\\Users\\Nandhakumar\\Downloads\\creditcard.csv\\creditcard.csv")


In [5]:
df.shape

(284807, 31)

In [6]:
df['Class'].value_counts()


Class
0    284315
1       492
Name: count, dtype: int64

In [7]:
df['Class'].value_counts(normalize=True) * 100

Class
0    99.827251
1     0.172749
Name: proportion, dtype: float64

In [8]:
X = df.drop('Class', axis=1)
y = df['Class']

In [9]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [10]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [11]:
scaler

In [12]:
lr = LogisticRegression(
    max_iter=1000,
    class_weight='balanced'
)

lr.fit(X_train_scaled, y_train)


In [13]:
y_pred_lr = lr.predict(X_test_scaled)
y_prob_lr = lr.predict_proba(X_test_scaled)[:, 1]

In [14]:
print(confusion_matrix(y_test, y_pred_lr))
print(classification_report(y_test, y_pred_lr))
print("ROC-AUC:", roc_auc_score(y_test, y_prob_lr))


[[55478  1386]
 [    8    90]]
              precision    recall  f1-score   support

           0       1.00      0.98      0.99     56864
           1       0.06      0.92      0.11        98

    accuracy                           0.98     56962
   macro avg       0.53      0.95      0.55     56962
weighted avg       1.00      0.98      0.99     56962

ROC-AUC: 0.9720834996210077


In [15]:
rf = RandomForestClassifier(
    n_estimators=300,          # stronger model
    max_depth=12,              # prevents overfitting
    class_weight={0:1, 1:5},   # prioritize fraud recall
    random_state=42,
    n_jobs=-1
)

rf.fit(X_train_scaled, y_train)


In [16]:
y_prob_rf = rf.predict_proba(X_test_scaled)[:, 1]

threshold = 0.3
y_pred_rf = (y_prob_rf >= threshold).astype(int)

In [17]:
print(confusion_matrix(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))
print("ROC-AUC:", roc_auc_score(y_test, y_prob_rf))


[[56853    11]
 [   14    84]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.88      0.86      0.87        98

    accuracy                           1.00     56962
   macro avg       0.94      0.93      0.94     56962
weighted avg       1.00      1.00      1.00     56962

ROC-AUC: 0.9763946272093531


In [18]:
def generate_risk_score(model, transaction):
    prob = model.predict_proba(transaction)[0][1]
    return round(prob * 100, 2)


In [19]:
sample = X_test_scaled[0].reshape(1, -1)
score = generate_risk_score(rf, sample)

if score > 80:
    print("Transaction Blocked")
elif score > 50:
    print("Transaction Flagged")
else:
    print("Transaction Approved")


Transaction Approved


In [20]:
import joblib
joblib.dump(rf, "fraud_model.pkl")
joblib.dump(scaler, "scaler.pkl")

['scaler.pkl']

In [21]:
joblib.dump(rf, "fraud_model.pkl")

['fraud_model.pkl']

In [22]:
import numpy as np
import joblib


In [23]:
ml_model = joblib.load("fraud_model.pkl")
scaler = joblib.load("scaler.pkl")


In [24]:
def prepare_ml_input_30(amount):
    X = np.zeros(30)
    X[-1] = amount
    return X.reshape(1, -1)

def analyze_transaction(
    sender_wallet,
    receiver_wallet,
    amount,
    wallet_balance,
    tx_hour,
    previous_destinations,
    recent_tx_count,
    last_known_location,
    current_location
):
    rule_risk = 0
    reasons = []

# Rule 1: Late night
    if tx_hour in list(range(0,6)) + list(range(22,24)):
        rule_risk += 0.15
        reasons.append("Late night transaction")

# Rule 2: Wallet drain %
    drain_ratio = amount / wallet_balance

    if drain_ratio > 0.8:
        rule_risk += 0.30
        reasons.append("Wallet drain > 80%")

    elif drain_ratio >= 0.4:
        rule_risk += 0.20
        reasons.append("Wallet drain 40%–80%")

# Rule 3: New wallet
    is_new_wallet = receiver_wallet not in previous_destinations
    if is_new_wallet:
        rule_risk += 0.20
        reasons.append("New destination wallet")

# Rule 4: Frequency
    if recent_tx_count >= 5:
        rule_risk += 0.15
        reasons.append("High transaction frequency")

# Rule 5: First-time big amount
    max_past = max(previous_destinations.values()) if previous_destinations else 0
    if amount > 30 and amount > max_past:
        rule_risk += 0.15
        reasons.append("First-time high amount")

# ML part
    ml_input = prepare_ml_input_30(amount)
    ml_input_scaled = scaler.transform(ml_input)
    ml_risk = ml_model.predict_proba(ml_input_scaled)[0][1]

# Hybrid risk
    final_risk = (0.6 * ml_risk) + (0.4 * rule_risk)

# Decision
    if final_risk >= 0.7:
        decision = "BLOCK"
    elif final_risk >= 0.4:
        decision = "VERIFY"
    else:
        decision = "ALLOW"

    return {
        "ml_risk": round(ml_risk, 2),
        "rule_risk": round(rule_risk, 2),
        "final_risk": round(final_risk, 2),
        "decision": decision,
        "reasons": reasons
    }
