In [4]:
# Import required libraries
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
import time
import warnings
import os

warnings.filterwarnings('ignore')

# Ensure Faker is installed (Fix for ModuleNotFoundError)
try:
    from faker import Faker
except ModuleNotFoundError:
    print("⚠ Faker module is missing. Installing...")
    os.system("pip install faker")
    from faker import Faker

# Initialize Faker for synthetic data
fake = Faker()

# Updated dataset path (Your Provided Path)
dataset_path = r"C:\Users\hp\Downloads\archive (10)\PS_20174392719_1491204439457_log.csv"

# Verify dataset existence
if os.path.exists(dataset_path):
    try:
        data = pd.read_csv(dataset_path)
        print("✅ Dataset loaded successfully!")
    except Exception as e:
        print(f"❌ Error loading dataset: {e}")
        data = pd.DataFrame(columns=['amount', 'oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest'])  # Empty fallback DataFrame
else:
    print(f"❌ File not found: {dataset_path}. Please verify the path.")
    data = pd.DataFrame(columns=['amount', 'oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest'])  # Empty fallback DataFrame

# Data preprocessing
features = ['amount', 'oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest']
X = data[features].fillna(0)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train Isolation Forest model
model = IsolationForest(contamination=0.01, random_state=42)
model.fit(X_scaled)

# Function to generate synthetic UPI transactions
def generate_transaction():
    return {
        'amount': np.random.uniform(10, 10000),
        'oldbalanceOrg': np.random.uniform(0, 50000),
        'newbalanceOrig': np.random.uniform(0, 50000),
        'oldbalanceDest': np.random.uniform(0, 100000),
        'newbalanceDest': np.random.uniform(0, 100000)
    }

# Simulate real-time transaction stream
def simulate_realtime_stream(n_transactions=5):
    print("🔹 Starting real-time UPI fraud detection simulation...")
    for i in range(n_transactions):
        transaction = generate_transaction()
        transaction_df = pd.DataFrame([transaction])
        
        # Scale the transaction data
        transaction_scaled = scaler.transform(transaction_df)
        
        # Predict fraud (1: normal, -1: anomaly)
        prediction = model.predict(transaction_scaled)
        
        # Output result
        status = "🚨 Fraud" if prediction[0] == -1 else "✅ Normal"
        print(f"📢 Transaction {i+1}: Amount=₹{transaction['amount']:.2f}, Status={status}")
        
        # Simulate real-time delay
        time.sleep(1)

# Run the simulation
simulate_realtime_stream(n_transactions=5)

# Evaluate model on test data
if "isFraud" in data.columns:
    y_pred = model.predict(X_scaled)
    y_pred = [1 if x == -1 else 0 for x in y_pred]  # Convert to 0/1
    y_true = data['isFraud']

    from sklearn.metrics import classification_report
    print("\n📊 Model Performance on Training Data:")
    print(classification_report(y_true, y_pred))
else:
    print("\n⚠ No fraud labels found in dataset. Evaluation skipped.")


✅ Dataset loaded successfully!
🔹 Starting real-time UPI fraud detection simulation...
📢 Transaction 1: Amount=₹7168.38, Status=✅ Normal
📢 Transaction 2: Amount=₹8653.63, Status=✅ Normal
📢 Transaction 3: Amount=₹6186.32, Status=✅ Normal
📢 Transaction 4: Amount=₹5135.52, Status=✅ Normal
📢 Transaction 5: Amount=₹4594.13, Status=✅ Normal

📊 Model Performance on Training Data:
              precision    recall  f1-score   support

           0       1.00      0.99      0.99   6354407
           1       0.01      0.04      0.01      8213

    accuracy                           0.99   6362620
   macro avg       0.50      0.52      0.50   6362620
weighted avg       1.00      0.99      0.99   6362620



In [5]:
# Import required libraries
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE  # Oversampling for fraud cases
import time
import warnings
import os

warnings.filterwarnings('ignore')

# Ensure Faker is installed
try:
    from faker import Faker
except ModuleNotFoundError:
    print("⚠ Faker module is missing. Installing...")
    os.system("pip install faker")
    from faker import Faker

# Initialize Faker for synthetic data
fake = Faker()

# Updated dataset path (Your Provided Path)
dataset_path = r"C:\Users\hp\Downloads\archive (10)\PS_20174392719_1491204439457_log.csv"

# Verify dataset existence
if os.path.exists(dataset_path):
    try:
        data = pd.read_csv(dataset_path)
        print("✅ Dataset loaded successfully!")
    except Exception as e:
        print(f"❌ Error loading dataset: {e}")
        data = pd.DataFrame(columns=['amount', 'oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest'])  # Empty fallback DataFrame
else:
    print(f"❌ File not found: {dataset_path}. Please verify the path.")
    data = pd.DataFrame(columns=['amount', 'oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest'])  # Empty fallback DataFrame

# Feature Engineering
data['transaction_freq'] = data.groupby('nameOrig')['amount'].transform('count')  # Frequency per account
data['previous_fraud'] = data.groupby('nameOrig')['isFraud'].transform('sum')  # Previous fraud cases per account

# Data preprocessing
features = ['amount', 'oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest', 'transaction_freq', 'previous_fraud']
data.fillna(0, inplace=True)
X = data[features]
y = data['isFraud'] if 'isFraud' in data.columns else pd.Series(np.zeros(X.shape[0]))  # Default to no fraud if missing

# Apply SMOTE to balance fraud cases
smote = SMOTE(sampling_strategy=0.1, random_state=42)  # Increase fraud samples
X_resampled, y_resampled = smote.fit_resample(X, y)

# Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_resampled)

# Train Optimized Isolation Forest model
model = IsolationForest(contamination=0.03, random_state=42)
model.fit(X_scaled)

# Function to generate synthetic UPI transactions
def generate_transaction():
    return {
        'amount': np.random.uniform(10, 10000),
        'oldbalanceOrg': np.random.uniform(0, 50000),
        'newbalanceOrig': np.random.uniform(0, 50000),
        'oldbalanceDest': np.random.uniform(0, 100000),
        'newbalanceDest': np.random.uniform(0, 100000),
        'transaction_freq': np.random.randint(1, 100),  # Simulated frequency
        'previous_fraud': np.random.randint(0, 10)  # Simulated past fraud cases
    }

# Simulate real-time transaction stream
def simulate_realtime_stream(n_transactions=5):
    print("🔹 Starting real-time UPI fraud detection simulation...")
    for i in range(n_transactions):
        transaction = generate_transaction()
        transaction_df = pd.DataFrame([transaction])
        
        # Scale the transaction data
        transaction_scaled = scaler.transform(transaction_df)
        
        # Predict fraud (1: normal, -1: anomaly)
        prediction = model.predict(transaction_scaled)
        
        # Output result
        status = "🚨 Fraud" if prediction[0] == -1 else "✅ Normal"
        print(f"📢 Transaction {i+1}: Amount=₹{transaction['amount']:.2f}, Status={status}")
        
        # Simulate real-time delay
        time.sleep(1)

# Run the simulation
simulate_realtime_stream(n_transactions=5)

# Evaluate model on test data
if "isFraud" in data.columns:
    y_pred = model.predict(X_scaled)
    y_pred = [1 if x == -1 else 0 for x in y_pred]  # Convert to 0/1
    from sklearn.metrics import classification_report
    print("\n📊 Model Performance on Training Data:")
    print(classification_report(y_resampled, y_pred))
else:
    print("\n⚠ No fraud labels found in dataset. Evaluation skipped.")


✅ Dataset loaded successfully!
🔹 Starting real-time UPI fraud detection simulation...
📢 Transaction 1: Amount=₹8455.97, Status=✅ Normal
📢 Transaction 2: Amount=₹8134.43, Status=✅ Normal
📢 Transaction 3: Amount=₹9124.72, Status=✅ Normal
📢 Transaction 4: Amount=₹7106.54, Status=✅ Normal
📢 Transaction 5: Amount=₹8406.65, Status=✅ Normal

📊 Model Performance on Training Data:
              precision    recall  f1-score   support

           0       0.92      0.98      0.95   6354407
           1       0.42      0.14      0.21    635440

    accuracy                           0.90   6989847
   macro avg       0.67      0.56      0.58   6989847
weighted avg       0.87      0.90      0.88   6989847



In [None]:
# Import required libraries
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE  # Oversampling for fraud cases
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
import time
import warnings
import os

warnings.filterwarnings('ignore')

# Ensure Faker is installed
try:
    from faker import Faker
except ModuleNotFoundError:
    print("⚠ Faker module is missing. Installing...")
    os.system("pip install faker")
    from faker import Faker

# Initialize Faker for synthetic data
fake = Faker()

# Updated dataset path (Your Provided Path)
dataset_path = r"C:\Users\hp\Downloads\archive (10)\PS_20174392719_1491204439457_log.csv"

# Verify dataset existence
if os.path.exists(dataset_path):
    try:
        data = pd.read_csv(dataset_path)
        print("✅ Dataset loaded successfully!")
    except Exception as e:
        print(f"❌ Error loading dataset: {e}")
        data = pd.DataFrame(columns=['amount', 'oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest'])  # Empty fallback DataFrame
else:
    print(f"❌ File not found: {dataset_path}. Please verify the path.")
    data = pd.DataFrame(columns=['amount', 'oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest'])  # Empty fallback DataFrame

# Advanced Feature Engineering
data['transaction_time'] = pd.to_datetime(data['step'], unit='s')  # Simulated transaction timestamps
data['account_age'] = data.groupby('nameOrig')['step'].transform('min')  # Account age based on first transaction
data['transaction_network'] = data.groupby('nameOrig')['amount'].transform('sum')  # Total transaction amount per account

# Data preprocessing
features = ['amount', 'oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest', 'account_age', 'transaction_network']
data.fillna(0, inplace=True)
X = data[features]
y = data['isFraud'] if 'isFraud' in data.columns else pd.Series(np.zeros(X.shape[0]))  # Default to no fraud if missing

# Apply SMOTE to balance fraud cases
smote = SMOTE(sampling_strategy=0.1, random_state=42)  # Increase fraud samples
X_resampled, y_resampled = smote.fit_resample(X, y)

# Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_resampled)

# Train Hybrid Fraud Models
models = {
    "Isolation Forest": IsolationForest(contamination=0.05, random_state=42),
    "Logistic Regression": LogisticRegression(),
    "XGBoost": XGBClassifier(),
    "Autoencoder (Neural Network)": MLPClassifier(hidden_layer_sizes=(64, 32, 16), max_iter=500)
}

for model_name, model in models.items():
    print(f"\n🔹 Training {model_name}...")
    model.fit(X_scaled, y_resampled)

# Function to generate synthetic UPI transactions
def generate_transaction():
    return {
        'amount': np.random.uniform(10, 10000),
        'oldbalanceOrg': np.random.uniform(0, 50000),
        'newbalanceOrig': np.random.uniform(0, 50000),
        'oldbalanceDest': np.random.uniform(0, 100000),
        'newbalanceDest': np.random.uniform(0, 100000),
        'account_age': np.random.uniform(1, 500),
        'transaction_network': np.random.uniform(1000, 500000)
    }

# Simulate real-time transaction stream
def simulate_realtime_stream(n_transactions=5):
    print("🔹 Starting real-time UPI fraud detection simulation...")
    for i in range(n_transactions):
        transaction = generate_transaction()
        transaction_df = pd.DataFrame([transaction])
        
        # Scale the transaction data
        transaction_scaled = scaler.transform(transaction_df)
        
        # Predict fraud using multiple models
        predictions = {model_name: model.predict(transaction_scaled)[0] for model_name, model in models.items()}
        
        # Output result
        results = {k: "🚨 Fraud" if v == -1 else "✅ Normal" for k, v in predictions.items()}
        print(f"\n📢 Transaction {i+1}: Amount=₹{transaction['amount']:.2f}")
        for model_name, result in results.items():
            print(f"🔹 {model_name}: {result}")
        
        # Simulate real-time delay
        time.sleep(1)

# Run the simulation
simulate_realtime_stream(n_transactions=5)

# Evaluate models on test data
if "isFraud" in data.columns:
    for model_name, model in models.items():
        y_pred = model.predict(X_scaled)
        y_pred = [1 if x == -1 else 0 for x in y_pred]  # Convert to 0/1
        
        from sklearn.metrics import classification_report
        print(f"\n📊 {model_name} - Model Performance on Training Data:")
        print(classification_report(y_resampled, y_pred))
else:
    print("\n⚠ No fraud labels found in dataset. Evaluation skipped.")
