In [None]:
# Import Libraries
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
import joblib
import warnings
warnings.filterwarnings('ignore')

# Import the new classifiers for the hybrid model
from xgboost import XGBClassifier
from sklearn.ensemble import VotingClassifier

plt.style.use('dark_background')
sns.set_style("darkgrid", {
    'axes.facecolor': '#111111',
    'figure.facecolor': '#111111',
    'axes.edgecolor': '#444444',
    'grid.color': '#333333',
    'text.color': 'white',
    'xtick.color': 'white',
    'ytick.color': 'white',
    'axes.labelcolor': 'white',
    'axes.grid': True,
})

# Get Data
df = pd.read_csv("dataset/Fraud.csv")

# EDA and Feature Engineering (same as before)
df['balanceDiffOrig'] = df['oldbalanceOrg'] - df['newbalanceOrig']
df['balanceDiffDest'] = df['oldbalanceDest'] - df['newbalanceDest']
df.drop(columns='step', inplace=True)

# Modeling
df_modeling = df.drop(columns=['nameOrig', 'nameDest', 'isFlaggedFraud'])

# Define categorical and numerical features
cat = ['type']
num = ['amount', 'oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest', 'balanceDiffOrig', 'balanceDiffDest']

x = df_modeling.drop(columns='isFraud')
y = df_modeling['isFraud']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42, stratify=y)

# --- NEW: Define the individual models for the hybrid classifier ---

# 1. Logistic Regression (as before, but now part of the ensemble)
lr = LogisticRegression(max_iter=1000, random_state=42, class_weight='balanced')

# 2. XGBoost Classifier
# We use scale_pos_weight to handle the class imbalance. It's the ratio of negative to positive samples.
scale_pos_weight = y_train.value_counts()[0] / y_train.value_counts()[1]
xgb = XGBClassifier(random_state=42, scale_pos_weight=scale_pos_weight)

# Create a Voting Classifier (this is our hybrid model)
# 'soft' voting uses the predicted probabilities from each model to make the final decision.
voting_clf = VotingClassifier(
    estimators=[('lr', lr), ('xgb', xgb)],
    voting='soft'
)

# --- Create the new pipeline with the Voting Classifier ---
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), num),
        ('cat', OneHotEncoder(), cat)
    ]
)

pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', voting_clf)
])

# Train the hybrid model
pipeline.fit(x_train, y_train)

# Make predictions and evaluate
ypred = pipeline.predict(x_test)
print(classification_report(y_test, ypred))

# Check the score
print(f"Model Score: {pipeline.score(x_test, y_test)}")

# Save the new hybrid model
joblib.dump(pipeline, 'hybrid_fraud_detection_model.pkl')

In [None]:
# Import Libraries
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
import joblib
import warnings
warnings.filterwarnings('ignore')

# Import the classifiers for the hybrid model
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
import lightgbm as lgb
from sklearn.ensemble import VotingClassifier

plt.style.use('dark_background')
sns.set_style("darkgrid", {
    'axes.facecolor': '#111111',
    'figure.facecolor': '#111111',
    'axes.edgecolor': '#444444',
    'grid.color': '#333333',
    'text.color': 'white',
    'xtick.color': 'white',
    'ytick.color': 'white',
    'axes.labelcolor': 'white',
    'axes.grid': True,
})

# Get Data
df = pd.read_csv("dataset/Fraud.csv")

# Feature Engineering
df['balanceDiffOrig'] = df['oldbalanceOrg'] - df['newbalanceOrig']
df['balanceDiffDest'] = df['oldbalanceDest'] - df['newbalanceDest']
df.drop(columns='step', inplace=True)

# Modeling Preparation
df_modeling = df.drop(columns=['nameOrig', 'nameDest', 'isFlaggedFraud'])

# Define categorical and numerical features
cat = ['type']
num = ['amount', 'oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest', 'balanceDiffOrig', 'balanceDiffDest']

x = df_modeling.drop(columns='isFraud')
y = df_modeling['isFraud']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42, stratify=y)

# --- NEW: Define the three individual models for the hybrid classifier ---

# 1. Logistic Regression
lr = LogisticRegression(max_iter=1000, random_state=42, class_weight='balanced')

# 2. XGBoost Classifier
scale_pos_weight = y_train.value_counts()[0] / y_train.value_counts()[1]
xgb = XGBClassifier(random_state=42, scale_pos_weight=scale_pos_weight)

# 3. LightGBM Classifier
lgbm = lgb.LGBMClassifier(random_state=42, class_weight='balanced')


# Create the Voting Classifier with all three models
voting_clf = VotingClassifier(
    estimators=[('lr', lr), ('xgb', xgb), ('lgbm', lgbm)],
    voting='soft'  # Use 'soft' voting for better performance
)

# --- Create the new pipeline with the enhanced Voting Classifier ---
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), num),
        ('cat', OneHotEncoder(), cat)
    ]
)

pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', voting_clf)
])

# Train the hybrid model
pipeline.fit(x_train, y_train)

# Make predictions and evaluate
ypred = pipeline.predict(x_test)
print("--- Hybrid Model (LR + XGB + LGBM) Results ---")
print(classification_report(y_test, ypred))

# Check the score
print(f"Model Score: {pipeline.score(x_test, y_test)}")

# Save the new model
joblib.dump(pipeline, 'advanced_hybrid_fraud_model.pkl')