<a href="https://colab.research.google.com/github/sanjanavb/sanjana-git/blob/main/Untitled0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install optuna --quiet
!pip install catboost --quiet
!pip uninstall -y numpy catboost
!pip install --upgrade numpy catboost

import os
import pandas as pd


import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import optuna
import joblib

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix
from imblearn.over_sampling import SMOTE
from sklearn.feature_selection import RFE

df = pd.read_csv("creditcard_2023.csv")

scaler = StandardScaler()
df["Amount"] = scaler.fit_transform(df["Amount"].values.reshape(-1, 1))

# Drop 'Time' column
df = df.drop(columns=["Time"])

# Features & Labels
X = df.drop(columns=["Class"])
y = df["Class"]

# Handle class imbalance using SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

rf_selector = RandomForestClassifier(n_estimators=100, random_state=42)
rfe = RFE(estimator=rf_selector, n_features_to_select=15)
X_train_selected = rfe.fit_transform(X_train, y_train)
X_test_selected = rfe.transform(X_test)


# ✅ LightGBM Model
lgbm_model = LGBMClassifier(n_estimators=500, learning_rate=0.05)
lgbm_model.fit(X_train_selected, y_train)
y_pred_lgbm = lgbm_model.predict(X_test_selected)
print("🔹 LightGBM Results:")
print(classification_report(y_test, y_pred_lgbm))

# ✅ CatBoost Model
cat_model = CatBoostClassifier(iterations=500, learning_rate=0.05, depth=6, verbose=0)
cat_model.fit(X_train_selected, y_train)
y_pred_cat = cat_model.predict(X_test_selected)
print("🔹 CatBoost Results:")
print(classification_report(y_test, y_pred_cat))

# ✅ Autoencoder for Anomaly Detection
input_dim = X_train.shape[1]
encoding_dim = 16
input_layer = Input(shape=(input_dim,))
encoded = Dense(encoding_dim, activation="relu")(input_layer)
encoded = Dense(8, activation="relu")(encoded)
decoded = Dense(encoding_dim, activation="relu")(encoded)
decoded = Dense(input_dim, activation="sigmoid")(decoded)

autoencoder = Model(input_layer, decoded)
autoencoder.compile(optimizer="adam", loss="mse")
autoencoder.fit(X_train, X_train, epochs=10, batch_size=32, shuffle=True, validation_data=(X_test, X_test))

# Predict Fraud using Autoencoder
mse_train = np.mean(np.power(X_train - autoencoder.predict(X_train), 2), axis=1)
mse_test = np.mean(np.power(X_test - autoencoder.predict(X_test), 2), axis=1)
threshold = np.percentile(mse_train, 95)
y_pred_autoencoder = (mse_test > threshold).astype(int)

print("🔹 Autoencoder Results:")
print(classification_report(y_test, y_pred_autoencoder))

def objective(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 1000),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
        "max_depth": trial.suggest_int("max_depth", 3, 15),
        "num_leaves": trial.suggest_int("num_leaves", 20, 100)
    }
    model = LGBMClassifier(**params)
    model.fit(X_train_selected, y_train)
    y_pred = model.predict(X_test_selected)
    return roc_auc_score(y_test, y_pred)

# Run Optuna Optimization
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=10)
print("Best Hyperparameters:", study.best_params)

# ========== 🔹 STEP 5: SAVE BEST MODEL ==========
joblib.dump(lgbm_model, "fraud_detection_lgbm.pkl")
print("✅ Best Model Saved: fraud_detection_lgbm.pkl")


Found existing installation: numpy 1.26.4
Uninstalling numpy-1.26.4:
  Successfully uninstalled numpy-1.26.4
Found existing installation: catboost 1.2.7
Uninstalling catboost-1.2.7:
  Successfully uninstalled catboost-1.2.7
Collecting numpy
  Using cached numpy-2.2.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
Collecting catboost
  Using cached catboost-1.2.7-cp311-cp311-manylinux2014_x86_64.whl.metadata (1.2 kB)
Collecting numpy
  Using cached numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Using cached catboost-1.2.7-cp311-cp311-manylinux2014_x86_64.whl (98.7 MB)
Using cached numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
Installing collected packages: numpy, catboost
Successfully installed catboost-1.2.7 numpy-1.26.4


FileNotFoundError: [Errno 2] No such file or directory: 'creditcard_2023.csv'