<a href="https://colab.research.google.com/github/moridin04/CCADMACL-Research/blob/main/fraud_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# -*- coding: utf-8 -*-
"""Fraud_Detection_Program_FINAL.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1J94F9wWFUPZsZqWEwFvx67fh0TM9zAsQ

# **Exploring Anomaly Detection Techniques for Fraudulent Credit Card Transactions**

# **1. Environment Setup**

**1.1 Tools and Libraries Installation**
"""

!pip install lime
!pip install scikit-learn

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import lime
import lime.lime_tabular

"""# 2. **Importing Libraries**

**2.1 Essential Libraries for Data Analysis**
"""

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report, roc_auc_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import average_precision_score, precision_recall_curve

"""**2.2 Libraries for Machine Learning and Visualization**"""

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.ensemble import IsolationForest
from sklearn.ensemble import VotingClassifier
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.layers import BatchNormalization, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.neighbors import LocalOutlierFactor

"""#3. **Loading Data**

**3.1 Loading the Kaggle Credit Card Fraud Dataset**
"""

cfd = pd.read_csv('creditcard.csv')

"""#4. **Exploration of Data**

### **Glimpse of the Dataset**

**4.1 Displaying the First Few Rows**
"""

cfd

"""**4.1.2 Dataset Information (Shape, Columns, Null Values, Data Types)**"""

print(f"Shape:  {cfd.shape}\n")
print(f"Columns:  {cfd.columns}\n")
print(f"Null Values:  \n{cfd.isnull().sum()}\n")
print(f"Data Types: \n{cfd.dtypes}")

"""**4.2 Summary Statistics for Numerical Features**"""

print(cfd.describe(include='all'))

"""**4.2.1 Class Distribution (Fraud vs. Non-Fraud)**"""

class_distribution = cfd['Class'].value_counts()
print("Class Distribution:")
print(class_distribution)

plt.figure(figsize=(8, 6))
sns.countplot(x='Class', data=cfd, hue='Class', palette=['blue', 'red'], legend=False)
plt.title('Class Distribution (Fraud vs. Non-Fraud)')
plt.xlabel('Class (0: Non-Fraud, 1: Fraud)')
plt.ylabel('Number of Transactions')
plt.show()

"""### **Distribution of Independent Variable**

**4.3 Distribution of Amount**
"""

plt.figure(figsize=(8, 4))
sns.violinplot(x=cfd["Amount"], color="blue")  # Use a valid color name
plt.title("Distribution of Transaction Amounts")
plt.xlabel("Amount")
plt.show()

"""**4.4 Distribution of Time**"""

plt.figure(figsize=(8, 6))
sns.histplot(cfd['Time'], kde=False, color="blue")
plt.title('Distribution of Transaction Time')
plt.xlabel('Time')
plt.ylabel('Frequency')
plt.show()

"""**4.5 Histograms for Key Features (V1-V28, Amount, Time)**"""

cfd.hist(bins=30, figsize=(20, 15), edgecolor='black')
plt.suptitle("Histograms of All Numerical Features", fontsize=16)
plt.tight_layout()
plt.show()

"""# **5. Pre-processing of Data**

**5.1 Checking of Null Values**
"""

print("\nChecking for Null Values:")
cfd.isnull().sum()

"""**5.2 Checking of Outliers**"""

Q1 = cfd['Amount'].quantile(0.25)
Q3 = cfd['Amount'].quantile(0.75)
IQR = Q3 - Q1

outliers = cfd[(cfd['Amount'] < (Q1 - 1.5 * IQR)) | (cfd['Amount'] > (Q3 + 1.5 * IQR))]
print(f"Number of outliers in 'Amount': {len(outliers)}")

"""**5.3 Checking of Duplicate Transactions**"""

cfd.duplicated()

duplicate_counts = cfd.duplicated().value_counts()
print(duplicate_counts)

duplicate_counts = cfd.duplicated().value_counts()
print("Duplicate Counts before removal:\n", duplicate_counts)

cfd = cfd.drop_duplicates(keep='first')

duplicate_counts = cfd.duplicated().value_counts()
print("\nDuplicate Counts after removal:\n", duplicate_counts)

"""**5.4 Feature Selection/Reduction**

**5.4.1 Correlation Matrix for Numerical Features**
"""

correlation_matrix = cfd.corr()

"""**5.4.2 Heatmap Visualization**"""

plt.figure(figsize=(28, 28))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Matrix of Numerical Features')
plt.show()

"""**5.4.3 Dropping Irrelevant Features**"""

threshold = 0.8
high_corr_features = set()
correlation_matrix = cfd.corr()

for i in range(len(correlation_matrix.columns)):
    for j in range(i):
        if abs(correlation_matrix.iloc[i, j]) > threshold:
            colname = correlation_matrix.columns[i]
            high_corr_features.add(colname)

print("Highly correlated features to drop:", high_corr_features)

cfd.drop(columns=high_corr_features, inplace=True)
print(f"Updated dataset shape: {cfd.shape}")

"""### **Application of Standard Scaler**

**5.5 Feature Scaling**

**5.5.1 Standardization (Z-Score Scaling)**
"""

scaler = StandardScaler()
cfd[['Amount']] = scaler.fit_transform(cfd[['Amount']])

cfd['Amount'].describe()

"""**5.5.2 Normalization (Min-Max Scaling)**"""

time = cfd['Time']
cfd['Time'] = (time - time.min()) / (time.max() - time.min())

cfd

cfd = cfd.sample(frac=1, random_state=1)
cfd

"""**5.6 Train, Test, and Validation**"""

x = cfd.drop(columns=['Class'])
y = cfd['Class']

"""**5.7 Splitting the Dataset into Training and Testing Sets**"""

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, stratify=y, random_state=42)

"""# **6. Machine Learning**

### **Isolation Forest**
"""

fraud_ratio = y_train.mean()
if_model = IsolationForest(contamination=0.02, random_state=101)
if_model.fit(x_train)

fraud_test = x_test[y_test == 1]
non_fraud_test = x_test[y_test == 0].sample(len(fraud_test), random_state=42)
x_test_balanced = pd.concat([fraud_test, non_fraud_test])
y_test_balanced = np.concatenate([np.ones(len(fraud_test)), np.zeros(len(non_fraud_test))])

if_y_pred = (if_model.predict(x_test_balanced) == -1).astype(int)
print(classification_report(y_test_balanced, if_y_pred))
if_roc_auc = roc_auc_score(y_test_balanced, if_y_pred)
print("ROC AUC Score:", if_roc_auc)
if_auprc = average_precision_score(y_test_balanced, if_y_pred)
print(f"AUPRC for Isolation Forest:", if_auprc)

report_dict = classification_report(y_test_balanced, if_y_pred, output_dict=True)

if_accuracy = report_dict['accuracy']
if_precision = report_dict['weighted avg']['precision']
if_recall = report_dict['weighted avg']['recall']
if_f1_score = report_dict['weighted avg']['f1-score']

"""### **Autoencoders**"""

y_train_fraud = y_train[y_train == 1].sample(frac=0.1, random_state=42)  # Increase to 10%
x_train_fraud = x_train.loc[y_train_fraud.index]
x_train_auto = pd.concat([x_train[y_train == 0], x_train_fraud])

input_dim = x_train_auto.shape[1]
input_layer = Input(shape=(input_dim,))
encoded = Dense(64, activation='relu')(input_layer)
encoded = Dense(32, activation='relu')(encoded)
encoded = Dense(16, activation='relu')(encoded)
encoded = Dense(8, activation='relu')(encoded)

decoded = Dense(16, activation='relu')(encoded)
decoded = Dense(32, activation='relu')(decoded)
decoded = Dense(input_dim, activation='sigmoid')(decoded)

autoencoder = Model(input_layer, decoded)
autoencoder.compile(optimizer='adam', loss='mse')

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
autoencoder.fit(x_train_auto, x_train_auto, epochs=50, batch_size=256, shuffle=True, validation_split=0.2, callbacks=[early_stopping])

reconstructed = autoencoder.predict(x_test)
mse = np.mean(np.power(x_test - reconstructed, 2), axis=1)
threshold = np.percentile(mse, 80)

y_test_pred = (mse > threshold).astype(int)
print(classification_report(y_test, y_test_pred))
ae_roc_auc = roc_auc_score(y_test, y_test_pred)
print("ROC AUC Score:", ae_roc_auc)
ae_auprc = average_precision_score(y_test, y_test_pred)
print(f"AUPRC for Autoencoder:", ae_roc_auc)

report_dict2 = classification_report(y_test, y_test_pred, output_dict=True)

ae_accuracy = report_dict2['accuracy']
ae_precision = report_dict2['weighted avg']['precision']
ae_recall = report_dict2['weighted avg']['recall']
ae_f1_score = report_dict2['weighted avg']['f1-score']

"""### **Local Outlier Factor**"""

x_train_normal = x_train[y_train == 0]

lof_model = LocalOutlierFactor(n_neighbors=50, contamination=0.01, novelty=True)
lof_model.fit(x_train_normal)

lof_scores = lof_model.decision_function(x_test)
lof_threshold = np.percentile(lof_scores, 2)
y_test_pred_lof = (lof_scores < lof_threshold).astype(int)

print(classification_report(y_test, y_test_pred_lof))
lof_roc_auc = roc_auc_score(y_test, y_test_pred_lof)
print("ROC AUC Score:", lof_roc_auc)
lof_auprc = average_precision_score(y_test, y_test_pred_lof)
print("AUPRC for LOF:", lof_auprc)

report_dict3 = classification_report(y_test, y_test_pred_lof, output_dict=True)

lof_accuracy = report_dict3['accuracy']
lof_precision = report_dict3['weighted avg']['precision']
lof_recall = report_dict3['weighted avg']['recall']
lof_f1_score = report_dict3['weighted avg']['f1-score']

"""# **7. Evaluation of Model Performance**

**7.1 Creation of Metrics-Data**
"""

metrics_data = {
    'Model': ['Isolation Forest', 'Autoencoders', 'Local Outlier Factor'],
    'Accuracy': [if_accuracy, ae_accuracy, lof_accuracy],
    'Precision': [if_precision, ae_precision, lof_precision],
    'Recall': [if_recall, ae_recall, lof_recall],
    'F1-score': [if_f1_score, ae_f1_score, lof_f1_score],
    'AUC-ROC': [if_roc_auc, ae_roc_auc, lof_roc_auc],
    'AUPRC': [if_auprc, ae_auprc, lof_auprc]
}

metrics_df = pd.DataFrame(metrics_data)

metrics_df.head()

# Ensure x_test is properly scaled
x_test[['Amount']] = scaler.transform(x_test[['Amount']])
x_test['Time'] = (x_test['Time'] - cfd['Time'].min()) / (cfd['Time'].max() - cfd['Time'].min())

# Isolation Forest Predictions
if_y_pred = (if_model.predict(x_test) == -1).astype(int)

# Autoencoder Predictions
mse = np.mean(np.power(x_test - autoencoder.predict(x_test), 2), axis=1)
best_threshold = np.percentile(mse, 90)  # Set fraud threshold
ae_y_pred = (mse > best_threshold).astype(int)

# Local Outlier Factor Predictions
# Use lof_model instead of lof
lof_scores = lof_model.decision_function(x_test)
lof_threshold = np.percentile(lof_scores, 5)  # Set fraud threshold
lof_y_pred = (lof_scores < lof_threshold).astype(int)

# Ensemble Voting (Weighted)
weights = [0.4, 0.4, 0.2]  # Adjust based on model performance
ensemble_preds = np.average([if_y_pred, ae_y_pred, lof_y_pred], axis=0, weights=weights)
final_preds = (ensemble_preds > 0.4).astype(int)  # Lower threshold to improve fraud detection

# Evaluation
print("Evaluation for Ensemble Model (Voting)")
print(classification_report(y_test, final_preds))
print(f"AUC-ROC Score: {roc_auc_score(y_test, final_preds):.4f}")
print(f"AUPRC Score: {average_precision_score(y_test, final_preds):.4f}")

"""**7.2 Selection of Best Performing Model**"""

metrics_data = {
    'Model': ['Isolation Forest', 'Autoencoders', 'Local Outlier Factor'],
    'Accuracy': [0.83, 0.80, 0.97],
    'Precision': [0.84, 0.99, 0.99],
    'Recall': [0.83, 0.80, 0.97],
    'F1-score': [0.82, 0.88, 0.98],
    'AUC-ROC': [0.83, 0.84, 0.71],
    'AUPRC': [0.80, 0.006, 0.0018]
}

metrics_df = pd.DataFrame(metrics_data)

sns.set(style="whitegrid")

plt.figure(figsize=(12, 6))
sns.barplot(x='Model', y='AUPRC', data=metrics_df, palette='viridis')  # Change y to 'AUPRC'
plt.title('Model Comparison - AUPRC')  # Update title
plt.ylabel('AUPRC')  # Update y-axis label
plt.xlabel('Model')
plt.show()

plt.figure(figsize=(12, 6))
for metric in ['Accuracy', 'Precision', 'Recall', 'F1-score', 'AUC-ROC', 'AUPRC']:  # Include 'AUPRC'
    sns.lineplot(x='Model', y=metric, data=metrics_df, label=metric, marker='o')

plt.title('Model Comparison - All Metrics')
plt.ylabel('Score')
plt.xlabel('Model')
plt.legend(title='Metrics', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()

"""**7.3 LIME Analysis**"""

explainer = lime.lime_tabular.LimeTabularExplainer(
    x_train.values, feature_names=x_train.columns.tolist(),
    class_names=['Non-Fraud', 'Fraud'], verbose=True, mode='classification'
)

"""**7.3.1 LIME Analysis for Isolation Forest**"""

def iforest_predict_proba(X):
    if isinstance(X, np.ndarray):
        X = pd.DataFrame(X, columns=x_train.columns)

    scores = if_model.decision_function(X)
    probs = (scores - scores.min()) / (scores.max() - scores.min())
    return np.vstack([1 - probs, probs]).T

idx = np.random.randint(0, x_test.shape[0])
exp = explainer.explain_instance(x_test.iloc[idx].values, iforest_predict_proba)

exp.show_in_notebook()

feature_importances = []
for _ in range(5):
    exp = explainer.explain_instance(x_test.iloc[0].values, iforest_predict_proba)
    feature_importances.extend(exp.as_list())

feature_importance_df = pd.DataFrame(feature_importances, columns=['feature', 'importance'])
avg_feature_importance_df = feature_importance_df.groupby('feature')['importance'].mean().reset_index()
display(avg_feature_importance_df.style.hide(axis='index'))

"""**7.3.2 LIME Analysis for Autoencoders**"""

def autoencoder_predict_proba(X):
    reconstructed = autoencoder.predict(X)
    mse = np.mean(np.power(X - reconstructed, 2), axis=1)
    min_mse, max_mse = mse.min(), mse.max()
    probs = (mse - min_mse) / (max_mse - min_mse)
    return np.vstack([1 - probs, probs]).T

idx = np.random.randint(0, x_test.shape[0])
exp2 = explainer.explain_instance(x_test.iloc[idx].values, autoencoder_predict_proba)

exp2.show_in_notebook()

feature_importances = []
for _ in range(5):
    exp2 = explainer.explain_instance(x_test.iloc[0].values, autoencoder_predict_proba)
    feature_importances.extend(exp.as_list())

feature_importance_df = pd.DataFrame(feature_importances, columns=['feature', 'importance'])
avg_feature_importance_df = feature_importance_df.groupby('feature')['importance'].mean().reset_index()
display(avg_feature_importance_df.style.hide(axis='index'))

"""**7.3.3 LIME Analysis for Local Outlier Factor**"""

def lof_predict_proba(X):
    if isinstance(X, np.ndarray):
        X = pd.DataFrame(X, columns=x_train.columns)
    lof_scores = lof_model._predict(X)
    probs = (lof_scores + 1) / 2
    return np.vstack([1 - probs, probs]).T

idx = np.random.randint(0, x_test.shape[0])
exp3 = explainer.explain_instance(x_test.iloc[idx].values, lof_predict_proba)

exp3.show_in_notebook()

feature_importances = []
for _ in range(5):
    exp3 = explainer.explain_instance(x_test.iloc[0].values, lof_predict_proba)
    feature_importances.extend(exp.as_list())

feature_importance_df = pd.DataFrame(feature_importances, columns=['feature', 'importance'])
avg_feature_importance_df = feature_importance_df.groupby('feature')['importance'].mean().reset_index()
display(avg_feature_importance_df.style.hide(axis='index'))

"""**7.4 Confusion Matrix for Each Model**"""

if_cm = confusion_matrix(y_test_balanced, if_y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(if_cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Non-Fraud', 'Fraud'], yticklabels=['Non-Fraud', 'Fraud'])
plt.title('Confusion Matrix - Isolation Forest')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

ae_cm = confusion_matrix(y_test, y_test_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(ae_cm, annot=True, fmt='d', cmap='Blues', cbar=False, xticklabels=['Non-Fraud', 'Fraud'], yticklabels=['Non-Fraud', 'Fraud'])
plt.title('Confusion Matrix - Autoencoders')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

lof_cm = confusion_matrix(y_test, y_test_pred_lof)

plt.figure(figsize=(8, 6))
sns.heatmap(lof_cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Non-Fraud', 'Fraud'], yticklabels=['Non-Fraud', 'Fraud'], cbar=False)
plt.title('Confusion Matrix - Local Outlier Factor')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

"""**7.5 Cohen's Kappa**"""

if_kappa = cohen_kappa_score(y_test_balanced, if_y_pred)
print(f"Cohen's Kappa for Isolation Forest: {if_kappa}")

ae_kappa = cohen_kappa_score(y_test, y_test_pred)
print(f"Cohen's Kappa for Autoencoders: {ae_kappa}")

lof_kappa = cohen_kappa_score(y_test, y_test_pred_lof)
print(f"Cohen's Kappa for Local Outlier Factor: {lof_kappa}")

metrics_data['Cohen\'s Kappa'] = [if_kappa, ae_kappa, lof_kappa]
metrics_df = pd.DataFrame(metrics_data)

metrics_df

"""# **8. Detection of Fraud**

**8.1 Defining Input Parameters (Time, Amount, V1-V28)**

Enter transaction time: 100000

Enter transaction amount: 5000.00

Enter value for V1: -5.64

Enter value for V2: -7.27

Enter value for V3: -4.83

Enter value for V4: -5.68

Enter value for V5: -1.14

Enter value for V6: -2.62

Enter value for V7: -4.36

Enter value for V8: -7.32

Enter value for V9: -1.34

Enter value for V10: -0.02

Enter value for V11: 0.28

Enter value for V12: -0.23

Enter value for V13: -0.64

Enter value for V14: 0.10

Enter value for V15: 0.17

Enter value for V16: 0.13

Enter value for V17: -0.01

Enter value for V18: 0.01

Enter value for V19: -0.11

Enter value for V20: 0.07

Enter value for V21: 0.13

Enter value for V22: -0.19

Enter value for V23: 0.13

Enter value for V24: -0.02

Enter value for V25: 0.13

Enter value for V26: -0.19

Enter value for V27: 0.13

Enter value for V28: -0.02

Expected Output: Fraudulent

**8.2 Preprocessing Input Data**

**8.3 Function for Fraud Prediction**
"""

def predict_fraud(input_data):
    decision_score = if_model.decision_function(input_data)
    fraud_prediction = (decision_score < 0).astype(int)[0]  # Adjusted threshold
    return "Fraudulent" if fraud_prediction == 1 else "Non-Fraudulent"

"""**8.4 Prediction using Isolation Forest**"""

def fraudulent_data():
    time = 100000
    amount = 5000.00
    v_values = [-5.64, -7.27, -4.83, -5.68, -1.14, -2.62, -4.36, -7.32, -1.34, -0.02, 0.28, -0.23, -0.64, 0.10, 0.17, 0.13, -0.01, 0.01, -0.11, 0.07, 0.13, -0.19, 0.13, -0.02, 0.13, -0.19, 0.13, -0.02]
    if len(v_values) == 28:
        return pd.DataFrame([[time, amount] + v_values], columns=['Time', 'Amount'] + [f'V{i}' for i in range(1, 29)])
    else:
        print("Error: v_values does not contain 28 elements")

input_data = fraudulent_data()
input_data[['Amount']] = scaler.transform(input_data[['Amount']])  # Apply same scaling
input_data['Time'] = (input_data['Time'] - cfd['Time'].min()) / (cfd['Time'].max() - cfd['Time'].min())
input_data = input_data[x_train.columns]  # Ensure correct feature order

print("Transaction Prediction:", predict_fraud(input_data))

def non_fraudulent_data():
    time = 50000
    amount = 50.00
    v_values = [-1.36, -0.07, 2.54, 1.38, -0.34, 0.46, 0.24, 0.10, 0.36, -0.02, 0.28, -0.23, -0.64, 0.10, 0.17, 0.13, -0.01, 0.01, -0.11, 0.07, 0.13, -0.19, 0.13, -0.02, 0.13, -0.19, 0.13, -0.02]
    if len(v_values) == 28:
        return pd.DataFrame([[time, amount] + v_values], columns=['Time', 'Amount'] + [f'V{i}' for i in range(1, 29)])
    else:
        print("Error: v_values does not contain 28 elements")

input_data2 = non_fraudulent_data()
input_data2[['Amount']] = scaler.transform(input_data2[['Amount']])  # Apply same scaling
input_data2['Time'] = (input_data2['Time'] - time.min()) / (time.max() - time.min())
input_data2 = input_data2[x_train.columns]  # Ensure correct feature order

print("Transaction Prediction:", predict_fraud(input_data2))

"""**8.5 Prediction using Autoencoders**"""

def ae_predict_fraud(input_data):
    reconstructed = autoencoder.predict(input_data)
    mse = np.mean(np.power(input_data - reconstructed, 2), axis=1)
    fraud_prediction = (mse > threshold).astype(int)[0]
    return "Fraudulent" if fraud_prediction == 1 else "Non-Fraudulent"

def fraudulent_data():
    time = 100000
    amount = 5000.00
    v_values = [-5.64, -7.27, -4.83, -5.68, -1.14, -2.62, -4.36, -7.32, -1.34, -0.02, 0.28, -0.23, -0.64, 0.10, 0.17, 0.13, -0.01, 0.01, -0.11, 0.07, 0.13, -0.19, 0.13, -0.02, 0.13, -0.19, 0.13, -0.02]
    if len(v_values) == 28:
        return pd.DataFrame([[time, amount] + v_values], columns=['Time', 'Amount'] + [f'V{i}' for i in range(1, 29)])
    else:
        print("Error: v_values does not contain 28 elements")

input_data3 = fraudulent_data()
input_data3[['Amount']] = scaler.transform(input_data3[['Amount']])  # Apply same scaling
input_data3['Time'] = (input_data3['Time'] - cfd['Time'].min()) / (cfd['Time'].max() - cfd['Time'].min())
input_data3 = input_data3[x_train.columns]  # Ensure correct feature order

print("Transaction Prediction:", ae_predict_fraud(input_data3))

def non_fraudulent_data():
    time = 50000
    amount = 50.00
    v_values = [-1.36, -0.07, 2.54, 1.38, -0.34, 0.46, 0.24, 0.10, 0.36, -0.02, 0.28, -0.23, -0.64, 0.10, 0.17, 0.13, -0.01, 0.01, -0.11, 0.07, 0.13, -0.19, 0.13, -0.02, 0.13, -0.19, 0.13, -0.02]
    if len(v_values) == 28:
        return pd.DataFrame([[time, amount] + v_values], columns=['Time', 'Amount'] + [f'V{i}' for i in range(1, 29)])
    else:
        print("Error: v_values does not contain 28 elements")

input_data4 = non_fraudulent_data()
input_data4[['Amount']] = scaler.transform(input_data4[['Amount']])  # Apply same scaling
input_data4['Time'] = (input_data4['Time'] - cfd['Time'].min()) / (cfd['Time'].max() - cfd['Time'].min())
input_data4 = input_data4[x_train.columns]  # Ensure correct feature order

print("Transaction Prediction:", ae_predict_fraud(input_data4))

"""**8.6 Prediction using Local Outlier Factor**"""

def lof_predict_fraud(input_data):
    input_data[['Amount']] = scaler.transform(input_data[['Amount']])  # Apply same scaling to new transactions
    input_data['Time'] = (input_data['Time'] - cfd['Time'].min()) / (cfd['Time'].max() - cfd['Time'].min() + 1e-6)  # Ensure Time normalization
    lof_score = lof_model.decision_function(input_data)
    fraud_prediction = (lof_score < lof_threshold).astype(int)[0]
    return "Fraudulent" if fraud_prediction == 1 else "Non-Fraudulent"

def fraudulent_data():
    time = 100000
    amount = 5000.00
    v_values = [-5.64, -7.27, -4.83, -5.68, -1.14, -2.62, -4.36, -7.32, -1.34, -0.02, 0.28, -0.23, -0.64, 0.10, 0.17, 0.13, -0.01, 0.01, -0.11, 0.07, 0.13, -0.19, 0.13, -0.02, 0.13, -0.19, 0.13, -0.02]
    if len(v_values) == 28:
        return pd.DataFrame([[time, amount] + v_values], columns=['Time', 'Amount'] + [f'V{i}' for i in range(1, 29)])
    else:
        print("Error: v_values does not contain 28 elements")

input_data5 = fraudulent_data()
input_data5[['Amount']] = scaler.transform(input_data5[['Amount']])  # Apply same scaling
input_data5['Time'] = (input_data5['Time'] - cfd['Time'].min()) / (cfd['Time'].max() - cfd['Time'].min())
input_data5 = input_data5[x_train.columns]  # Ensure correct feature order

print("Transaction Prediction:", lof_predict_fraud(input_data5))

def non_fraudulent_data():
    time = 50000
    amount = 50.00
    v_values = [-1.36, -0.07, 2.54, 1.38, -0.34, 0.46, 0.24, 0.10, 0.36, -0.02, 0.28, -0.23, -0.64, 0.10, 0.17, 0.13, -0.01, 0.01, -0.11, 0.07, 0.13, -0.19, 0.13, -0.02, 0.13, -0.19, 0.13, -0.02]
    if len(v_values) == 28:
        return pd.DataFrame([[time, amount] + v_values], columns=['Time', 'Amount'] + [f'V{i}' for i in range(1, 29)])
    else:
        print("Error: v_values does not contain 28 elements")

input_data6 = non_fraudulent_data()
input_data6[['Amount']] = scaler.transform(input_data6[['Amount']])  # Apply same scaling
input_data6['Time'] = (input_data6['Time'] - cfd['Time'].min()) / (cfd['Time'].max() - cfd['Time'].min())
input_data6 = input_data6[x_train.columns]  # Ensure correct feature order

print("Transaction Prediction:", lof_predict_fraud(input_data6))