In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
from sklearn.feature_selection import SelectKBest, f_classif
from imblearn.over_sampling import SMOTE
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

In [3]:
df = pd.read_csv("frauddetection.csv")

In [4]:
X = df.drop(['Class'], axis=1)
y = df['Class']


In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [6]:
# Handle imbalanced data using SMOTE
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)


In [7]:
selector = SelectKBest(f_classif, k=20)
X_train_smote = selector.fit_transform(X_train_smote, y_train_smote)
X_test = selector.transform(X_test)


In [8]:
scaler = StandardScaler()
X_train_smote = scaler.fit_transform(X_train_smote)
X_test = scaler.transform(X_test)

In [9]:
def create_model():
    model = Sequential([
        Dense(64, input_shape=(X_train_smote.shape[1],), activation='relu'),
        Dropout(0.5),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(64, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model


In [10]:
# Hyperparameters
batch_size = 32
epochs = 10

model = create_model()

# Implement early stopping to avoid overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)


In [11]:
model.fit(X_train_smote, y_train_smote, epochs=epochs, batch_size=batch_size, validation_split=0.2, callbacks=[early_stopping])


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7e4f0fd4b190>

In [12]:
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype("int32")




In [13]:
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print("Optimized Neural Network Model Evaluation")
print("Accuracy: {:.4f}".format(accuracy))
print("Precision: {:.4f}".format(precision))
print("Recall: {:.4f}".format(recall))
print("F1 Score: {:.4f}".format(f1))
print("Confusion Matrix:\n", conf_matrix)
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Optimized Neural Network Model Evaluation
Accuracy: 0.9930
Precision: 0.1845
Recall: 0.8980
F1 Score: 0.3061
Confusion Matrix:
 [[56475   389]
 [   10    88]]

Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.99      1.00     56864
           1       0.18      0.90      0.31        98

    accuracy                           0.99     56962
   macro avg       0.59      0.95      0.65     56962
weighted avg       1.00      0.99      1.00     56962



In [14]:
threshold = 0.3
y_pred_adjusted = (y_pred_prob > threshold).astype("int32")


accuracy_adj = accuracy_score(y_test, y_pred_adjusted)
precision_adj = precision_score(y_test, y_pred_adjusted)
recall_adj = recall_score(y_test, y_pred_adjusted)
f1_adj = f1_score(y_test, y_pred_adjusted)
conf_matrix_adj = confusion_matrix(y_test, y_pred_adjusted)

print("\nEvaluation with Adjusted Threshold")
print("Accuracy: {:.4f}".format(accuracy_adj))
print("Precision: {:.4f}".format(precision_adj))
print("Recall: {:.4f}".format(recall_adj))
print("F1 Score: {:.4f}".format(f1_adj))
print("Confusion Matrix:\n", conf_matrix_adj)
print("\nClassification Report:\n", classification_report(y_test, y_pred_adjusted))



Evaluation with Adjusted Threshold
Accuracy: 0.9907
Precision: 0.1447
Recall: 0.8980
F1 Score: 0.2493
Confusion Matrix:
 [[56344   520]
 [   10    88]]

Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.99      1.00     56864
           1       0.14      0.90      0.25        98

    accuracy                           0.99     56962
   macro avg       0.57      0.94      0.62     56962
weighted avg       1.00      0.99      0.99     56962

