<a href="https://colab.research.google.com/github/lavanya9739/credit-card-fraud-detection/blob/main/Untitled12.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.utils import resample
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv1D, Flatten, BatchNormalization


In [2]:
file_path = '/content/output_file.csv'  # Replace with your file path
data = pd.read_csv(file_path)


In [3]:
data_cleaned = data.drop(columns=["Account Number", "Card Number", "Transaction Time", "Transaction Date",
                                  "Merchant Number", "Approval Code"])

In [4]:
label_encoder = LabelEncoder()
categorical_cols = ["Transaction Type", "Currency Code", "Transaction Country", "Transaction City", "Fraud Label"]
for col in categorical_cols:
    data_cleaned[col] = label_encoder.fit_transform(data_cleaned[col])

In [5]:
fraud = data_cleaned[data_cleaned["Fraud Label"] == 1]
not_fraud = data_cleaned[data_cleaned["Fraud Label"] == 0]
fraud_resampled = resample(fraud, replace=True, n_samples=len(not_fraud), random_state=42)
balanced_data = pd.concat([not_fraud, fraud_resampled])

In [6]:
X = balanced_data.drop(columns=["Fraud Label"])
y = balanced_data["Fraud Label"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [7]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)



In [8]:
X_train_cnn = X_train_scaled.reshape(X_train_scaled.shape[0], X_train_scaled.shape[1], 1)
X_test_cnn = X_test_scaled.reshape(X_test_scaled.shape[0], X_test_scaled.shape[1], 1)


In [9]:
cnn_model = Sequential([
    Conv1D(32, kernel_size=2, activation='relu', input_shape=(X_train_cnn.shape[1], 1)),
    BatchNormalization(),
    Dropout(0.2),
    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [10]:
cnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [11]:
cnn_history = cnn_model.fit(X_train_cnn, y_train, epochs=50, batch_size=8, validation_split=0.2, verbose=1)


Epoch 1/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 80ms/step - accuracy: 0.5861 - loss: 0.8132 - val_accuracy: 0.5294 - val_loss: 0.6899
Epoch 2/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - accuracy: 0.5781 - loss: 0.6414 - val_accuracy: 0.5294 - val_loss: 0.7074
Epoch 3/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - accuracy: 0.7019 - loss: 0.5482 - val_accuracy: 0.5294 - val_loss: 0.7181
Epoch 4/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step - accuracy: 0.6786 - loss: 0.5902 - val_accuracy: 0.5294 - val_loss: 0.7226
Epoch 5/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.6524 - loss: 0.6014 - val_accuracy: 0.5294 - val_loss: 0.7229
Epoch 6/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.7473 - loss: 0.5378 - val_accuracy: 0.5294 - val_loss: 0.7221
Epoch 7/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━

In [12]:
cnn_eval = cnn_model.evaluate(X_test_cnn, y_test, verbose=1)
y_pred_cnn = (cnn_model.predict(X_test_cnn) > 0.5).astype("int32")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step - accuracy: 0.4545 - loss: 1.0121
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 256ms/step


In [13]:
cnn_metrics = {
    "Accuracy": accuracy_score(y_test, y_pred_cnn),
    "Precision": precision_score(y_test, y_pred_cnn),
    "Recall": recall_score(y_test, y_pred_cnn),
    "F1 Score": f1_score(y_test, y_pred_cnn),
    "AUC": roc_auc_score(y_test, cnn_model.predict(X_test_cnn))
}


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step


In [14]:
print("CNN Evaluation Metrics:")
for metric, value in cnn_metrics.items():
    print(f"{metric}: {value:.4f}")

CNN Evaluation Metrics:
Accuracy: 0.4545
Precision: 0.4444
Recall: 0.3636
F1 Score: 0.4000
AUC: 0.5041


In [15]:
y_pred_cnn = (cnn_model.predict(X_test_cnn) > 0.5).astype("int32")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step


In [16]:
predictions = pd.DataFrame({
    "Actual": y_test.values,
    "Predicted": y_pred_cnn.flatten()
})
print(predictions)

    Actual  Predicted
0        1          1
1        0          1
2        0          1
3        1          0
4        1          0
5        1          1
6        0          1
7        0          0
8        1          0
9        0          0
10       0          0
11       0          0
12       0          0
13       1          0
14       0          0
15       1          0
16       1          1
17       1          1
18       1          0
19       0          1
20       0          1
21       1          0
