<a href="https://colab.research.google.com/github/lavanya9739/credit-card-fraud-detection/blob/main/Copy_of_Untitled12.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.utils import resample
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv1D, Flatten, BatchNormalization


In [5]:
from lightgbm import LGBMClassifier


In [10]:
import joblib  # For saving the scaler
from tensorflow.keras.models import save_model  # For saving the CNN model


In [12]:
file_path = '/content/output_file.csv'  # Replace with your file path
data = pd.read_csv(file_path)


In [13]:
data_cleaned = data.drop(columns=["Account Number", "Card Number", "Transaction Time", "Transaction Date",
                                  "Merchant Number", "Approval Code"])

In [14]:
label_encoder = LabelEncoder()
categorical_cols = ["Transaction Type", "Currency Code", "Transaction Country", "Transaction City", "Fraud Label"]
for col in categorical_cols:
    data_cleaned[col] = label_encoder.fit_transform(data_cleaned[col])

In [15]:
fraud = data_cleaned[data_cleaned["Fraud Label"] == 1]
not_fraud = data_cleaned[data_cleaned["Fraud Label"] == 0]
fraud_resampled = resample(fraud, replace=True, n_samples=len(not_fraud), random_state=42)
balanced_data = pd.concat([not_fraud, fraud_resampled])

In [16]:
X = balanced_data.drop(columns=["Fraud Label"])
y = balanced_data["Fraud Label"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [17]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)



In [18]:
X_train_cnn = X_train_scaled.reshape(X_train_scaled.shape[0], X_train_scaled.shape[1], 1)
X_test_cnn = X_test_scaled.reshape(X_test_scaled.shape[0], X_test_scaled.shape[1], 1)


In [19]:
cnn_model = Sequential([
    Conv1D(32, kernel_size=2, activation='relu', input_shape=(X_train_cnn.shape[1], 1)),
    BatchNormalization(),
    Dropout(0.2),
    Flatten(),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [20]:
cnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [21]:
cnn_history = cnn_model.fit(X_train_cnn, y_train, epochs=50, batch_size=8, validation_split=0.2, verbose=1)


Epoch 1/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 35ms/step - accuracy: 0.4182 - loss: 0.9139 - val_accuracy: 0.3529 - val_loss: 0.7094
Epoch 2/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.5893 - loss: 0.7147 - val_accuracy: 0.4118 - val_loss: 0.7148
Epoch 3/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.5892 - loss: 0.7025 - val_accuracy: 0.4706 - val_loss: 0.7214
Epoch 4/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.7427 - loss: 0.5073 - val_accuracy: 0.4706 - val_loss: 0.7290
Epoch 5/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.6762 - loss: 0.5747 - val_accuracy: 0.4118 - val_loss: 0.7362
Epoch 6/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6935 - loss: 0.5234 - val_accuracy: 0.3529 - val_loss: 0.7369
Epoch 7/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[

In [22]:
cnn_eval = cnn_model.evaluate(X_test_cnn, y_test, verbose=1)
y_pred_cnn = (cnn_model.predict(X_test_cnn) > 0.5).astype("int32")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.5000 - loss: 0.9333
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step


In [23]:
cnn_metrics = {
    "Accuracy": accuracy_score(y_test, y_pred_cnn),
    "Precision": precision_score(y_test, y_pred_cnn),
    "Recall": recall_score(y_test, y_pred_cnn),
    "F1 Score": f1_score(y_test, y_pred_cnn),
    "AUC": roc_auc_score(y_test, cnn_model.predict(X_test_cnn))
}


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step


In [24]:
print("CNN Evaluation Metrics:")
for metric, value in cnn_metrics.items():
    print(f"{metric}: {value:.4f}")

CNN Evaluation Metrics:
Accuracy: 0.5000
Precision: 0.5000
Recall: 0.5455
F1 Score: 0.5217
AUC: 0.6033


In [25]:
y_pred_cnn = (cnn_model.predict(X_test_cnn) > 0.5).astype("int32")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step


In [26]:
predictions = pd.DataFrame({
    "Actual": y_test.values,
    "Predicted": y_pred_cnn.flatten()
})
print(predictions)

    Actual  Predicted
0        1          1
1        0          1
2        0          1
3        1          0
4        1          0
5        1          1
6        0          1
7        0          0
8        1          0
9        0          0
10       0          0
11       0          0
12       0          0
13       1          1
14       0          1
15       1          1
16       1          1
17       1          1
18       1          0
19       0          1
20       0          1
21       1          0


In [27]:
lgbm_model = LGBMClassifier(boosting_type='gbdt',
                            max_depth=7,
                            n_estimators=100,
                            learning_rate=0.1,
                            random_state=42)
lgbm_model.fit(X_train, y_train)

[LightGBM] [Info] Number of positive: 42, number of negative: 42
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001219 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 128
[LightGBM] [Info] Number of data points in the train set: 84, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


In [28]:
y_pred_lgbm = lgbm_model.predict(X_test)
y_pred_lgbm_proba = lgbm_model.predict_proba(X_test)[:, 1]

In [29]:
lgbm_metrics = {
    "Accuracy": accuracy_score(y_test, y_pred_lgbm),
    "Precision": precision_score(y_test, y_pred_lgbm),
    "Recall": recall_score(y_test, y_pred_lgbm),
    "F1 Score": f1_score(y_test, y_pred_lgbm),
    "AUC": roc_auc_score(y_test, y_pred_lgbm_proba)
}


In [30]:

print("\nLightGBM Evaluation Metrics:")
for metric, value in lgbm_metrics.items():
    print(f"{metric}: {value:.4f}")


LightGBM Evaluation Metrics:
Accuracy: 0.5000
Precision: 0.5000
Recall: 0.3636
F1 Score: 0.4211
AUC: 0.5207
