### CREDIT CARD FRAUD DETECTION WITH LIGHTGBM & DNN-TENSORFLOW

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import *
from sklearn.metrics import *
from sklearn.preprocessing import *
from lightgbm import *

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
dataset = pd.read_csv('creditcard.csv')

In [4]:
dataset.shape

(284807, 31)

In [5]:
dataset.Class.value_counts()

0    284315
1       492
Name: Class, dtype: int64

In [6]:
dataset.isnull().sum()

Time      0
V1        0
V2        0
V3        0
V4        0
V5        0
V6        0
V7        0
V8        0
V9        0
V10       0
V11       0
V12       0
V13       0
V14       0
V15       0
V16       0
V17       0
V18       0
V19       0
V20       0
V21       0
V22       0
V23       0
V24       0
V25       0
V26       0
V27       0
V28       0
Amount    0
Class     0
dtype: int64

In [7]:
X = dataset.drop(columns=['Class'])
y = dataset['Class']

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=1,stratify=y)

In [9]:
y.value_counts()

0    284315
1       492
Name: Class, dtype: int64

### LIGHTGBM

In [20]:
lgbm = LGBMClassifier(n_estimators=1, max_depth=3, learning_rate=0.02, reg_lambda=0.3).fit(X_train,y_train)
train = lgbm.predict(X_train)
test = lgbm.predict(X_test)

In [21]:
print('Accuracy:{:.4f}, Precision:{:.4f}, Recall:{:.4f}, F1:{:.4f}'.format(
        accuracy_score(y_test,test),
        precision_score(y_test,test),
        recall_score(y_test,test),
        f1_score(y_test,test)))

Accuracy:0.9994, Precision:0.8667, Recall:0.7959, F1:0.8298


In [22]:
print(classification_report(y_test,test))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.87      0.80      0.83        98

    accuracy                           1.00     56962
   macro avg       0.93      0.90      0.91     56962
weighted avg       1.00      1.00      1.00     56962



In [23]:
print(confusion_matrix(y_test,test))

[[56852    12]
 [   20    78]]


In [19]:
# for estimator in (x*0.1 for x in range(1,100)):
#     lgbm = LGBMClassifier(n_estimators=1, max_depth=3, learning_rate=0.02, reg_lambda=estimator).fit(X_train,y_train)
#     train = lgbm.predict(X_train)
#     test = lgbm.predict(X_test)
#     print('Estimator:{}, Accuracy:{:.4f}, Precision:{:.4f}, Recall:{:.4f}, F1:{:.4f}'.format(estimator,
#         accuracy_score(y_test,test),
#         precision_score(y_test,test),
#         recall_score(y_test,test),
#         f1_score(y_test,test)))

### DNN

In [24]:
model = (tf.keras.models.Sequential())
model.add(tf.keras.layers.Dense(100,activation='selu',kernel_initializer='lecun_normal',input_shape=(X_train.shape[-1],)))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(150,activation='selu',kernel_initializer='lecun_normal'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(80,activation='selu',kernel_initializer='lecun_normal'))
model.add(tf.keras.layers.Dense(1,activation='sigmoid'))

In [25]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 100)               3100      
                                                                 
 batch_normalization (BatchN  (None, 100)              400       
 ormalization)                                                   
                                                                 
 dense_1 (Dense)             (None, 150)               15150     
                                                                 
 batch_normalization_1 (Batc  (None, 150)              600       
 hNormalization)                                                 
                                                                 
 dense_2 (Dense)             (None, 80)                12080     
                                                                 
 dense_3 (Dense)             (None, 1)                 8

In [26]:
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
             optimizer=tf.keras.optimizers.Adam(lr=0.003),
             metrics=([tf.keras.metrics.BinaryAccuracy(name='acc')],
                     [tf.keras.metrics.Precision(name='prc')],
                     [tf.keras.metrics.Recall(name='rcl')]))

In [27]:
history = model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=10,batch_size=16,verbose=1,shuffle=False)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [28]:
predictions = model.predict(X_test,batch_size=16,verbose=1)



In [32]:
predicted = np.where(predictions>0.5,1,0)

In [33]:
print(classification_report(y_test,predicted))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.00      0.00      0.00        98

    accuracy                           1.00     56962
   macro avg       0.50      0.50      0.50     56962
weighted avg       1.00      1.00      1.00     56962



In [34]:
print(confusion_matrix(y_test,predicted))

[[56792    72]
 [   98     0]]
