# Training Pipeline (LightGBM)
Machine Learning-Based Credit Card Transaction Fraud Detection

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import log_loss,accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
import seaborn as sns
from datetime import datetime
import lightgbm as lgbm

In [None]:
# read in the dataset
df = pd.read_csv('../Dataset/card-transaction-dataset.csv')

In [None]:
# check distribution
df['Class'].value_counts()

In [None]:
# encoding categorical data
y = df['Class']

label_encoder = LabelEncoder()
Y = label_encoder.fit_transform(y)
print(Y[500:600])

In [None]:
X = df.drop(labels = ['Time','Class'],axis=1)
print(X.head)

In [None]:
# Extract column names
feature_names = np.array(X.columns)

In [None]:
# Normalize the data
scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)

In [None]:
# Split dataset into train test chucks
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=6)

In [None]:
# Let's do LGBM
d_train = lgbm.Dataset(X_train,label=Y_train)

lgbm_params = {'learning_rate':0.05,
               'boosting_type':'gbdt',
               'objective':'binary',
               'metric':['binary_logloss'],
               'num_leaves':80,'max_depth':10
               }

start = datetime.now()
classifier = lgbm.train(lgbm_params,d_train,50)
stop = datetime.now()
train_time = stop - start
print("Training duration :",train_time)

In [None]:
# Let's make predictions
Y_pred_lgbm = classifier.predict(X_test)

for i in range(0,X_test.shape[0]):
    if Y_pred_lgbm[i] >= .5:
        Y_pred_lgbm[i] = 1
    else:
        Y_pred_lgbm[i] = 0


print("///Metrics")
print("Log loss:",log_loss(Y_test,Y_pred_lgbm))
print("Accuracy:",accuracy_score(Y_test,Y_pred_lgbm))

In [None]:
# create the confusion matrix
cf_lgbm = confusion_matrix(Y_test,Y_pred_lgbm)
print(cf_lgbm)
precision,recall,fscore,_ = precision_recall_fscore_support(Y_test,Y_pred_lgbm)
print("Precission :",precision)
print("Recall :",recall)
print("F1-score :",fscore)
sns.heatmap(cf_lgbm,annot=True)