## 0. Mengimport Modul dan Membaca Data

In [1]:
# Basic Packages
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# Classifier or Clustering algorithm
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
import xgboost as xgb
from sklearn.linear_model import LogisticRegression
from sklearn.cluster import KMeans
from sklearn.cluster import Birch
from sklearn.mixture import GaussianMixture
# Metrics
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import average_precision_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix

In [2]:
# Data untuk direct machine learning
df = pd.read_csv('BankSim 8000.csv')
X = pd.get_dummies(df.drop(['Unnamed: 0', 'step', 'customer', 'merchant','fraud'], axis=1), columns=['category'])
y = df['fraud']

# Data untuk model AAVGA
Xe = pd.read_csv('Embedding AAVGA without ImbalanceSampler.csv').drop(['Unnamed: 0'], axis=1)
ye = df['fraud']

## 1. Hasil dengan model AAVGA

In [3]:
classifier = MLPClassifier(random_state=42)
clf_output = classifier.fit(Xe, ye)
pred = classifier.predict(Xe)
print(np.round(accuracy_score(ye, pred), 3))
print(np.round(roc_auc_score(ye, pred), 3))
print(np.round(average_precision_score(ye, pred), 3))
print(confusion_matrix(ye, pred))

0.989
0.545
0.101
[[7900    0]
 [  91    9]]


In [4]:
classifier = SVC(random_state=42)
clf_output = classifier.fit(Xe, ye)
pred = classifier.predict(Xe)
print(np.round(accuracy_score(ye, pred), 3))
print(np.round(roc_auc_score(ye, pred), 3))
print(np.round(average_precision_score(ye, pred), 3))
print(confusion_matrix(ye, pred))

0.988
0.5
0.012
[[7900    0]
 [ 100    0]]


In [5]:
classifier = LogisticRegression(random_state=42)
clf_output = classifier.fit(Xe, ye)
pred = classifier.predict(Xe)
print(np.round(accuracy_score(ye, pred), 3))
print(np.round(roc_auc_score(ye, pred), 3))
print(np.round(average_precision_score(ye, pred), 3))
print(confusion_matrix(ye, pred))

0.988
0.5
0.012
[[7900    0]
 [ 100    0]]


In [6]:
classifier = KNeighborsClassifier()
clf_output = classifier.fit(Xe, ye)
pred = classifier.predict(Xe)
print(np.round(accuracy_score(ye, pred), 3))
print(np.round(roc_auc_score(ye, pred), 3))
print(np.round(average_precision_score(ye, pred), 3))
print(confusion_matrix(ye, pred))

0.993
0.794
0.482
[[7886   14]
 [  41   59]]


In [7]:
classifier = DecisionTreeClassifier(random_state=42)
clf_output = classifier.fit(Xe, ye)
pred = classifier.predict(Xe)
print(np.round(accuracy_score(ye, pred), 3))
print(np.round(roc_auc_score(ye, pred), 3))
print(np.round(average_precision_score(ye, pred), 3))
print(confusion_matrix(ye, pred))

1.0
0.995
0.99
[[7900    0]
 [   1   99]]


In [8]:
classifier = xgb.XGBClassifier(random_state=42)
clf_output = classifier.fit(Xe, ye)
pred = classifier.predict(Xe)
print(np.round(accuracy_score(ye, pred), 3))
print(np.round(roc_auc_score(ye, pred), 3))
print(np.round(average_precision_score(ye, pred), 3))
print(confusion_matrix(ye, pred))

1.0
1.0
1.0
[[7900    0]
 [   0  100]]


In [9]:
classifier = KMeans(n_clusters=2)
clf_output = classifier.fit(Xe, ye)
pred = classifier.predict(Xe)
print(np.round(accuracy_score(ye, pred), 3))
print(np.round(roc_auc_score(ye, pred), 3))
print(np.round(average_precision_score(ye, pred), 3))
print(confusion_matrix(ye, pred))

0.863
0.931
0.084
[[6806 1094]
 [   0  100]]


In [10]:
classifier = GaussianMixture(n_components=2)
clf_output = classifier.fit(Xe, ye)
pred = classifier.predict(Xe)
print(np.round(accuracy_score(ye, pred), 3))
print(np.round(roc_auc_score(ye, pred), 3))
print(np.round(average_precision_score(ye, pred), 3))
print(confusion_matrix(ye, pred))

0.863
0.931
0.083
[[6802 1098]
 [   0  100]]


In [11]:
classifier = Birch(threshold=0.03, n_clusters=2)
clf_output = classifier.fit(Xe, ye)
pred = classifier.predict(Xe)
print(np.round(accuracy_score(ye, pred), 3))
print(np.round(roc_auc_score(ye, pred), 3))
print(np.round(average_precision_score(ye, pred), 3))
print(confusion_matrix(ye, pred))

0.987
0.5
0.012
[[7895    5]
 [ 100    0]]


## 2. Hasil direct ML

In [12]:
classifier = MLPClassifier(random_state=42)
clf_output = classifier.fit(X, y)
pred = classifier.predict(X)
print(np.round(accuracy_score(y, pred), 3))
print(np.round(roc_auc_score(y, pred), 3))
print(np.round(average_precision_score(y, pred), 3))
print(confusion_matrix(y, pred))

0.991
0.847
0.437
[[7857   43]
 [  30   70]]


In [13]:
classifier = SVC(random_state=42)
clf_output = classifier.fit(X, y)
pred = classifier.predict(X)
print(np.round(accuracy_score(y, pred), 3))
print(np.round(roc_auc_score(y, pred), 3))
print(np.round(average_precision_score(y, pred), 3))
print(confusion_matrix(y, pred))

0.993
0.755
0.455
[[7893    7]
 [  49   51]]


In [14]:
classifier = LogisticRegression(random_state=42)
clf_output = classifier.fit(X, y)
pred = classifier.predict(X)
print(np.round(accuracy_score(y, pred), 3))
print(np.round(roc_auc_score(y, pred), 3))
print(np.round(average_precision_score(y, pred), 3))
print(confusion_matrix(y, pred))

0.993
0.774
0.486
[[7892    8]
 [  45   55]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [15]:
classifier = KNeighborsClassifier()
clf_output = classifier.fit(X, y)
pred = classifier.predict(X)
print(np.round(accuracy_score(y, pred), 3))
print(np.round(roc_auc_score(y, pred), 3))
print(np.round(average_precision_score(y, pred), 3))
print(confusion_matrix(y, pred))

0.993
0.759
0.449
[[7891    9]
 [  48   52]]


In [24]:
classifier = DecisionTreeClassifier(random_state=42)
clf_output = classifier.fit(X, y)
pred = classifier.predict(X)
print(np.round(accuracy_score(y, pred), 3))
print(np.round(roc_auc_score(y, pred), 3))
print(np.round(average_precision_score(y, pred), 3))
print(confusion_matrix(y, pred))

1.0
1.0
1.0
[[7900    0]
 [   0  100]]


In [17]:
classifier = xgb.XGBClassifier(random_state=42)
clf_output = classifier.fit(X, y)
pred = classifier.predict(X)
print(np.round(accuracy_score(y, pred), 3))
print(np.round(roc_auc_score(y, pred), 3))
print(np.round(average_precision_score(y, pred), 3))
print(confusion_matrix(y, pred))

0.999
0.965
0.921
[[7899    1]
 [   7   93]]


In [18]:
classifier = KMeans(n_clusters=2)
clf_output = classifier.fit(X, y)
pred = classifier.predict(X)
print(np.round(accuracy_score(y, pred), 3))
print(np.round(roc_auc_score(y, pred), 3))
print(np.round(average_precision_score(y, pred), 3))
print(confusion_matrix(y, pred))

0.988
0.515
0.042
[[7900    0]
 [  97    3]]


In [18]:
classifier = KMeans(n_clusters=2)
clf_output = classifier.fit(X, y)
pred = classifier.predict(X)
print(np.round(accuracy_score(y, pred), 3))
print(np.round(roc_auc_score(y, pred), 3))
print(np.round(average_precision_score(y, pred), 3))
print(confusion_matrix(y, pred))

0.988
0.515
0.042
[[7900    0]
 [  97    3]]


In [20]:
classifier = Birch(threshold=0.03, n_clusters=2)
clf_output = classifier.fit(X, y)
pred = classifier.predict(X)
print(np.round(accuracy_score(y, pred), 3))
print(np.round(roc_auc_score(y, pred), 3))
print(np.round(average_precision_score(y, pred), 3))
print(confusion_matrix(y, pred))

0.988
0.515
0.042
[[7900    0]
 [  97    3]]
