In [2]:
# Importing the libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
# Importing the datset
dataset = pd.read_csv('creditcard.csv')
x = dataset.iloc[:,1:30].values
y = dataset.iloc[:,30].values

In [4]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 32)

In [5]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc_x = StandardScaler()
x_train = sc_x.fit_transform(x_train)
x_test = sc_x.transform(x_test)

In [6]:
# Getting classifier ready
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()

In [6]:
# Fitting Feature Selector (Best 18 features)
from mlxtend.feature_selection import SequentialFeatureSelector as sfs
sfs1 = sfs(classifier, k_features=18, verbose = 2)
sfs1 = sfs1.fit(x_train, y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  29 out of  29 | elapsed:    2.8s finished

[2020-04-29 21:34:35] Features: 1/18 -- score: 0.998793039127477[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  28 out of  28 | elapsed:    6.0s finished

[2020-04-29 21:34:41] Features: 2/18 -- score: 0.9988325396651232[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  27 out of  27 | elapsed:    6.8s finished

[2020-04-29 21:34:48] Features: 3/18 -- score: 0.9988149838706137[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   

In [7]:
# Showing Features
feat_cols = list(sfs1.k_feature_idx_)
print(feat_cols) # [2, 3, 5, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 21, 23, 24, 25, 28]

[2, 3, 5, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 21, 23, 24, 25, 28]


In [32]:
# Fitting in the model
classifier.fit(x_train[:,[2, 3, 5, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 21, 23, 24, 25, 28]],y_train)
y_pred = classifier.predict(x_test[:,[2, 3, 5, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 21, 23, 24, 25, 28]])

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm

array([[56494,   356],
       [   17,    95]], dtype=int64)

### Ensemble Method : Bagging

In [33]:
from sklearn.ensemble import BaggingClassifier
bagging = BaggingClassifier(base_estimator = GaussianNB(), n_jobs = -1, random_state = 0, n_estimators = 100,
                            bootstrap = False, max_samples = 0.6, max_features = 0.6, verbose = 2)
bagging.fit(x_train[:,[2, 3, 5, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 21, 23, 24, 25, 28]],y_train)
y_pred = bagging.predict(x_test[:,[2, 3, 5, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 21, 23, 24, 25, 28]])

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm

[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    4.6s remaining:   14.1s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    4.9s remaining:    0.9s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    4.9s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    0.9s remaining:    3.0s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    1.0s remaining:    0.1s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    1.1s finished


array([[56585,   265],
       [   17,    95]], dtype=int64)

### Ensemble Method : ADA Boost

In [7]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
ada_boost = AdaBoostClassifier(base_estimator = GaussianNB(), n_estimators = 100, algorithm = 'SAMME.R')
ada_boost.fit(x_train[:,[2, 3, 5, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 21, 23, 24, 25, 28]],y_train)

from sklearn.metrics import confusion_matrix
y_pred = ada_boost.predict(x_test[:,[2, 3, 5, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 21, 23, 24, 25, 28]])
cm = confusion_matrix(y_test, y_pred)
cm

array([[54927,  1923],
       [   46,    66]], dtype=int64)

### Applying ADA Boost on Bagging Classifier

In [8]:
from sklearn.ensemble import BaggingClassifier
bagging = BaggingClassifier(base_estimator = GaussianNB(), n_jobs = -1, random_state = 0, n_estimators = 100,
                            bootstrap = False, max_samples = 0.6, max_features = 0.6, verbose = 2)

from sklearn.ensemble import AdaBoostClassifier
ada_boost = AdaBoostClassifier(base_estimator = bagging, n_estimators = 100, algorithm = 'SAMME.R')
ada_boost.fit(x_train[:,[2, 3, 5, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 21, 23, 24, 25, 28]],y_train)

from sklearn.metrics import confusion_matrix
y_pred = ada_boost.predict(x_test[:,[2, 3, 5, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 21, 23, 24, 25, 28]])
cm = confusion_matrix(y_test, y_pred)
cm

[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    5.8s remaining:   17.6s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    6.1s remaining:    1.1s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    6.2s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    4.5s remaining:   13.7s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    5.2s remaining:    1.0s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    5.3s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    5.1s remaining:   15.6s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    5.5s remaining:    1.0s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    5.5s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent wo

[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    5.2s remaining:    1.0s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    5.2s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    4.6s remaining:   14.0s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    5.5s remaining:    1.0s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    5.5s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    5.2s remaining:   15.7s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    5.6s remaining:    1.0s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    5.7s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    4.3s remaining:   13.1s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    5.4s remainin

[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    4.9s remaining:   14.8s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    5.5s remaining:    1.0s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    5.5s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    5.1s remaining:   15.4s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    5.5s remaining:    1.0s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    5.6s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    4.6s remaining:   14.0s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    5.3s remaining:    1.0s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    5.4s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent wo

[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    5.4s remaining:    1.0s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    5.5s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    5.2s remaining:   15.8s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    6.1s remaining:    1.1s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    6.6s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    5.2s remaining:   15.7s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    5.9s remaining:    1.1s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    5.9s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    4.6s remaining:   14.1s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    5.0s remainin

[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    4.7s remaining:   14.3s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    4.9s remaining:    0.9s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    4.9s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    4.7s remaining:   14.4s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    5.1s remaining:    0.9s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    5.1s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    4.6s remaining:   14.0s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    4.8s remaining:    0.9s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    4.8s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    4.8s remainin

[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    4.7s remaining:   14.4s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    5.0s remaining:    0.9s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    5.1s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    4.6s remaining:   14.0s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    5.0s remaining:    0.9s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    5.0s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    4.6s remaining:   13.9s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    5.1s remaining:    0.9s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    5.2s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    4.7s remainin

[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    4.6s remaining:   14.0s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    4.9s remaining:    0.9s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    4.9s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    4.5s remaining:   13.8s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    5.0s remaining:    0.9s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    5.1s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    4.7s remaining:   14.4s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    4.9s remaining:    0.9s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    5.0s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    4.8s remainin

[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    4.8s remaining:   14.6s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    5.0s remaining:    0.9s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    5.0s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    4.5s remaining:   13.6s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    4.8s remaining:    0.9s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    4.8s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    4.8s remaining:   14.6s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    5.0s remaining:    0.9s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    5.0s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    4.5s remainin

[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    1.0s remaining:    3.1s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    1.0s remaining:    0.1s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    1.1s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    1.0s remaining:    3.2s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    1.0s remaining:    0.1s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    1.0s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    1.0s remaining:    3.1s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    1.1s remaining:    0.1s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    1.1s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    1.0s remainin

[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    0.9s remaining:    2.9s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    1.0s remaining:    0.1s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    1.1s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    0.9s remaining:    2.9s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    1.0s remaining:    0.1s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    1.0s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    0.9s remaining:    3.0s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    1.0s remaining:    0.1s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    1.0s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    0.9s remainin

[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    0.9s remaining:    3.0s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    1.0s remaining:    0.1s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    1.1s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    0.9s remaining:    2.9s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    1.0s remaining:    0.1s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    1.1s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    0.9s remaining:    2.8s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    1.1s remaining:    0.1s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    1.1s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    0.9s remainin

[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    0.9s remaining:    2.9s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    1.1s remaining:    0.1s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    1.1s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    0.9s remaining:    2.9s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    1.0s remaining:    0.1s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    1.1s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    0.9s remaining:    2.9s
[Parallel(n_jobs=12)]: Done  10 out of  12 | elapsed:    1.1s remaining:    0.1s
[Parallel(n_jobs=12)]: Done  12 out of  12 | elapsed:    1.1s finished
[Parallel(n_jobs=12)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=12)]: Done   3 out of  12 | elapsed:    0.9s remainin

array([[54991,  1859],
       [   42,    70]], dtype=int64)

In [10]:
cm

array([[54991,  1859],
       [   42,    70]], dtype=int64)