In [1]:
import numpy as np
import pandas as pd

In [2]:
from google.colab import drive

In [4]:
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
data  = pd.read_csv("/content/drive/My Drive/Deep Learning/creditcard.csv")

In [8]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from collections import Counter

# Original dataset
x = data.drop('Class', axis=1).values
y = data['Class'].values
scaler = MinMaxScaler()
scaler.fit(x)
x = scaler.transform(x)
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.2, random_state=0)

print('Sampled train dataset shape %s' % Counter(ytrain))
print('Sampled validation dataset shape %s' % Counter(ytest))

Sampled train dataset shape Counter({0: 227454, 1: 391})
Sampled validation dataset shape Counter({0: 56861, 1: 101})


In [9]:
import torch

bs =100

#creating torch dataset and loader using original dataset. 
#to use resampled dataset, replace ex. xtrain with xtrain_over etc.
train_ds = torch.utils.data.TensorDataset(torch.tensor(xtrain).float(), torch.tensor(ytrain).float())
valid_ds = torch.utils.data.TensorDataset(torch.tensor(xtest).float(), torch.tensor(ytest).float())

train_dl = torch.utils.data.DataLoader(train_ds, batch_size=bs)
valid_dl = torch.utils.data.DataLoader(valid_ds, batch_size=bs)


In [10]:
#network class 2-hidden layer model
class Classifier(torch.nn.Module):
    def __init__(self, n_input=10, n_hidden = 20, n_output = 1,drop_prob=0.5):
        super().__init__()
        self.extractor1 = torch.nn.Linear(n_input, n_hidden)
        self.extractor2 = torch.nn.Linear(n_hidden, n_hidden)
        self.relu = torch.nn.ReLU()
        self.drop_out = torch.nn.Dropout(drop_prob)
        self.classifier = torch.nn.Linear(n_hidden, n_output)

    def forward(self, xb):
        x = self.relu(self.extractor1(xb))
        x = self.relu(self.extractor2(x))
        x = self.drop_out(x)
        return self.classifier(x).squeeze()

In [11]:
def loss_batch(model, loss_func, xb, yb, opt=None):
    loss = loss_func(model(xb), yb)

    if opt is not None:
        loss.backward()
        opt.step()
        opt.zero_grad()

    return loss.item(), len(xb)

In [12]:
#training the network
def train(epochs, model, loss_func, opt, train_dl, valid_dl):
    for epoch in range(epochs):
        model.train()
        for xb, yb in train_dl:
            loss_batch(model, loss_func, xb, yb, opt)

        model.eval()
        with torch.no_grad():
            losses, nums = zip(
                *[loss_batch(model, loss_func, xb, yb) for xb, yb in valid_dl]
            )
        val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums)

        print(epoch, val_loss)

## **SGD Optimizer**

In [13]:
#network setting
n_input = xtrain.shape[1]
n_output = 1
n_hidden = 15

model = Classifier(n_input=n_input,n_hidden=n_hidden,n_output=n_output,drop_prob=0.2)

lr = 0.001

#for orignal dataset, I use pos_weight.
pos_weight = torch.tensor([5])
opt = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9)
loss_func = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight)

n_epoch = 200

In [14]:
train(n_epoch,model,loss_func,opt,train_dl,valid_dl)
model.eval()

0 0.04767648543390339
1 0.04698426333979026
2 0.04617386487997381
3 0.04530260065999299
4 0.04416965013509873
5 0.04280117842718375
6 0.040858868309784385
7 0.03829832972613636
8 0.03551580954672629
9 0.0329761027580929
10 0.030442962878180664
11 0.028063188695068332
12 0.025926954272382962
13 0.024147200849551288
14 0.022663339682841337
15 0.021387844870019938
16 0.020280538461459535
17 0.01928858037371332
18 0.018332927306053355
19 0.01753875197289306
20 0.016763873888428798
21 0.016038949297289176
22 0.01537617374636925
23 0.014840396827639497
24 0.014352141504105428
25 0.01376061488499419
26 0.013355276332199581
27 0.013090116663226648
28 0.012833250318067166
29 0.012545347633299708
30 0.012452679449926956
31 0.01232564876780285
32 0.012119109895251707
33 0.012082000394206397
34 0.011950621133601002
35 0.011907963315043365
36 0.011803857742935209
37 0.01164326720366817
38 0.011596404041378641
39 0.011455302346470683
40 0.011373100145541486
41 0.011474861171426945
42 0.0113562848583

Classifier(
  (extractor1): Linear(in_features=30, out_features=15, bias=True)
  (extractor2): Linear(in_features=15, out_features=15, bias=True)
  (relu): ReLU()
  (drop_out): Dropout(p=0.2, inplace=False)
  (classifier): Linear(in_features=15, out_features=1, bias=True)
)

In [18]:
from sklearn import metrics
ypred = model(torch.tensor(xtest).float()).detach().numpy()

ypred [ypred>=0.5] =1.0
ypred [ypred<0.5] =0.0
print('Accuracy score: {}'.format(metrics.accuracy_score(ytest, ypred)))



Accuracy score: 0.9993679997191109


## **Adam Optimizer**

In [20]:
#network setting
n_input = xtrain.shape[1]
n_output = 1
n_hidden = 15

model = Classifier(n_input=n_input,n_hidden=n_hidden,n_output=n_output,drop_prob=0.2)

lr = 0.001

#for orignal dataset, I use pos_weight.
pos_weight = torch.tensor([5])
opt = torch.optim.Adam(model.parameters(), lr=lr)
loss_func = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight)

n_epoch = 200

In [21]:
train(n_epoch,model,loss_func,opt,train_dl,valid_dl)
model.eval()

0 0.03662987152521263
1 0.02021260144687296
2 0.015076272979464112
3 0.013930225812321532
4 0.013237360732705171
5 0.012510836359147397
6 0.012706423001131378
7 0.01202770463282336
8 0.01173016331146173
9 0.011450087165358442
10 0.011302948844006662
11 0.011379850824401183
12 0.011520987088905584
13 0.011213159758370856
14 0.010953839029804922
15 0.011256639363325983
16 0.01118905194579839
17 0.011021117039799095
18 0.011410597640127688
19 0.010908506795602792
20 0.010863967755535393
21 0.01087764661972398
22 0.010851942810280167
23 0.010884245584629833
24 0.011616970355056776
25 0.010667660189423906
26 0.010935514817206048
27 0.010758664649199636
28 0.0106092322493461
29 0.010738325311108741
30 0.011677314331279179
31 0.01068805483918364
32 0.010679457417932362
33 0.010837597549059821
34 0.010728327871443235
35 0.010825119003069622
36 0.010586002631651041
37 0.0110795744159198
38 0.010857426464417414
39 0.010733470387744333
40 0.01055711125981166
41 0.010597754461082003
42 0.010843133

Classifier(
  (extractor1): Linear(in_features=30, out_features=15, bias=True)
  (extractor2): Linear(in_features=15, out_features=15, bias=True)
  (relu): ReLU()
  (drop_out): Dropout(p=0.2, inplace=False)
  (classifier): Linear(in_features=15, out_features=1, bias=True)
)

In [22]:
from sklearn import metrics
ypred = model(torch.tensor(xtest).float()).detach().numpy()

ypred [ypred>=0.5] =1.0
ypred [ypred<0.5] =0.0
print('Accuracy score: {}'.format(metrics.accuracy_score(ytest, ypred)))

Accuracy score: 0.999420666409185


## **Adadelta Optimizer**

In [24]:
#network setting
n_input = xtrain.shape[1]
n_output = 1
n_hidden = 15

model = Classifier(n_input=n_input,n_hidden=n_hidden,n_output=n_output,drop_prob=0.2)

lr = 0.001

#for orignal dataset, I use pos_weight.
pos_weight = torch.tensor([5])
opt = torch.optim.Adadelta(model.parameters(), lr=lr)
loss_func = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight)

n_epoch = 200

In [25]:
train(n_epoch,model,loss_func,opt,train_dl,valid_dl)
model.eval()

0 0.7054230957142419
1 0.2845288438599605
2 0.09200966939142273
3 0.06185265763205514
4 0.052121967718759626
5 0.04885232249429651
6 0.047719709172641886
7 0.04724647675913826
8 0.047064826455931116
9 0.047019943317049784
10 0.0470503481146793
11 0.047118375471369225
12 0.04721022915026879
13 0.047320854219890435
14 0.0474264008025873
15 0.04753349471436971
16 0.04763944248549593
17 0.04774206306609485
18 0.04783716202919852
19 0.04792985050262432
20 0.04802050806220576
21 0.048097096198278295
22 0.048178109290584276
23 0.048252546557101254
24 0.048313107722742034
25 0.048371700265258
26 0.04842555623997824
27 0.04847926571747928
28 0.04851866364234961
29 0.04856836294185644
30 0.048600940864809435
31 0.04863269112527183
32 0.04866613262607458
33 0.04869306297796841
34 0.04871537384232211
35 0.04873455274821976
36 0.04875660230654834
37 0.04878337112861681
38 0.04880970797280061
39 0.04880934083025101
40 0.04881823959980993
41 0.04882403169602967
42 0.04883696848149013
43 0.04885063167

Classifier(
  (extractor1): Linear(in_features=30, out_features=15, bias=True)
  (extractor2): Linear(in_features=15, out_features=15, bias=True)
  (relu): ReLU()
  (drop_out): Dropout(p=0.2, inplace=False)
  (classifier): Linear(in_features=15, out_features=1, bias=True)
)

In [26]:
from sklearn import metrics
ypred = model(torch.tensor(xtest).float()).detach().numpy()

ypred [ypred>=0.5] =1.0
ypred [ypred<0.5] =0.0
print('Accuracy score: {}'.format(metrics.accuracy_score(ytest, ypred)))

Accuracy score: 0.9982268881008391


## **RMSprop Optimizer**

In [27]:
#network setting
n_input = xtrain.shape[1]
n_output = 1
n_hidden = 15

model = Classifier(n_input=n_input,n_hidden=n_hidden,n_output=n_output,drop_prob=0.2)

lr = 0.001

#for orignal dataset, I use pos_weight.
pos_weight = torch.tensor([5])
opt = torch.optim.RMSprop(model.parameters(), lr=lr)
loss_func = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight)

n_epoch = 200

In [28]:
train(n_epoch,model,loss_func,opt,train_dl,valid_dl)
model.eval()

0 0.03496223439070151
1 0.013979871606656076
2 0.011933266703498275
3 0.011239341047338687
4 0.011133184239051274
5 0.010929863881706886
6 0.010783810782130485
7 0.010751321248073637
8 0.010691884435692413
9 0.010708625443484861
10 0.010541012739448508
11 0.010764422175900592
12 0.010744914971521734
13 0.010706968073505102
14 0.010520363501651555
15 0.010980050398253408
16 0.010756790727825915
17 0.010843587952522367
18 0.010396848590688281
19 0.01042071140703407
20 0.010633501436743365
21 0.010383688508532442
22 0.010328687332602443
23 0.010548790703902418
24 0.010616510880416017
25 0.010510644784142455
26 0.010556447321001557
27 0.011011127048109956
28 0.01055728723234518
29 0.010650470361661245
30 0.010466485411878746
31 0.010365644323355027
32 0.010443524542407016
33 0.010275810057730286
34 0.010264244164493078
35 0.010113414968118774
36 0.010117366277271613
37 0.010137995488401044
38 0.01010131217719658
39 0.010630002302154523
40 0.010605573032376131
41 0.010501500919412422
42 0.0

Classifier(
  (extractor1): Linear(in_features=30, out_features=15, bias=True)
  (extractor2): Linear(in_features=15, out_features=15, bias=True)
  (relu): ReLU()
  (drop_out): Dropout(p=0.2, inplace=False)
  (classifier): Linear(in_features=15, out_features=1, bias=True)
)

In [29]:
from sklearn import metrics
ypred = model(torch.tensor(xtest).float()).detach().numpy()

ypred [ypred>=0.5] =1.0
ypred [ypred<0.5] =0.0
print('Accuracy score: {}'.format(metrics.accuracy_score(ytest, ypred)))

Accuracy score: 0.9994382219725431
