In [1]:
import numpy as np
import pandas as pd
import torch
from torch import nn
import torch.utils.data
import torch.nn.functional as F
import torchvision
from torchvision import datasets, transforms
import torch.optim as optim
import matplotlib
import matplotlib.pyplot as plt
import random
import sklearn
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score

In [2]:
# Package requirement

print('numpy version: ', np.__version__)
print('pandas version: ', pd.__version__)
print('pytorch version: ', torch.__version__)
print('torchvision version: ', torchvision.__version__)
print('matplotlib version: ', matplotlib.__version__)
print('sklearn version: ', sklearn.__version__)

numpy version:  1.18.5
pandas version:  1.0.5
pytorch version:  1.7.0
torchvision version:  0.8.1
matplotlib version:  3.2.2
sklearn version:  0.23.1


In [3]:
data = pd.read_csv("data/creditcard.csv")

In [4]:
num_fraud = data[data["Class"] == 1]["Class"].count()
num_total = data["Class"].count()
print("Fraction of fraud is ", num_fraud/num_total)
print("Number of fraud transaction: ", num_fraud)

Fraction of fraud is  0.001727485630620034
Number of fraud transaction:  492


In [5]:
# Set random seed for reproductibility

seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [6]:
std_scaler = StandardScaler()
rob_scaler = RobustScaler()

data['scaled_amount'] = rob_scaler.fit_transform(data['Amount'].values.reshape(-1,1))
data['scaled_time'] = rob_scaler.fit_transform(data['Time'].values.reshape(-1,1))

data.drop(['Time','Amount'], axis=1, inplace=True)
data

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V22,V23,V24,V25,V26,V27,V28,Class,scaled_amount,scaled_time
0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,0.090794,...,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,0,1.783274,-0.994983
1,1.191857,0.266151,0.166480,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,-0.166974,...,-0.638672,0.101288,-0.339846,0.167170,0.125895,-0.008983,0.014724,0,-0.269825,-0.994983
2,-1.358354,-1.340163,1.773209,0.379780,-0.503198,1.800499,0.791461,0.247676,-1.514654,0.207643,...,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,0,4.983721,-0.994972
3,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,-0.054952,...,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,0,1.418291,-0.994972
4,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,0.753074,...,0.798278,-0.137458,0.141267,-0.206010,0.502292,0.219422,0.215153,0,0.670579,-0.994960
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
284802,-11.881118,10.071785,-9.834783,-2.066656,-5.364473,-2.606837,-4.918215,7.305334,1.914428,4.356170,...,0.111864,1.014480,-0.509348,1.436807,0.250034,0.943651,0.823731,0,-0.296653,1.034951
284803,-0.732789,-0.055080,2.035030,-0.738589,0.868229,1.058415,0.024330,0.294869,0.584800,-0.975926,...,0.924384,0.012463,-1.016226,-0.606624,-0.395255,0.068472,-0.053527,0,0.038986,1.034963
284804,1.919565,-0.301254,-3.249640,-0.557828,2.630515,3.031260,-0.296827,0.708417,0.432454,-0.484782,...,0.578229,-0.037501,0.640134,0.265745,-0.087371,0.004455,-0.026561,0,0.641096,1.034975
284805,-0.240440,0.530483,0.702510,0.689799,-0.377961,0.623708,-0.686180,0.679145,0.392087,-0.399126,...,0.800049,-0.163298,0.123205,-0.569159,0.546668,0.108821,0.104533,0,-0.167680,1.034975


In [7]:
ytrain = np.array(data['Class']).reshape(-1,1)
ytrain

array([[0],
       [0],
       [0],
       ...,
       [0],
       [0],
       [0]], dtype=int64)

In [8]:
data.drop('Class', axis=1, inplace=True)
data

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V21,V22,V23,V24,V25,V26,V27,V28,scaled_amount,scaled_time
0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,0.090794,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,1.783274,-0.994983
1,1.191857,0.266151,0.166480,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,-0.166974,...,-0.225775,-0.638672,0.101288,-0.339846,0.167170,0.125895,-0.008983,0.014724,-0.269825,-0.994983
2,-1.358354,-1.340163,1.773209,0.379780,-0.503198,1.800499,0.791461,0.247676,-1.514654,0.207643,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,4.983721,-0.994972
3,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,-0.054952,...,-0.108300,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,1.418291,-0.994972
4,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,0.753074,...,-0.009431,0.798278,-0.137458,0.141267,-0.206010,0.502292,0.219422,0.215153,0.670579,-0.994960
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
284802,-11.881118,10.071785,-9.834783,-2.066656,-5.364473,-2.606837,-4.918215,7.305334,1.914428,4.356170,...,0.213454,0.111864,1.014480,-0.509348,1.436807,0.250034,0.943651,0.823731,-0.296653,1.034951
284803,-0.732789,-0.055080,2.035030,-0.738589,0.868229,1.058415,0.024330,0.294869,0.584800,-0.975926,...,0.214205,0.924384,0.012463,-1.016226,-0.606624,-0.395255,0.068472,-0.053527,0.038986,1.034963
284804,1.919565,-0.301254,-3.249640,-0.557828,2.630515,3.031260,-0.296827,0.708417,0.432454,-0.484782,...,0.232045,0.578229,-0.037501,0.640134,0.265745,-0.087371,0.004455,-0.026561,0.641096,1.034975
284805,-0.240440,0.530483,0.702510,0.689799,-0.377961,0.623708,-0.686180,0.679145,0.392087,-0.399126,...,0.265245,0.800049,-0.163298,0.123205,-0.569159,0.546668,0.108821,0.104533,-0.167680,1.034975


In [9]:
xtrain = np.array(data)

scaler = MinMaxScaler()
xtrain = scaler.fit_transform(xtrain)

xtrain

array([[9.35192337e-01, 7.66490419e-01, 8.81364903e-01, ...,
        3.12696634e-01, 5.82379309e-03, 0.00000000e+00],
       [9.78541955e-01, 7.70066651e-01, 8.40298490e-01, ...,
        3.13422663e-01, 1.04705276e-04, 0.00000000e+00],
       [9.35217023e-01, 7.53117667e-01, 8.68140819e-01, ...,
        3.11911316e-01, 1.47389219e-02, 5.78730497e-06],
       ...,
       [9.90904812e-01, 7.64079694e-01, 7.81101998e-01, ...,
        3.12584864e-01, 2.64215395e-03, 9.99976851e-01],
       [9.54208999e-01, 7.72855742e-01, 8.49587129e-01, ...,
        3.15245157e-01, 3.89238944e-04, 9.99976851e-01],
       [9.49231759e-01, 7.65256401e-01, 8.49601462e-01, ...,
        3.13400843e-01, 8.44648509e-03, 1.00000000e+00]])

In [10]:
df_x = pd.DataFrame(xtrain)
df_x

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
0,0.935192,0.766490,0.881365,0.313023,0.763439,0.267669,0.266815,0.786444,0.475312,0.510600,...,0.561184,0.522992,0.663793,0.391253,0.585122,0.394557,0.418976,0.312697,0.005824,0.000000
1,0.978542,0.770067,0.840298,0.271796,0.766120,0.262192,0.264875,0.786298,0.453981,0.505267,...,0.557840,0.480237,0.666938,0.336440,0.587290,0.446013,0.416345,0.313423,0.000105,0.000000
2,0.935217,0.753118,0.868141,0.268766,0.762329,0.281122,0.270177,0.788042,0.410603,0.513018,...,0.565477,0.546030,0.678939,0.289354,0.559515,0.402727,0.415489,0.311911,0.014739,0.000006
3,0.941878,0.765304,0.868484,0.213661,0.765647,0.275559,0.266803,0.789434,0.414999,0.507585,...,0.559734,0.510277,0.662607,0.223826,0.614245,0.389197,0.417669,0.314371,0.004807,0.000006
4,0.938617,0.776520,0.864251,0.269796,0.762975,0.263984,0.268968,0.782484,0.490950,0.524303,...,0.561327,0.547271,0.663392,0.401270,0.566343,0.507497,0.420561,0.317490,0.002724,0.000012
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
284802,0.756448,0.873531,0.666991,0.160317,0.729603,0.236810,0.235393,0.863749,0.528729,0.598850,...,0.564920,0.515249,0.680500,0.313600,0.658558,0.466291,0.433929,0.329840,0.000030,0.999965
284803,0.945845,0.766677,0.872678,0.219189,0.771561,0.273661,0.265504,0.788548,0.482925,0.488530,...,0.564933,0.553153,0.665619,0.245298,0.543855,0.360884,0.417775,0.312038,0.000965,0.999971
284804,0.990905,0.764080,0.781102,0.227202,0.783425,0.293496,0.263547,0.792985,0.477677,0.498692,...,0.565220,0.537005,0.664877,0.468492,0.592824,0.411177,0.416593,0.312585,0.002642,0.999977
284805,0.954209,0.772856,0.849587,0.282508,0.763172,0.269291,0.261175,0.792671,0.476287,0.500464,...,0.565755,0.547353,0.663008,0.398836,0.545958,0.514746,0.418520,0.315245,0.000389,0.999977


In [11]:
# Transfer the numpy array of X and Y to tensor

X = torch.from_numpy(xtrain).float()
Y = torch.from_numpy(ytrain).float()
X.shape

torch.Size([284807, 30])

In [12]:
# Create training dataset set and training data loader

train_dataset = torch.utils.data.TensorDataset(X, Y)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = 32, shuffle = True)

In [19]:
# Autoencoder model class
# Input layer shape: 30, hidden layers shape 18-9-18, output layer shape: 30

class RAE(nn.Module):
    def __init__(self):
        super(RAE, self).__init__()

        self.enc1 = nn.Linear(in_features=xtrain.shape[1], out_features=18)
        self.enc2 = nn.Linear(in_features=18, out_features=9)
        #self.enc3 = nn.Linear(in_features=12, out_features=5)
        
        #self.dec3 = nn.Linear(in_features=5, out_features=12)
        self.dec2 = nn.Linear(in_features=9, out_features=18)
        self.dec1 = nn.Linear(in_features=18, out_features=xtrain.shape[1])

    def forward(self, x):

        x = F.relu(self.enc1(x))
        x = F.relu(self.enc2(x))
        #x = F.relu(self.enc3(x))

        #x = F.relu(self.dec3(x))
        x = F.relu(self.dec2(x))
        x = F.relu(self.dec1(x)) #sigmoid

        return x

In [25]:
# Necessary function

def training(model, epoch, train_loader):
    train_loss = []
    val_loss = []
    for e in range(epoch):
        running_loss = 0
        for data in train_loader:
            input = data[0]
            input = input.view(-1, 30)
            output = model(input)
            loss = criterion(input, output)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            running_loss += loss.item()

        ave_loss = running_loss / len(train_loader)
        train_loss.append(ave_loss)
        print('Train loss at {}/{} epoch is {}'.format(e+1, epoch, ave_loss))


# Shrinkage function based on the original paper

# L1 shrinkage
def shrinkage(S, lamda):
    for i in range(np.shape(S)[0]):
        for j in range(np.shape(S)[1]):
            if S[i, j] > lamda:
                S[i, j] = S[i, j] - lamda
            if S[i, j] < -lamda:
                S[i, j] = S[i, j] + lamda
            if S[i, j] <= lamda and S[i, j] >= -lamda:
                S[i, j] = 0
    return S

# L21 shrinkage
def l21shrinkage(S, lamda):
    for j in range(np.shape(S)[1]):
        e = np.sqrt(np.sum(S[:, j]**2))
        if e > lamda:
            for i in range(np.shape(S)[0]):
                S[i, j] = S[i, j] - lamda * S[i, j]/e

        if e <= lamda:
            for i in range(np.shape(S)[0]):
                S[i, j] = 0
    return S

In [53]:
# initialization
lambda_value = 5e-08
S = np.zeros((len(xtrain), 30))
output = np.zeros((len(xtrain), 30))
epsilon = 1e-9
X = np.array(xtrain).reshape((len(xtrain), 30))
# y = np.array(ytrain_1)
LS = X

j = 0

while j < 5:
    # Define the LD matrix for this step
    LD = X - S

    # Autoencoder step on LD substracted from S
    LD_tensor = torch.from_numpy(LD).float()
    # y_tensor = torch.from_numpy(y).float()
    train_dataset = torch.utils.data.TensorDataset(LD_tensor)#, y_tensor)
    # Create data loader and use batch size = 32
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = 32, shuffle = False)

    # Initiate the model and use MSE loss, learning rate = 0.001 and epochs = 10
    model = RAE()
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.1)
    training(model, 10, train_loader)
    
    optimizer.param_groups[0]['lr'] = 0.01
    training(model, 10, train_loader)
    
    optimizer.param_groups[0]['lr'] = 0.001
    training(model, 10, train_loader)

    # Autoencoder of the step trained, so extraction of the output.
    n = 0
    output = np.zeros((xtrain.shape[0], 30))
    for data in train_loader:
        input = data[0].view(-1, 30)
        output[n:n + input.shape[0]] = model(input).detach().numpy()

        n += input.shape[0]

    # Update LD as the reconstruction by the autoencoder
    LD = output

    # Compute S and apply shrinkage.
    S = X - LD
    S = l21shrinkage(S, lamda=lambda_value)
    #S = shrinkage(S, lamda=lambda_value)

    # Increment j for the next step
    j += 1

    # Check for convergence condition
    print('Iteration {} '.format(j))
    """
    c1 = np.linalg.norm(X - S - LD) / np.linalg.norm(X)
    c2 = np.linalg.norm(LS - S - LD) / np.linalg.norm(X)
    print("c1 = ", c1)
    print("c2 = ", c2)
    print("--------------------------------------------------------")
    if c1 < epsilon or c2 < epsilon:
        print("The training proess has converged and stopped")
        break
    """
    # Update LS for convergence checking in next iteration
    LS = S + LD

print('Training for lambda value {} completed'.format(lambda_value))
print('------------------------------------------------------------------------')

Train loss at 1/10 epoch is 0.11860634565751829
Train loss at 2/10 epoch is 0.11850322237236822
Train loss at 3/10 epoch is 0.11850322248955521
Train loss at 4/10 epoch is 0.1185032225113185
Train loss at 5/10 epoch is 0.11850322249374046
Train loss at 6/10 epoch is 0.11850322251382965
Train loss at 7/10 epoch is 0.11850322249374046
Train loss at 8/10 epoch is 0.11850322251382965
Train loss at 9/10 epoch is 0.11850322249374046
Train loss at 10/10 epoch is 0.11850322251382965
Train loss at 1/10 epoch is 0.11840154605090732
Train loss at 2/10 epoch is 0.11840242648838531
Train loss at 3/10 epoch is 0.11840242649759286
Train loss at 4/10 epoch is 0.11840242649591876
Train loss at 5/10 epoch is 0.11840242649424466
Train loss at 6/10 epoch is 0.11840242649591876
Train loss at 7/10 epoch is 0.11840242649424466
Train loss at 8/10 epoch is 0.11840242649591876
Train loss at 9/10 epoch is 0.11840242649424466
Train loss at 10/10 epoch is 0.11840242649591876
Train loss at 1/10 epoch is 0.118388217

In [34]:
for i in train_loader:
    print(i)

[tensor([[9.3519e-01, 7.6649e-01, 8.8136e-01,  ..., 3.1270e-01, 5.8238e-03,
         0.0000e+00],
        [9.7854e-01, 7.7007e-01, 8.4030e-01,  ..., 3.1342e-01, 1.0471e-04,
         0.0000e+00],
        [9.3522e-01, 7.5312e-01, 8.6814e-01,  ..., 3.1191e-01, 1.4739e-02,
         5.7873e-06],
        ...,
        [9.4826e-01, 7.6974e-01, 8.7822e-01,  ..., 3.0070e-01, 7.1270e-04,
         4.5141e-04],
        [9.1796e-01, 7.7356e-01, 8.3805e-01,  ..., 3.0436e-01, 3.1139e-03,
         4.5720e-04],
        [9.4716e-01, 7.7158e-01, 8.5975e-01,  ..., 3.1552e-01, 1.2654e-03,
         4.6298e-04]])]
[tensor([[9.0720e-01, 7.7707e-01, 8.3007e-01,  ..., 3.0451e-01, 1.5550e-03,
         4.6298e-04],
        [9.7717e-01, 7.6953e-01, 8.4390e-01,  ..., 3.1392e-01, 2.6756e-03,
         4.6877e-04],
        [8.9727e-01, 7.8149e-01, 8.2011e-01,  ..., 3.0387e-01, 3.0750e-03,
         4.7456e-04],
        ...,
        [9.7978e-01, 7.6903e-01, 8.3264e-01,  ..., 3.1317e-01, 5.0212e-05,
         9.9542e-04],


[tensor([[9.4565e-01, 7.7768e-01, 8.6764e-01,  ..., 3.1451e-01, 3.4642e-05,
         1.6736e-01],
        [9.7774e-01, 7.6534e-01, 8.4747e-01,  ..., 3.1297e-01, 1.1677e-04,
         1.6738e-01],
        [9.0535e-01, 7.6783e-01, 8.5868e-01,  ..., 3.0612e-01, 6.0885e-03,
         1.6738e-01],
        ...,
        [9.7614e-01, 7.6808e-01, 8.4581e-01,  ..., 3.1339e-01, 1.4791e-03,
         1.6801e-01],
        [9.4076e-01, 7.7925e-01, 8.6321e-01,  ..., 3.1907e-01, 3.5810e-05,
         1.6801e-01],
        [9.5247e-01, 7.7948e-01, 8.5993e-01,  ..., 3.1514e-01, 1.7477e-04,
         1.6801e-01]])]
[tensor([[9.8112e-01, 7.6019e-01, 8.5504e-01,  ..., 3.1360e-01, 4.9433e-04,
         1.6801e-01],
        [9.7947e-01, 7.7297e-01, 8.3089e-01,  ..., 3.1298e-01, 1.0490e-03,
         1.6802e-01],
        [8.8677e-01, 8.0060e-01, 7.4489e-01,  ..., 2.9592e-01, 3.5028e-03,
         1.6802e-01],
        ...,
        [9.2585e-01, 7.4095e-01, 8.6159e-01,  ..., 3.1601e-01, 2.8994e-03,
         1.6868e-01],


[tensor([[9.5517e-01, 7.8276e-01, 8.4409e-01,  ..., 3.1428e-01, 3.8924e-05,
         2.1210e-01],
        [9.2796e-01, 7.4799e-01, 8.5121e-01,  ..., 3.1618e-01, 3.4564e-02,
         2.1211e-01],
        [9.7654e-01, 7.7221e-01, 8.5173e-01,  ..., 3.1360e-01, 1.2642e-03,
         2.1211e-01],
        ...,
        [9.5535e-01, 7.6876e-01, 8.6087e-01,  ..., 3.0614e-01, 3.3241e-03,
         2.1245e-01],
        [9.4351e-01, 7.6929e-01, 8.6982e-01,  ..., 3.1276e-01, 9.6726e-04,
         2.1245e-01],
        [9.7772e-01, 7.6544e-01, 8.5886e-01,  ..., 3.1378e-01, 1.1191e-03,
         2.1245e-01]])]
[tensor([[9.8086e-01, 7.5092e-01, 8.4958e-01,  ..., 3.1345e-01, 2.8290e-03,
         2.1246e-01],
        [9.7911e-01, 7.5766e-01, 8.4864e-01,  ..., 3.1347e-01, 2.7243e-03,
         2.1246e-01],
        [9.7975e-01, 7.5908e-01, 8.5608e-01,  ..., 3.1359e-01, 1.5531e-03,
         2.1246e-01],
        ...,
        [9.8206e-01, 7.4955e-01, 8.6546e-01,  ..., 3.1374e-01, 6.2278e-04,
         2.1282e-01],


[tensor([[9.8203e-01, 7.5257e-01, 8.5467e-01,  ..., 3.1388e-01, 2.3354e-03,
         2.5017e-01],
        [9.8022e-01, 7.7003e-01, 8.4281e-01,  ..., 3.1344e-01, 1.7477e-04,
         2.5017e-01],
        [9.7785e-01, 7.6468e-01, 8.5862e-01,  ..., 3.1377e-01, 3.5771e-04,
         2.5017e-01],
        ...,
        [9.7306e-01, 7.5957e-01, 8.4710e-01,  ..., 3.1339e-01, 5.2975e-03,
         2.5047e-01],
        [9.4685e-01, 7.8082e-01, 8.3935e-01,  ..., 3.1311e-01, 2.9582e-05,
         2.5047e-01],
        [9.7819e-01, 7.5745e-01, 8.4523e-01,  ..., 3.1362e-01, 3.8535e-03,
         2.5047e-01]])]
[tensor([[9.3997e-01, 7.7136e-01, 8.6000e-01,  ..., 3.1646e-01, 7.7809e-04,
         2.5047e-01],
        [9.5247e-01, 7.4275e-01, 8.2426e-01,  ..., 3.1609e-01, 3.1132e-02,
         2.5047e-01],
        [9.8006e-01, 7.5093e-01, 8.5765e-01,  ..., 3.1349e-01, 1.3234e-03,
         2.5047e-01],
        ...,
        [9.7887e-01, 7.7369e-01, 8.2943e-01,  ..., 3.1399e-01, 2.9582e-05,
         2.5082e-01],


[tensor([[9.4481e-01, 7.7338e-01, 8.7358e-01,  ..., 3.1006e-01, 3.8924e-05,
         2.9567e-01],
        [9.4073e-01, 7.7718e-01, 8.6741e-01,  ..., 3.1334e-01, 3.6332e-03,
         2.9567e-01],
        [9.5532e-01, 7.6748e-01, 8.5956e-01,  ..., 3.0917e-01, 6.6171e-04,
         2.9567e-01],
        ...,
        [9.4671e-01, 7.7839e-01, 8.5940e-01,  ..., 3.1509e-01, 1.0042e-04,
         2.9587e-01],
        [9.7769e-01, 7.7045e-01, 8.4879e-01,  ..., 3.1356e-01, 3.8924e-05,
         2.9587e-01],
        [9.3810e-01, 7.6591e-01, 8.8443e-01,  ..., 3.1504e-01, 1.9878e-03,
         2.9588e-01]])]
[tensor([[9.2373e-01, 7.8227e-01, 8.4985e-01,  ..., 3.1880e-01, 4.0675e-04,
         2.9588e-01],
        [9.8268e-01, 7.5656e-01, 8.4665e-01,  ..., 3.1328e-01, 1.9462e-04,
         2.9588e-01],
        [9.8208e-01, 7.5557e-01, 8.5116e-01,  ..., 3.1348e-01, 5.4493e-04,
         2.9588e-01],
        ...,
        [9.8292e-01, 7.5971e-01, 8.1972e-01,  ..., 3.1347e-01, 5.8386e-04,
         2.9615e-01],


[tensor([[9.4958e-01, 7.7510e-01, 8.6114e-01,  ..., 3.0453e-01, 2.7169e-03,
         3.3973e-01],
        [9.7004e-01, 7.5423e-01, 8.5236e-01,  ..., 3.1389e-01, 9.0700e-03,
         3.3974e-01],
        [9.7920e-01, 7.6351e-01, 8.2752e-01,  ..., 3.1361e-01, 1.1603e-03,
         3.3974e-01],
        ...,
        [9.8019e-01, 7.5835e-01, 8.4228e-01,  ..., 3.1339e-01, 2.0190e-03,
         3.4006e-01],
        [9.7804e-01, 7.6912e-01, 8.4428e-01,  ..., 3.1344e-01, 3.6822e-04,
         3.4007e-01],
        [9.8192e-01, 7.6052e-01, 8.4111e-01,  ..., 3.1339e-01, 1.3619e-03,
         3.4007e-01]])]
[tensor([[9.8141e-01, 7.5818e-01, 8.3461e-01,  ..., 3.1289e-01, 1.3619e-03,
         3.4007e-01],
        [9.8244e-01, 7.5896e-01, 8.1656e-01,  ..., 3.1340e-01, 1.3744e-03,
         3.4007e-01],
        [9.8136e-01, 7.6143e-01, 8.4403e-01,  ..., 3.1354e-01, 1.5433e-03,
         3.4007e-01],
        ...,
        [9.8457e-01, 7.5370e-01, 8.4050e-01,  ..., 3.1343e-01, 1.4791e-03,
         3.4033e-01],


[tensor([[9.7492e-01, 7.5962e-01, 8.5735e-01,  ..., 3.1384e-01, 4.5455e-03,
         3.8248e-01],
        [9.4665e-01, 7.8159e-01, 8.5901e-01,  ..., 3.1422e-01, 2.9450e-03,
         3.8248e-01],
        [9.4415e-01, 7.7255e-01, 8.7476e-01,  ..., 3.1222e-01, 1.6757e-03,
         3.8248e-01],
        ...,
        [9.6077e-01, 7.7388e-01, 8.5438e-01,  ..., 3.1734e-01, 2.1369e-04,
         3.8277e-01],
        [9.3637e-01, 7.8381e-01, 8.4252e-01,  ..., 3.1319e-01, 3.6666e-04,
         3.8277e-01],
        [9.3939e-01, 7.5007e-01, 8.7380e-01,  ..., 3.1497e-01, 5.3715e-03,
         3.8278e-01]])]
[tensor([[9.3110e-01, 7.6689e-01, 8.7826e-01,  ..., 3.1463e-01, 0.0000e+00,
         3.8279e-01],
        [9.7850e-01, 7.6033e-01, 8.4775e-01,  ..., 3.1363e-01, 1.9423e-03,
         3.8280e-01],
        [9.7767e-01, 7.6359e-01, 8.4112e-01,  ..., 3.1367e-01, 2.5873e-03,
         3.8280e-01],
        ...,
        [9.4369e-01, 7.7745e-01, 8.6176e-01,  ..., 3.1473e-01, 7.3566e-05,
         3.8315e-01],


[tensor([[8.8906e-01, 8.0241e-01, 8.2433e-01,  ..., 3.1513e-01, 8.6800e-05,
         4.2250e-01],
        [9.7798e-01, 7.6990e-01, 8.4252e-01,  ..., 3.1350e-01, 3.1061e-04,
         4.2250e-01],
        [9.7549e-01, 7.5542e-01, 8.6408e-01,  ..., 3.1405e-01, 3.8924e-03,
         4.2251e-01],
        ...,
        [9.7872e-01, 7.6718e-01, 8.4956e-01,  ..., 3.1369e-01, 1.3892e-03,
         4.2279e-01],
        [9.5928e-01, 7.7341e-01, 8.4618e-01,  ..., 3.1325e-01, 4.7915e-04,
         4.2279e-01],
        [9.3153e-01, 7.8691e-01, 8.6973e-01,  ..., 3.1178e-01, 4.6709e-06,
         4.2279e-01]])]
[tensor([[9.7867e-01, 7.6904e-01, 8.4386e-01,  ..., 3.1350e-01, 3.8535e-05,
         4.2279e-01],
        [9.7352e-01, 7.5833e-01, 8.5457e-01,  ..., 3.1339e-01, 4.0489e-03,
         4.2279e-01],
        [9.7935e-01, 7.6426e-01, 8.0465e-01,  ..., 3.1305e-01, 3.0750e-03,
         4.2279e-01],
        ...,
        [9.7720e-01, 7.6876e-01, 8.3914e-01,  ..., 3.1322e-01, 1.6698e-03,
         4.2305e-01],


[tensor([[9.8257e-01, 7.6291e-01, 8.3318e-01,  ..., 3.1298e-01, 3.8924e-05,
         4.5735e-01],
        [9.4939e-01, 7.7880e-01, 8.5895e-01,  ..., 3.1491e-01, 1.9423e-04,
         4.5736e-01],
        [9.7462e-01, 7.5971e-01, 8.5307e-01,  ..., 3.1345e-01, 4.1275e-03,
         4.5736e-01],
        ...,
        [9.5058e-01, 7.7083e-01, 8.6608e-01,  ..., 3.1438e-01, 8.5633e-04,
         4.5763e-01],
        [9.4436e-01, 7.7552e-01, 8.6937e-01,  ..., 3.1484e-01, 7.7069e-05,
         4.5763e-01],
        [9.5357e-01, 7.7969e-01, 8.5414e-01,  ..., 3.1498e-01, 1.0042e-04,
         4.5763e-01]])]
[tensor([[9.4254e-01, 7.6532e-01, 8.6136e-01,  ..., 3.1458e-01, 5.0406e-04,
         4.5763e-01],
        [9.7180e-01, 7.5300e-01, 8.4796e-01,  ..., 3.1462e-01, 1.0276e-02,
         4.5763e-01],
        [9.4859e-01, 7.7679e-01, 8.6294e-01,  ..., 3.1545e-01, 9.7310e-05,
         4.5764e-01],
        ...,
        [9.7788e-01, 7.6515e-01, 8.5113e-01,  ..., 3.1321e-01, 3.8924e-05,
         4.5792e-01],


[tensor([[9.9834e-01, 7.5484e-01, 8.0666e-01,  ..., 3.1169e-01, 5.8386e-04,
         5.0650e-01],
        [9.5387e-01, 7.8809e-01, 8.3802e-01,  ..., 3.1737e-01, 1.3324e-03,
         5.0651e-01],
        [9.2465e-01, 7.8316e-01, 8.3720e-01,  ..., 2.9368e-01, 2.6468e-05,
         5.0651e-01],
        ...,
        [9.4761e-01, 7.7653e-01, 8.1711e-01,  ..., 3.1211e-01, 3.2307e-03,
         5.0693e-01],
        [9.0865e-01, 7.9525e-01, 8.1256e-01,  ..., 3.1992e-01, 3.8924e-05,
         5.0694e-01],
        [9.8446e-01, 7.4951e-01, 7.8199e-01,  ..., 3.1217e-01, 1.5336e-02,
         5.0694e-01]])]
[tensor([[9.5882e-01, 7.7598e-01, 8.3946e-01,  ..., 3.1482e-01, 2.7792e-04,
         5.0694e-01],
        [9.9295e-01, 7.6688e-01, 8.2212e-01,  ..., 3.1246e-01, 3.5032e-04,
         5.0695e-01],
        [9.3359e-01, 7.4415e-01, 8.2536e-01,  ..., 3.1664e-01, 2.8602e-02,
         5.0696e-01],
        ...,
        [9.5843e-01, 7.7559e-01, 8.4495e-01,  ..., 3.1750e-01, 2.1369e-04,
         5.0754e-01],


[tensor([[9.9305e-01, 7.6813e-01, 8.0598e-01,  ..., 3.1222e-01, 3.8535e-05,
         6.5579e-01],
        [9.4882e-01, 7.8060e-01, 8.3471e-01,  ..., 3.1752e-01, 7.7069e-05,
         6.5580e-01],
        [9.5106e-01, 7.8373e-01, 8.5389e-01,  ..., 3.1620e-01, 2.0046e-04,
         6.5581e-01],
        ...,
        [9.4856e-01, 7.4982e-01, 8.4034e-01,  ..., 3.1810e-01, 1.8035e-02,
         6.5643e-01],
        [9.5846e-01, 7.7441e-01, 8.3897e-01,  ..., 3.1476e-01, 1.3857e-04,
         6.5643e-01],
        [8.6774e-01, 8.1982e-01, 7.6467e-01,  ..., 3.2393e-01, 3.8145e-05,
         6.5643e-01]])]
[tensor([[9.9322e-01, 7.6381e-01, 7.8972e-01,  ..., 3.1190e-01, 3.8924e-05,
         6.5643e-01],
        [9.9181e-01, 7.6346e-01, 8.2470e-01,  ..., 3.1245e-01, 1.0284e-03,
         6.5644e-01],
        [9.8885e-01, 7.5351e-01, 8.1712e-01,  ..., 3.1207e-01, 3.2716e-03,
         6.5644e-01],
        ...,
        [9.9322e-01, 7.6381e-01, 7.8972e-01,  ..., 3.1190e-01, 3.8924e-05,
         6.5692e-01],


[tensor([[9.4852e-01, 7.5831e-01, 8.7584e-01,  ..., 3.1751e-01, 1.0042e-02,
         7.1347e-01],
        [9.4244e-01, 7.7835e-01, 8.3217e-01,  ..., 3.1187e-01, 1.1288e-03,
         7.1347e-01],
        [9.9185e-01, 7.6241e-01, 8.3122e-01,  ..., 3.1191e-01, 4.4762e-04,
         7.1348e-01],
        ...,
        [9.9316e-01, 7.6608e-01, 8.1867e-01,  ..., 3.1187e-01, 7.7069e-05,
         7.1383e-01],
        [9.5234e-01, 7.8426e-01, 8.1629e-01,  ..., 3.1953e-01, 2.2187e-04,
         7.1383e-01],
        [9.2101e-01, 7.7886e-01, 8.1498e-01,  ..., 3.1221e-01, 3.5028e-03,
         7.1384e-01]])]
[tensor([[9.8871e-01, 7.6012e-01, 8.1303e-01,  ..., 3.1165e-01, 2.7247e-03,
         7.1384e-01],
        [9.9122e-01, 7.6587e-01, 8.3611e-01,  ..., 3.1268e-01, 4.1415e-04,
         7.1385e-01],
        [9.9356e-01, 7.6769e-01, 8.1562e-01,  ..., 3.1198e-01, 3.8924e-05,
         7.1385e-01],
        ...,
        [9.5170e-01, 7.8075e-01, 8.2221e-01,  ..., 3.1612e-01, 3.8924e-05,
         7.1416e-01],


[tensor([[9.7334e-01, 7.4403e-01, 8.1593e-01,  ..., 3.1454e-01, 2.3354e-02,
         7.5680e-01],
        [9.4799e-01, 7.6611e-01, 8.4684e-01,  ..., 3.1748e-01, 1.3312e-03,
         7.5681e-01],
        [9.8899e-01, 7.4064e-01, 8.4367e-01,  ..., 3.1312e-01, 7.1857e-03,
         7.5681e-01],
        ...,
        [9.9009e-01, 7.7419e-01, 8.2101e-01,  ..., 3.1208e-01, 1.1810e-03,
         7.5716e-01],
        [9.5606e-01, 7.8275e-01, 8.5145e-01,  ..., 3.1411e-01, 2.0513e-04,
         7.5716e-01],
        [9.8721e-01, 7.6707e-01, 7.8159e-01,  ..., 3.1389e-01, 7.9794e-03,
         7.5717e-01]])]
[tensor([[9.9072e-01, 7.6357e-01, 8.3416e-01,  ..., 3.1219e-01, 3.8924e-05,
         7.5718e-01],
        [9.4793e-01, 7.8225e-01, 8.3864e-01,  ..., 3.1477e-01, 7.1114e-04,
         7.5718e-01],
        [9.7405e-01, 7.4689e-01, 8.1360e-01,  ..., 3.1423e-01, 2.1817e-02,
         7.5719e-01],
        ...,
        [7.0777e-01, 5.5304e-01, 8.5785e-01,  ..., 3.2266e-01, 2.3354e-02,
         7.5744e-01],


[tensor([[9.9348e-01, 7.6607e-01, 8.1308e-01,  ..., 3.1187e-01, 3.0828e-04,
         8.0243e-01],
        [9.0960e-01, 7.4266e-01, 8.3129e-01,  ..., 3.1111e-01, 3.2152e-02,
         8.0244e-01],
        [9.2456e-01, 7.4679e-01, 8.5246e-01,  ..., 3.0711e-01, 7.7848e-04,
         8.0244e-01],
        ...,
        [9.9480e-01, 7.6648e-01, 7.9282e-01,  ..., 3.1120e-01, 7.3566e-05,
         8.0275e-01],
        [9.3456e-01, 7.6932e-01, 8.5579e-01,  ..., 3.1384e-01, 1.1439e-02,
         8.0276e-01],
        [9.3172e-01, 7.6692e-01, 8.4722e-01,  ..., 3.0683e-01, 2.7208e-03,
         8.0276e-01]])]
[tensor([[9.3375e-01, 7.6618e-01, 8.5826e-01,  ..., 3.1249e-01, 5.1769e-03,
         8.0277e-01],
        [9.5191e-01, 7.6346e-01, 8.2829e-01,  ..., 3.1839e-01, 2.5460e-03,
         8.0277e-01],
        [9.5234e-01, 7.7596e-01, 8.3681e-01,  ..., 3.1586e-01, 6.9285e-05,
         8.0277e-01],
        ...,
        [9.5587e-01, 7.6284e-01, 8.1001e-01,  ..., 3.1903e-01, 1.3919e-02,
         8.0310e-01],


[tensor([[9.4275e-01, 7.5334e-01, 8.1597e-01,  ..., 3.1516e-01, 1.2751e-02,
         8.3697e-01],
        [9.9321e-01, 7.7172e-01, 8.0646e-01,  ..., 3.1188e-01, 3.2540e-04,
         8.3697e-01],
        [9.5140e-01, 7.8894e-01, 7.8705e-01,  ..., 3.1344e-01, 2.9582e-05,
         8.3697e-01],
        ...,
        [9.4449e-01, 7.4912e-01, 7.8375e-01,  ..., 3.1338e-01, 3.5937e-02,
         8.3734e-01],
        [9.9486e-01, 7.5664e-01, 8.2620e-01,  ..., 3.1216e-01, 4.7098e-04,
         8.3734e-01],
        [9.9198e-01, 7.6189e-01, 8.3045e-01,  ..., 3.1182e-01, 2.3315e-04,
         8.3735e-01]])]
[tensor([[9.9039e-01, 7.6348e-01, 8.1443e-01,  ..., 3.1186e-01, 3.1521e-03,
         8.3735e-01],
        [9.8728e-01, 7.4802e-01, 8.1243e-01,  ..., 3.1221e-01, 9.7310e-03,
         8.3735e-01],
        [9.8831e-01, 7.6258e-01, 8.0542e-01,  ..., 3.1294e-01, 4.6039e-03,
         8.3737e-01],
        ...,
        [9.4324e-01, 7.7918e-01, 8.3312e-01,  ..., 3.1603e-01, 7.8751e-03,
         8.3773e-01],


[tensor([[9.9281e-01, 7.6917e-01, 8.1165e-01,  ..., 3.1261e-01, 5.0212e-05,
         8.8083e-01],
        [9.9124e-01, 7.6817e-01, 8.0677e-01,  ..., 3.1174e-01, 1.8508e-03,
         8.8085e-01],
        [9.8472e-01, 7.6231e-01, 7.9525e-01,  ..., 3.1352e-01, 9.3141e-03,
         8.8085e-01],
        ...,
        [9.8969e-01, 7.5014e-01, 8.4803e-01,  ..., 3.1280e-01, 4.0831e-03,
         8.8115e-01],
        [9.4076e-01, 7.7804e-01, 8.2294e-01,  ..., 3.0652e-01, 1.1989e-04,
         8.8115e-01],
        [9.6903e-01, 7.1970e-01, 7.9377e-01,  ..., 3.1514e-01, 3.2501e-02,
         8.8116e-01]])]
[tensor([[9.5305e-01, 7.7775e-01, 8.3559e-01,  ..., 3.1377e-01, 4.2777e-04,
         8.8116e-01],
        [9.9154e-01, 7.6538e-01, 7.8363e-01,  ..., 3.1187e-01, 1.6870e-03,
         8.8116e-01],
        [9.5860e-01, 7.7712e-01, 8.4290e-01,  ..., 3.1519e-01, 4.8577e-04,
         8.8116e-01],
        ...,
        [9.5611e-01, 7.7793e-01, 8.4422e-01,  ..., 3.1180e-01, 1.1638e-04,
         8.8149e-01],


[tensor([[9.8742e-01, 7.5964e-01, 7.8179e-01,  ..., 3.1309e-01, 6.2878e-03,
         9.2625e-01],
        [9.8442e-01, 7.4583e-01, 8.1348e-01,  ..., 3.1321e-01, 1.3748e-02,
         9.2625e-01],
        [9.4691e-01, 7.7976e-01, 8.6178e-01,  ..., 3.1748e-01, 1.1677e-04,
         9.2625e-01],
        ...,
        [9.4345e-01, 7.6480e-01, 8.4616e-01,  ..., 3.1342e-01, 6.2278e-04,
         9.2661e-01],
        [9.8835e-01, 7.6044e-01, 8.0653e-01,  ..., 3.1280e-01, 3.6989e-03,
         9.2661e-01],
        [9.8812e-01, 7.5726e-01, 8.3758e-01,  ..., 3.1267e-01, 3.8535e-03,
         9.2662e-01]])]
[tensor([[9.9267e-01, 7.6944e-01, 8.0803e-01,  ..., 3.1157e-01, 3.8924e-05,
         9.2662e-01],
        [9.9154e-01, 7.6473e-01, 7.8148e-01,  ..., 3.1252e-01, 1.8936e-03,
         9.2663e-01],
        [9.9020e-01, 7.5213e-01, 8.1042e-01,  ..., 3.1244e-01, 7.5512e-03,
         9.2663e-01],
        ...,
        [9.4289e-01, 7.8618e-01, 8.1745e-01,  ..., 3.2360e-01, 1.4986e-04,
         9.2694e-01],


[tensor([[9.8874e-01, 7.5880e-01, 8.2140e-01,  ..., 3.1274e-01, 5.7997e-03,
         9.7133e-01],
        [9.9243e-01, 7.6650e-01, 7.9798e-01,  ..., 3.1154e-01, 1.9462e-03,
         9.7134e-01],
        [9.9352e-01, 7.6789e-01, 8.1846e-01,  ..., 3.1195e-01, 7.7069e-05,
         9.7134e-01],
        ...,
        [9.9204e-01, 7.5521e-01, 8.3246e-01,  ..., 3.1237e-01, 7.7809e-04,
         9.7180e-01],
        [9.9370e-01, 7.5745e-01, 8.3151e-01,  ..., 3.1235e-01, 3.3046e-04,
         9.7180e-01],
        [9.2617e-01, 7.4871e-01, 8.4223e-01,  ..., 3.1149e-01, 1.6814e-02,
         9.7180e-01]])]
[tensor([[9.9190e-01, 7.5937e-01, 8.0852e-01,  ..., 3.1204e-01, 2.9426e-04,
         9.7181e-01],
        [9.9283e-01, 7.6579e-01, 8.2085e-01,  ..., 3.1222e-01, 5.0406e-04,
         9.7181e-01],
        [9.5986e-01, 7.7817e-01, 8.3194e-01,  ..., 3.1469e-01, 1.7477e-04,
         9.7182e-01],
        ...,
        [9.4815e-01, 7.8159e-01, 8.4141e-01,  ..., 3.0854e-01, 1.9073e-05,
         9.7222e-01],


In [51]:
# Generate the reconstruction output using the trained model

with torch.no_grad():
    n = 0
    output = np.zeros((xtrain.shape[0], 30))
    for data in train_loader:
        input = data[0].view(-1, 30)
        output[n:n + input.shape[0]] = model(input).detach().numpy()

        n += input.shape[0]
            
reconstruction = output


# 1:
recon_error = np.sum(np.abs(xtrain - reconstruction), axis=1)
# 2:
#recon_error = np.sqrt(np.sum((xtrain - reconstruction)**2, axis=1))

ind_err = np.argsort(-recon_error)  # ordering from largest to smallest
ind_err2 = np.argsort(recon_error)  # ordering from smallest to largest

# Metrics

ypred = np.zeros(ytrain.shape, dtype=int)
ypred[ind_err[0:492]] = 1


f1 = f1_score(ytrain, ypred)
precision = precision_score(ytrain, ypred)
recall = recall_score(ytrain, ypred)
accuracy = accuracy_score(ytrain, ypred)


tp = ytrain[ind_err[:num_fraud]].sum()
fp = num_fraud - tp
tn = len(ytrain[ind_err2[:(num_total-num_fraud)]])-ytrain[ind_err2[:(num_total-num_fraud)]].sum()
fn = len(ytrain[ind_err2[:(num_total-num_fraud)]])-tn


accuracy = (tp + tn) / (tp + tn + fp + fn)
precision = tp / (tp + fp)
recall = tp / (tp + fn)
f1 = 2 * precision * recall / (precision + recall)


print("F1 score: ", f1)
print("precision score: ", precision)
print("recall score: ", recall)
print("accuracy score: ", accuracy)


F1 score:  0.39634146341463417
precision score:  0.39634146341463417
recall score:  0.39634146341463417
accuracy score:  0.9979143771044954


In [46]:
X

array([[9.35192337e-01, 7.66490419e-01, 8.81364903e-01, ...,
        3.12696634e-01, 5.82379309e-03, 0.00000000e+00],
       [9.78541955e-01, 7.70066651e-01, 8.40298490e-01, ...,
        3.13422663e-01, 1.04705276e-04, 0.00000000e+00],
       [9.35217023e-01, 7.53117667e-01, 8.68140819e-01, ...,
        3.11911316e-01, 1.47389219e-02, 5.78730497e-06],
       ...,
       [9.90904812e-01, 7.64079694e-01, 7.81101998e-01, ...,
        3.12584864e-01, 2.64215395e-03, 9.99976851e-01],
       [9.54208999e-01, 7.72855742e-01, 8.49587129e-01, ...,
        3.15245157e-01, 3.89238944e-04, 9.99976851e-01],
       [9.49231759e-01, 7.65256401e-01, 8.49601462e-01, ...,
        3.13400843e-01, 8.44648509e-03, 1.00000000e+00]])

In [44]:
xtrain.shape

(284807, 30)