In [None]:
import torch
import numpy as np
import pandas as pd 
import sklearn

from torch import nn
from torch.utils.data import \
    Dataset as DS, \
    DataLoader as DL
from pandas import read_csv
from ipdb import set_trace

import utils


### load data

In [None]:
X_train, X_test, Y_train, Y_test = utils.load_standard_data(data_type="standard")

In [None]:
print(len(X_train[X_train.card1>0]), len(X_train[X_train.card1<0]))
print(len(Y_test[Y_test==1]), len(Y_test[Y_test==0]))

### model

In [None]:
class AlexNet(torch.nn.Module):
    def __init__(self, num_classes: int = 2, dropout: float = 0.5) -> None:
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv1d(1, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool1d(kernel_size=3, stride=2),
            nn.Conv1d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool1d(kernel_size=3, stride=2),
            nn.Conv1d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv1d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv1d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool1d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool1d(6)
        self.classifier = nn.Sequential(
            nn.Dropout(p=dropout),
            nn.Linear(256 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        x = torch.flatten(x, 0)
        return x


### dataset

In [None]:
class Fraud_Dataset(DS):
    def __init__(self, X: pd.DataFrame, y: pd.DataFrame):
        self.X = X.values.astype(np.float32)
        self.Y = y.values.astype(np.float32)
    
    def __len__(self):
        return len(self.Y)
    
    def __getitem__(self, idx):
        return self.X[idx], self.Y[idx]


### training configuration

In [None]:
dropout = 0.5
lr = 0.0002
batch_size = 100
num_classes = 1
epochs = 100

### training

In [None]:
train_ds = Fraud_Dataset(X_train, Y_train)
train_dl = DL(train_ds, batch_size=batch_size, shuffle=True)

AN = AlexNet(num_classes=num_classes, dropout=dropout)
AN_opt = torch.optim.Adam(AN.parameters(), lr=lr)
if torch.cuda.is_available():
    AN.cuda()
    
sigmoid = nn.Sigmoid() # final activation
bce = nn.BCELoss() # binary cross-entropy 

    
for e in range(epochs):
    
    # loss data
    loss_df = pd.DataFrame({"loss": []})
    
    for i, (X, y) in enumerate(train_dl):

        if torch.cuda.is_available():
            X = X.cuda()
            y = y.cuda()
        X = X.unsqueeze(1) # add a channel dimension
        
        AN_opt.zero_grad()
        
        y_probs = AN(X)
        loss = bce(sigmoid(y_probs), y)
        loss.backward() 
        loss_df = loss_df.append({"loss": loss.item()}, ignore_index=True)
        
        AN_opt.step()
    if e % 5 == 0:
        torch.save(AN.state_dict(), f"{utils._data_pth_}/models/AN_epochs_{e}.pth")
        loss_df.to_csv(f"{utils._data_pth_}/models/AN_epochs_{e}_loss.csv")


### evaluation

In [None]:
import Evaluation as eva
import metrics
from sklearn.metrics import classification_report, roc_auc_score, roc_curve, confusion_matrix
_data_type_ = ""

try:
    del X_train
    del Y_train
except NameError:
    pass

input_X = torch.Tensor(X_test.values).unsqueeze(1)
AN = AlexNet(num_classes=num_classes, dropout=dropout)
# if torch.cuda.is_available():
#     input_X = input_X.cuda()
#     AN.cuda()
    
_model_state_ = torch.load(f'{utils._data_pth_}/models/{_data_type_}AN_epochs_60.pth', "cuda" if torch.cuda.is_available() else "cpu") 
AN.load_state_dict(_model_state_)
y_pred_probs = AN(input_X).detach().numpy()

metrics.roc_pr_curve(Y_test, y_pred_probs)
y_pred_probs[y_pred_probs >= 0.5] = 1
y_pred_probs[y_pred_probs < 0.5] = 0
metrics.conf_matrix(Y_test,y_pred_probs)



In [None]:
%debug