In [1]:
import torch
import numpy as np
import pandas as pd 

from torch import nn
from torch.utils.data import \
    Dataset as DS, \
    DataLoader as DL
from pandas import read_csv
from ipdb import set_trace

import utils


### load data

In [None]:
train_data = read_csv(f"{utils._data_pth_}/processed/train_joined.csv", index_col=0)

### model

In [None]:
class AlexNet(torch.nn.Module):
    def __init__(self, num_classes: int = 2, dropout: float = 0.5) -> None:
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv1d(1, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool1d(kernel_size=3, stride=2),
            nn.Conv1d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool1d(kernel_size=3, stride=2),
            nn.Conv1d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv1d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv1d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool1d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool1d(6)
        self.classifier = nn.Sequential(
            nn.Dropout(p=dropout),
            nn.Linear(256 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        x = torch.flatten(x, 0)
        return x


### dataset

In [None]:
class Fraud_Dataset(DS):
    def __init__(self, df):
        self.X = df.iloc[:, 1:].values.astype(np.float32)
        self.Y = df.iloc[:, 0].values.astype(np.float32)
    
    def __len__(self):
        return len(self.Y)
    
    def __getitem__(self, idx):
        return self.X[idx], self.Y[idx]


### training configuration

In [None]:
dropout = 0.5
lr = 0.0002
batch_size = 10
num_classes = 1
epochs = 10

### training

In [None]:
train_ds = Fraud_Dataset(train_data)
train_dl = DL(train_ds, batch_size=batch_size, shuffle=True)

AN = AlexNet(num_classes=num_classes, dropout=dropout)
AN_opt = torch.optim.Adam(AN.parameters(), lr=lr)
if torch.cuda.is_available():
    AN.cuda()
    
sigmoid = nn.Sigmoid() # final activation
bce = nn.BCELoss() # binary cross-entropy 

# loss data
loss_df = pd.DataFrame({"loss": []})
    
for e in range(epochs):
    for i, (X, y) in enumerate(train_dl):

        if torch.cuda.is_available():
            X = X.cuda()
            y = y.cuda()
        X = X.unsqueeze(1) # add a channel dimension
        
        AN_opt.zero_grad()
        
        y_probs = AN(X)
        loss = bce(sigmoid(y_probs), y)
        loss.backward() 
        loss_df = loss_df.append({"loss": loss.item()}, ignore_index=True)
        
        AN_opt.step()
        set_trace()
        
    torch.save(AN.state_dict(), f"{utils._data_pth_}/models/AN_epochs_{e}.pth")
    loss_df.to_csv(f"{utils._data_pth_}/models/AN_epochs_{e}_loss.csv")


In [None]:
%debug