In [None]:
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier
from sklearn.model_selection import cross_val_score, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler, MinMaxScaler

from utils import graph_overfit

import numpy as np
import pandas as pd
import dill as pkl
import os
import matplotlib.pyplot as plt
import tqdm

import torch.nn as nn
import torch
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset

torch.manual_seed(seed=73)
np.random.seed(seed=73)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
X_train = np.load(os.path.join('data', 'X_train_transformed.npy'), allow_pickle=True)
y_train = np.load(os.path.join('data', 'y_train.pkl'), allow_pickle=True)
X_test = np.load(os.path.join('data', 'X_test_transformed.npy'), allow_pickle=True)
y_test = np.load(os.path.join('data', 'y_test.pkl'), allow_pickle=True)

X_folds = np.load(os.path.join('data', 'X_folds_tuple.npy'), allow_pickle=True)
y_folds = np.load(os.path.join('data', 'y_folds_tuple.npy'), allow_pickle=True)
fold_ids = [(np.array(fold.index)) for fold in X_folds]

X_tensor = torch.tensor(X_train.astype(np.float32))
y_train -= 1
y_tensor = torch.tensor(y_train.values).type(torch.LongTensor)

train_data = TensorDataset(X_tensor, y_tensor)

X_test_tensor = torch.tensor(X_test.astype(np.float32))
y_test -= 1
y_test_tensor = torch.tensor(y_test.values).type(torch.LongTensor)

test_data = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_data,
                        shuffle=True,
                        batch_size=32,
                        num_workers=0)

test_loader = DataLoader(test_data,
                        shuffle=True,
                        batch_size=16,
                        num_workers=0)

In [None]:
def custom_cv_folds(fold_ids):
    fold_ids = np.array(fold_ids, dtype=object)
    for n in range(len(fold_ids)):
        all_ids = list(range(len(fold_ids)))
        all_ids.remove(n)
        yield np.concatenate(fold_ids[all_ids]), fold_ids[n]

In [None]:
def plot_training(train_vals, test_vals, base_vals):
    fig, axs = plt.subplots(3,1, figsize=(10, 15))
    titles = ['Loss', 'Accuracy', 'Revenue']
    for n, (title, train, test, base) in enumerate(zip(titles, train_vals, test_vals, base_vals)):
        axs[n].plot(train, label='train')
        axs[n].plot(test, label='test')
        axs[n].plot(base, label='base')
        axs[n].set_title(title)
        axs[n].legend()

    plt.legend()
    plt.show()

In [None]:
def calculate_revenue(predictions, targets, cost_matrix= torch.tensor([
                                                        [5, -5, -5, 2],
                                                        [-5, 10, 2, -5],
                                                        [-5, 2, 10, -5],
                                                        [2, -5, -2, 5]
                                                        ], device=device)):
    winners = predictions.argmax(dim=1)
    return torch.sum(cost_matrix[targets, winners])

In [None]:
class CostSensitiveRegularizedLoss(nn.Module):
    def __init__(self, cost_matrix, lambd, reduction='mean'):
        super(CostSensitiveRegularizedLoss, self).__init__()
        self.cost_matrix = (-1.)*cost_matrix
        self.base_loss = torch.nn.CrossEntropyLoss()
        self.lambd = lambd
        self.reduction = reduction
    
    def forward(self, outputs, labels):
        base_l = self.base_loss(outputs, labels)
        cost_l = (self.cost_matrix[labels]*outputs.float()).sum(dim=-1)
        if self.reduction == 'mean':
            total_l = base_l + self.lambd * cost_l.mean()
        elif self.reduction == 'sum':
            total_l = base_l + self.lambd * cost_l.sum()

        return total_l

In [None]:
class nn_classifier(nn.Module):
    def __init__(self, n_features=99):
        super(nn_classifier, self).__init__()

        self.input_layer = nn.Linear(n_features, 128)
        self.input_activation = nn.SELU()

        self.hidden_layers = nn.Sequential(
            nn.Linear(128, 64),
            nn.AlphaDropout(p=0.1),
            nn.SELU(),
            nn.Linear(64, 32),
            nn.AlphaDropout(p=0.1),
            nn.Linear(32, 4)
        )

        self.output_layer = nn.LogSoftmax(1)

    def forward(self, x):
        x = self.input_activation(self.input_layer(x))
        x = self.hidden_layers(x)
        output = self.output_layer(x)
        return output

In [None]:
class BaselineClassifier(nn.Module):
    def __init__(self, y):
        super(BaselineClassifier, self).__init__()
        self.prediction = torch.mode(y)[0]

    def forward(self, x):
        output = torch.zeros((len(x), 4))
        output[:, self.prediction] = 1.
        return output

In [None]:
def train(model: nn.Module, data_loader: DataLoader, optimizer: torch.optim.Optimizer, loss_function: nn.modules.loss, device: torch.device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    revenue = 0

    for data in data_loader:
        inp, target = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
        output = model(inp)
        #print(output)

        loss = loss_function(output, target)
        loss.backward()
        optimizer.step()

        running_loss += loss

        winners = output.argmax(dim=1)
        correct += torch.sum((winners == target))
        total += len(target)
        revenue += calculate_revenue(output, target)

    #out_loss = running_loss / len(data_loader)

    accuracy = correct.float() / float(total)
    out_loss = running_loss

    return out_loss , accuracy, revenue


def evaluate(model: nn.Module, data_loader: DataLoader, loss_function: nn.modules.loss, device: torch.device):
    model.eval()

    with torch.no_grad():
        running_loss = 0.0
        correct = 0
        total = 0
        revenue = 0

        for data in data_loader:
            inp, target = data[0].to(device), data[1].to(device)
            output = model(inp)
            loss = loss_function(output, target)

            running_loss += loss

            winners = output.argmax(dim=1)
            correct += torch.sum((winners == target))
            total += len(target)
            revenue += calculate_revenue(output, target)

        #out_loss = running_loss/len(data_loader)
        accuracy = correct.float() / float(total)
        out_loss = running_loss

    return out_loss , accuracy, revenue

In [None]:
cost_matrix_np = np.array([
    [5, -5, -5, 2],
    [-5, 10, 2, -5],
    [-5, 2, 10, -5],
    [2, -5, -2, 5]
    ])

cost_matrix = torch.tensor([
        [5, -5, -5, 2],
        [-5, 10, 2, -5],
        [-5, 2, 10, -5],
        [2, -5, -2, 5]
        ], device=device)

scaler = MinMaxScaler()
cost_matrix_transformed = cost_matrix
#cost_matrix_transformed = torch.Tensor(scaler.fit_transform(cost_matrix_np))
#cost_matrix_transformed = torch.nn.functional.normalize(torch.Tensor(cost_matrix))

In [None]:
model = nn_classifier(n_features=X_train.shape[1])
baseline_clf = BaselineClassifier(y_tensor)
model.to(device=device)

cost_matrix = torch.tensor([
        [5, -5, -5, 2],
        [-5, 10, 2, -5],
        [-5, 2, 10, -5],
        [2, -5, -2, 5]
        ], dtype=torch.float, device=device)

#loss_function = CostLoss(cost_matrix)
#loss_function = CostLossSingle(cost_matrix)
loss_function = CostSensitiveRegularizedLoss(cost_matrix, 2)  # (-1.)*
#loss_function = nn.CrossEntropyLoss()
lr = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

epochs = 1000
train_losses, train_accs, train_revs = [], [], []
test_losses, test_accs, test_revs = [], [], []
base_losses, base_accs, base_revs = [], [], []

for update in range(epochs):
    train_loss, train_acc, train_rev = train(model, train_loader, optimizer, loss_function, device)
    train_losses.append(train_loss.detach())
    train_accs.append(train_acc.detach())
    train_revs.append(train_rev.detach())

    test_loss, test_acc, test_rev = evaluate(model, test_loader, loss_function, device)
    test_losses.append(test_loss.detach())
    test_accs.append(test_acc.detach())
    test_revs.append(test_rev.detach())

    baseline_loss, baseline_acc, baseline_rev = evaluate(baseline_clf, test_loader, loss_function, device)
    base_losses.append(baseline_loss)
    base_accs.append(baseline_acc)
    base_revs.append(baseline_rev)

    if update % 50 == 0:
        print(f'############# Epoch:{update} ###############')
        print(f'Training Loss: {train_loss:.2f}\tTraining accuracy: {train_acc:.2f}\tTraining Revenue: {train_rev:.2f}')
        print(f'Test Loss: {test_loss:.2f}\tTest accuracy: {test_acc:.2f}\tTest Revenue: {test_rev:.2f}')
    
plot_training((train_losses, train_accs, train_revs), (test_losses, test_accs, test_revs), (base_losses, base_accs, base_revs))

############# Epoch:0 ###############
Training Loss: 33.88	Training accuracy: 0.32	Training Revenue: 1445.00
Test Loss: -87.91	Test accuracy: 0.44	Test Revenue: 1427.00
############# Epoch:50 ###############
Training Loss: -22215906.00	Training accuracy: 0.42	Training Revenue: 3090.00
Test Loss: -29221276.00	Test accuracy: 0.41	Test Revenue: 1165.00


KeyboardInterrupt: 

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=e2905743-bdaf-45dd-a896-9824e6125426' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>