# Environment setup

In [1]:
import os
import time

import numpy as np
import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm

import lib.data
import lib.models
import random

# Algorithm setup

In [2]:
test_accs = []
seeds = range(20)

# data parameters
data_name = "vehicle"
data_path=os.path.expanduser('data')

# label noise parameters
flip_labels = False
eta = 0.0

# network architecture
dimh = 2
depth = 4
blocks = 2

# optimiser parameters
lr = 0.01
gamma_cycle = 4
weight_decay = 0.01
num_epochs = 240
batch_size_train = 64
batch_size_test = 64
categorical_loss = torch.nn.NLLLoss(reduction="mean")
torch.set_default_dtype(torch.float64)

# training options
train_kwargs = {"batch_size": batch_size_train, "shuffle": True}
test_kwargs = {"batch_size": batch_size_test, "shuffle": True}
cuda = torch.cuda.is_available()

# load data and retrieve input feature dimension
X, Y = lib.data.load_libsvm(data_path, data_name)
d = X.shape[1]
K = np.max(Y)

In [3]:
for seed in seeds:
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.deterministic = True

    Xtrain, Xtest, Ytrain, Ytest = train_test_split(
        X, Y, test_size=0.2, random_state=seed
    )

    # initialise models and attach processed data to main model
    inner_model = lib.models.MultinomialLogisticRegression(
        input_dim=d, output_dim=K
    )
    link_model = lib.models.LegendreLink(
        n_blocks=blocks, K=K, dim_hidden=dimh, depth=depth
    )

    if cuda:
        inner_model = inner_model.cuda()
        link_model = link_model.cuda()
        cuda_kwargs = {"num_workers": 4, "pin_memory": True}
        train_kwargs.update(cuda_kwargs)
        test_kwargs.update(cuda_kwargs)

    if flip_labels:
        Ytrain = lib.data.noisify(Ytrain, K=K, eta=eta, random_state=seed)

    data_train = lib.data.LibSVMDataset(Xtrain, Ytrain)
    data_test = lib.data.LibSVMDataset(Xtest, Ytest)
    train_loader = DataLoader(data_train, **train_kwargs)
    test_loader = DataLoader(data_test, **test_kwargs)
    
    model_parameters = list(inner_model.parameters()) + list(link_model.parameters())
    opt = torch.optim.Adam(model_parameters, lr=lr)
    sch = torch.optim.lr_scheduler.StepLR(opt, step_size=gamma_cycle * len(train_loader), gamma=0.95)
    
    for e in range(1, num_epochs + 1):
        # optimization step
        inner_model.train()
        link_model.train()
        for X_train, Y_train in train_loader:
            if cuda:
                X_train, Y_train = X_train.cuda(), Y_train.cuda()
            X_train, Y_train = X_train.type(torch.double), Y_train.flatten().type(
                torch.long
            )
            opt.zero_grad()
            logp_pred = link_model(inner_model(X_train), context=None)
            loss_train = categorical_loss(logp_pred, Y_train)

            # L2 regularisation on all parameters
            regulariser = torch.sum(torch.zeros(1, dtype=torch.float))
            for p in model_parameters:
                regulariser = regulariser + torch.sum(p**2)

            if cuda:
                loss_train = loss_train.cuda()
                regulariser = regulariser.cuda()

            regularised_loss = loss_train + 0.5 * weight_decay * regulariser

            regularised_loss.backward()
            opt.step()
            sch.step()
        
    inner_model.eval()
    link_model.eval()
    # print step
    y_true_test, logp_pred_test, y_pred_test = [], [], []
    with torch.no_grad():
        for X_test, Y_test in test_loader:  # testing data
            if cuda:
                X_test, Y_test = X_test.cuda(), Y_test.flatten().cuda()
            X_test, Y_test = X_test.type(
                torch.double
            ), Y_test.flatten().type(torch.long)
            logp_pred = link_model(inner_model(X_test), context=None)
            Y_pred = logp_pred.argmax(dim=-1)

            y_true_test.extend(Y_test.flatten().tolist())
            logp_pred_test.extend(logp_pred.tolist())
            y_pred_test.extend(Y_pred.flatten().tolist())

        y_true_test, logp_pred_test, y_pred_test = (
            torch.as_tensor(y_true_test),
            torch.as_tensor(logp_pred_test),
            torch.as_tensor(y_pred_test),
        )
        prop_correct_test = (
            torch.sum(y_pred_test == y_true_test) / y_pred_test.shape[0]
        )
    print(f"pct correct {100 * prop_correct_test:2.2f}")
    test_accs.append(100 * prop_correct_test.item())

pct correct 77.65
pct correct 78.24
pct correct 81.76
pct correct 80.59
pct correct 77.65
pct correct 73.53
pct correct 78.82
pct correct 80.59
pct correct 75.29
pct correct 71.18
pct correct 78.82
pct correct 76.47
pct correct 76.47
pct correct 77.06
pct correct 75.88
pct correct 75.88
pct correct 75.29
pct correct 71.18
pct correct 80.59
pct correct 75.29


# Accuracy statistics

In [4]:
# mean test accuracy
np.mean(test_accs)

76.91176470588235

In [5]:
# std error
np.std(test_accs, ddof=1)/np.sqrt(len(test_accs))

0.6526287150171274