## Uncertainty estimation for regression

In [1]:
%load_ext autoreload
%autoreload 2
import numpy as np
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
import torch.nn.functional as F

from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from sklearn import metrics

from alpaca.utils.datasets.builder import build_dataset
from alpaca.utils.ue_metrics import get_uq_metrics, ndcg, uq_ll
from alpaca.ue.masks import BasicBernoulliMask, DecorrelationMask, LeverageScoreMask
from alpaca.utils import model_builder
import alpaca.nn as ann

from nuq import NuqRegressor

In [2]:
class MLP(nn.Module):
    def __init__(self, input_size, base_size=64, dropout_rate=0., dropout_mask=None):
        super().__init__()
        self.backbone = nn.Sequential(
            nn.Linear(input_size, 4*base_size),
            nn.CELU(),

            nn.Linear(4*base_size, 2*base_size),
            ann.Dropout(dropout_rate, dropout_mask),
            nn.CELU(),

            nn.Linear(2*base_size, 1*base_size),
            ann.Dropout(dropout_rate, dropout_mask),
            nn.CELU(),
        )
        self.final_layer = nn.Linear(base_size, 1)
        

    def forward(self, x):
        return self.final_layer(self.backbone(x))
    
    def get_embeddings(self, x):
        return self.backbone(x)

In [3]:
def train_model(model, train_loader):
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters())

    model.train()
    for epochs in range(100):
        for x_batch, y_batch in train_loader: # Train for one epoch
            predictions = model(x_batch)
            loss = criterion(predictions, y_batch)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    return model

In [4]:
def estimate_uncertainty_quality(dataset_name, base_size=64):
    dataset = build_dataset(dataset_name, val_split=0.1)
    x_train, y_train = dataset.dataset('train')
    x_val, y_val = dataset.dataset('val')
    print("number of features in the dataset: ", x_train.shape[1])
    
    train_ds = TensorDataset(torch.FloatTensor(x_train), torch.FloatTensor(y_train))
    val_ds = TensorDataset(torch.FloatTensor(x_val), torch.FloatTensor(y_val))
    train_loader = DataLoader(train_ds, batch_size=512)
    val_loader = DataLoader(val_ds, batch_size=512)
    
    x_train, y_train = torch.FloatTensor(x_train), torch.FloatTensor(y_train)
    x_val, y_val = torch.FloatTensor(x_val), torch.FloatTensor(y_val)
    
    model = MLP(input_size=x_train.shape[1], base_size=base_size, dropout_rate=0.1, dropout_mask=BasicBernoulliMask)
    model = train_model(model, train_loader)
    model.eval()
    
    train_embeddings = model.get_embeddings(x_train).detach().cpu().numpy()
    val_embeddings = model.get_embeddings(x_val).detach().cpu().numpy()
    regressor = NuqRegressor()
    print("embedding size: ", train_embeddings.shape[1])
    regressor.fit(train_embeddings, y_train.reshape(-1))
    
    uncertainty = regressor.predict_uncertainty(val_embeddings, infinity=100)
    net_predictions = model(x_val).detach().cpu().numpy()
    
    del model
    del regressor
    
    print(uncertainty["aleatoric"].mean(), uncertainty["epistemic"].mean(), uncertainty["total"].mean())
    acc, ndcg, ll = get_uq_metrics(uncertainty["epistemic"], net_predictions)
    print("====================================================================================")
    print(dataset_name)
    print("Mean squared error: ", metrics.mean_squared_error(y_val, net_predictions))
    print("Uncertainty quality by \naccuracy: ", acc, "\nndcg: ", ndcg, "\nlog-likelihood: ", ll)
    print("====================================================================================")

In [5]:
datasets = ["red_wine", "kin8nm", "ccpp", "boston_housing", "narval_propulsion"]
estimate_uncertainty_quality("boston_housing")

number of features in the dataset:  13
embedding size:  64


  t_opt = np.power(2 * N * np.sqrt(np.pi) * f, -2.0 / 5)
  time = np.power((2 * const * K0 / (N * f)), (2.0 / (3.0 + 2.0 * s)))


ValueError: Root finding did not converge. Need more data.