In [1]:
%load_ext autoreload
%autoreload 2
import numpy as np
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
import torch.nn.functional as F

from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.metrics import r2_score 

from alpaca.ue import MCDUE
from alpaca.utils.datasets.builder import build_dataset
from alpaca.utils.ue_metrics import ndcg
from alpaca.ue.masks import BasicBernoulliMask, DecorrelationMask, LeverageScoreMask
import alpaca.nn as ann

## Prepare the dataset

In [2]:
# Load dataset
dataset = build_dataset('kin8nm', val_split=1_000)
x_train, y_train = dataset.dataset('train')
x_val, y_val = dataset.dataset('val')
x_train.shape, y_val.shape
train_ds = TensorDataset(torch.FloatTensor(x_train), torch.FloatTensor(y_train))
val_ds = TensorDataset(torch.FloatTensor(x_val), torch.FloatTensor(y_val))
train_loader = DataLoader(train_ds, batch_size=512)
val_loader = DataLoader(val_ds, batch_size=512)

## Define the model

In [3]:
class MLP(nn.Module):
    def __init__(self, layer_sizes, activation=F.celu, dropout_rate=0., dropout_mask=None):
        super().__init__()
        self.layer_sizes = layer_sizes
        self.activation = activation
        
        self.fcs = nn.ModuleList(
            [
                nn.Sequential(
                    *[
                        nn.Linear(layer_sizes[i], layer_sizes[i + 1]),
                        ann.Dropout(dropout_rate, dropout_mask)
                        if i < len(layer_sizes) - 2 and i != 0
                        else nn.Sequential(),
                    ]
                )
                for i, layer in enumerate(layer_sizes[:-1])
            ]
        )
        
    def forward(self, x, dropout_rate=0, dropout_mask=None):
        for layer_num, fc in enumerate(self.fcs):
            x = fc(x)
            x = self.activation(x)
        return x

In [4]:
# Train model
layers = [8, 256, 128, 64, 1]
model = MLP(layers)

## Train the model

In [5]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())

model.train()
for epochs in range(10):
    for x_batch, y_batch in train_loader: # Train for one epoch
        predictions = model(x_batch)
        loss = criterion(predictions, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
print('Train loss on last batch', loss.item())

Train loss on last batch 0.014912816695868969


In [6]:
# Check model effectiveness 
model.eval()
x_batch, y_batch = next(iter(val_loader))
predictions = model(x_batch).detach().cpu().numpy()
print('R2:', r2_score(predictions, y_batch))

R2: 0.6662407050850632


## Estimate uncertainty

In [7]:
# Calculate uncertainty estimation
estimator = MCDUE(model)
predictions, estimations = estimator(x_batch)

Uncertainty estimation with MCDUE_regression approach: 100%|██████████| 25/25 [00:00<00:00, 2777.91it/s]


In [8]:
# Calculate NDCG score for the uncertainty
errors = np.abs(estimations - y_batch.reshape((-1)).numpy()) 
score = ndcg(np.array(errors), estimations)
print("Quality score is ", score)

Quality score is  0.9205884137596084


## Change dropout rates/mask

In [9]:
from alpaca.utils import model_builder
model = model_builder.build_model(model, dropout_rate=0.2, dropout_mask=LeverageScoreMask())

TypeError: 'float' object is not subscriptable

In [None]:
model

In [None]:
# Calculate uncertainty estimation
estimator = MCDUE(model)
predictions, estimations = estimator(x_batch)

In [None]:
# Calculate NDCG score for the uncertainty
errors = np.abs(estimations - y_batch.reshape((-1)).numpy()) 
score = ndcg(np.array(errors), estimations)
print("Quality score is ", score)