In [2]:
from sklearn.metrics import accuracy_score
from scipy.special import softmax
import calibrator
import numpy as np
import pandas as pd
from alpaca.dataloader.builder import build_dataset

In [3]:
import sklearn
import math
from torch.nn import functional as f
import torch
from torch import nn, optim
from torch.utils.data import TensorDataset, DataLoader

In [4]:
def compute_errors(n_bins, probs, labels, len_dataset, threshold):
    ece = calibrator.compute_ece(n_bins, probs, labels, len_dataset)
    sce = calibrator.compute_sce(n_bins, probs, labels)
    ace = calibrator.compute_ace(n_bins, probs, labels)
    tace = calibrator.compute_tace(threshold, probs, labels, n_bins)
    errors = {
        'ECE' : ece,
        'SCE' : sce,
        'ACE' : ace,
        'TACE' : tace
    }
    for error in errors.items():
        print(error[0], '=', error[1])

In [5]:
mnist = build_dataset('mnist', val_size=10_000)

In [6]:
X_train, y_train = mnist.dataset('train')
X_val, y_val = mnist.dataset('val')
X_cal = X_train[48000:][:]
X_train = X_train[0:48000][:]
y_cal = y_train[48000:][:]
y_train = y_train[0:48000][:]

x_shape = (-1, 1, 28, 28)

train_ds = TensorDataset(torch.FloatTensor(X_train.reshape(x_shape)), torch.LongTensor(y_train))
val_ds = TensorDataset(torch.FloatTensor(X_val.reshape(x_shape)), torch.LongTensor(y_val))
train_loader = DataLoader(train_ds, batch_size=512)
val_loader = DataLoader(val_ds, batch_size=512)
cal_ds = TensorDataset(torch.FloatTensor(X_cal.reshape(x_shape)), torch.LongTensor(y_cal))
cal_loader = DataLoader(cal_ds, batch_size=512)
X_val.shape

(10000, 784)

In [7]:
class Net(nn.Module):   
    def __init__(self):
        super(Net, self).__init__()

        self.cnn_layers = nn.Sequential(
            nn.Conv2d(1, 4, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(4),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(4, 4, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(4),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        self.linear_layers = nn.Sequential(
            nn.Linear(4 * 7 * 7, 10)
        )
  
    def forward(self, x):
        x = self.cnn_layers(x)
        x = x.view(x.size(0), -1)
        x = self.linear_layers(x)
        return x

In [8]:
model = Net()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

In [9]:
for epoch in range(5):
    for x_batch, y_batch in train_loader: # Train for one epoch
        print('.', end='')
        prediction = model(x_batch)
        optimizer.zero_grad()
        loss = criterion(prediction, y_batch)
        loss.backward()
        optimizer.step()
    print('\nTrain loss on last batch', loss.item())

# Check accuracy
x_batch, y_batch = next(iter(val_loader))


class_preds = f.softmax(model(x_batch), dim=-1).detach().numpy()
predictions = np.argmax(class_preds, axis=-1)
print('Accuracy', accuracy_score(predictions, y_batch))

..............................................................................................
Train loss on last batch 0.5957837700843811
..............................................................................................
Train loss on last batch 0.31751400232315063
..............................................................................................
Train loss on last batch 0.22566795349121094
..............................................................................................
Train loss on last batch 0.1808311939239502
..............................................................................................
Train loss on last batch 0.15155142545700073
Accuracy 0.95703125


In [10]:
logits_list = []
labels_list = []
for x_batch, y_batch in cal_loader:
    logits_list.append(model(x_batch))
    labels_list.append(y_batch)
logits = torch.cat(logits_list)
labels = torch.cat(labels_list)
logits.detach_()


tensor([[ 2.0402, -2.3747, -1.5386,  ..., -6.1627,  0.7528, -3.5082],
        [-1.1144, -4.0823, -1.4041,  ..., -8.5825, -2.7105, -7.1008],
        [-2.4544, -6.2429, -3.4529,  ...,  6.3505, -0.6553,  1.9902],
        ...,
        [-1.6984, -7.1889,  1.6950,  ..., -3.6523, -3.9744, -2.1117],
        [-3.9930, -7.3645, -2.3506,  ...,  0.0897, -0.2456, -1.5334],
        [-2.1843, -5.8997, -1.6733,  ...,  0.6538,  1.5230,  1.6767]])

In [13]:
calibr = calibrator.ModelWithTempScaling(model)

In [14]:
calibr.scaling(logits, labels)

ModelWithTempScaling(
  (model): Net(
    (cnn_layers): Sequential(
      (0): Conv2d(1, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (4): Conv2d(4, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (5): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (6): ReLU(inplace=True)
      (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (linear_layers): Sequential(
      (0): Linear(in_features=196, out_features=10, bias=True)
    )
  )
)

In [15]:
val_logits_list = []
val_labels_list = []
for x_batch, y_batch in val_loader:
    val_logits_list.append(model(x_batch))
    val_labels_list.append(y_batch)
val_logits = torch.cat(val_logits_list)
val_labels = torch.cat(val_labels_list)
val_logits.detach_()

tensor([[ 8.4618, -6.9845, -2.5562,  ..., -2.8854,  1.0569, -1.4189],
        [-4.6299, -8.1617, -2.4772,  ..., -5.5162, -3.4626, -0.8133],
        [-1.7167, -6.0005, -1.3019,  ..., -2.3598,  0.5954, -3.3539],
        ...,
        [-4.9065,  1.0178, -0.0791,  ..., -1.3204,  5.3247, -1.1474],
        [-1.9918, -3.9923,  5.4205,  ..., -3.6292, -0.3483, -2.3259],
        [-3.9200, -2.2164, -1.2872,  ...,  8.2665, -0.5424, -0.2890]])

In [16]:
probs = f.softmax(val_logits, dim=-1)

In [17]:
compute_errors(n_bins=15, probs=probs.numpy(), labels=val_labels.numpy(),
               len_dataset=np.shape(probs)[0], threshold=0.9)

  return np.array(bins), np.array(true_labels_for_bins)


ECE = tensor([0.0310])
SCE = tensor([0.0037])
ACE = tensor(0.0335)
TACE = tensor(0.0187)


In [18]:
print(calibr.temperature)

Parameter containing:
tensor([0.6377], requires_grad=True)


In [19]:
temp_scaling_logits = torch.true_divide(val_logits, calibr.temperature)
temp_scaling_probs = f.softmax(temp_scaling_logits, dim=1)

In [20]:
compute_errors(n_bins=15, probs=temp_scaling_probs.detach().numpy(), labels=val_labels.numpy(),
               len_dataset=np.shape(probs)[0], threshold=0.9)

ECE = tensor([0.0087])
SCE = tensor([0.0015])
ACE = tensor(0.0172)
TACE = tensor(0.0066)


In [23]:
calibr = calibrator.ModelWithVectScaling(model, 10).float()
labels.dtype

torch.int64

In [25]:
calibr.scaling(logits, labels, lr=0.01, max_iter=50)

ModelWithVectScaling(
  (model): Net(
    (cnn_layers): Sequential(
      (0): Conv2d(1, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (4): Conv2d(4, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (5): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (6): ReLU(inplace=True)
      (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (linear_layers): Sequential(
      (0): Linear(in_features=196, out_features=10, bias=True)
    )
  )
)

In [26]:
vect_scaling_logits = calibr.scaling_logits(val_logits)
vect_scaling_probs = f.softmax(vect_scaling_logits, dim=1)

In [27]:
compute_errors(n_bins=15, probs=vect_scaling_probs.detach().numpy(), labels=val_labels.numpy(),
               len_dataset=np.shape(probs)[0], threshold=0.9)

ECE = tensor([0.0192])
SCE = tensor([0.0026])
ACE = tensor(0.0241)
TACE = tensor(0.0123)


In [28]:
calibr.W_and_b

Parameter containing:
tensor([ 1.1565,  1.1151,  1.1211,  1.1463,  1.0053,  1.0451,  1.1684,  1.1099,
         1.1534,  1.1622,  0.0301,  0.0187,  0.0047, -0.0012, -0.0503, -0.0283,
         0.0143,  0.0076, -0.0110,  0.0154], requires_grad=True)

In [31]:
calibr = calibrator.ModelWithMatrScaling(model, 10).float()
calibr.scaling(logits, labels, lr=0.0001, max_iter=1000)

ModelWithMatrScaling(
  (model): Net(
    (cnn_layers): Sequential(
      (0): Conv2d(1, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (4): Conv2d(4, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (5): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (6): ReLU(inplace=True)
      (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (linear_layers): Sequential(
      (0): Linear(in_features=196, out_features=10, bias=True)
    )
  )
)

In [32]:
matr_scaling_logits = calibr.scaling_logits(val_logits)
matr_scaling_probs = f.softmax(matr_scaling_logits, dim=1)

In [33]:
compute_errors(n_bins=15, probs=matr_scaling_probs.detach().numpy(), labels=val_labels.numpy(),
               len_dataset=np.shape(probs)[0], threshold=0.9)

ECE = tensor([0.0068])
SCE = tensor([0.0015])
ACE = tensor(0.0166)
TACE = tensor(0.0054)


In [34]:
hist_binning_probs = calibrator.multiclass_histogram_binning(15, logits.numpy(), labels.numpy(), val_logits)

In [35]:
compute_errors(n_bins=15, probs=hist_binning_probs, labels=val_labels.numpy(),
               len_dataset=np.shape(probs)[0], threshold=0.9)

ECE = tensor([0.0083])
SCE = tensor([0.0021])
ACE = tensor(0.0172, dtype=torch.float64)
TACE = tensor(0.0133, dtype=torch.float64)
