# Evaluation of MLP log-likelihood

Confirm PyTorch and manually coded MLP log-likelihood coincide

In [1]:
## Import packages

import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.distributions import Normal
from torch.autograd import grad

from eeyore.models.mlp import Hyperparameters, MLP
from eeyore.data import XOR

## Compute MLP log-target using eeyore API version

In [2]:
## Load XOR data

xor = XOR(dtype=torch.float64)

data = xor.data
labels = xor.labels

In [3]:
## Setup MLP model

hparams = Hyperparameters([2, 2, 1])
model = MLP(hparams=hparams, dtype=torch.float64)
model.prior = Normal(torch.zeros(9, dtype=torch.float64), 100*torch.ones(9, dtype=torch.float64))

In [4]:
## Fix model parameters

theta0 = torch.tensor([1.1, -2.9, -0.4, 0.8, 4.3, 9.2, 4.44, -3.4, 7.2], dtype=torch.float64)
theta = theta0.clone().detach()
model.set_params(theta.clone().detach())

In [5]:
## Compute MLP log-target using eeyore API version

lt_result01 = model.log_target(theta, data, labels)
lt_result01

tensor(-65.8127, dtype=torch.float64, grad_fn=<AddBackward0>)

In [6]:
## Confirm that log-target is the sum of log-lik and log-prior

model.log_lik(data, labels), model.log_prior(), model.log_lik(data, labels)+model.log_prior()

(tensor(-16.0859, dtype=torch.float64, grad_fn=<NegBackward>),
 tensor(-49.7268, dtype=torch.float64, grad_fn=<SumBackward0>),
 tensor(-65.8127, dtype=torch.float64, grad_fn=<AddBackward0>))

## Compute MLP log-target fully manually

In [7]:
def log_lik(theta, x, y):
    w1 = theta[0:4].view(2, 2)
    b1 = theta[4:6].view(2)
    g1 = x @ w1.t() + b1
    h1 = torch.sigmoid(g1)
    w2 = theta[6:8].view(1, 2)
    b2 = theta[8:9].view(1)
    g2 = h1 @ w2.t() + b2
    h2 = torch.sigmoid(g2)
    
    return -F.binary_cross_entropy(h2, y, reduction='sum')

In [8]:
def log_prior(theta):
    d = Normal(torch.zeros(9, dtype=torch.float64), 100*torch.ones(9, dtype=torch.float64))
    return torch.sum(d.log_prob(theta))

In [9]:
def log_target(theta, x, y):
    return log_lik(theta, x, y)+log_prior(theta)

In [10]:
lt_result02 = log_target(theta, data, labels)
lt_result02

tensor(-65.8127, dtype=torch.float64)

## Print out values of both log-target implementations

In [11]:
[p.data.item() for p in [lt_result01, lt_result02]]

[-65.81269034997256, -65.81269034997256]

## Compute grad of MLP log-target using eeyore API version

In [12]:
theta = theta0.clone().detach()

lt_val01 = model.log_target(theta, data, labels)

glt_result01 = model.grad_log_target(lt_val01)
glt_result01

tensor([-3.1125e-01, -3.1041e-01,  2.7002e-04,  1.5006e-04, -3.6975e-01,
        -3.4671e-04, -1.9098e+00, -1.9979e+00, -1.9992e+00],
       dtype=torch.float64, grad_fn=<CatBackward>)

## Compute grad of MLP log-target using backward pass

In [13]:
theta = theta0.clone().detach()

lt_val02 = model.log_target(theta, data, labels)

lt_val02.backward(retain_graph=True)

# Rerun so that it becomes possible to call p.grad.zero_()

theta = theta0.clone().detach()

lt_val03 = model.log_target(theta, data, labels)

for p in model.parameters():
    p.grad.zero_()

lt_val03.backward()

glt_result02 = torch.cat([p.grad.view(-1) for p in model.parameters()])
glt_result02

tensor([-3.1125e-01, -3.1041e-01,  2.7002e-04,  1.5006e-04, -3.6975e-01,
        -3.4671e-04, -1.9098e+00, -1.9979e+00, -1.9992e+00],
       dtype=torch.float64)

## Compute grad of MLP log-target calling grad() on manually coded log_target()

In [14]:
theta = theta0.clone().detach()
theta.requires_grad_(True)

lt_val04 = log_target(theta, data, labels)

glt_result03, = grad(lt_val04, theta)
glt_result03

tensor([-3.1125e-01, -3.1041e-01,  2.7002e-04,  1.5006e-04, -3.6975e-01,
        -3.4671e-04, -1.9098e+00, -1.9979e+00, -1.9992e+00],
       dtype=torch.float64)

## Compute grad of MLP log-target calling grad() on manually coded log-lik and log-prior

In [15]:
## Confirm that log-target is the sum of log-lik and log-prior

theta = theta0.clone().detach()
theta.requires_grad_(True)

ll_val = log_lik(theta, data, labels)

gll_val, = grad(ll_val, theta)

lp_val = log_prior(theta)

glp_val, = grad(lp_val, theta)

glt_result04 = gll_val+glp_val
gll_val, glp_val, glt_result04

(tensor([-3.1114e-01, -3.1070e-01,  2.3002e-04,  2.3006e-04, -3.6932e-01,
          5.7329e-04, -1.9094e+00, -1.9983e+00, -1.9984e+00],
        dtype=torch.float64),
 tensor([-1.1000e-04,  2.9000e-04,  4.0000e-05, -8.0000e-05, -4.3000e-04,
         -9.2000e-04, -4.4400e-04,  3.4000e-04, -7.2000e-04],
        dtype=torch.float64),
 tensor([-3.1125e-01, -3.1041e-01,  2.7002e-04,  1.5006e-04, -3.6975e-01,
         -3.4671e-04, -1.9098e+00, -1.9979e+00, -1.9992e+00],
        dtype=torch.float64))

## Print out values of all grad log-target implementations

In [16]:
[p for p in [glt_result01, glt_result02, glt_result03, glt_result04]]

[tensor([-3.1125e-01, -3.1041e-01,  2.7002e-04,  1.5006e-04, -3.6975e-01,
         -3.4671e-04, -1.9098e+00, -1.9979e+00, -1.9992e+00],
        dtype=torch.float64, grad_fn=<CatBackward>),
 tensor([-3.1125e-01, -3.1041e-01,  2.7002e-04,  1.5006e-04, -3.6975e-01,
         -3.4671e-04, -1.9098e+00, -1.9979e+00, -1.9992e+00],
        dtype=torch.float64),
 tensor([-3.1125e-01, -3.1041e-01,  2.7002e-04,  1.5006e-04, -3.6975e-01,
         -3.4671e-04, -1.9098e+00, -1.9979e+00, -1.9992e+00],
        dtype=torch.float64),
 tensor([-3.1125e-01, -3.1041e-01,  2.7002e-04,  1.5006e-04, -3.6975e-01,
         -3.4671e-04, -1.9098e+00, -1.9979e+00, -1.9992e+00],
        dtype=torch.float64)]