In [1]:
import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np
import matplotlib.pyplot as plt

from check_shape import check_shape

from tqdm.notebook import tqdm

In [2]:
class EQcovariance(tf.keras.Model):

    def __init__(self, 
                 log_coeff,
                 log_scales,
                 dtype,
                 trainable=True,
                 name='eq_covariance',
                 **kwargs):
        
        super().__init__(name=name, dtype=dtype, **kwargs)
    
        # Convert parameters to tensors
        log_coeff = tf.convert_to_tensor(log_coeff, dtype=dtype)
        log_scales = tf.convert_to_tensor(log_scales, dtype=dtype)

        # Reshape parameter tensors
        log_coeff = tf.squeeze(log_coeff)
        log_scales = tf.reshape(log_scales, (-1,))
        
        # Set EQ parameters
        self.log_scales = tf.Variable(log_scales, trainable=trainable)
        self.log_coeff = tf.Variable(log_coeff, trainable=trainable)
        
    def call(self,
             x1,
             x2,
             diag=False,
             epsilon=None):
        
        # Convert to tensors
        x1 = tf.convert_to_tensor(x1, dtype=self.dtype)
        x2 = tf.convert_to_tensor(x2, dtype=self.dtype)

        # Get vector of lengthscales
        scales = self.scales
        
        # If calculating full covariance, add dimensions to broadcast
        if not diag:

            x1 = x1[:, None, :]
            x2 = x2[None, :, :]

            scales = self.scales[None, None, :] ** 2

        # Compute quadratic, exponentiate and multiply by coefficient
        quad = - 0.5 * (x1 - x2) ** 2 / scales
        quad = tf.reduce_sum(quad, axis=-1)
        eq_cov = self.coeff ** 2 * tf.exp(quad)
        
        # Add jitter for invertibility
        if epsilon is not None:
            eq_cov = eq_cov + epsilon * tf.eye(
                eq_cov.shape[0], 
                dtype=self.dtype
            )

        return eq_cov
        
    @property
    def scales(self):
        return tf.math.exp(self.log_scales)
    
    @property
    def coeff(self):
        return tf.math.exp(self.log_coeff)

In [3]:
log_coeff = [0.]
log_scales = [0.]
dtype = tf.float64
trainable = True

covariance = EQcovariance(
    log_coeff=log_coeff,
    log_scales=log_scales,
    dtype=dtype,
    trainable=trainable
)

In [4]:
class GP(tf.keras.Model):
    
    def __init__(self, log_noise, trainable_noise, covariance, dtype, name="gp", **kwargs):
        
        super().__init__(name=name, dtype=dtype, **kwargs)
        
        self.log_noise = tf.convert_to_tensor(log_noise)
        self.log_noise = tf.Variable(log_noise, trainable=trainable_noise, dtype=dtype)
        
        self.covariance = covariance
    
    def posterior_predictive(self, x_train, y_train, x_pred):
        
        N = x_train.shape[0]
        Neye = tf.eye(N, dtype=self.dtype)
        
        M = x_test.shape[0]
        Meye = tf.eye(M, dtype=self.dtype)
        
        Ktt = self.covariance(x_train, x_train)
        Ktp = self.covariance(x_train, x_pred)
        Kpt = self.covariance(x_pred, x_train)
        Kpp = self.covariance(x_pred, x_pred)
        
        mean = Kpt @ tf.linalg.solve(Ktt + Neye * self.noise**2., y_train[:, None])
        mean = mean[:, 0]
        
        cov = Kpp - Kpt @ tf.linalg.solve(Ktt + Neye * self.noise**2., Ktp) + Meye * self.noise**2.
        var = tf.linalg.diag_part(cov)
        
        return mean, cov, var
    
    def marg_lik_loss(self, x, y):
        
        N = x.shape[0]
        eye = tf.eye(N, dtype=self.dtype)
        
        loc = tf.zeros_like(y)
        cov = self.covariance(x, x)
        cov = cov + eye * self.noise**2
        
        normal = tfp.distributions.MultivariateNormalFullCovariance(
            loc=loc,
            covariance_matrix=cov
        )
        
        marg_lik = normal.log_prob(y)
        loss = - marg_lik / N
        
        return loss
        
    @tf.function
    def loo_loss(self, x, y):
        
        N = x.shape[0]
        
        # Compute stable inverse once
        cov, prec = self.full_covariance_and_precision(x)
        
        logpdf = tf.convert_to_tensor(0., dtype=self.dtype)
        
        for n in tf.range(N):
            
            loo_prec, D, B, C = self.loo_precision_and_covariance_submatrices(
                cov=cov,
                prec=prec,
                n=n
            )
            
            y_roll = tf.roll(y, shift=n, axis=0)
            y_cond = y_roll[:-1, None]
            y_pred = y_roll[-1:, None]
            
            loc = C @ loo_prec @ y_cond
            scale = (D - C @ loo_prec @ B) ** 0.5

            normal = tfp.distributions.Normal(loc=loc, scale=scale)
            logpdf = logpdf + normal.log_prob(y_pred)[0, 0]
            
        loss = - logpdf / N
            
        return loss
        
    def full_covariance_and_precision(self, x):
        
        N = x.shape[0]
        eye = tf.eye(N, dtype=self.dtype)
        
        cov = self.covariance(x, x)
        cov = cov + eye * self.noise**2
        
        chol_cov = tf.linalg.cholesky(cov)
        chol_prec = tf.linalg.triangular_solve(chol_cov, eye, lower=True)
        
        prec = tf.matmul(chol_prec, chol_prec, transpose_a=True)
        
        return cov, prec
    
    @tf.function
    def loo_precision_and_covariance_submatrices(self, cov, prec, n):
        
        permute = lambda mat: tf.roll(tf.roll(mat, shift=n, axis=0), shift=n, axis=1)
        
        # Permute covariance and precision so the leave-out point is bottom right
        cov = permute(cov)
        prec = permute(prec)
         
        B = cov[:-1, -1:]
        C = cov[-1:, :-1]
        D = cov[-1:, -1:]
        G = prec[-1:, :-1]
        E = prec[:-1, :-1]
        
        loo_prec = E + tf.matmul(tf.matmul(E, B), G) / tf.reshape((1. - tf.matmul(G, B)), (-1,))
        
        return loo_prec, D, B, C
        
    @property
    def noise(self):
        return tf.exp(self.log_noise)

In [5]:
log_coeff = [0.]
log_scales = [-2.]
dtype = tf.float64
trainable = True
log_noise = -8.
trainable_noise = True

covariance = EQcovariance(
    log_coeff=log_coeff,
    log_scales=log_scales,
    dtype=dtype,
    trainable=trainable
)

gp = GP(
    log_noise=log_noise,
    trainable_noise=trainable_noise,
    covariance=covariance,
    dtype=dtype
)

x = tf.random.uniform(shape=(100, 1), dtype=dtype)
cov, prec = gp.full_covariance_and_precision(x)

In [6]:
x = tf.random.uniform(shape=(300, 1), dtype=dtype)
cov, prec = gp.full_covariance_and_precision(x)
n = 100

permute = lambda mat, n: tf.roll(tf.roll(mat, shift=n, axis=0), shift=n, axis=1)

print(tf.reduce_sum((gp.loo_precision_and_covariance_submatrices(cov, prec, n)[0] \
                     - tf.linalg.inv(permute(cov, n)[:-1, :-1]))**2)/300**2)

tf.Tensor(9.868662073167221e-05, shape=(), dtype=float64)


In [7]:
gp.marg_lik_loss(x, x[:, 0])

Instructions for updating:
`MultivariateNormalFullCovariance` is deprecated, use `MultivariateNormalTriL(loc=loc, scale_tril=tf.linalg.cholesky(covariance_matrix))` instead.


<tf.Tensor: shape=(), dtype=float64, numpy=-6.661863632667553>

In [8]:
# # Load pre-downloaded concrete dataset
# x = np.load('data/concrete/x.npy')
# y = np.load('data/concrete/y.npy')

# # Normalise inputs and outputs
# x = (x - x.mean(axis=0)) / x.var(axis=0) ** 0.5
# y = (y - y.mean()) / y.var() ** 0.5

# num_train = 500

# # Split into training and test sets
# x_train = x[:num_train]
# y_train = y[:num_train, 0]
# x_test = x[num_train:]
# y_test = y[num_train:, 0]

In [9]:
def generate_dataset(num_dim, gamma, noise, num_train, num_test, dtype):
    
    num_total = num_train + num_test
    
    x = 10. * tf.random.uniform(shape=(num_total, num_dim)) - 5.
    
    y = gamma * tf.reduce_sum(tf.nn.softplus(x), axis=1, keepdims=True)
    y = y + noise * tf.random.normal(shape=y.shape)
    
    x_train = tf.cast(x[:num_train], dtype=dtype)
    y_train = tf.cast(y[:num_train, 0], dtype=dtype)
    x_test = tf.cast(x[num_train:], dtype=dtype)
    y_test = tf.cast(y[num_train:, 0], dtype=dtype)
    
    return x_train, y_train, x_test, y_test

In [10]:
@tf.function
def single_step(model, optimiser, x_train, y_train, x_test, y_test):

    with tf.GradientTape() as tape:
        loss = model.loo_loss(x_train, y_train)

    gradients = tape.gradient(loss, model.trainable_variables)
    optimiser.apply_gradients(zip(gradients, model.trainable_variables))
    
    mean, cov, var = gp.posterior_predictive(x_train, y_train, x_test)
    rmse = tf.reduce_mean((mean - y_test)**2)**0.5
    normal = tfp.distributions.MultivariateNormalFullCovariance(
        loc=mean,
        covariance_matrix=cov,
    )
    log_lik = normal.log_prob(y_test) / y_test.shape[0]

    return loss, rmse, log_lik

In [11]:
tf.random.set_seed(0)

num_dim = 4
gamma = 2
noise = 1e-1
num_train = 500
num_test = 1000

x_train, y_train, x_test, y_test = generate_dataset(
    num_dim=num_dim,
    gamma=gamma,
    noise=noise,
    num_train=num_train,
    num_test=num_test,
    dtype=dtype
)

In [12]:
num_steps = 5000
learning_rate = 1e-2

log_coeff = [0.]
log_scales = x_train.shape[1] * [0.]
dtype = tf.float64
trainable = True
log_noise = 0.
trainable_noise = True

covariance = EQcovariance(
    log_coeff=log_coeff,
    log_scales=log_scales,
    dtype=dtype,
    trainable=trainable
)

gp = GP(
    log_noise=log_noise,
    trainable_noise=trainable_noise,
    covariance=covariance,
    dtype=dtype
)

# Initialise optimiser
optimiser = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    
# Set progress bar and suppress warnings
progress_bar = tqdm(range(1, num_steps+1), bar_format="{l_bar}{r_bar}")
tf.get_logger().setLevel('ERROR')

# Train model
for i in progress_bar:
        
    loss, rmse, log_lik = single_step(
        model=gp,
        optimiser=optimiser,
        x_train=x_train,
        y_train=y_train,
        x_test=x_test,
        y_test=y_test
    )

    if i % 1 == 0:
        
        progress_bar.set_description(
            f"Loss {loss.numpy():.3f} "
            f"Test RMSE {rmse.numpy():.3f} "
            f"Test LL {log_lik.numpy():.3f} "
            f"Noise {gp.noise.numpy():.2f} "
            f"Coeff {gp.covariance.coeff.numpy():.2f} "
            f"Scale {gp.covariance.scales.numpy()[0]:.2f} "
        )

HBox(children=(FloatProgress(value=0.0, max=5000.0), HTML(value='')))

KeyboardInterrupt: 

In [13]:
# Syntetic (gamma = 2)
# Marglik : Loss 0.856 Test RMSE 0.350 Test LL -0.044 Noise 0.23 Coeff 19.02 Scale 14.93
# LOO     : Loss 0.041 Test RMSE 0.251 Test LL  0.536 Noise 0.09 Coeff 12.46 Scale 7.59

# Low-rank approximation

In [14]:
# tf.random.set_seed(0)

# x = tf.convert_to_tensor(np.random.rand(3))[:, None]
# u = tf.convert_to_tensor(np.random.rand(2))[:, None]
# sigma = 1e-1

# log_coeff = [0.]
# log_scales = [0.]
# dtype = tf.float64
# trainable = False

# covariance = EQcovariance(
#     log_coeff=log_coeff,
#     log_scales=log_scales,
#     dtype=dtype,
#     trainable=trainable
# )

# A = covariance(u, u)
# v1 = covariance(u, x[:1])
# v2 = covariance(u, x[-1:])

# v1T = covariance(x[:1], u)
# v2T = covariance(x[-1:], u)

# L1 = tf.linalg.cholesky(Kuu + tf.matmul(v1, v1, transpose_b=True))
# L2 = tf.linalg.cholesky(Kuu + tf.matmul(v1, v1, transpose_b=True) + tf.matmul(v2, v2, transpose_b=True))
# L2 = tfp.math.cholesky_update(L2, v2[:, 0], multiplier=-1.0)

# print(L1)
# print(L2)

In [27]:
class DTCGP(tf.keras.Model):
    
    def __init__(self, inducing_inputs, log_noise, trainable_noise, covariance, dtype, name="gp", **kwargs):
        
        super().__init__(name=name, dtype=dtype, **kwargs)
        
        self.inducing_inputs = inducing_inputs
        self.log_noise = tf.convert_to_tensor(log_noise)
        self.log_noise = tf.Variable(log_noise, trainable=trainable_noise, dtype=dtype)
        
        self.covariance = covariance
    
    def posterior_predictive(self, x_train, y_train, x_pred):
        pass
    
    def dtc_marg_lik_loss(self, x, y):
        pass
        
    def dtc_loo_loss(self, x, y, batch_size):
        
        # Check shapes of input and output tensors
        check_shape([x, y], [("N", "D"), ("N",)])
        
        N = x.shape[0]
        M = self.inducing_inputs.shape[0]
        
        # Compute matrix terms
        Kuu = self.covariance(self.inducing_inputs, self.inducing_inputs)
        Kux = self.covariance(self.inducing_inputs, x)
        
        # Compute cholesky of Nystrom matrix - updated in every step
        chol = tf.linalg.cholesky(Kuu + self.noise**-2 * tf.matmul(Kux, Kux, transpose_b=True))
        
        idx = tf.range(N)
        idx_batch = tf.convert_to_tensor(np.random.choice(np.arange(N), size=(batch_size,)), dtype=tf.int32)
        
#         idx_batch = tf.random.categorical(tf.zeros(shape=(1, N)), num_samples=batch_size)[0, :]
#         idx_batch = tf.cast(idx_batch, dtype=tf.int32)
        
        logpdf = tf.convert_to_tensor(0., dtype=self.dtype)
        
        for n in idx_batch:
            
            l_idx = idx == n
            r_idx = idx != n

            l = tf.boolean_mask(x, l_idx, axis=0)
            r = tf.boolean_mask(x, r_idx, axis=0)
            
            yl = tf.boolean_mask(y, l_idx)
            yr = tf.boolean_mask(y, r_idx)
            
            # Compute covariance submatrices associated with current LOO point
            Kll = self.covariance(l, l)
            Klr = self.covariance(l, r)
            Kru = self.covariance(r, self.inducing_inputs)
            Kul = self.covariance(self.inducing_inputs, l)
            
            # Compute cholesky
            L = tfp.math.cholesky_update(chol, Kux[:, n]/self.noise, multiplier=-1.0)
            
            # Compute mean
            mean = tf.linalg.matvec(Kru, yr, transpose_a=True)
            mean = tf.linalg.cholesky_solve(L, mean[:, None])[:, 0]
            mean = tf.linalg.matvec(Kru, mean)
            mean = tf.linalg.matvec(Klr, mean)
            mean = self.noise**-2 * tf.linalg.matvec(Klr, yr) - self.noise**-4 * mean
            
            # Compute variance
            var = tf.linalg.matmul(Kru, Klr, transpose_a=True, transpose_b=True)
            var = tf.linalg.cholesky_solve(L, var)
            var = tf.linalg.matmul(Kru, var)
            var = tf.linalg.matmul(Klr, var)
            var = self.noise**-2 * tf.matmul(Klr, Klr, transpose_b=True) - self.noise**-4 * var
            var = Kll - var + self.noise**2
            
            # Compute log-probability contribution
            normal = tfp.distributions.Normal(loc=mean, scale=var[0]**0.5)
            logpdf = logpdf + normal.log_prob(yl)[0]
#             print(f"{yl.numpy()[0]: 8.3f}, {mean.numpy()[0]: 8.3f}, {var.numpy()[0, 0]**0.5: 8.3f}")
            
        loss = - logpdf / batch_size
            
        return loss
        
    @property
    def noise(self):
        return tf.exp(self.log_noise)

In [28]:
tf.random.set_seed(0)

num_dim = 4
gamma = 2
noise = 1e-1
num_train = 500
num_test = 1000

x_train, y_train, x_test, y_test = generate_dataset(
    num_dim=num_dim,
    gamma=gamma,
    noise=noise,
    num_train=num_train,
    num_test=num_test,
    dtype=dtype
)

In [30]:
@tf.function
def single_step(model, optimiser, x_train, y_train, x_test, y_test, batch_size):

    with tf.GradientTape() as tape:
        loss = model.dtc_loo_loss(x_train, y_train, batch_size=batch_size)

    gradients = tape.gradient(loss, model.trainable_variables)
    optimiser.apply_gradients(zip(gradients, model.trainable_variables))

    return loss


num_steps = 5000
learning_rate = 1e-3

log_coeff = [1.]
log_scales = x_train.shape[1] * [2.]
dtype = tf.float64
trainable_covariance = True
log_noise = -1.
trainable_noise = True
batch_size = 5

covariance = EQcovariance(
    log_coeff=log_coeff,
    log_scales=log_scales,
    dtype=dtype,
    trainable=trainable_covariance
)

gp = DTCGP(
    inducing_inputs=x_train[:100, :],
    log_noise=log_noise,
    trainable_noise=trainable_noise,
    covariance=covariance,
    dtype=dtype
)

# Initialise optimiser
optimiser = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    
# Set progress bar and suppress warnings
progress_bar = tqdm(range(1, num_steps+1), bar_format="{l_bar}{r_bar}")
tf.get_logger().setLevel('ERROR')

# Train model
for i in progress_bar:
        
    loss = single_step(
        model=gp,
        optimiser=optimiser,
        x_train=x_train,
        y_train=y_train,
        x_test=x_test,
        y_test=y_test,
        batch_size=batch_size
    )
    
    if i % 1 == 0:
        
        progress_bar.set_description(
            f"Loss {loss.numpy():.3f} "
            f"Noise {gp.noise.numpy():.2f} "
            f"Coeff {gp.covariance.coeff.numpy():.2f} "
            f"Scale {gp.covariance.scales.numpy()[0]:.2f} "
        )

HBox(children=(FloatProgress(value=0.0, max=5000.0), HTML(value='')))

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/Users/stratis/repos/random-walks/venv-random-walks/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3331, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-30-4ec88f2516c5>", line 56, in <module>
    batch_size=batch_size
  File "/Users/stratis/repos/random-walks/venv-random-walks/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py", line 780, in __call__
    result = self._call(*args, **kwds)
  File "/Users/stratis/repos/random-walks/venv-random-walks/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py", line 807, in _call
    return self._stateless_fn(*args, **kwds)  # pylint: disable=not-callable
  File "/Users/stratis/repos/random-walks/venv-random-walks/lib/python3.7/site-packages/tensorflow/python/eager/function.py", line 2829, in __call__
    return graph_function._filtered_call(args, kwargs)  # pylint: disable=protected-access
  File "/Users/stra

KeyboardInterrupt: 