In [59]:
import gpytorch
import torch
import matplotlib.pyplot as plt
from gpytorch_models import dfRBFKernel

import linear_operator
from linear_operator import to_linear_operator

# Load data

In [60]:
# setting device to GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# overwrite if needed: # device = 'cpu'
print('Using device:', device)

region_name = "region_lower_byrd"

# define paths based on region_name
path_to_training_tensor = "data/real_data/" + region_name + "_train_tensor.pt"
path_to_test_tensor = "data/real_data/" + region_name + "_test_tensor.pt"

# load and tranpose to have rows as points
train = torch.load(path_to_training_tensor, weights_only = False).T 
test = torch.load(path_to_test_tensor, weights_only = False).T

# train
x_train = train[:, [0, 1]].to(device)
y_train = train[:, [3, 4]].to(device)

# test
x_test = test[:, [0, 1]].to(device)
y_test = test[:, [3, 4]].to(device)

scalar = 25
x_train = x_train * scalar
x_test = x_test * scalar

Using device: cuda


In [61]:
kernel = dfRBFKernel().to(device)
# Remember that these will be squared
kernel.lengthscale = torch.tensor([0.69, 0.5], device = device) * 1
print(kernel.lengthscale)

K_train_train = kernel(x_train, x_train).evaluate()
K_test_test = kernel(x_test, x_test).evaluate()
K_train_test = kernel(x_train, x_test).evaluate()

tensor([0.6900, 0.5000], device='cuda:0', grad_fn=<SoftplusBackward0>)


In [62]:
torch.set_printoptions(precision = 4, sci_mode = False)
K_train_test[0:4, 0:4]

tensor([[     0.0000,      0.0000,      0.0000,      0.0000],
        [     0.0000,      0.0000,      0.0000,      0.0000],
        [    -0.0000,      0.0000,     -0.0000,      0.0000],
        [     0.0000,     -0.0000,      0.0000,     -0.0000]], device='cuda:0',
       grad_fn=<SliceBackward0>)

In [63]:
torch.allclose(K_train_train, K_train_train.T, atol = 1e-4)
print((K_train_train - K_train_train.T).max().item())

torch.allclose(K_test_test, K_test_test.T, atol = 1e-4)
print((K_test_test - K_test_test.T).max().item())
# K_train test is not meant to be symmetric

0.0
0.0


# Inference in torch

In [64]:
output_scalar = 20
K_train_train = K_train_train * output_scalar
K_test_test = K_test_test * output_scalar
K_train_test = K_train_test * output_scalar

In [65]:
# Ensure symmetric + PSD: add jitter for numerical stability
K_train_train_jittered = K_train_train + 1e-2 * torch.eye(K_train_train.shape[0], device=device)

# Cholesky factor (L @ L.T = K_train_train)
L = torch.linalg.cholesky(K_train_train_jittered)

# Flatten target: shape (N * T, 1)
y_train_flat_interleaved = y_train.reshape(-1, 1)

# Solve (K + σ²I)⁻¹ y via Cholesky
alpha = torch.cholesky_solve(y_train_flat_interleaved, L, upper=False)

# Predictive mean: K_*^T @ α
K_test_train = K_train_test.transpose(-2, -1)  # shape (M*T, N*T)
predictive_mean = torch.matmul(K_test_train, alpha).squeeze(-1)  # shape (M*T,)

# Predictive covariance: K_ss - v.T @ v
v = torch.linalg.solve_triangular(L, K_train_test, upper=False)
predictive_covariance = K_test_test - torch.matmul(v.transpose(-2, -1), v)

print((predictive_covariance - predictive_covariance.T).max().item())
print(predictive_covariance.max().item())

predictive_distribution = gpytorch.distributions.MultitaskMultivariateNormal(
    mean = predictive_mean.reshape(-1, 2),
    covariance_matrix = predictive_covariance
)

gpytorch.metrics.negative_log_predictive_density(
    predictive_distribution, y_test.to(device)
)

0.0
80.00000762939453


tensor(9.5465, device='cuda:0', grad_fn=<DivBackward0>)

In [15]:
x1 = x_train
x2 = x_train

# Extract the chosen device
device = x1.device

N = x1.shape[0]
M = x2.shape[0]

# Extract both lengthscales
l1, l2 = kernel.lengthscale[0], kernel.lengthscale[1]

# STEP 1: Pairwise differences of shape [N, M, 2]
# Expand row_tensor [N, 2] -> [N, 1, 2] and column_tensor [M, 2] -> [1, M, 2]
diff = (x1[:, None, :] - x2[None, :, :]).to(device)

# Extract the relative components (columns of diff) for convenience, matching paper notation
r1 = diff[:, :, 0]
r2 = diff[:, :, 1]

K_uu = to_linear_operator(((1 - (r2**2 / l2**2)) / l2**2).contiguous())
K_uv = to_linear_operator(((r1 * r2) / (l1**2 * l2**2)).contiguous())
K_vu = K_uv
K_vv = to_linear_operator(((1 - (r1**2 / l1**2)) / l1**2).contiguous())

# STEP 3: RBF/SE envelope (elementwise) (shape N × M)
# NOTE: If column_tensor == row_tensor, the diagonal elements will be 1
exp_term = to_linear_operator(torch.exp(-0.5 * ((r1 / l1) ** 2 + (r2 / l2) ** 2)).contiguous()).to(device)

# STEP 4: Combine and stack
# Final scaled components (each shape N × M)
K_uu = K_uu * exp_term
K_uv = K_uv * exp_term
K_vu = K_vu * exp_term
K_vv = K_vv * exp_term

### Step 1: Create inticators for "interleaving" Kronecker product
K_uu_indicator = torch.zeros(2, 2).to(device)
K_uu_indicator[0, 0] = 1.0
K_uu_indicator_lo = to_linear_operator(K_uu_indicator)

K_uv_indicator = torch.zeros(2, 2).to(device)
K_uv_indicator[0, 1] = 1.0
K_uv_indicator_lo = to_linear_operator(K_uv_indicator)

K_vu_indicator = torch.zeros(2, 2).to(device)
K_vu_indicator[1, 0] = 1.0
K_vu_indicator_lo = to_linear_operator(K_vu_indicator)

K_vv_indicator = torch.zeros(2, 2).to(device)
K_vv_indicator[1, 1] = 1.0
K_vv_indicator_lo = to_linear_operator(K_vv_indicator)

# Step 2: Create Kronecker product linear operators
K_uu_expand = linear_operator.operators.KroneckerProductLinearOperator(
    K_uu,
    K_uu_indicator_lo, # NOTE: The order is important here, it is not commutative
)

K_uv_expand = linear_operator.operators.KroneckerProductLinearOperator(
    K_uv,
    K_uv_indicator_lo, # NOTE: The order is important here, it is not commutative
)

K_vu_expand = linear_operator.operators.KroneckerProductLinearOperator(
    K_vu,
    K_vu_indicator_lo, # NOTE: The order is important here, it is not
)

K_vv_expand = linear_operator.operators.KroneckerProductLinearOperator(
    K_vv,
    K_vv_indicator_lo, # NOTE: The order is important here, it is not commutative
)

K_interleaved = K_uu_expand + K_uv_expand + K_vu_expand + K_vv_expand

In [16]:
diff = (x1[:, None, :] - x2[None, :, :]).to(device)

# Extract the relative components (columns of diff) for convenience, matching paper notation
r1 = diff[:, :, 0]
r2 = diff[:, :, 1]

In [17]:
(K_interleaved.to_dense() - K_interleaved.to_dense()).max()

tensor(0., device='cuda:0', grad_fn=<MaxBackward1>)

In [18]:
K_uv.to_dense()

tensor([[ 0.0000,  0.0011,  0.0002,  ..., -0.0006, -0.0014, -0.0019],
        [ 0.0011,  0.0000,  0.0096,  ..., -0.0088, -0.0079, -0.0054],
        [ 0.0002,  0.0096,  0.0000,  ...,  0.0021,  0.0051,  0.0071],
        ...,
        [-0.0006, -0.0088,  0.0021,  ...,  0.0000, -0.0028, -0.0061],
        [-0.0014, -0.0079,  0.0051,  ..., -0.0028,  0.0000, -0.0011],
        [-0.0019, -0.0054,  0.0071,  ..., -0.0061, -0.0011,  0.0000]],
       device='cuda:0', grad_fn=<MulBackward0>)

In [20]:
K_uv_expand.to_dense()

tensor([[ 0.0000,  0.0000,  0.0000,  ..., -0.0014,  0.0000, -0.0019],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0011,  0.0000,  ..., -0.0079,  0.0000, -0.0054],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000, -0.0019,  0.0000,  ..., -0.0011,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],
       device='cuda:0', grad_fn=<MatmulBackward>)

In [21]:
K_vu.to_dense()

tensor([[ 0.0000,  0.0011,  0.0002,  ..., -0.0006, -0.0014, -0.0019],
        [ 0.0011,  0.0000,  0.0096,  ..., -0.0088, -0.0079, -0.0054],
        [ 0.0002,  0.0096,  0.0000,  ...,  0.0021,  0.0051,  0.0071],
        ...,
        [-0.0006, -0.0088,  0.0021,  ...,  0.0000, -0.0028, -0.0061],
        [-0.0014, -0.0079,  0.0051,  ..., -0.0028,  0.0000, -0.0011],
        [-0.0019, -0.0054,  0.0071,  ..., -0.0061, -0.0011,  0.0000]],
       device='cuda:0', grad_fn=<MulBackward0>)

In [22]:
exp_term.to_dense()

tensor([[1.0000, 0.1004, 0.0019,  ..., 0.0053, 0.0189, 0.0370],
        [0.1004, 1.0000, 0.3263,  ..., 0.2506, 0.4740, 0.6346],
        [0.0019, 0.3263, 1.0000,  ..., 0.0759, 0.1240, 0.1533],
        ...,
        [0.0053, 0.2506, 0.0759,  ..., 1.0000, 0.8995, 0.7572],
        [0.0189, 0.4740, 0.1240,  ..., 0.8995, 1.0000, 0.9600],
        [0.0370, 0.6346, 0.1533,  ..., 0.7572, 0.9600, 1.0000]],
       device='cuda:0', grad_fn=<ExpBackward0>)

In [23]:
K_uv.to_dense()

tensor([[ 0.0000,  0.0011,  0.0002,  ..., -0.0006, -0.0014, -0.0019],
        [ 0.0011,  0.0000,  0.0096,  ..., -0.0088, -0.0079, -0.0054],
        [ 0.0002,  0.0096,  0.0000,  ...,  0.0021,  0.0051,  0.0071],
        ...,
        [-0.0006, -0.0088,  0.0021,  ...,  0.0000, -0.0028, -0.0061],
        [-0.0014, -0.0079,  0.0051,  ..., -0.0028,  0.0000, -0.0011],
        [-0.0019, -0.0054,  0.0071,  ..., -0.0061, -0.0011,  0.0000]],
       device='cuda:0', grad_fn=<MulBackward0>)

In [24]:
r1 * r2

tensor([[   0.0000,   12.6979,  113.8843,  ..., -129.7339,  -85.5610,
          -61.5250],
        [  12.6979,    0.0000,   34.8503,  ...,  -41.7442,  -19.8875,
          -10.1077],
        [ 113.8843,   34.8503,    0.0000,  ...,   32.5165,   48.8842,
           54.8879],
        ...,
        [-129.7339,  -41.7442,   32.5165,  ...,    0.0000,   -3.6545,
           -9.5903],
        [ -85.5610,  -19.8875,   48.8842,  ...,   -3.6545,    0.0000,
           -1.4040],
        [ -61.5250,  -10.1077,   54.8879,  ...,   -9.5903,   -1.4040,
            0.0000]], device='cuda:0')

These are only symmetric if we copy after multiplication with exp_term

In [None]:
x1 = x_train
x2 = x_train

# Extract the chosen device
device = x1.device

N = x1.shape[0]
M = x2.shape[0]

# Extract both lengthscales
l1, l2 = kernel.lengthscale[0], kernel.lengthscale[1]

# STEP 1: Pairwise differences of shape [N, M, 2]
# Expand row_tensor [N, 2] -> [N, 1, 2] and column_tensor [M, 2] -> [1, M, 2]
diff = (x1[:, None, :] - x2[None, :, :]).to(device)

# Extract the relative components (columns of diff) for convenience, matching paper notation
# NOTE: r1 and r2 are negative symmetric for like pairs 
r1 = diff[:, :, 0]
r2 = diff[:, :, 1]
            
# STEP 2: Block matrix
# Block components (shape N × M each)
K_uu = to_linear_operator(((1 - (r2**2 / l2**2)) / l2**2).contiguous())
K_uv = to_linear_operator(((r1 * r2) / (l1**2 * l2**2)).contiguous())
# K_vu = K_uv
K_vv = to_linear_operator(((1 - (r1**2 / l1**2)) / l1**2).contiguous())

# STEP 3: RBF/SE envelope (elementwise) (shape N × M)
# NOTE: If column_tensor == row_tensor, the diagonal elements will be 1
exp_term = to_linear_operator(torch.exp(-0.5 * ((r1 / l1) ** 2 + (r2 / l2) ** 2)).contiguous()).to(device)

# STEP 4: Combine and stack
# Final scaled components (each shape N × M)
K_uu = K_uu * exp_term
K_uv = K_uv * exp_term
K_vu = K_uv
K_vv = K_vv * exp_term

### Step 1: Create inticators for "interleaving" Kronecker product
K_uu_indicator = torch.zeros(2, 2).to(device)
K_uu_indicator[0, 0] = 1.0
K_uu_indicator_lo = to_linear_operator(K_uu_indicator)

K_uv_indicator = torch.zeros(2, 2).to(device)
K_uv_indicator[0, 1] = 1.0
K_uv_indicator_lo = to_linear_operator(K_uv_indicator)

K_vu_indicator = torch.zeros(2, 2).to(device)
K_vu_indicator[1, 0] = 1.0
K_vu_indicator_lo = to_linear_operator(K_vu_indicator)

K_vv_indicator = torch.zeros(2, 2).to(device)
K_vv_indicator[1, 1] = 1.0
K_vv_indicator_lo = to_linear_operator(K_vv_indicator)

# Step 2: Create Kronecker product linear operators
K_uu_expand = linear_operator.operators.KroneckerProductLinearOperator(
    K_uu,
    K_uu_indicator_lo, # NOTE: The order is important here, it is not commutative
)

K_uv_expand = linear_operator.operators.KroneckerProductLinearOperator(
    K_uv,
    K_uv_indicator_lo, # NOTE: The order is important here, it is not commutative
)

K_vu_expand = linear_operator.operators.KroneckerProductLinearOperator(
    K_vu,
    K_vu_indicator_lo, # NOTE: The order is important here, it is not
)

K_vv_expand = linear_operator.operators.KroneckerProductLinearOperator(
    K_vv,
    K_vv_indicator_lo, # NOTE: The order is important here, it is not commutative
)

K_interleaved = K_uu_expand + K_uv_expand + K_vu_expand + K_vv_expand

In [None]:
exp_term

<linear_operator.operators.dense_linear_operator.DenseLinearOperator at 0x7fd6edf77190>