## Hierarchical GP with Bilby

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import bilby
from bilby.core.utils import random
import json

# set up
random.seed(123)
label = "GP"
outdir = "outdir"
bilby.utils.check_directory_exists_and_if_not_mkdir(outdir)

sample_dat_path = "../synthetic_data/N11000_AP10_noise0.5_seed1/Size500/Rep3.csv"
sample_dat = pd.read_csv(sample_dat_path)

val_dat_path = "../synthetic_data/N11000_AP10_noise0.5_seed1/N11000_AP10_noise0.5_seed1_meta.json"
with open(val_dat_path, 'r') as f:
    val_dat = json.load(f)

from sklearn.model_selection import train_test_split

X = sample_dat.iloc[:, :-1].values  #all columns except the last
y = sample_dat.iloc[:, -1].values   #last column

Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state=22)

In [1]:
# helper function to compute the RBF kernel
def rbf_kernel(X, ell, sigma_gp):
    N = X.shape[0]
    K = np.zeros((N, N))
    for i in range(N):
        for j in range(N):
            diff = X[i] - X[j] / ell
            K[i, j] = sigma_gp**2 * np.exp(-0.5 * np.dot(diff, diff))
    return K
        

In [None]:
# custom likelihood for multi-dimensional linear regression
class HierarchicalGPLikelihood(bilby.Likelihood):
    def __init__(self, X, y):
        # store data
        self.X = np.asarray(X)
        self.y = np.asarray(y)

        # define parameters
        parameters = {}
        for i in range(self.X.shape[1]):
            parameters[f"beta{i}"] = None

        for i in range(self.X.shape[1]):
            parameters[f"tau{i}"] = None
        
        # lengthscale
        for i in range(self.X.shape[1]):
            parameters[f"ell{i}"] = None

        parameters["sigma_noise"] = None
        parameters["sigma_gp"] = None
        parameters["lambda"] = None
        
        super().__init__(parameters=parameters)



    def log_likelihood(self):

        betas = np.array([self.parameters[f"beta{i}"] for i in range(self.X.shape[1])])
        lin_mean = self.X @ betas 

        residual = self.y - lin_mean

        log_likelihood = ...
        return log_likelihood


In [None]:
# model function

def model_function(X, **params): 
    betas = np.array([params[f"beta{i}"] for i in range(X.shape[1])]) # make beta for each column
    return X @ betas 

# make priors
priors = dict()
for i in range(30):
    priors[f"beta{i}"] = bilby.core.prior.Uniform(-5, 5, f"beta{i}") # define uniform priors for each beta coefficient



# define the likelihood based on the Gaussian noise model
likelihood = HierarchicalGPLikelihood(
    X = Xtrain,
    y = ytrain)

# run MCMC sampler
result = bilby.run_sampler(
    likelihood=likelihood, # likelihood function
    priors=priors, # prior distributions
    sampler="dynesty", 
    nlive=250,
    outdir=outdir,
    label=label
    #plot=True
)