## Hierarchical GP with Bilby

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import bilby
from bilby.core.utils import random
import json

# set up
random.seed(123)
label = "GP"
outdir = "outdir"
bilby.utils.check_directory_exists_and_if_not_mkdir(outdir)

sample_dat_path = "../synthetic_data/N11000_AP10_noise0.5_seed1/Size500/Rep3.csv"
sample_dat = pd.read_csv(sample_dat_path)

val_dat_path = "../synthetic_data/N11000_AP10_noise0.5_seed1/N11000_AP10_noise0.5_seed1_meta.json"
with open(val_dat_path, 'r') as f:
    val_dat = json.load(f)

from sklearn.model_selection import train_test_split

X = sample_dat.iloc[:, :-1].values  #all columns except the last
y = sample_dat.iloc[:, -1].values   #last column

Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state=22)

In [None]:
# custom likelihood for multi-dimensional linear regression
class HierarchicalGP(bilby.Likelihood):
    def __init__(self, X, y, sigma):
        # store data
        self.X = np.asarray(X)
        self.y = np.asarray(y)
        self.sigma = sigma

        # define parameters
        parameters = {f"beta{i}": None for i in range(self.X.shape[1])}
        super().__init__(parameters=parameters)

    def log_likelihood(self):
        betas = np.array([self.parameters[f"beta{i}"] for i in range(self.X.shape[1])])
        y_model = self.X @ betas
        residual = self.y - y_model
        return np.sum(-0.5 * (residual / self.sigma) ** 2 - 0.5 * np.log(2 * np.pi * self.sigma ** 2)) # log likelihood for Gaussian noise

In [None]:
# model function

def model_function(X, **params): 
    betas = np.array([params[f"beta{i}"] for i in range(X.shape[1])]) # make beta for each column
    return X @ betas 

# make priors
priors = dict()
for i in range(30):
    priors[f"beta{i}"] = bilby.core.prior.Uniform(-5, 5, f"beta{i}") # define uniform priors for each beta coefficient

# define the likelihood based on the Gaussian noise model
likelihood = LinearRegressionLikelihood(
    X = Xtrain,
    y = ytrain,
    sigma = 0.5)

#define injection parameters for validation (true coefficient values)
injection_parameters = {
   f"beta{i}": val_dat['beta'][i] for i in range(len(val_dat['beta']))}

# run MCMC sampler
result = bilby.run_sampler(
    likelihood=likelihood, # likelihood function
    priors=priors, # prior distributions
    sampler="dynesty", 
    nlive=250,
    injection_parameters=injection_parameters, # used for validation
    outdir=outdir,
    label=label
    #plot=True
)