In [2]:
import pandas as pd
import numpy as np
import  matplotlib.pyplot as plt
from ipywidgets import interact, fixed, widgets

np.random.seed(2023)


In [3]:
class LinearModel:
  '''
    Simple linear model with a single output (y) given the covariates x_1...x_M of the form:
    y = w_1 * x_1 + ... + w_M * x_M + b
    where M = number of features, w are the weights, and b is the bias.
  '''
  def __init__(self):
    self.w=None
    self.b=None
  def evaluate_proposal(self,data,theta):
    '''
      Function to load a given proposal distribution (theta) and return the model prediction
      data: DataFrame with columns x_1, x_2, ..., x_M
      theta: List of model parameters [w_1, w_2, ..., w_M, b]
    '''
    self.encode(theta)
    return self.predict(data)
  def predict(self,x_in):
    '''
      Function to output y given the input data and model parameters
      data: DataFrame with columns x_1, x_2, ..., x_M
    '''
    y_out=x_in.dot(self.w) + self.b
    return y_out
  def encode(self,theta):
    '''
      Helper function to encode the model parameters (theta) into the model as w and b
      theta: List of model parameters [w_1, w_2, ..., w_M, b]
    '''
    self.w=theta[:-1]
    self.b=theta[-1]
      
  

In [None]:
#  defining the likelihood and prior distributions
def likelihood(self,theta,tausq,test=False):
  '''
    Function to compute the likelihood of the data given the model parameters
    theta: List of model parameters [w_1, w_2, ..., w_M, b]
    data: DataFrame with columns x_1, x_2, ..., x_M and y
    tausq: Variance of the noise in the data
    test: If True, return only the prediction without computing the likelihood
  '''
  if test:
    x_data=self.x_test
    y_data=self.y_test
  else:
    x_data=self.x_data
    y_data=self.y_data
  # making  prediction prediction using parameters theta
  y_pred=self.model.evaluate_proposal(theta,x_data)
  model_simulation=y_pred+np.np.random.normal(0.tausq,size=y_pred.shape)
  # checking the rmse for debugging
  rmse=self.rmse(y_pred,y_data)
  # computing the likelihood
  log_likelihood=np.sum(-0.5*np.log(2*np.pi*tausq)-0.5*np.square(y_data-y_pred)/tausq)
  return [log_likelihood,y_pred,model_simulation,rmse]
#  defining the prior distribution
def prior(self,sigma_sqr,nu_1,nu_2,theta,tausq):
  '''
    Function to compute the prior distribution of the model parameters
    sigma_sqr: Variance of the prior distribution
    nu_1: Degrees of freedom for the prior distribution
    nu_2: Scale parameter for the prior distribution
    theta: List of model parameters [w_1, w_2, ..., w_M, b]
    tausq: Variance of the noise in the data
    where tausq ,nu_1, nu_2 are hyperparameters user need to give
    Output: log prior distribution
  '''
  n_params=self.theta_size # number of parameters in the model
  part_1=-1 * (n_params / 2) * np.log(sigma_sqr)
  part_2=1/(2 * sigma_sqr) * np.sum(np.square(theta))
  inv_gamma_part=- (1 + nu_1) * np.log(tausq) - (nu_2 / tausq)
  return part_1 - part_2 + inv_gamma_part


Appling MCMC Sampling the core heart like gradient descent

In [4]:
def MCMC_sampler(self):
  post_theta=np.ones((self.n_samples, self.theta_size))
  post_tau=np.ones((self.n_samples,1))
  post_eta=np.ones((self.n_samples,1))
  y_pred=np.zeros((self.n_samples,self.x_data.shape[0]))
  #  stroring the simulated values f(x) + error all over samples
  y_sim=np.zeros((self.n_samples,self.x_data.shape[0]))
  #  storing the rmse of each sample
  rmse=np.zeros(self.n_samples)
  # in case of testing 
  y_test_pred=np.ones((self.n_samples,self.x_test.shape[0]))
  y_sim_test=np.ones((self.n_samples,self.x_test.shape[0]))
  rmse_test=np.zeros(self.n_samples)
  #  initial values for the model parameters
  theta=np.random.randn(self.theta_size)
  #  making the prediction first time'
  y_pred[0,]=self.model.evaluate_proposal(self.x_data,theta)
  #  iniit eta - it is a gaussian random walk in log space of tau^2
  eta=np.log(np.var(y_pred[0,]-self.y_data))
  tausq_proposal=np.exp(eta) # converting back to tau^2 space

  #  finding the prior 
  prior_val=self.prior(self.sigma_sqr,self.nu_1,self.nu_2,theta,tausq_proposal)
  #  finding the likelihood given the data and the model parameters
  [likelihood,y_pred[0,],y_sim[0,],rmse[0]]=self.likelihood(theta,tausq_proposal)

  n_accept=0 # init we are not accepting  later it changes with time
  for i in range(1,self.n_samples):
    # sample a new random  tehta and tau using gaussian random walk
    theta_proposal=theta+np.random.randn(0,self.theta,self.theta_size)
    eta_proposal=eta+np.random.randn(0,self.eta_size,1) # sampling of unknown tau^2 in log space
    tausq_proposal=np.exp(eta_proposal)
    #  finding the prior for the proposal
    prior_proposal=self.prior(self.sigma_sqr,self.nu_1,self.nu_2,theta_proposal,tausq_proposal)
    #  finding the likelihood given the data and the model parameters
    [likelihood_proposal,y_pred[i,],y_sim[i,],rmse[i]]=self.likelihood(theta_proposal,tausq_proposal)
    # finding the test likelihood
    [_, y_test_pred[i,], y_sim_test[i,], rmse_test[i]] = self.likelihood(theta_proposal, tausq_proposal, test=True)

    #  finding the acceptance ratio
    diff_likelihood=likelihood_proposal-likelihood
    diff_prior=prior_proposal-prior_val
    metropolsih_ratio=min(1,np.exp(diff_likelihood+diff_prior))
    #  accept or reject the proposal
    # making a general random uniform distribution
    u=np.random.rand(0,1)
    if u<metropolsih_ratio:
      #  accept the proposal
      theta=theta_proposal
      eta=eta_proposal
      prior_val=prior_proposal
      likelihood=likelihood_proposal
      n_accept+=1
      # store the values
      post_theta[i,]=theta
      post_tau[i,]=tausq_proposal
      post_eta[i,]=eta
    else:
      #  reject the proposal
      post_theta[i,]=post_theta[i-1,]
      post_tau[i,]=post_tau[i-1,]
      post_eta[i,]=post_eta[i-1,]
  #  checking the acceptance rate
  acceptance_rate=(n_accept/self.n_samples)*100
  print(f'Acceptance rate: {acceptance_rate:.2f}%')
  # storing the posterior predictions for future use in a dataFrame and returning the results
  self.post_theta=post_theta[self.n_burnin:,]
  self.post_tau=post_tau[self.n_burnin:,]
  self.post_eta=post_eta[self.n_burnin:,]
  self.rmse=rmse[self.n_burnin:]
  # split theta into w and b
  result_dict={
    "w{}".format(_):self.post_theta[:,_].squeeze() for _ in range(self.theta_size-1)}
  result_dict["b"]=self.post_theta[:,-1].squeeze()
  result_dict["tau"]=self.post_tau.squeeze()
  result_dict["eta"]=self.post_eta.squeeze()
  result_dict["rmse"]=self.rmse.squeeze()
  
  # returning the predictions
  pred_dict={}
  pred_dict["train_pred"]=y_pred[self.n_burnin:,]
  pred_dict["train_sim"]=y_sim[self.n_burnin:,]
  pred_dict["test_pred"]=y_test_pred[self.n_burnin:,]
  pred_dict["test_sim"]=y_sim_test[self.n_burnin:,]

  results_df=pd.DataFrame.from_dict(result_dict)

  return results_df, pred_dict




In [5]:
class MCMC:
  def __init__(self,n_samples,n_burnin,x_data,y_data,x_test,y_test):
    self.n_samples=n_samples  # number of MCMC samples
    self.n_burnin=n_burnin # number of burn-in samples
    self.x_data = x_data # (N x M)
    self.y_data = y_data # (N x 1)
    self.x_test = x_test # (Nt x num_features)
    self.y_test = y_test # (Nt x 1)
    # MCMC sampler hyperparameters - defines the variance term in our Gaussian random walk
    self.step_theta=0.01
    self.step_eta=0.01 
    # model hyperparameters
    # considered by looking at distribution of  similar trained  models - i.e distribution of weights and bias
    self.sigma_sqr=4
    self.nu_1=1
    self.nu_2=0.5
    #  initialise linear model
    self.model=LinearModel()
    self.theta_size=x_data.shape[1]+1 # number of features + 1 for bias
    # storing the output
    self.post_theta=None
    self.post_tau=None
    self.post_eta=None
    self.rmse=None

    self.likelihood=MethodType(likelihood, self)
    self.prior=MethodType(prior, self)
    self.MCMC_sampler=MethodType(MCMC_sampler, self)
    self.encode=MethodType(self.model.encode, self.model)

    def rmse(self,predictions,targets):
      return np.sqrt(((predictions - targets) ** 2).mean())



In [6]:
#  This is implementation of linear_model with MCMC sampling