## Bayesian Model

This notebook implements the Bayesian model using Pyro and fitting it with the filtered data containing averages for sentiment scores and ratings.

In [33]:
# Import necessary libraries
import torch
import pyro
import numpy as np
import pandas as pd
import pyro.distributions as dist
from pyro.nn import PyroModule, PyroSample
from pyro.infer import SVI, Trace_ELBO, Predictive
from pyro.optim import Adam
from pyro.infer.autoguide import AutoNormal

Load the filtered dataset from (``ca_restaurants_bayesian_dataset.csv``) and extract the features and target using Torch Tensor.

In [34]:
# Load the dataset
df = pd.read_csv("ca_restaurants_bayesian_dataset.csv")

# Extract the features and target
X_sentiment = torch.tensor(df["avg_sentiment_score"].values, dtype=torch.float32)
X_log_reviews = torch.tensor(df["log_review_count"].values, dtype=torch.float32)
y = torch.tensor(df["avg_rating"].values, dtype=torch.float32)

Define the Bayesian model in Pyro.

- Bayesian linear regression:
$r_d \sim \mathcal{N}\left( \alpha + \beta_s \cdot s_d + \beta_r \cdot \log(1 + \text{review\_count}_d), \sigma^2 \right)$
- $\alpha$ = overall mean rating
- $\beta_s$ = impact of sentiment on star rating
- $\beta_r$ = impact of log-popularity (more reviews ---> more reliable rating)

##### Priors
- $\alpha, \beta_s, \beta_r \sim \mathcal{N}\left(0,1\right)$
- $ \sigma \sim HalfCauchy\left(1\right)$

In [35]:
# Define the Bayesian model
def bayesian_model(X_sentiment, X_log_reviews, y=None):
    # Define priors for the model parameters
    alpha = pyro.sample("alpha", dist.Normal(0., 1. ))
    beta_sentiment = pyro.sample("beta_sentiment", dist.Normal(0., 1.))
    beta_log_reviews = pyro.sample("beta_reviews", dist.Normal(0., 1.))
    sigma = pyro.sample("sigma", dist.HalfCauchy(1.))

    # Define the linear model
    mean = alpha + beta_sentiment * X_sentiment + beta_log_reviews * X_log_reviews
    # Sample from the likelihood
    with pyro.plate("data", len(X_sentiment)):
        pyro.sample("obs", dist.Normal(mean, sigma), obs=y)

Define the guide (mean-field variational inference).

In [36]:
# Define the guide for mean-field variational inference
guide = AutoNormal(bayesian_model)

Fit the model with stochastic variational inference. Train the model with 5000 steps and print the loss.

In [37]:
# Clear the parameter store
pyro.clear_param_store()

# Define the Stochastic Variational Inference (SVI) object
svi = SVI(
    model=bayesian_model,
    guide=guide,
    optim=Adam({"lr": 0.01}),
    loss=Trace_ELBO()
)

# Training the model with 5000 steps
num_steps = 5000
for step in range(num_steps):
    # Perform a single step of optimization
    loss = svi.step(X_sentiment, X_log_reviews, y)
    # Print the loss every 500 steps
    if step % 500 == 0:
        print(f"Step {step} : loss = {loss}")

Step 0 : loss = 6428.512166008353
Step 500 : loss = 551.7367859482765
Step 1000 : loss = 197.11374771595
Step 1500 : loss = 85.43491965532303
Step 2000 : loss = 86.06524235010147
Step 2500 : loss = 83.23494756221771
Step 3000 : loss = 82.20709455013275
Step 3500 : loss = 83.53672397136688
Step 4000 : loss = 83.73305231332779
Step 4500 : loss = 89.42959153652191


Extract posterior samples (mean and 95% credible intervals) and report estimates.

In [38]:
# Extract the learned parameters
predictive = Predictive(
    bayesian_model,
    guide=guide,
    num_samples=1000,
    return_sites=["alpha", "beta_sentiment", "beta_reviews", "sigma"]
)

# Generate samples from the posterior predictive distribution
samples = predictive(X_sentiment, X_log_reviews)

for parameter in ["alpha", "beta_sentiment", "beta_reviews", "sigma"]:
    values = samples[parameter].detach().numpy()
    mean = values.mean()
    confidence_interval = (np.percentile(values, 2.5), np.percentile(values, 97.5))
    print(f"{parameter}: mean = {mean:.3f}, 95% confidence interval = {confidence_interval}")

alpha: mean = 1.950, 95% confidence interval = (np.float32(1.9319592), np.float32(1.9692227))
beta_sentiment: mean = 3.032, 95% confidence interval = (np.float32(3.003604), np.float32(3.0629349))
beta_reviews: mean = -0.024, 95% confidence interval = (np.float32(-0.028452465), np.float32(-0.020107083))
sigma: mean = 0.263, 95% confidence interval = (np.float32(0.24925363), np.float32(0.27711612))
