In [1]:
import numpy as np
from scipy.optimize import minimize
from scipy.stats import norm

In [2]:

# Generate synthetic data
def generate_data(n_samples, n_features, true_theta, true_thresholds, sigma=1.0):
    """
    Generate synthetic data for ordinal regression using a latent variable model.

    Parameters:
    n_samples : int
        Number of samples to generate.
    n_features : int
        Number of features.
    true_theta : ndarray
        True parameter vector for features.
    true_thresholds : ndarray
        True thresholds for ordinal categories.
    sigma : float
        Standard deviation of the latent variable.

    Returns:
    X : ndarray
        Feature matrix.
    y : ndarray
        Ordinal target vector.
    """
    X = np.random.randn(n_samples, n_features)  # Random feature matrix
    z = X @ true_theta + np.random.normal(0, sigma, size=n_samples)  # Latent variable
    thresholds = np.concatenate([[-np.inf], true_thresholds, [np.inf]])  # Include boundaries

    # Determine y based on thresholds
    y = np.digitize(z, thresholds) - 1  # Subtract 1 to make y start from 0

    return X, y

In [3]:
def ordinal_nll(X, y, theta, thresholds, sigma=1.0):
    """
    Compute the negative log-likelihood for ordinal regression (vectorized).

    Parameters:
    X : ndarray
        Feature matrix where each row represents a sample and each column represents a feature.
    y : ndarray
        Target vector where each element is the target ordinal value for the corresponding sample.
    theta : ndarray
        Parameter vector for features (weights).
    thresholds : ndarray
        Thresholds for ordinal categories (K-1 cutpoints).
    sigma : float
        Standard deviation of the latent variable.

    Returns:
    float
        The negative log-likelihood value.
    """
    # Compute the linear predictor
    linear_pred = X @ theta  # shape: [n_samples]

    # Prepend -inf and append +inf to thresholds for boundary conditions
    thresholds = np.concatenate([[-np.inf], thresholds, [np.inf]])
    
    # Compute probabilities for each category (vectorized)
    cdf_upper = norm.cdf((thresholds[y + 1] - linear_pred) / sigma)
    cdf_lower = norm.cdf((thresholds[y] - linear_pred) / sigma)
    prob_y = cdf_upper - cdf_lower

    # Avoid log(0) with numerical stability
    prob_y = np.clip(prob_y, 1e-15, 1 - 1e-15)
    
    # Compute negative log-likelihood
    return -np.sum(np.log(prob_y))

In [4]:
# True parameters for synthetic data
n_samples = 1000
n_features = 3
n_categories = 4  # K=4 ordinal categories
true_theta = np.array([1.0, -1.0, 0.5])  # True feature weights
true_thresholds = np.array([-1.0, 0.0, 1.0])  # True thresholds
sigma_true = 1.0  # True sigma

# Generate synthetic data
X, y = generate_data(n_samples, n_features, true_theta, true_thresholds, sigma=sigma_true)



In [5]:
# Initial parameters for optimization
theta_init = np.random.randn(n_features)  # Random initial weights
thresholds_init = np.linspace(-2, 2, n_categories - 1)  # Evenly spaced initial thresholds
params_init = np.concatenate([theta_init, thresholds_init])  # Combine parameters

def unpack_params(params, n_features):
    """Helper function to unpack weights and thresholds."""
    theta = params[:n_features]
    thresholds = params[n_features:]
    return theta, thresholds

# Wrapper for optimization function
def nll_wrapper(params, X, y, sigma):
    theta, thresholds = unpack_params(params, X.shape[1])
    return ordinal_nll(X, y, theta, thresholds, sigma=sigma)



In [6]:

# Optimize using scipy's minimize
sigma_fixed = sigma_true  # Fix sigma to true value
result = minimize(
    lambda params: nll_wrapper(params, X, y, sigma_fixed),
    params_init,
    method='BFGS'
)

# Extract results
theta_opt, thresholds_opt = unpack_params(result.x, n_features)

# Print results
print("Optimization Result:")
print("Success:", result.success)
print("Message:", result.message)
print("True Parameters (theta):", true_theta)
print("Estimated Parameters (theta):", theta_opt)
print("True Thresholds:", true_thresholds)
print("Estimated Thresholds:", thresholds_opt)
print("Function Value (Negative Log-Likelihood):", result.fun)

Optimization Result:
Success: True
Message: Optimization terminated successfully.
True Parameters (theta): [ 1.  -1.   0.5]
Estimated Parameters (theta): [ 1.02232015 -1.03886499  0.48176276]
True Thresholds: [-1.  0.  1.]
Estimated Thresholds: [-1.00456218 -0.06331369  1.0246352 ]
Function Value (Negative Log-Likelihood): 885.5286481795844


In [9]:
import numpy as np
import jax.numpy as jnp
from jax import random
import numpyro
import numpyro.distributions as dist
from numpyro.infer import MCMC, NUTS

In [12]:
# Bayesian model definition
def ordinal_regression_model(X, y=None, n_categories=4):
    """
    NumPyro model for Bayesian ordinal regression.

    Parameters:
    X : jnp.ndarray
        Feature matrix.
    y : jnp.ndarray or None
        Ordinal target vector.
    n_categories : int
        Number of ordinal categories.

    Returns:
    None
    """
    n_features = X.shape[1]

    # Priors for regression coefficients
    beta = numpyro.sample("beta", dist.Normal(jnp.zeros(n_features), 1.0))

    # Priors for thresholds (sorted constraints are needed)
    raw_thresholds = numpyro.sample("raw_thresholds", dist.Normal(0.0, 1.0).expand([n_categories - 1]))
    thresholds = jnp.sort(raw_thresholds)  # Ensure thresholds are ordered

    # Prior for the standard deviation of the latent variable
    sigma = numpyro.sample("sigma", dist.Exponential(1.0))

    # Linear predictor
    linear_pred = jnp.dot(X, beta)

    # Compute probabilities for each category
    cdf_upper = dist.Normal(linear_pred, sigma).cdf(thresholds[y + 1])
    cdf_lower = dist.Normal(linear_pred, sigma).cdf(thresholds[y])

    # Clip CDF values to ensure numerical stability
    cdf_upper = jnp.clip(cdf_upper, 0.0, 1.0)
    cdf_lower = jnp.clip(cdf_lower, 0.0, 1.0)

    prob_y = cdf_upper - cdf_lower

    # Avoid invalid probabilities
    prob_y = jnp.clip(prob_y, 1e-15, 1 - 1e-15)


    # Likelihood
    numpyro.sample("obs", dist.Categorical(probs=prob_y), obs=y)

In [13]:
# Convert data to JAX arrays
X = jnp.array(X)
y = jnp.array(y)

# Run MCMC
rng_key = random.PRNGKey(0)
nuts_kernel = NUTS(ordinal_regression_model)
mcmc = MCMC(nuts_kernel, num_warmup=1000, num_samples=2000, num_chains=1)
mcmc.run(rng_key, X=X, y=y, n_categories=n_categories)
mcmc.print_summary()

# Extract posterior samples
posterior_samples = mcmc.get_samples()

# Print results
print("True Parameters:")
print(f"Theta: {true_theta}")
print(f"Thresholds: {true_thresholds}")
print(f"Sigma: {sigma_true}")

sample: 100%|██████████| 3000/3000 [00:18<00:00, 164.45it/s, 31 steps of size 2.77e-02. acc. prob=0.84] 



                       mean       std    median      5.0%     95.0%     n_eff     r_hat
          beta[0]     -0.41      0.50     -0.39     -1.19      0.42    148.44      1.00
          beta[1]      0.08      0.56      0.06     -0.74      1.07    199.24      1.00
          beta[2]     -1.02      0.53     -0.99     -1.81     -0.15    176.92      1.00
raw_thresholds[0]      0.32      0.50      0.31     -0.47      1.13    116.81      1.01
raw_thresholds[1]     -1.33      0.64     -1.25     -2.46     -0.35    102.31      1.00
raw_thresholds[2]      1.52      0.66      1.52      0.49      2.70    183.92      1.00
            sigma      0.04      0.04      0.03      0.00      0.09    303.43      1.00

Number of divergences: 900
True Parameters:
Theta: [ 1.  -1.   0.5]
Thresholds: [-1.  0.  1.]
Sigma: 1.0
