In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from joblib import Parallel, delayed
import time

# Optimized LangevinSampler class
class LangevinSampler:
    def __init__(self, targ, algo, step=0.001, beta=1, d=None, n=None, gamma=None):
        assert targ == 'posterior'
        assert algo in ['LMC', 'LMCO', 'aHOLLA']
        
        self.targ = targ
        self.algo = algo
        self.step = step
        self.beta = beta
        self.d = d
        self.n = n
        self.gamma = gamma
        
        # Precompute eye matrix for cov
        self.cov = np.eye(d)
        
        # Cache variables
        self._exp_cache = None
        self._grad_cache = None
        self._hvp_cache = None

    def _exp(self, theta, x):
        z = x[:, :-1]
        if self._exp_cache is None:
            self._exp_cache = np.exp(np.dot(z, theta))
        return self._exp_cache

    def _gradient(self, theta, x):
        z, y = x[:, :-1], x[:, -1]
        exp_term = self._exp(theta, x)
        
        term1 = np.sum((1 - y[:, np.newaxis]) * z, axis=0)
        term2 = np.sum(z / (1 + exp_term)[:, np.newaxis], axis=0)
        term3 = self.gamma * theta  # Since cov is eye matrix
        
        return term1 - term2 + term3

    def _hessianvectorproduct(self, theta, x, vector):
        z = x[:, :-1]
        exp_term = self._exp(theta, x)
        
        term = exp_term / (1 + exp_term) ** 2
        hess_inner = term * np.dot(z, vector)
        hvp_term = np.dot(z.T, hess_inner)
        
        return self.gamma * vector + hvp_term

    def _vectorlaplacian(self, theta, x):
        z = x[:, :-1]
        exp_term = self._exp(theta, x)
        
        norms_squared = np.sum(z**2, axis=1) 
        term = exp_term * (1 - exp_term) / (1 + exp_term)**3
        return np.sum(z * (norms_squared * term)[:, np.newaxis], axis=0)

    def _hessian_term(self, theta, x):
        if self.algo == 'LMC':
            return 0
        return self._hessianvectorproduct(theta, x, self._gradient(theta, x))

    def _vectorlaplacian_term(self, theta, x):
        if self.algo == 'aHOLLA':
            return self._vectorlaplacian(theta, x) / self.beta 
        return 0

    def _diffusion_term(self, theta, x):
        if self.algo == 'LMC':
            return np.random.standard_normal(self.d)
        
        gau_a, gau_b = np.random.standard_normal((2, self.d))
        
        hvp_a = self._hessianvectorproduct(theta, x, gau_a)
        hvp_b = self._hessianvectorproduct(theta, x, gau_b)
        
        term_a = gau_a - self.step * hvp_a / 2
        term_b = (np.sqrt(3) / 6) * self.step * hvp_b
        
        return term_a + term_b

    def sample(self, theta0, x, runtime=200):
        n_iter = int(runtime/self.step)
        theta = np.ravel(np.array(theta0).reshape(-1))
        theta_arr = np.zeros((200, self.d))
        
        for n in range(n_iter):
            # Clear cache at start of each iteration
            self._exp_cache = None
            
            grad = self._gradient(theta, x)
            hess_term = self._hessian_term(theta, x)
            vec_lap_term = self._vectorlaplacian_term(theta, x)
            diffusion = self._diffusion_term(theta, x)
            
            # Update theta
            theta += (-self.step * grad + 
                     (self.step**2 / 2) * hess_term - 
                     (self.step**2 / 2) * vec_lap_term + 
                     np.sqrt(2 * self.step / self.beta) * diffusion)
            
            # Store last 200 samples
            if n >= (n_iter - 200):
                theta_arr[n - (n_iter - 200)] = theta
                
        return theta_arr

def generate_data(n, d, theta_true):
    z = np.random.choice([-1, 1], size=(n, d))
    p = 1 / (1 + np.exp(-np.dot(z, theta_true)))
    y = np.random.binomial(1, p)
    return np.hstack((z, y.reshape(-1, 1)))

def draw_samples_parallel(sampler, theta0, runtime=200, n_chains=50, n_jobs=-1):
    d = len(np.ravel(theta0))
    sampler.d = d
    
    def _run_single_markov_chain():
        x = generate_data(sampler.n, d, np.ones(d))
        return pd.DataFrame(
            sampler.sample(theta0, x, runtime=runtime),
            columns=[f'component_{i+1}' for i in range(d)]
        )
    
    samples_df = Parallel(n_jobs=n_jobs, prefer="processes")(
        delayed(_run_single_markov_chain)() for _ in range(n_chains)
    )
    
    return pd.concat(samples_df, ignore_index=True)

In [None]:
if __name__ == "__main__":
    n_values = [100, 200, 300, 400, 500]
    d_values = [2, 5, 10]
    runtime = 1000
    n_chains = 50
    step_size = 0.01
    
    results = {}
    
    for d in d_values:
        for n in n_values:
            theta_true = np.ones(d)
            gamma = 1
            
            for algo in ['LMC', 'LMCO', 'aHOLLA']:
                sampler = LangevinSampler(
                    targ='posterior', 
                    algo=algo, 
                    step=step_size, 
                    d=d, 
                    n=n, 
                    gamma=gamma
                )
                
                start_time = time.time()
                samples_df = draw_samples_parallel(
                    sampler, 
                    2 * np.ones(d), 
                    runtime=runtime, 
                    n_chains=n_chains
                )
                runtime_sec = time.time() - start_time
                
                # Compute MSE
                mean_samples = samples_df.groupby(samples_df.index // 200).mean().values
                mse = np.mean(np.linalg.norm(mean_samples - theta_true, axis=1) ** 2)
                
                results[(d, n, algo)] = {
                    'mse': mse,
                    'runtime': runtime_sec
                }
                
                print(f"d={d}, n={n}, {algo}: MSE={mse:.4f}, Time={runtime_sec:.2f}s")