# Bayesian parameter estimation of the bivariate beta distribution

In [1]:
import nest_asyncio
nest_asyncio.apply()

import numpy as np
#from cmdstanpy import cmdstan_path, CmdStanModel
import stan
import matplotlib.pyplot as plt
import os
import tqdm

import sys
sys.path.append('../scripts')

from parameter_estimation import BivariateBeta

First, let's analyze the `integrate_1d` function from Stan

In [9]:
alpha = np.random.gamma(1,1,size=4)
n = 1000
XY = np.random.random(size=(n,2))

data = {'n': n, 'xy': XY, 'tolerance': 1e-10, 'alpha': alpha}

In [3]:
filename = '../scripts/bivariate-beta-density.stan'
with open(filename) as f:
    model = f.read()

bivbeta_density = stan.build(model, data)

[32mBuilding:[0m found in cache, done.
[36mMessages from [0m[36;1mstanc[0m[36m:[0m
    variable integrand may not have been assigned a value before its use.


In [4]:
quantities_stan = bivbeta_density.fixed_param(num_chains=1, num_samples=1)
quantities_stan = quantities_stan['density']

[36mSampling:[0m   0%
[1A[0J[36mSampling:[0m 100% (1/1)
[1A[0J[32mSampling:[0m 100% (1/1), done.
[36mMessages received during sampling:[0m
  Exception: Exception: Error in function tanh_sinh<double>::integrate: The tanh_sinh quadrature evaluated your function at a singular point and got inf. Please narrow the bounds of integration or check your function for singularities. (in '/tmp/httpstan_b1u1lh1i/model_edxvebxa.stan', line 11, column 7 to column 102) (in '/tmp/httpstan_b1u1lh1i/model_edxvebxa.stan', line 32, column 8 to column 90)


In [15]:
biv_beta_object = BivariateBeta(alpha=alpha)
quantities_scipy = np.array([np.log(biv_beta_object.pdf(XY[i,0], XY[i,1])) for i in range(n)]).reshape(-1,2)

  in the extrapolation table.  It is assumed that the requested tolerance
  cannot be achieved, and that the returned result (if full_output = 1) is 
  the best which can be obtained.
  result = quad(self._log_integral_pdf, lb, ub, args = ((x, y, self.alpha),), epsabs=1e-10, limit=50)[0]
  in the extrapolation table.  It is assumed that the requested tolerance
  cannot be achieved, and that the returned result (if full_output = 1) is 
  the best which can be obtained.
  result2 = quad(self._integral_pdf, lb, ub, args = (x, y, self.alpha), epsabs=1e-10, limit=50)[0]
  If increasing the limit yields no improvement it is advised to analyze 
  the integrand in order to determine the difficulties.  If the position of a 
  local difficulty can be determined (singularity, discontinuity) one will 
  probably gain from splitting up the interval and calling the integrator 
  on the subranges.  Perhaps a special-purpose integrator should be used.
  result2 = quad(self._integral_pdf, lb, ub, args 

In [19]:
max(abs(quantities_scipy[:,0]-quantities_scipy[:,1]))

0.0008469311749701447

In [108]:
id_max = np.argmax(abs(quantities_stan-quantities_scipy))
print('max_value = {}'.format(max(abs(quantities_stan-quantities_scipy))))
print('(x,y) = {}'.format(XY[id_max]))
print('integral={}'.format(np.log(biv_beta_object.pdf(XY[id_max,0], XY[id_max,1]))))

max_value = [4.76753986]
(x,y) = [1.15400206e-04 4.76916404e-01]
integral=4.767539856106468


Getting the Stan model.

In [2]:
filename = '../scripts/bivariate-beta-model.stan'

Generating data.

In [23]:
alpha = np.array([1,1,1,1])
n = 1
U = np.random.dirichlet(alpha, size=n)
X = U[:,0] + U[:,1]
Y = U[:,0] + U[:,2]
XY = np.column_stack([X,Y])

a = np.array([1,1,1,1])
b = np.array([1,1,1,1])

data = {'n': n, 'xy': XY, 'a': a, 'b': b, 'tolerance': 1e-8}

Sampling

In [25]:
model_fit = bivbeta_model.sample(num_chains=4, num_samples=1000)

[36mSampling:[0m   0%
[1A[0J[36mSampling:[0m   0% (1/8000)
[1A[0J[36mSampling:[0m   0% (2/8000)
[1A[0J[36mSampling:[0m   0% (3/8000)
[1A[0J[36mSampling:[0m   0% (4/8000)
[1A[0J[36mSampling:[0m   1% (103/8000)
[1A[0J[36mSampling:[0m   3% (202/8000)
[1A[0J[36mSampling:[0m   4% (301/8000)
[1A[0J[36mSampling:[0m   5% (400/8000)
[1A[0J[36mSampling:[0m   6% (500/8000)
[1A[0J[36mSampling:[0m   8% (600/8000)
[1A[0J[36mSampling:[0m   9% (700/8000)
[1A[0J[36mSampling:[0m  10% (800/8000)
[1A[0J[36mSampling:[0m  11% (900/8000)
[1A[0J[36mSampling:[0m  12% (1000/8000)
[1A[0J[36mSampling:[0m  14% (1100/8000)
[1A[0J[36mSampling:[0m  15% (1200/8000)
[1A[0J[36mSampling:[0m  16% (1300/8000)
[1A[0J[36mSampling:[0m  18% (1400/8000)
[1A[0J[36mSampling:[0m  19% (1500/8000)
[1A[0J[36mSampling:[0m  20% (1600/8000)
[1A[0J[36mSampling:[0m  21% (1700/8000)
[1A[0J[36mSampling:[0m  22% (1800/8000)
[1A[0J[36mSampling:[0m  2

In [27]:
np.quantile(model_fit['alpha'], axis=1, q=0.1)

array([0.23757785, 0.23674657, 0.15590381, 0.15995233])