In [1]:
import numpy as np
import pymc3 as pm

import aesara.tensor as at
from aesara.tensor.random.op import RandomVariable

from scipy import stats as st

import aesara

%config InlineBackend.figure_format = "retina"
%matplotlib inline

You are running the v4 development version of PyMC3 which currently still lacks key features. You probably want to use the stable v3 instead which you can either install via conda or find on the v3 GitHub branch: https://github.com/pymc-devs/pymc3/tree/v3


In [2]:
K = 50; N = 300
M = 2; mu = 2.
rng = np.random.RandomState(seed=34)

In [8]:
def generate_weights(M, rng, K):

    betas = rng.beta(1., M, size=(K-1,))
    
    sticks = np.concatenate(([1], np.cumprod(1 - betas[:-1])))

    product = betas * sticks
    
    return np.append(product, 1 - product.sum()) # shape = (K,)


def stick_glueing(weights):
    
    denominator = np.concatenate(([1], -weights[:-1]))
    denominator = np.cumsum(denominator)
    
    return weights[:-1]/denominator[:-1] # shape = (K - 1,)


def create_dp_samples(M, rng, K):
    
    atoms = rng.normal(loc=mu, scale=3., size=(K,))
    weights = generate_weights(M, rng, K)

    return rng.choice(a=atoms, p=weights, size=(K,))

In [9]:
dp_samples = create_dp_samples(M=3., rng=rng, K=50)
dp_samples

array([[ 2.72786108,  3.57974145, -1.86273651, ...,  3.57974145,
        -0.46265869, -0.46265869],
       [-0.84844108,  3.57974145,  3.57974145, ...,  3.57974145,
         3.57974145,  3.57974145],
       [ 2.72786108,  3.57974145,  3.57974145, ..., -1.00996505,
         3.57974145,  3.57974145],
       ...,
       [-2.07445279, -1.00996505,  3.57974145, ...,  3.57974145,
        -0.84844108,  3.57974145],
       [-0.84844108,  3.57974145, -0.84844108, ..., -1.00996505,
        -1.00996505, -0.46265869],
       [ 0.84400093,  2.88729209, -1.00996505, ..., -0.84844108,
         2.72786108,  3.57974145]])

In [None]:
class DirichletProcess:
    
    def __init__(self, M, rng, K):
        self.M = M
        self.K = K
        
        self.samples = create_dp_samples(M, rng, K)
        self.rng = rng
    
    def run(self):
        
        atoms, counts = np.unique(self.samples, return_counts=True)
        
        # stick-glueing assumes decreasing weights
        index = np.argsort(counts)
        counts = counts[index][::-1]
        atoms = atoms[index][::-1]
        
        weights = np.array(counts)/counts.sum() # empirical weights
        
        recovered_betas = stick_glueing(weights)
        
        with pm.Model(rng_seeder=rng) as model:
            α = pm.Uniform("α", 0., 10.)
            
            # K - 1 because the Kth weight is 1 - weights.sum()
            β = pm.Beta("β", 1., α, observed=recovered_betas)
            µ = pm.Normal("µ", mu=0., sigma=5.)
            
            G0 = pm.Normal("G0", mu=µ, sigma=3., observed=atoms)
            
            self.posterior = pm.sample(draws=2000, chains=1)

In [None]:
dp = DirichletProcess(M, rng, K)
dp.run()

In [None]:
_ = pm.plot_trace(dp.posterior)

In [None]:
dp.posterior.to_dict()["posterior"]["α"].mean()

In [None]:
dp.samples