https://discourse.pymc.io/t/beta-process-or-truncated-indian-buffet-process-factor-analysis-in-the-vein-of-the-dirichlet-process-example/1180

In [1]:
import numpy as np
import pymc3 as pm
import theano.tensor as tt

# Data generation
# 100 observations, each produced from a linear combination of 10 latent features
# Binary indicator array (100x10) indicating which latent features are "on" for every observation
# Beta process prior over the binary indicator array
a = 5
b = 10
K = 10
data_p = np.random.beta(a / K, b * (K - 1) / K, size=10)
data_p = np.tile(data_p[None, :], (100, 1))
# Bernoulli indicator array of latent features for all observations
data_Z = np.random.binomial(n=1, p=data_p)
# The latent features, pulled from a Normal distribution
latent_features_data = np.random.normal(loc=5, scale=2, size=10)
# Multiply the latent features and indicator array
# Observations are linear combinations of latent features, so sum along the features axis
data = np.sum(
    data_Z * latent_features_data[None, :], axis=-1) + np.random.normal(
        loc=0, scale=1, size=100)

  from ._conv import register_converters as _register_converters


In [2]:
# Use Gamma random variables to pull the sparse Beta draws
n = 10
K = 10
with pm.Model() as model:
    # Params for Beta distribution over factor probabilities
    a = pm.Gamma("a", 1, 1)
    b = pm.Gamma("b", 1, 1)
    p = pm.Beta("p", a / K, b * (K - 1) / K, shape=10)
    p = tt.tile(p[None, :], (100, 1))
    # Bernoulli indicator matrix of which factors are "on" for every observation
    a1 = pm.Gamma('a1', alpha=p / n, beta=1, shape=(100, 10))
    a2 = pm.Gamma("a2", alpha=(1 - p) / n, beta=1, shape=(100, 10))
    Z = pm.Deterministic("Z", a1 / (a1 + a2))
    # Latent factors - 10 of them
    factors = pm.Normal("factors", mu=0, sd=1, shape=10)
    # Observations are linear combinations of the factors
    equation = tt.sum(Z * tt.tile(factors[None, :], (100, 1)), axis=-1)
    sd = pm.HalfCauchy("sd", 0.5)
    obs = pm.Normal("obs", mu=equation, sd=sd, observed=data)

with model:
    tr = pm.sample(tune=4000, draws=2000, njobs=3)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (3 chains in 3 jobs)
NUTS: [sd_log__, factors, a2_log__, a1_log__, p_logodds__, b_log__, a_log__]
  0%|          | 17/6000 [00:06<23:05,  4.32it/s]  INFO (theano.gof.compilelock): Waiting for existing lock by process '49277' (I am process '49278')
INFO (theano.gof.compilelock): To manually release the lock, delete /Users/jlao/.theano/compiledir_Darwin-17.5.0-x86_64-i386-64bit-i386-3.5.1-64/lock_dir
  out=out, **kwargs)
  out=out, **kwargs)


KeyboardInterrupt: 