In [1]:
from src.models.dpfa import DPFAHMCSampler
from src.data import generate_data

import numpy as np

In [16]:
num_topic = 5
vocab_size = 100
n = 50
document = generate_data(num_topic, vocab_size, n, partial_depend=True)

hparams = {
            'e0': 1.,
            'f0': 0.001,
            'c0': np.full((num_topic,), 1.),
            'pn': np.full((n, 1), 0.5),
            'word_dist': np.full((num_topic, vocab_size), 10.),
            'bias0': np.full((num_topic, num_topic), 0.),
            'bias1': np.full((1, num_topic), 0.),
            'w1_std': np.full((n, num_topic), 10.),
            }
model = DPFAHMCSampler(vocab_size, num_topic, hparams)

model.set_data(document)
model.set_model(n)

event = model.model.event_shape
sample = model.model.sample()

for k, v in model.model.batch_shape.items():
    print(f'{k}: batch: {v} | event: {event[k]}')
    print(f'---{sample[k].shape}\n')

gamma0: batch: () | event: ()
---()

gamma: batch: () | event: (5,)
---(5,)

theta: batch: (50,) | event: (5,)
---(50, 5)

w1: batch: (50,) | event: (5,)
---(50, 5)

h0: batch: () | event: (5, 5)
---(5, 5)

h: batch: (50,) | event: (5,)
---(50, 5)

phi: batch: () | event: (5, 100)
---(5, 100)

document: batch: (50,) | event: (100,)
---(50, 100)



In [4]:
n_states = 100
n_burnin = 3000
model.sample_states(document,
                  n_states=n_states,
                  n_burnin=n_burnin,
                  step_size=0.05)

Finish sampling...


In [5]:
draw_samples = model.predict()
pred_low, pred_high = np.quantile(draw_samples, [0.025, 0.975], axis=0)
is_cover = np.all((pred_low < document, pred_high > document), axis=0)

In [6]:
pred_low

array([[1., 1., 2., ..., 1., 1., 1.],
       [1., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [1., 1., 2., ..., 1., 1., 1.],
       [1., 1., 2., ..., 1., 1., 1.]])

In [7]:
pred_high

array([[10.   ,  8.   , 12.   , ..., 10.   , 10.   ,  9.   ],
       [ 8.   ,  6.   ,  7.   , ...,  7.   ,  6.   ,  6.   ],
       [ 0.   ,  0.   ,  0.   , ...,  0.   ,  0.   ,  0.   ],
       ...,
       [ 0.   ,  0.   ,  0.   , ...,  0.   ,  0.   ,  0.   ],
       [10.   ,  8.   , 11.   , ..., 10.   , 10.   , 10.   ],
       [11.   ,  8.   , 11.   , ...,  9.525, 10.   , 10.   ]])

In [8]:
document

<tf.Tensor: shape=(200, 100), dtype=float32, numpy=
array([[  3.,   2.,   6., ..., 284., 200., 245.],
       [  4.,   1.,  11., ..., 170., 128., 210.],
       [  3.,   4.,   8., ..., 368., 283., 347.],
       ...,
       [  3.,   3.,  11., ..., 191., 137., 230.],
       [  2.,   3.,  13., ..., 335., 253., 330.],
       [  3.,   1.,  10., ..., 174., 161., 213.]], dtype=float32)>

In [9]:
is_cover.mean()

0.03155