In [21]:
from src.models.dirpfa import DirPFAHMCSampler
from src.data import generate_data

import numpy as np

In [22]:
num_topic = 5
vocab_size = 100
n = 50
document = generate_data(num_topic, vocab_size, n, partial_depend=True)


hparams = {
    'n0': document.numpy().sum(1).mean(),
    'topic_dist': np.full((n, num_topic ), 10.),
    'word_dist': np.full((num_topic, vocab_size), 10.)
    }

model = DirPFAHMCSampler(vocab_size, num_topic, hparams)
model.set_data(document)
model.set_model(n)

event = model.model.event_shape
sample = model.model.sample()

for k, v in model.model.batch_shape.items():
    print(f'{k}: batch: {v} | event: {event[k]}')
    print(f'---{sample[k].shape}\n')

theta: batch: (50,) | event: (5,)
---(50, 5)

phi: batch: () | event: (5, 100)
---(5, 100)

l: batch: () | event: ()
---()

document: batch: (50,) | event: (100,)
---(50, 100)





In [15]:
n_states = 100
n_burnin = 3000
model.sample_states(document,
                  n_states=n_states,
                  n_burnin=n_burnin,
                  step_size=0.05)


Finish sampling...


In [16]:
draw_samples = model.predict()
pred_low, pred_high = np.quantile(draw_samples, [0.025, 0.975], axis=0)
is_cover = np.all((pred_low < document, pred_high > document), axis=0)

In [17]:
pred_low

array([[ 30.   ,  32.   ,  32.95 , ..., 119.475, 116.   , 118.475],
       [ 15.475,  16.475,  21.   , ..., 141.475, 133.95 , 126.95 ],
       [ 13.475,  17.   ,  19.95 , ..., 145.95 , 133.9  , 126.95 ],
       ...,
       [ 20.475,  21.475,  24.425, ..., 130.95 , 128.   , 119.   ],
       [ 17.   ,  22.475,  24.   , ..., 134.475, 129.   , 127.95 ],
       [ 23.   ,  27.   ,  29.   , ..., 131.425, 123.425, 124.475]])

In [18]:
pred_high

array([[ 50.525,  55.525,  57.525, ..., 164.525, 163.2  , 160.525],
       [ 35.05 ,  36.   ,  41.525, ..., 190.525, 182.575, 173.625],
       [ 31.   ,  35.575,  40.   , ..., 193.525, 185.1  , 169.   ],
       ...,
       [ 40.   ,  46.05 ,  48.   , ..., 182.   , 171.525, 173.525],
       [ 41.05 ,  44.   ,  45.525, ..., 182.05 , 177.525, 177.1  ],
       [ 43.   ,  51.   ,  51.   , ..., 174.05 , 169.05 , 165.   ]])

In [19]:
document

<tf.Tensor: shape=(50, 100), dtype=float32, numpy=
array([[  0.,   1.,   1., ...,  52.,  59.,  61.],
       [  0.,   5.,  15., ..., 246., 215., 236.],
       [  1.,   3.,   3., ..., 226., 226., 192.],
       ...,
       [  0.,   2.,  13., ..., 177., 152., 150.],
       [  2.,   7.,  10., ..., 192., 161., 206.],
       [  1.,   6.,   5., ..., 164., 130., 151.]], dtype=float32)>

In [20]:
is_cover.mean()

0.2822