In [34]:
import pandas as pd 
import pymc3 as pm
from sklearn.preprocessing import LabelEncoder
from theano import shared
import theano.tensor as tt

measurement = pd.read_csv('anonymized_data.csv')

predictors = ['network', 'content_id', 'genre_id', 'weekday_id', 'stratified_hour_id']

train = measurement.loc[measurement.leave_out_set == 0]
test = measurement.loc[measurement.leave_out_set == 1]

# This is to mimic encoding done in actual program so as to be used during model construction
encoders = {}

for predictor in predictors:
    encoder = LabelEncoder()
    encoder.fit(measurement[predictor])
    encoders[predictor] = encoder

In [62]:
def construct_model(model_variables, encoders):
    with pm.Model() as model:
        intercept = pm.Normal(
            'intercept', mu=0.0, tau=1 / (2 ** 2), shape=1
        )

        network_coeff_sigma = pm.HalfNormal('network_coeff_sigma', sd=5)
        network_coeff_offset = pm.Normal(
            'network_coeff_offset', mu=0, sd=1, 
            shape=encoders['network'].classes_.size
        )
        network_coeff = pm.Deterministic(
            'network_coeff', 
            0.0 + network_coeff_offset * network_coeff_sigma
        )

        content_id_coeff_sigma = pm.HalfNormal('content_id_coeff_sigma', sd=5)
        content_id_coeff_offset = pm.Normal(
            'content_id_coeff_offset', mu=0, sd=1, 
            shape=encoders['content_id'].classes_.size
        )
        content_id_coeff = pm.Deterministic(
            'content_id_coeff', 
            0.0 + content_id_coeff_offset * content_id_coeff_sigma
        )

        genre_id_coeff_sigma = pm.HalfNormal('genre_id_coeff_sigma', sd=5)
        genre_id_coeff_offset = pm.Normal(
            'genre_id_coeff_offset', mu=0, sd=1, 
            shape=encoders['genre_id'].classes_.size
        )
        genre_id_coeff = pm.Deterministic(
            'genre_id_coeff', 
            0.0 + genre_id_coeff_offset * genre_id_coeff_sigma
        )

        weekday_id_coeff_sigma = pm.HalfNormal('weekday_id_coeff_sigma', sd=5)
        weekday_id_coeff_offset = pm.Normal(
            'weekday_id_coeff_offset', mu=0, sd=1, 
            shape=encoders['weekday_id'].classes_.size
        )
        weekday_id_coeff = pm.Deterministic(
            'weekday_id_coeff', 
            0.0 + weekday_id_coeff_offset * weekday_id_coeff_sigma
        )

        stratified_hour_id_coeff_sigma = pm.HalfNormal('stratified_hour_id_coeff_sigma', sd=5)
        stratified_hour_id_coeff_offset = pm.Normal(
            'stratified_hour_id_coeff_offset', mu=0, sd=1, 
            shape=encoders['stratified_hour_id'].classes_.size
        )
        stratified_hour_id_coeff = pm.Deterministic(
            'stratified_hour_id_coeff', 
            0.0 + stratified_hour_id_coeff_offset * stratified_hour_id_coeff_sigma
        )

        # Parameters for categories
        link_argument = (
            intercept + 
            network_coeff[model_variables['network']] +
            content_id_coeff[model_variables['content_id']] +
            genre_id_coeff[model_variables['genre_id']] + 
            weekday_id_coeff[model_variables['weekday_id']] +
            stratified_hour_id_coeff[model_variables['stratified_hour_id']]
        )
        omega = pm.Deterministic('omega', pm.invlogit(link_argument))
        kappa = pm.HalfStudentT('kappa', nu=3, sd=3)

        # Mean parameter for individual data 
        mu = pm.Beta(
            'mu', alpha=omega * kappa + 1, beta=(1 - omega) * kappa + 1, 
            )

        likelihood = pm.Binomial(
            'likelihood', 
            p=mu, 
            n=tt.cast(model_variables['n'], 'int64'), 
            observed=tt.cast(model_variables['y_obs'], 'int64'),
#             total_size=model_variables['y_obs'].shared.eval().size
        )

        # Rescale coefficients to be deflections from baseline
#         b_0 = pm.Deterministic('b_0', tt.mean(link_argument))
#         b_1 = pm.Deterministic('b_1', link_argument[model_variables['network']] - b_0)
#         b_2 = pm.Deterministic('b_2', link_argument[model_variables['content_id']] - b_0)
#         b_3 = pm.Deterministic('b_3', link_argument[model_variables['genre_id']] - b_0)
#         b_4 = pm.Deterministic('b_4', link_argument[model_variables['weekday_id']] - b_0)
#         b_5 = pm.Deterministic('b_5', link_argument[model_variables['stratified_hour_id']] - b_0)
        
    return model

In [64]:
model.omega.tag.test_value.shape

(87301,)

In [63]:
# creating shared variables results in no error in test point evaluation
model_variables = {}

# Nominal Predictors of model
for predictor in predictors:
    model_variables[predictor] = shared(train[predictor].values)

model_variables['y_obs'] = shared(train.y.values)
model_variables['n'] = shared(train.n.values)

model = construct_model(model_variables, encoders)

for RV in model.basic_RVs:
    print(RV.name, RV.logp(model.test_point))

TypeError: For compute_test_value, one input test value does not have the requested type.

The error when converting the test value to that variable type:
Wrong number of dimensions: expected 0, got 1 with shape (87301,).

In [6]:
# However, when repeating with minibatch, model test point evaluates to inf for likelihood
batch_size = 2000

random_seed = 42

model_variables = {}

# Nominal Predictors of model
for predictor in encoders:
    encoder = encoders[predictor]
    model_variables[predictor] = pm.Minibatch(train[predictor].values, batch_size=batch_size)

model_variables['y_obs'] = pm.Minibatch(train.y.values, batch_size=batch_size)
model_variables['n'] = pm.Minibatch(train.n.values, batch_size=batch_size)

model = construct_model(model_variables, encoders, batch_size)

for RV in model.basic_RVs:
    print(RV.name, RV.logp(model.test_point))

intercept -1.612085713764618
network_coeff_sigma_log__ -0.7698925914732453
network_coeff_offset -7.35150826563738
content_id_coeff_sigma_log__ -0.7698925914732453
content_id_coeff_offset -1495.1129935240372
genre_id_coeff_sigma_log__ -0.7698925914732453
genre_id_coeff_offset -16.540893597684104
weekday_id_coeff_sigma_log__ -0.7698925914732453
weekday_id_coeff_offset -6.432569732432707
stratified_hour_id_coeff_sigma_log__ -0.7698925914732453
stratified_hour_id_coeff_offset -5.513631199228035
kappa_log__ -0.8831057935713134
mu_logodds__ -1714.0956267952663
likelihood -45492.94857581889


In [50]:
# creating shared variables results in no error in test point evaluation
model_variables = {}

# Nominal Predictors of model
for predictor in predictors:
    model_variables[predictor] = shared(train[predictor].values)

model_variables['y_obs'] = shared(train.y.values)
model_variables['n'] = shared(train.n.values)

model = construct_model(model_variables, encoders, train.y.values.shape)

predictor_mini = {}

# Nominal Predictors of model
for predictor in encoders:
    predictor_mini[predictor] = pm.Minibatch(train[predictor].values, batch_size=batch_size)

predictor_mini['y_obs'] = pm.Minibatch(train.y.values, batch_size=batch_size)
predictor_mini['n'] = pm.Minibatch(train.n.values, batch_size=batch_size)

In [55]:
replace_dict = {}
for k in model_variables.keys():
    replace_dict[model_variables[k]] = predictor_mini[k]

{<TensorType(int64, vector)>: Minibatch,
 <TensorType(int64, vector)>: Minibatch,
 <TensorType(int64, vector)>: Minibatch,
 <TensorType(int64, vector)>: Minibatch,
 <TensorType(int64, vector)>: Minibatch,
 <TensorType(int64, vector)>: Minibatch,
 <TensorType(int64, vector)>: Minibatch}

In [56]:
with model:
    approx = pm.ADVI()
    approx.fit(20000, more_replacements=replace_dict)
    trace = approx.approx.sample(1000)

  0%|          | 0/20000 [00:00<?, ?it/s]


ValueError: Input dimension mis-match. (input[0].shape[0] = 87301, input[2].shape[0] = 2000)
Apply node that caused the error: Elemwise{Composite{Cast{int8}((i0 * i1 * GT(i2, i3) * GT(i4, i3)))}}(Elemwise{ge,no_inplace}.0, Elemwise{le,no_inplace}.0, Elemwise{add,no_inplace}.0, TensorConstant{(1,) of 0}, Elemwise{add,no_inplace}.0)
Toposort index: 168
Inputs types: [TensorType(bool, vector), TensorType(bool, vector), TensorType(float64, vector), TensorType(int8, (True,)), TensorType(float64, vector)]
Inputs shapes: [(87301,), (87301,), (2000,), (1,), (2000,)]
Inputs strides: [(1,), (1,), (8,), (1,), (8,)]
Inputs values: ['not shown', 'not shown', 'not shown', array([0], dtype=int8), 'not shown']
Outputs clients: [[Elemwise{Composite{(Switch(i0, ((Switch(i1, i2, (i3 * i4 * i5)) + Switch(i6, i2, (i7 * i8))) - ((scalar_gammaln(i9) + scalar_gammaln(i10)) - scalar_gammaln(i11))), i12) + log(Abs((i13 * i14))))}}[(0, 4)](Elemwise{Composite{Cast{int8}((i0 * i1 * GT(i2, i3) * GT(i4, i3)))}}.0, Elemwise{eq,no_inplace}.0, TensorConstant{(1,) of 0}, TensorConstant{(1,) of -1.0}, Elemwise{mul,no_inplace}.0, Elemwise{Composite{scalar_softplus((-i0))}}.0, Elemwise{eq,no_inplace}.0, Elemwise{mul,no_inplace}.0, Elemwise{Composite{log1p((-i0))}}.0, Elemwise{add,no_inplace}.0, Elemwise{add,no_inplace}.0, Elemwise{add,no_inplace}.0, TensorConstant{(1,) of -inf}, mu, Elemwise{sub,no_inplace}.0), Elemwise{Composite{(Switch(i0, i1, Switch(i2, (i3 * i4 * i5 * i6), i1)) + Switch(i7, i1, Switch(i2, (i8 * i4 * i9), i1)) + (i6 * Composite{sgn((i0 * i1))}(i9, i5) * i5 * sgn(i9) * sgn(i5)) + (Composite{sgn((i0 * i1))}(i9, i5) * i9 * sgn(i9) * sgn(i5)) + Switch(i10, i1, Switch(i11, (i5 * Cast{float64}((-i12))), i1)) + Switch(i13, i1, Switch(i11, (i14 * i9 * Cast{float64}((-i15))), i1)))}}[(0, 3)](Elemwise{eq,no_inplace}.0, TensorConstant{(1,) of 0}, Elemwise{Composite{Cast{int8}((i0 * i1 * GT(i2, i3) * GT(i4, i3)))}}.0, omega, InplaceDimShuffle{x}.0, Elemwise{sub,no_inplace}.0, TensorConstant{(1,) of -1.0}, Elemwise{eq,no_inplace}.0, Elemwise{sub,no_inplace}.0, mu, Elemwise{eq,no_inplace}.0, Elemwise{Composite{Cast{int8}((GE(i0, i1) * LE(i0, i2) * i3 * i4))}}.0, Minibatch, Elemwise{eq,no_inplace}.0, TensorConstant{(1,) of -1.0}, Elemwise{sub,no_inplace}.0), Elemwise{Composite{Switch(i0, (-psi(i1)), i2)}}[(0, 1)](Elemwise{Composite{Cast{int8}((i0 * i1 * GT(i2, i3) * GT(i4, i3)))}}.0, Elemwise{add,no_inplace}.0, TensorConstant{(1,) of 0}), Elemwise{Composite{(Switch(i0, i1, Switch(i2, (-i3), i1)) + Switch(i2, psi(i4), i1) + i5)}}[(0, 3)](Elemwise{eq,no_inplace}.0, TensorConstant{(1,) of 0}, Elemwise{Composite{Cast{int8}((i0 * i1 * GT(i2, i3) * GT(i4, i3)))}}.0, Elemwise{Composite{log1p((-i0))}}.0, Elemwise{add,no_inplace}.0, Elemwise{Composite{Switch(i0, (-psi(i1)), i2)}}[(0, 1)].0), Elemwise{Composite{(Switch(i0, i1, Switch(i2, i3, i1)) + Switch(i2, psi(i4), i1) + i5)}}[(0, 4)](Elemwise{eq,no_inplace}.0, TensorConstant{(1,) of 0}, Elemwise{Composite{Cast{int8}((i0 * i1 * GT(i2, i3) * GT(i4, i3)))}}.0, Elemwise{Composite{scalar_softplus((-i0))}}.0, Elemwise{add,no_inplace}.0, Elemwise{Composite{Switch(i0, (-psi(i1)), i2)}}[(0, 1)].0)]]

HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.

In [48]:
# creating shared variables results in no error in test point evaluation
model_variables = {}

# Nominal Predictors of model
for predictor in predictors:
    model_variables[predictor] = shared(test[predictor].values)

model_variables['y_obs'] = shared(test.y.values)
model_variables['n'] = shared(test.n.values)

model_predi = construct_model(model_variables, encoders, test.y.values.shape)

ppc = pm.sample_ppc(trace, samples=100, model=model_predi)

  0%|          | 0/100 [00:00<?, ?it/s]


TypeError: object of type 'NoneType' has no len()

In [46]:
model_variables['n'].eval().shape

(48167,)

In [47]:
ppc['likelihood'].shape

(100, 2000)

In [41]:
pm.Minibatch?