In [4]:
import numpy as np
import pandas as pd
import pymc3 as pm
from mb_modelbase.models_core.models import Model
from mb_modelbase.models_core.pyMC3_model import ProbabilisticPymc3Model
from mb_modelbase.models_core.empirical_model import EmpiricalModel
import theano
from scripts.run_conf import cfg as user_cfg
import os
import timeit

In [17]:
path_for_fitted_models = '/home/luca_ph/Documents/projects/graphical_models/code/models_ppl'

In [18]:
## create "observed" data
np.random.seed(123)
alpha, sigma = 1, 1
beta_0 = 1
beta_1 = 2.5
size = 100
X1 = np.random.randn(size)
X2 = np.random.randn(size) * 0.2
Y = alpha + beta_0 * X1 + beta_1 * X2 + np.random.randn(size) * sigma
data = pd.DataFrame({'X1': X1, 'X2': X2, 'Y': Y})

In [23]:
# create PPL model with PyMC3
basic_model = pm.Model()
modelname = "pymc3_simple"
emp_modelname ="pymc3_simple_emp"

with basic_model:
    # Priors for unknown model parameters
    alpha = pm.Normal('alpha', mu=0, sd=10)
    beta_0 = pm.Normal('beta_0', mu=0, sd=10)
    beta_1 = pm.Normal('beta_1', mu=0, sd=20)
    sigma = pm.HalfNormal('sigma', sd=5)
    
    # Expected value of outcome
    mu = alpha + beta_0 * data['X1'] + beta_1 * data['X2']
    
    # Likelihood (sampling distribution) of observations
    Y = pm.Normal('Y', mu=mu, sd=sigma, observed=data['Y'])
    X1 = pm.Normal('X1', mu=data['X1'], sd=sigma, observed=data['X1'])
    X2 = pm.Normal('X2', mu=data['X2'], sd=sigma, observed=data['X2'])
    
    model = ProbabilisticPymc3Model(modelname, basic_model)
    model.fit(data)

12:17:32.579 INFO :: Auto-assigning NUTS sampler...
12:17:32.580 INFO :: Initializing NUTS using jitter+adapt_diag...
12:17:34.346 INFO :: Sequential sampling (1 chains in 1 job)
12:17:34.346 INFO :: NUTS: [sigma, beta_1, beta_0, alpha]
12:17:35.389 INFO :: Only one chain was sampled, this makes it impossible to run some convergence checks
100%|██████████| 500/500 [00:00<00:00, 530.94it/s]


In [24]:
# save model
model.set_empirical_model_name(emp_modelname)
model.save(path_for_fitted_models)

'/home/luca_ph/Documents/projects/graphical_models/code/models_ppl/pymc3_simple.mdl'

In [30]:
# create modified PPL model with PyMC3
basic_model = pm.Model()
modelname = "pymc3_simple2"
emp_modelname ="pymc3_simple_emp"

with basic_model:
    # Priors for unknown model parameters
    alpha = pm.Uniform('alpha')
    beta_0 = pm.Normal('beta_0', mu=20, sd=1)
    beta_1 = pm.unIF('beta_1', mu=5, sd=1)
    sigma = pm.HalfNormal('sigma', sd=1)
    
    # Expected value of outcome
    mu = alpha + beta_0 * data['X1'] + beta_1 * data['X2']
    
    # Likelihood (sampling distribution) of observations
    Y = pm.Normal('Y', mu=mu, sd=sigma, observed=data['Y'])
    X1 = pm.Normal('X1', mu=data['X1'], sd=sigma, observed=data['X1'])
    X2 = pm.Normal('X2', mu=data['X2'], sd=sigma, observed=data['X2'])
    
    model = ProbabilisticPymc3Model(modelname, basic_model)
    model.fit(data)
    
model.set_empirical_model_name(emp_modelname)
model.save(path_for_fitted_models)

12:24:41.147 INFO :: Auto-assigning NUTS sampler...
12:24:41.148 INFO :: Initializing NUTS using jitter+adapt_diag...
12:24:45.538 INFO :: Sequential sampling (1 chains in 1 job)
12:24:45.539 INFO :: NUTS: [sigma, beta_1, beta_0, alpha]
12:24:47.051 INFO :: Only one chain was sampled, this makes it impossible to run some convergence checks
100%|██████████| 500/500 [00:01<00:00, 286.32it/s]


'/home/luca_ph/Documents/projects/graphical_models/code/models_ppl/pymc3_simple2.mdl'

In [21]:
# create empirical model
# this is required to allows training data related facets in the front-end
emp_model = EmpiricalModel(name=emp_modelname)
emp_model.fit(df=data)
emp_model.save(modelpath)

'/home/luca_ph/Documents/projects/graphical_models/code/models_ppl/pymc3_simple_emp.mdl'