In [1]:
import pickle
from copy import deepcopy
from datetime import datetime as DT
from loguru import logger
import pandas as pd
from theano import shared
from pymc_models import PyMCModel
from pymc_models import hs_regression
from sklearn.preprocessing import PolynomialFeatures



In [2]:
with open('../PickleJar/DataSets/AphiTrainTestSplitDataSets.pkl', 'rb') as fb:
    datadict = pickle.load(fb)
X_s_train = datadict['x_train_s']
y_train = datadict['y_train']
X_s_test = datadict['x_test_s']
y_test = datadict['y_test']
X_s_train_w_int = datadict['x_train_wi_s']
X_s_test_w_int = datadict['x_test_wi_s']

In [3]:
bands = [411, 443, 489, 510, 555, 670]
# create band-keyed dictionary to contain models
model_dict=dict.fromkeys(bands)

# create theano shared variable
X_shared = shared(X_s_train_w_int.values)
y_shared = shared(y_train['log10_aphy%d' % bands[0]].values)
# Fitting aphi411 model:
# Instantiate PyMC3 model with bnn likelihood
for band in bands:
    logger.info("processing aphi{band}", band=band)
    X_shared.set_value(X_s_train_w_int.values)
    y_shared.set_value(y_train['log10_aphy%d' % band].values)
    hshoe_wi_ = PyMCModel(hs_regression, X_shared, y_shared )
    hshoe_wi_.model.name = 'hshoe_wi_aphy%d' %band
    hshoe_wi_.fit(n_samples=2000, cores=4, chains=4, tune=10000,
                nuts_kwargs=dict(target_accept=0.95))
    ppc_train_ = hshoe_wi_.predict(likelihood_name='likelihood')
    waic_train = hshoe_wi_.get_waic()
    loo_train = hshoe_wi_.get_loo()
    model_train = deepcopy(hshoe_wi_.model)
    trace = deepcopy(hshoe_wi_.trace_)
    run_dict = dict(model_train=model_train, trace=trace,
                    ppc_train=ppc_train_, loo_train=loo_train, waic_train=waic_train)
    X_shared.set_value(X_s_test_w_int.values)
    y_shared.set_value(y_test['log10_aphy%d' % band].values)
    model_test = deepcopy(hshoe_wi_.model)
    ppc_test_ = hshoe_wi_.predict(likelihood_name='likelihood')
    waic_test = hshoe_wi_.get_waic()
    loo_test = hshoe_wi_.get_loo()
    run_dict.update(dict(model_test=model_test, ppc_test=ppc_test_,
                         waic_test=waic_test, loo_test=loo_test))
    model_dict[band] = run_dict
    

2019-03-11 10:36:43.101 | INFO     | __main__:<module>:11 - processing aphi411
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [sigma, bias, w, lambda_m, sd_bias, tau]
Sampling 4 chains: 100%|██████████| 48000/48000 [02:02<00:00, 391.08draws/s]
100%|██████████| 8000/8000 [00:53<00:00, 148.57it/s]
  return np.stack(logp)
        log predictive densities exceeds 0.4. This could be indication of
        WAIC starting to fail see http://arxiv.org/abs/1507.04544 for details
        
  """)
        greater than 0.7 for one or more samples.
        You should consider using a more robust model, this is because
        importance sampling is less likely to work well if the marginal
        posterior and LOO posterior are very different. This is more likely to
        happen with a non-robust model and highly influential observations.
  happen with a non-robust model and highly influential observations.""")
100%|██████

In [4]:
with open('../PickleJar/Results/hshoe_wi_model_dict_%s.pkl' %DT.now(), 'wb') as fb:
        pickle.dump(model_dict, fb, protocol=pickle.HIGHEST_PROTOCOL)