In [None]:
# if need be, manually add the local project root to PYTHONPATH and move working directories

import os
import sys

project = '/' # change to local project root
sys.path.append(project)
os.chdir(project)

In [None]:
# dependencies

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import nfx.lm.gibbs
import nfx.misc.plot

In [None]:
# helper functions

def package_samples(samples, node_names, covariate_names):

    loc_samples, prec_samples, nuisance_samples = zip(*samples)
    loc_samples = [np.array(loc_samples_) for loc_samples_ in zip(*loc_samples)][::-1]
    prec_samples = np.trace(prec_samples, axis1=2, axis2=3).T[::-1]
    nuisance_samples = np.array(nuisance_samples)
    return package_loc_samples(loc_samples, node_names, covariate_names), package_prec_samples(prec_samples), package_disp_samples(nuisance_samples, node_names)

def package_loc_samples(loc_samples, node_names, covariate_names):

    dfs = []
    for i, (loc_samples_, node_names_) in enumerate(zip(loc_samples, node_names)):
        for j, node_names__ in enumerate(node_names_):
            df_ = pd.DataFrame(loc_samples_[:, j].T, index=covariate_names)
            df_.index = df_.index.rename('covariate')
            df_.columns = df_.columns.rename('iter')
            df_['level'] = i
            df_['node'] = node_names__.zfill(i + 1)
            dfs.append(df_)
    df = pd.concat(dfs).reset_index().set_index(['level', 'node', 'covariate'])
    return df

def package_prec_samples(prec_samples):

    df = pd.DataFrame(prec_samples, index=np.arange(len(prec_samples)))
    df.index = df.index.rename('level')
    df.columns = df.columns.rename('iter')
    df = df.reset_index().set_index(['level'])
    return df

def package_disp_samples(nuisance_samples, node_names):

    df = pd.DataFrame(np.mean(nuisance_samples, 0)[np.newaxis], index=['mean'])
    df.index = df.index.rename('node')
    df.columns = df.columns.rename('iter')
    df = df.reset_index().set_index(['node'])
    return df

In [None]:
# config

covariate_names = ['housing']
n_samples = 1000
n_warmup = 100
seed = 0

In [None]:
# load data

macro = pd.read_csv('demos/data/sareb_covariates.csv').set_index('time')
prices = pd.read_csv('demos/data/sareb_prices_synthetic.csv').set_index('zip')
rng = np.random.default_rng(seed)

In [None]:
# format response

response = prices.dropna(axis=1)

In [None]:
# format covariates

covariates = macro.loc[:, covariate_names]
covariates['_constant'] = 1
covariates['_trend'] = np.arange(covariates.shape[0])
covariates = covariates.loc[:, ['_constant', '_trend', 'housing']].dropna().loc[response.columns]

In [None]:
# construct tree

indices = response.index.to_frame()
indices['lvl_1'] = indices.zip.str.slice(0, 2)
indices['lvl_2'] = indices.zip.str.slice(0, 3)
indices['lvl_3'] = indices.zip.str.slice(0, 4)
indices['lvl_4'] = indices.zip.str.slice(0, 5)
indices = indices.drop('zip', 1)
codes = indices.apply(lambda x: x.astype('category').cat.codes).astype('int64')
n_nodes = codes.max(0) + 1
parent_node_3 = codes[['lvl_4', 'lvl_3']].drop_duplicates().lvl_3
parent_node_2 = codes[['lvl_3', 'lvl_2']].drop_duplicates().lvl_2
parent_node_1 = codes[['lvl_2', 'lvl_1']].drop_duplicates().lvl_1
node_names = [['0'], indices.lvl_1.unique(), indices.lvl_2.unique(), indices.lvl_3.unique(), indices.lvl_4.unique()]
tree = [parent_node_3.values, parent_node_2.values, parent_node_1.values]

In [None]:
# sample

sampler = nfx.lm.gibbs.sample_posterior(response.values, covariates.values, tree, ome=rng)
samples = [next(sampler) for _ in range(n_warmup + n_samples)][n_warmup:]

In [None]:
# reformat samples for plotting

coef_samples, prec_samples, nuisance_samples = package_samples(samples, node_names, ['_constant', '_trend'] + covariate_names)

In [None]:
# coefficient samples

coef_samples.iloc[:5, :5]

In [None]:
# prior precision trace samples

prec_samples.iloc[:5, :5]

In [None]:
# plot traces for root coefficients, grid and color by covariate

nfx.misc.plot.plot_traces(coef_samples.loc[0], 'iter', 'covariate', 'covariate')

In [None]:
# plot marginals for root coefficients, grid and color by covariate

nfx.misc.plot.plot_marginals(coef_samples.loc[0], 'covariate', 'covariate')

In [None]:
# plot ACFs for prior precision traces, grid and color by level

nfx.misc.plot.plot_acf(prec_samples, 'level', 'level')