In [1]:
%run init_notebookspace.py
from settings import DATA_DIR, MODEL_DIR, POST_EST_DIR

Current venv: C:\Users\LukasGrahl\Documents\GIT\memoire1\env_macro_rbc
DATA_DIR is existant under: C:\Users\LukasGrahl\Documents\GIT\memoire1\data


In [2]:
%matplotlib inline

import arviz as az
from gEconpy.classes.model import gEconModel

import matplotlib.pyplot as plt
import seaborn as sns

import numpy as np
import xarray as xr
import pandas as pd

import os
import time
import math
from scipy.stats import percentileofscore
from copy import deepcopy

from scipy.stats import gamma, norm, beta, uniform
from filterpy.kalman import KalmanFilter
from filterpy.common import Saver


from src.plotting import plot_dfs
from src.process_data import load_data
from src.filtering_sampling import set_up_kalman_filter, kalman_filter, sample_from_priors, solve_updated_mod, get_arr_pdf_from_dist
from src.utils import printProgBar
from src.classes import Spinner

from config import plt_config
plt.rcParams.update(plt_config)

load data

In [3]:
from config import fred_dict

df = load_data('prepro_data.csv', DATA_DIR, fred_dict)

# using real potential GDP instead of GDP
df = df.drop(['Ix', 'Zx', 'Y', 'pi_s', 'w'], axis=1).rename(columns={'Y_p': 'Y', 'pi_c': 'pi'})

# split train and test
train = df[df['is_test'] == False].drop('is_test', axis=1).copy()
test = df[df['is_test'] == True].drop('is_test', axis=1).copy()

Error occured 'is_test', file_dict may be incomplete


load & solve model

In [4]:
from config import mod4_params, mod4_priors, mod5_params, mod5_priors, mod6_params, mod6_priors
mods = {
    # 'mod4_rbc_vanilla': {'params': mod4_params,
    #                      'priors': mod4_priors,
                             # 'is_lin': False},
    'mod5_nk_vanilla': {'params': mod5_params,
                        'priors': mod5_priors,
                        'is_lin': False},    
    # 'mod6_nk_energy_lin2': {'params': mod6_params,
    #                                'is_lin': True,
    #                                'priors': mod6_priors},
}

# load model
for key in mods.keys():
    # load
    mods[key]['mod'] = gEconModel(os.path.join(MODEL_DIR, f'{key}.gcn'), verbose=False)
    
    # solve
    _, mods[key]['mod'] = solve_updated_mod(mods[key]['mod'], verbose=True, model_is_linear=mods[key]['is_lin'])
    assert _ == True, f'{key} model was not solvable'
    
    # get shocks
    mods[key]['shocks'] = [item.base_name for item in mods[key]['mod'].shocks]

Steady state found! Sum of squared residuals is 9.760551087350258e-22
Solution found, sum of squared residuals:  8.027537305022083e-29
Norm of deterministic part: 0.000000000
Norm of stochastic part:    0.000000000
Model solution has 6 eigenvalues greater than one in modulus and 4 forward-looking variables.
Blanchard-Kahn condition is satisfied.


In [9]:
mod5_priors = {
    'alpha': beta(2, 5),
    'eta_p': beta(10, 3.4),
    'gamma_R': gamma(4, 0, .5),
    'gamma_Y': gamma(4, 0, .5),
    'gamma_pi': gamma(4, 0, .5),
    'epsilon_A': beta(1.2, 1.2),
    'epsilon_R': beta(1.2, 1.2),
    'sigma_C': norm(2, 2),
    'sigma_L': norm(2, 2),
    'epsilon_T': beta(1.1, 10),
    'epsilon_Y': beta(1.1, 10),
    'epsilon_pi': beta(1.1, 10)
}

## Kalman Filter Estimation

## drawing from priors

In [10]:
mod_name = 'mod5_nk_vanilla'
mod = mods[mod_name]['mod']
mod_params = mod.free_param_dict
prior_dist = mods[mod_name]['priors']
mod_is_linear = mods[mod_name]['is_lin']

mod.variables

[A_t,
 C_t,
 Div_t,
 LHS_t,
 L_t,
 RHS_t,
 TC_t,
 U_t,
 Y_t,
 lambda_t,
 mc_t,
 pi_obj_t,
 pi_star_t,
 pi_t,
 r_t,
 w_t]

In [11]:
mod_params

{'alpha': 0.35,
 'beta': 0.99,
 'eta_p': 0.75,
 'gamma_R': 0.9,
 'gamma_Y': 0.05,
 'gamma_pi': 1.5,
 'psi_p': 0.6,
 'rho_A': 0.95,
 'rho_pi_dot': 0.924,
 'sigma_C': 1.5,
 'sigma_L': 2.0}

In [None]:
n_runs = 100
verbose = False
infinity_mask_val = -1

start = time.time()

n_param_dim_out = []
n_dim_out = []

ratio_list = np.array([[1,0,0,0,0]])
loglike_list = [-100]

# counters
counter_solved = 0 # model was sovable
counter_kalman = 0 # kalman filter did not fail
counter_accp = 0 # draw was accepted

# reset params
mod.free_param_dict.update(mod_params)

# get params, variables and shocks as lists
shock_names = [x.base_name for x in mod.shocks]
print(shock_names)
state_variables = [x.base_name for x in mod.variables]

observed_vars = ["Y", 'C']

model_params = list(mod.free_param_dict.keys())
param_posterio_list = {item: [mod.free_param_dict[item]] for item in model_params if item in prior_dist.keys()}
shock_prior_list = {item: [.1] for item in shock_names}
param_prior_list = {item: [] for item in model_params if item in prior_dist.keys()}
new_prior_pdf = []


output_dict = { }
sample_dict = {
    'log_like_list': None,
    'log_like_sum': None,
    'is_solved': False,
    'ratio': None,
    'omega': None,
    'is_KF_solved': False,
    'is_accepted': False,
    'parameters': {
        'prior': {item: None for item in model_params if item in prior_dist.keys()},
        'prior_pdf': {item: None for item in model_params if item in prior_dist.keys()},
        'posterior': {item: None for item in model_params if item in prior_dist.keys()}
    },
    'shocks': {
        'prior': {item: None for item in shock_names},
        'posterior': {item: None for item in shock_names}
    }
}



for i in range(0, n_runs):
    printProgBar(i, n_runs-1, prefix='Progress')
    # set for this run
    draw_dict = sample_dict.copy()
        
    # sample from priors
    new_prior, shocks = sample_from_priors(prior_dist, mod_params, shock_names)
    draw_dict['parameters']['prior'] = new_prior
    draw_dict['shocks']['prior'] = shocks
    
    # update model parameters
    mod.free_param_dict.update(new_prior)
    mod.shock_priors.update(shocks)
    
    # with Spinner():
        # print('Busy with solving:')
    is_solved, mod = solve_updated_mod(mod, verbose=verbose) #, model_is_linear=mod_is_linear)
    if not is_solved:
        draw_dict['is_solved'] = False
        output_dict[i] = draw_dict
        continue
    else:
        draw_dict['is_solved'] = True
        counter_solved += 1
            
    # get Kalman filter initial condition
    T, R = mod.T.values, mod.R.values
    H, Z, T, R, QN, zs = set_up_kalman_filter(R=R, T=T, observed_data=train[observed_vars].values, observed_vars=observed_vars, 
                                              shock_names=shock_names, shocks_drawn_prior=shocks, state_variables=state_variables)
       
    # set up Kalman filter
    kfilter = KalmanFilter(len(state_variables), len(observed_vars))
    kfilter.F = T
    kfilter.Q = QN
    kfilter.H = Z
    kfilter.R = H

    # run Kalman filter
    try:
        saver = Saver(kfilter)
        mu, cov, _, _ = kfilter.batch_filter(zs, saver=saver)
        ll = saver.log_likelihood
        draw_dict['log_like_list'] = ll
        counter_kalman += 1
    except Exception as e:
        print(e)
        output_dict[i] = draw_dict
        continue
        
    # append shocks
    for key in shock_prior_list.keys():
        shock_prior_list[key].append(shocks[key])
    
    # append priors
    for key in param_prior_list.keys():
        param_prior_list[key].append(new_prior[key])
    
    # catch -math.inf values in log_likelihood
    new_loglike = np.sum([infinity_mask_val if val == -math.inf else val for val in ll])
    draw_dict['log_like_sum'] = new_loglike
    loglike_list.append(new_loglike)
    
    #### MH #####
    old_loglike = loglike_list[-2]
    old_posterior = {item: vals[-1] for item, vals in param_posterio_list.items()}
    
    # MH ratio
    ratio = ((new_loglike * get_arr_pdf_from_dist(new_prior, prior_dist)) / (old_loglike * get_arr_pdf_from_dist(old_posterior, prior_dist))).mean()
    ω = min([ratio, 1])
    draw_dict['ratio'] = ratio
    draw_dict['omega'] = ω
    random = np.random.uniform(0, 1)
        
    
    # merge draws prior into posterior 
    if random <= ω:
        is_accepted = True
        counter_accp += 1.
        draw_dict['is_accepted'] = True
        draw_dict['parameters']['posterior'] = new_prior
        
        for key in new_prior.keys():
            param_posterio_list[key].append(new_prior[key])
            
    # leave posterior unaltered and restart
    else:
        for key in new_prior.keys():
            param_posterio_list[key].append(np.nan)
        is_accepted = False
        draw_dict['is_accepted'] = False
        counter_accp += 0.
        
    # save output
    new_prior_pdf.append(list(get_arr_pdf_from_dist(new_prior, prior_dist)))    
    draw_dict['parameters']['prior_pdf'] = dict(zip(new_prior.keys(), get_arr_pdf_from_dist(new_prior, prior_dist)))
    ratio_list = np.append(ratio_list, [[new_loglike, ratio, ω, random, random <= ω]], axis=0)
    
    output_dict[i] = draw_dict
        
n_param_dim_out = np.array(n_param_dim_out)

# print stats
print('\nloop ran for', (time.time() - start) / 60, 'minutes')
print('\nsolver rate', counter_solved/n_runs)
# print('\nacceptance rate', counter_accp/counter_solved)

['epsilon_A', 'epsilon_R', 'epsilon_pi']
Progress |██████████████████████████████████████████----------------------------------------------------------| 42.4% 

In [162]:
output_dict = {i: x_dict[i] for i in output_dict.keys() if output_dict[i]['is_solved']}

In [165]:
params = list(param_prior_list.keys())
xarr = xr.Dataset(
    {
        'posterior': (['draw', 'parameter'],[np.array(list(output_dict[i]['parameters']['posterior'].values())) for i in output_dict.keys()]),
        'prior': (['draw', 'parameter'], [np.array(list(output_dict[i]['parameters']['prior'].values())) for i in output_dict.keys()]),
        'prior_pdf': (['draw', 'parameter'], [np.array(list(output_dict[i]['parameters']['prior_pdf'].values())) for i in output_dict.keys()]),
        
        'is_solved': (['draw'], [output_dict[i]['is_accepted'] for i in output_dict.keys()]),
        'log_likelihood': (['draw'], [output_dict[i]['log_like_sum'] for i in output_dict.keys()]),
        'n_runs_acc': (['uni_dim'], [counter_accp]),
        'n_runs': (['uni_dim'], [n_runs]), # number of solved models
        
    },
        # 'new_prior': (['draw', 'parameter'], np.array([param_prior_list[item] for item in params]).transpose()),
        # 'new_prior_pdf': (['draw', 'parameter'],  np.array(new_prior_pdf)),
        # 'log_like': (['draw'], loglike_list[1:]),
        # 'n_runs': (['uni_dim'], [n_runs]), # number of solved models
        # 'n_runs_acc': (['uni_dim'], [counter_accp]),
        # 'solved_rate': (['uni_dim'], [counter_accp/counter_solved])

    coords={
        'draw': (['draw'], list(range(0, int(counter_accp)))),
        'parameter': (['parameter'], params),
        'uni_dim': (['uni_dim'], [0])
    }
)

arr_nan = deepcopy(xarr.posterior.values)

for i in range(0, arr_nan.shape[1]):
    arr_no_nan = arr_nan[~np.isnan(arr_nan[:, i]), i]
    arr_nan[~np.isnan(arr_nan[:, i]), i] = [percentileofscore(arr_no_nan, x, 'rank') for x in arr_no_nan]
xarr = xarr.assign({'posterior_percentiles': (['draw', 'parameter'], arr_nan)})


quantile_ind = []
for i in [25, 50, 75]:
    arr = np.abs(xarr.posterior_percentiles[int(xarr.n_runs_acc/2):].mean(axis=1) - i)
    quantile_ind.append(np.where(arr==np.min(arr))[0][0])
    
xarr = xarr.assign({
    'posterior_q1': (['parameter'], xarr.sel(draw=quantile_ind[0]).posterior.values),
    'posterior_q2': (['parameter'], xarr.sel(draw=quantile_ind[1]).posterior.values),
    'posterior_q3': (['parameter'], xarr.sel(draw=quantile_ind[2]).posterior.values)
})

In [166]:
xarr.where(xarr.is_solved)

In [171]:
def get_xarr_InferenceData(xarr_in: xr.Dataset):
    xarr = xarr_in.where(xarr_in.is_solved).copy()
    draw = int(xarr_in.n_runs_acc)
    
    xr_prior = xr.Dataset(
        {
            key: (
                ['chain', 'draw', 'a_dim'],
                np.concatenate(
                    [
                        np.array([0] * draw).reshape(draw, 1),
                        np.array(xarr.sel(parameter=key).prior).reshape(draw, 1),
                        np.array([0] * draw).reshape(draw, 1)
                    ],
                    axis=1
                ).reshape(1, draw, 3)
            )
            for key in xarr.parameter.values
        },
        coords={
            "chain": (["chain"], np.arange(1)),
            "draw": (["draw"], np.arange(draw)),
            "a_dim": (["a_dim"], ["x", "y", "z"])

        }
    )

    xr_post = xr.Dataset(
        {
            key: (
                ['chain', 'draw', 'a_dim'],
                np.concatenate(
                    [
                        np.array([0] * draw).reshape(draw, 1),
                        np.array(xarr.sel(parameter=key).posterior).reshape(draw, 1),
                        np.array([0] * draw).reshape(draw, 1)
                    ],
                    axis=1
                ).reshape(1, draw, 3)
            )
            for key in xarr.parameter.values
        },
        coords={
            "chain": (["chain"], np.arange(1)),
            "draw": (["draw"], np.arange(draw)),
            "a_dim": (["a_dim"], ["x", "y", "z"])

        }
    )
    
    xr_loglike = xr.Dataset(
        {
            'obs': (
                ['chain', 'draw', 'a_dim'],
                np.concatenate(
                    [
                        np.array([0.] * draw).reshape(draw, 1),
                        np.array(xarr.log_likelihood).reshape(draw, 1),
                        np.array([0.] * draw).reshape(draw, 1)
                    ],
                    axis=1
                ).reshape(1, draw, 3)
            )
        },
        coords={
            "chain": (["chain"], np.arange(1)),
            "draw": (["draw"], np.arange(draw)),
            "a_dim": (["a_dim"], ["x", "y", "z"])

        }
    )
    return xr_prior, xr_post, xr_loglike

In [172]:
az_mod4 = az.InferenceData(**dict(zip(
    ['prior', 'posterior', 'log_likelihood'], 
    *[get_xarr_InferenceData(xarr)]
)))

In [173]:
az.waic(az_mod4)

See http://arxiv.org/abs/1507.04544 for details


Computed from 32 posterior samples and 3 observations log-likelihood matrix.

          Estimate       SE
elpd_waic -213854.11  174611.15
p_waic    10963.89        -


In [123]:
# # create xarray
# params = list(param_prior_list.keys())
# xarr = xr.Dataset(
#     {
#         'posterior': (['draw', 'parameter'],  np.array([param_posterio_list[item] for item in params]).transpose()[1:]),
#         'new_prior': (['draw', 'parameter'], np.array([param_prior_list[item] for item in params]).transpose()),
#         'new_prior_pdf': (['draw', 'parameter'],  np.array(new_prior_pdf)),
#         'log_like': (['draw'], loglike_list[1:]),
#         'n_runs': (['uni_dim'], [n_runs]), # number of solved models
#         'n_runs_acc': (['uni_dim'], [counter_accp]),
#         'solved_rate': (['uni_dim'], [counter_accp/counter_solved])       
        
#     },
#     coords={
#         'draw': (['draw'], list(range(0, counter_solved))),
#         'parameter': (['parameter'], params),
#         'uni_dim': (['uni_dim'], [0])
#     }
# )

# # get percentiles
# arr_nan = deepcopy(xarr.posterior.values)

# for i in range(0, arr_nan.shape[1]):
#     arr_no_nan = arr_nan[~np.isnan(arr_nan[:, i]), i]
#     arr_nan[~np.isnan(arr_nan[:, i]), i] = [percentileofscore(arr_no_nan, x, 'rank') for x in arr_no_nan]
# xarr = xarr.assign({'posterior_percentiles': (['draw', 'parameter'], arr_nan)})

# quantile_ind = []
# for i in [25, 50, 75]:
#     arr = np.abs(xarr.posterior_percentiles[int(xarr.n_runs_acc/2):].mean(axis=1) - i)
#     quantile_ind.append(np.where(arr==np.min(arr))[0][0])
    
# xarr = xarr.assign({
#     'posterior_q1': (['parameter'], xarr.sel(draw=quantile_ind[0]).posterior.values),
#     'posterior_q2': (['parameter'], xarr.sel(draw=quantile_ind[1]).posterior.values),
#     'posterior_q3': (['parameter'], xarr.sel(draw=quantile_ind[2]).posterior.values)
# })

In [51]:
# save output
from datetime import datetime

a = ''.join(str(datetime.now().date()).split('-'))
b = ''.join((str(datetime.now().time()).split(':'))[:-1])
timestamp = '_'.join([a, b])

file_path = os.path.join(POST_EST_DIR, f'{mod_name}_{timestamp}.nc')
print(file_path)
if not os.path.exists(file_path):
    xarr.to_netcdf(file_path)
else:
    print('File existst already')


C:\Users\LukasGrahl\Documents\GIT\memoire1\data\posterior_est_out\mod6_nk_energy_lin2_20230425_1819.nc


In [52]:
def get_xarr_InferenceData(xarr: xr.Dataset, draw: int):
    
    xr_prior = xr.Dataset(
        {
            key: (
                ['chain', 'draw', 'a_dim'],
                np.concatenate(
                    [
                        np.array([0] * draw).reshape(draw, 1),
                        np.array(xarr.sel(parameter=key).new_prior).reshape(draw, 1),
                        np.array([0] * draw).reshape(draw, 1)
                    ],
                    axis=1
                ).reshape(1, draw, 3)
            )
            for key in param_posterio_list.keys()
        },
        coords={
            "chain": (["chain"], np.arange(1)),
            "draw": (["draw"], np.arange(draw)),
            "a_dim": (["a_dim"], ["x", "y", "z"])

        }
    )

    xr_post = xr.Dataset(
        {
            key: (
                ['chain', 'draw', 'a_dim'],
                np.concatenate(
                    [
                        np.array([0] * draw).reshape(draw, 1),
                        np.array(xarr.sel(parameter=key).posterior).reshape(draw, 1),
                        np.array([0] * draw).reshape(draw, 1)
                    ],
                    axis=1
                ).reshape(1, draw, 3)
            )
            for key in param_posterio_list.keys()
        },
        coords={
            "chain": (["chain"], np.arange(1)),
            "draw": (["draw"], np.arange(draw)),
            "a_dim": (["a_dim"], ["x", "y", "z"])

        }
    )
    return xr_prior, xr_post

In [53]:
_ = get_xarr_InferenceData(xarr, counter_solved)
az.InferenceData(prior=_[0], posterior=_[1])

In [54]:
def get_xarr_InferenceData(param_dict: dict, draw: int):

    xr_data = xr.Dataset(
        {
            key: (
                ['chain', 'draw', 'a_dim'],
                np.concatenate(
                    [
                        np.array([0] * draw).reshape(draw, 1),
                        np.array(param_dict[key]).reshape(draw, 1),
                        np.array([0] * draw).reshape(draw, 1)
                    ],
                    axis=1
                ).reshape(1, draw, 3)
            )
            for key in param_posterio_list.keys()
        },
        coords={
            "chain": (["chain"], np.arange(1)),
            "draw": (["draw"], np.arange(draw)),
            "a_dim": (["a_dim"], ["x", "y", "z"])

        }
    )
    return xr_data

In [55]:
az_inference = az.InferenceData(
    posterior=get_xarr_InferenceData({key: item[1:] for key, item in param_posterio_list.items()}, counter_solved),
    prior=get_xarr_InferenceData({key: item[1:] for key, item in param_posterio_list.items()}, counter_solved)
                )

In [21]:
print(az_inference.to_dict() == unserialized_data)

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()