In [1]:
%run init_notebookspace.py
from settings import DATA_DIR, MODEL_DIR, POST_EST_DIR

DATA_DIR is existant under: C:\Users\LukasGrahl\Documents\GIT\memoire1\data


In [2]:
%matplotlib inline

import arviz as az
from gEconpy.classes.model import gEconModel

import matplotlib.pyplot as plt
import seaborn as sns

import numpy as np
import xarray as xr
import pandas as pd

import os
import time
import math
from scipy.stats import percentileofscore
from copy import deepcopy

from scipy.stats import gamma, norm, beta, uniform
from filterpy.kalman import KalmanFilter
from filterpy.common import Saver


from src.plotting import plot_dfs
from src.process_data import load_data
from src.filtering_sampling import set_up_kalman_filter, kalman_filter, sample_from_priors, solve_updated_mod, get_arr_pdf_from_dist
from src.utils import printProgBar
from src.classes import Spinner

from config import plt_config
plt.rcParams.update(plt_config)

load data

In [3]:
from config import fred_dict

df = load_data('prepro_data.csv', DATA_DIR, fred_dict)

# using real potential GDP instead of GDP
df = df.drop(['Ix', 'Zx', 'Y', 'pi_s', 'w'], axis=1).rename(columns={'Y_p': 'Y', 'pi_c': 'pi'})

# split train and test
train = df[df['is_test'] == False].drop('is_test', axis=1).copy()
test = df[df['is_test'] == True].drop('is_test', axis=1).copy()

Error occured 'S', file_dict may be incomplete
Error occured 'is_test', file_dict may be incomplete


load & solve model

In [4]:
from config import mod4_params, mod4_priors, mod5_params, mod5_priors, mod6_params, mod6_priors
mods = {
    'mod4_rbc_vanilla': {'params': mod4_params,
                         'priors': mod4_priors,
                             'is_lin': False},
    'mod5_nk_vanilla': {'params': mod5_params,
                        'priors': mod5_priors,
                        'is_lin': False},    
    'mod6_nk_energy_lin2': {'params': mod6_params,
                            'is_lin': True,
                            'priors': mod6_priors},
}

# load model
for key in mods.keys():
    # load
    mods[key]['mod'] = gEconModel(os.path.join(MODEL_DIR, f'{key}.gcn'), verbose=False)
    
    # solve
    _, mods[key]['mod'] = solve_updated_mod(mods[key]['mod'], verbose=True, model_is_linear=mods[key]['is_lin'])
    assert _ == True, f'{key} model was not solvable'
    
    # get shocks
    mods[key]['shocks'] = [item.base_name for item in mods[key]['mod'].shocks]

Steady state found! Sum of squared residuals is 6.695803486498152e-23
Solution found, sum of squared residuals:  9.039442757935503e-31
Norm of deterministic part: 0.000000000
Norm of stochastic part:    0.000000000
Model solution has 2 eigenvalues greater than one in modulus and 2 forward-looking variables.
Blanchard-Kahn condition is satisfied.
Steady state found! Sum of squared residuals is 9.760551087350258e-22
Solution found, sum of squared residuals:  3.0464227584645074e-28
Norm of deterministic part: 0.000000000
Norm of stochastic part:    0.000000000
Model solution has 6 eigenvalues greater than one in modulus and 4 forward-looking variables.
Blanchard-Kahn condition is satisfied.
Steady state found! Sum of squared residuals is 0.0
Solution found, sum of squared residuals:  3.5962834363340206e-33
Norm of deterministic part: 0.000000000
Norm of stochastic part:    0.000000000
Model solution has 2 eigenvalues greater than one in modulus and 2 forward-looking variables.
Blanchard-K

## Kalman Filter Estimation

## drawing from priors

In [6]:
mod_name = 'mod4_rbc_vanilla'
mod = mods[mod_name]['mod']
mod_params = mod.free_param_dict
prior_dist = mods[mod_name]['priors']
mod_is_linear = mods[mod_name]['is_lin']

mod.variables

[A_t, C_t, I_t, K_t, L_t, Y_t, lambda_t, r_t, w_t]

In [7]:
n_runs = 10_000
verbose = False

start = time.time()

# counters
counter_solved = 0 # model was sovable
counter_kalman = 0 # kalman filter did not fail
counter_accp = 0 # draw was accepted

# reset params
mod.free_param_dict.update(mod_params)

# get params, variables and shocks as lists
shock_names = [x.base_name for x in mod.shocks]
state_variables = [x.base_name for x in mod.variables]
model_params = list(mod.free_param_dict.keys())

# set kalman filter observed variables
observed_vars = ["Y", 'C', 'r']
_ = [item for item in observed_vars if item not in state_variables]
assert len(_) == 0, f"{_} not in state variables"

# get posterior output list
param_posterio_list = {item: [mod.free_param_dict[item]] for item in model_params if item in prior_dist.keys()}
shock_posterior_list = {item: [.1] for item in shock_names}
loglike_list = [-100]

# get final ouput
output_dict = {}

for i in range(0, n_runs):
    printProgBar(i, n_runs-1, prefix='Progress')
    
    # set dict to capture results of this run
    draw_dict = {
        'log_like_list': None,
        'log_like_sum': None,
        'is_solved': False,
        'ratio': None,
        'omega': None,
        'is_KF_solved': False,
        'is_accepted': False,
        'parameters': {
            'prior': {item: None for item in model_params if item in prior_dist.keys()},
            'prior_pdf': {item: None for item in model_params if item in prior_dist.keys()},
            'posterior': {item: None for item in model_params if item in prior_dist.keys()}
        },
        'shocks': {
            'prior': {item: None for item in shock_names},
            'posterior': {item: None for item in shock_names}
        }
    }
    # current posterior
    old_posterior = {item: vals[-1] for item, vals in param_posterio_list.items()}
    old_loglike = loglike_list[-1]
    
    draw_dict['parameters']['posterior'] = old_posterior
    draw_dict['shocks']['posterior'] = {item: vals[-1] for item, vals in shock_posterior_list.items()}
    
    # sample from priors
    prior, shocks = sample_from_priors(prior_dist, mod_params, shock_names)
    
    draw_dict['parameters']['prior'].update(prior)
    draw_dict['shocks']['prior'].update(shocks)
    
    mod.free_param_dict.update(prior)
    mod.shock_priors.update(shocks)
    
    # sovle mdoel
    is_solved, mod = solve_updated_mod(mod, verbose=verbose) #, model_is_linear=mod_is_linear)
    if not is_solved:
        output_dict[i] = draw_dict
        continue
    else:
        draw_dict['is_solved'] = True
        counter_solved += 1
            
    # get Kalman filter initial condition
    T, R = mod.T.values, mod.R.values
    H, Z, T, R, QN, zs = set_up_kalman_filter(R=R, T=T, observed_data=train[observed_vars].values, observed_vars=observed_vars, 
                                              shock_names=shock_names, shocks_drawn_prior=shocks, state_variables=state_variables)
       
    # set up Kalman filter
    kfilter = KalmanFilter(len(state_variables), len(observed_vars))
    kfilter.F = T
    kfilter.Q = QN
    kfilter.H = Z
    kfilter.R = H

    # run Kalman filter
    try:
        saver = Saver(kfilter)
        mu, cov, _, _ = kfilter.batch_filter(zs, saver=saver)
        ll = saver.log_likelihood
        
         # catch -math.inf values in log_likelihood
        if len([val for val in ll if val == -math.inf]) >0:
            output_dict[i] = draw_dict
            continue
        
        # otherwise keep going
        new_loglike = np.sum(ll)
        loglike_list.append(new_loglike)
    
        draw_dict['log_like_sum'] = new_loglike
        draw_dict['log_like_list'] = ll
        
        draw_dict['is_KF_solved'] = True
        counter_kalman += 1
        
    except Exception as e:
        print(e)
        output_dict[i] = draw_dict
        continue
        
    #### MH #####  
    # MH ratio
    prior_pdf = get_arr_pdf_from_dist(prior, prior_dist)
    draw_dict['parameters']['prior_pdf'] = dict(zip(prior.keys(), prior_pdf))
    
    ratio = ((new_loglike * prior_pdf) / (old_loglike * get_arr_pdf_from_dist(old_posterior, prior_dist))).mean()
    ω = min([ratio, 1])
    draw_dict['ratio'] = ratio
    draw_dict['omega'] = ω
    random = np.random.uniform(0, 1)
        
    # merge draws prior into posterior 
    if random <= ω:
        is_accepted = True
        counter_accp += 1.
        draw_dict['is_accepted'] = is_accepted
        
        # update param posterior
        for key in prior.keys():
            param_posterio_list[key].append(prior[key])
        
        # update shock posterior
        for key in shock_posterior_list:
            shock_posterior_list[key].append(shocks[key])
                
    else:
        # leave posterior unaltered and restart
        is_accepted = False
        
    # save output    
    output_dict[i] = draw_dict
        

# print stats
print('\nloop ran for', (time.time() - start) / 60, 'minutes')
print('\nsolver rate', counter_solved/n_runs)
print('\nacceptance rate', counter_accp/counter_solved)

Progress |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.0% 

loop ran for 125.43099851608277 minutes

solver rate 0.6796

acceptance rate 0.8209240729841083


In [19]:
params = [item for item in prior_dist if item not in shock_names]
xarr = xr.Dataset(
    {
        # draw & param dimension
        'posterior': (['draw', 'parameter'],[np.array(list(output_dict[i]['parameters']['posterior'].values())) for i in output_dict.keys()]),
        'prior': (['draw', 'parameter'], [np.array(list(output_dict[i]['parameters']['prior'].values())) for i in output_dict.keys()]),
        'prior_pdf': (['draw', 'parameter'], [np.array(list(output_dict[i]['parameters']['prior_pdf'].values())) for i in output_dict.keys()]),
        
        # draw dimension
        'is_solved': (['draw'], [output_dict[i]['is_accepted'] for i in output_dict.keys()]),
        'log_likelihood': (['draw'], [output_dict[i]['log_like_sum'] for i in output_dict.keys()]),
        'mh_ration': (['draw'], [output_dict[i]['ratio'] for i in output_dict.keys()]),
        
        # uni dimensional
        'n_runs_acc': (['uni_dim'], [counter_accp]),
        'n_runs': (['uni_dim'], [n_runs]), # number of solved models  
    },
    coords={
        'draw': (['draw'], list(range(0, int(n_runs)))),
        'parameter': (['parameter'], params),
        'uni_dim': (['uni_dim'], [0])
    }
)

# only accepted runs
xarr_acc = xarr.where(xarr.is_solved).dropna('draw').copy()
xarr_acc = xarr_acc.where(xarr_acc.draw > int(xarr_acc.n_runs_acc[0][0]/2)).dropna('draw')

# get quantiles of parameters
arr_nan = deepcopy(xarr_acc.posterior.values)
arr_nan = np.array(arr_nan, dtype=float)

for i in range(0, arr_nan.shape[1]):
    arr_no_nan = arr_nan[~np.isnan(arr_nan[:, i]), i]
    arr_nan[~np.isnan(arr_nan[:, i]), i] = [percentileofscore(arr_no_nan, x, 'rank') for x in arr_no_nan]
xarr_acc = xarr_acc.assign({'posterior_percentiles': (['draw', 'parameter'], arr_nan)})


quantile_ind = []
for i in [25, 50, 75]:
    arr = np.abs(xarr_acc.posterior_percentiles.mean(axis=1) - i)
    quantile_ind.append(np.where(arr==np.min(arr))[0][0])
    
xarr_acc = xarr_acc.assign({
    'posterior_q1': (['parameter'], xarr.sel(draw=quantile_ind[0]).posterior.values),
    'posterior_q2': (['parameter'], xarr.sel(draw=quantile_ind[1]).posterior.values),
    'posterior_q3': (['parameter'], xarr.sel(draw=quantile_ind[2]).posterior.values)
})

In [20]:
# save output
from datetime import datetime

a = ''.join(str(datetime.now().date()).split('-'))
b = ''.join((str(datetime.now().time()).split(':'))[:-1])
timestamp = '_'.join([a, b])

file_path = os.path.join(POST_EST_DIR, f'{mod_name}_{timestamp}.nc')
file_path_acc = os.path.join(POST_EST_DIR, f'{mod_name}_accepted_{timestamp}.nc')
print(file_path)
if not os.path.exists(file_path):
    xarr.to_netcdf(file_path)
    
if not os.path.exists(file_path_acc):
    xarr_acc.to_netcdf(file_path_acc)
    
else:
    print('File existst already')


C:\Users\LukasGrahl\Documents\GIT\memoire1\data\posterior_est_out\mod4_rbc_vanilla_20230426_2236.nc
