In [1]:
%run init_notebookspace.py
from settings import DATA_DIR, MODEL_DIR, POST_EST_DIR

DATA_DIR is existant under: C:\Users\LukasGrahl\Documents\GIT\memoire1\data


In [2]:
%matplotlib inline

from gEconpy.classes.model import gEconModel

import matplotlib.pyplot as plt
import seaborn as sns

import numpy as np
import xarray as xr
import pandas as pd

import os
import time
import math
from scipy.stats import percentileofscore
from copy import deepcopy

from scipy.stats import gamma, norm, beta, uniform
from filterpy.kalman import KalmanFilter
from filterpy.common import Saver


from src.plotting import plot_dfs
from src.process_data import load_data
from src.filtering_sampling import set_up_kalman_filter, kalman_filter, sample_from_priors, solve_updated_mod, get_arr_pdf_from_dist
from src.utils import printProgBar

from config import plt_config
plt.rcParams.update(plt_config)

load data

In [3]:
from config import fred_dict

df = load_data('prepro_data.csv', DATA_DIR, fred_dict)

# using real potential GDP instead of GDP
df = df.drop(['Ix', 'Zx', 'Y', 'pi_s', 'w'], axis=1).rename(columns={'Y_p': 'Y', 'pi_c': 'pi'})

# split train and test
train = df[df['is_test'] == False].drop('is_test', axis=1).copy()
test = df[df['is_test'] == True].drop('is_test', axis=1).copy()

Error occured 'is_test', file_dict may be incomplete


load & solve model

In [4]:
from config import mod4_params, mod4_priors, mod5_params, mod5_priors
mods = {'mod4_rbc_vanilla': {'params': mod4_params,
                             'priors': mod4_priors},
        'mod5_nk_vanilla': {'params': mod5_params,
                            'priors': mod5_priors}}

# load model
for item in mods.keys():
    mods[item]['mod'] = gEconModel(os.path.join(MODEL_DIR, f'{item}.gcn'), verbose=False)

# solve model
for item in mods.keys():
    _, mods[item]['mod'] = solve_updated_mod(mods[item]['mod'], verbose=False)
    assert _ == True

## Kalman Filter Estimation

## drawing from priors

In [10]:
mod_name = 'mod4_rbc_vanilla'
mod = mods[mod_name]['mod']
mod_params = mod.free_param_dict
prior_dist = mods[mod_name]['priors']

In [11]:
n_runs = 10_000
verbose = False
infinity_mask_val = -100

start = time.time()

n_param_dim_out = []
n_dim_out = []

ratio_list = np.array([[1,0,0,0,0]])
loglike_list = [-100]
counter_accp = 0
counter_solved = 0

# reset params
mod.free_param_dict.update(mod_params)

# get params, variables and shocks as lists
shock_names = [x.base_name for x in mod.shocks]
state_variables = [x.base_name for x in mod.variables]
observed_vars = ["Y", 'C']
model_params = list(mod.free_param_dict.keys())

param_posterio_list = {item: [mod.free_param_dict[item]] for item in model_params if item in prior_dist.keys()}
shock_prior_list = {item: [0] for item in shock_names}
param_prior_list = {item: [] for item in model_params if item in prior_dist.keys()}
new_prior_pdf = []



for i in range(0, n_runs):
    printProgBar(i, n_runs-1, prefix='Progress')
    
    # sample from priors
    new_prior, shocks = sample_from_priors(prior_dist, mod_params, shock_names)
    mod.free_param_dict.update(new_prior)
    
    is_solved, mod = solve_updated_mod(mod, verbose=verbose)
    if not is_solved:
        counter_solved += 0
        continue
    else:
        counter_solved += 1
            
    # get Kalman filter initial condition
    T, R = mod.T.values, mod.R.values
    H, Z, T, R, QN, zs = set_up_kalman_filter(R=R, T=T, observed_data=train[observed_vars].values, observed_vars=observed_vars, 
                                              shock_names=shock_names, shocks_drawn_prior=shocks, state_variables=state_variables)
       
    # set up Kalman filter
    kfilter = KalmanFilter(len(state_variables), len(observed_vars))
    kfilter.F = T
    kfilter.Q = QN
    kfilter.H = Z
    kfilter.R = H

    # run Kalman filter
    try:
        saver = Saver(kfilter)
        mu, cov, _, _ = kfilter.batch_filter(zs, saver=saver)
        ll = saver.log_likelihood
        
    except Exception as e:
        print(e)
        counter_solved -= 1
        continue
        
    # append shocks
    for key in shock_prior_list.keys():
        shock_prior_list[key].append(shocks[key])
    
    # append priors
    for key in param_prior_list.keys():
        param_prior_list[key].append(new_prior[key])
    
    # catch -math.inf values in log_likelihood
    new_loglike = np.sum([infinity_mask_val if val == -math.inf else val for val in ll])
    loglike_list.append(new_loglike)
    
    
    #### MH #####
    old_loglike = loglike_list[-2]
    old_posterior = {item: vals[-1] for item, vals in param_prior_list.items()}
    
    # MH ratio
    ratio = ((new_loglike * get_arr_pdf_from_dist(new_prior, prior_dist)) / (old_loglike * get_arr_pdf_from_dist(old_posterior, prior_dist))).mean()
    ω = min([ratio, 1])
    random = np.random.uniform(0, 1)
        
    
    # merge draws prior into posterior 
    if random <= ω:
        is_accepted = True
        counter_accp += 1.
        for key in new_prior.keys():
            param_posterio_list[key].append(new_prior[key])
            
    # leave posterior unaltered and restart
    else:
        for key in new_prior.keys():
            param_posterio_list[key].append(np.nan)
        is_accepted = False
        counter_accp += 0.
        
    # save output
    new_prior_pdf.append(list(get_arr_pdf_from_dist(new_prior, prior_dist)))    
    ratio_list = np.append(ratio_list, [[new_loglike, ratio, ω, random, random <= ω]], axis=0)
        
n_param_dim_out = np.array(n_param_dim_out)

# print stats
print('\nloop ran for', (time.time() - start) / 60, 'minutes')
print('\nsolver rate', counter_solved/n_runs)
print('\nacceptance rate', counter_accp/counter_solved)

Progress |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.0% 

loop ran for 33.76643278598785 minutes

solver rate 0.6822

acceptance rate 0.9501612430372325


In [24]:
# create xarray
params = list(param_prior_list.keys())
xarr = xr.Dataset(
    {
        'posterior': (['draw', 'parameter'],  np.array([param_posterio_list[item] for item in params]).transpose()[1:]),
        'new_prior': (['draw', 'parameter'], np.array([param_prior_list[item] for item in params]).transpose()),
        'new_prior_pdf': (['draw', 'parameter'],  np.array(new_prior_pdf)),
        'log_like': (['draw'], loglike_list[1:]),
        'n_runs': (['uni_dim'], [n_runs]), # number of solved models
        'n_runs_acc': (['uni_dim'], [counter_accp]),
        'solved_rate': (['uni_dim'], [counter_accp/counter_solved])       
        
    },
    coords={
        'draw': (['draw'], list(range(0, counter_solved))),
        'parameter': (['parameter'], params),
        'uni_dim': (['uni_dim'], [0])
    }
)

# get percentiles
arr_nan = deepcopy(xarr.posterior.values)

for i in range(0, arr_nan.shape[1]):
    arr_no_nan = arr_nan[~np.isnan(arr_nan[:, i]), i]
    arr_nan[~np.isnan(arr_nan[:, i]), i] = [percentileofscore(arr_no_nan, x, 'rank') for x in arr_no_nan]
xarr = xarr.assign({'posterior_percentiles': (['draw', 'parameter'], arr_nan)})

quantile_ind = []
for i in [25, 50, 75]:
    arr = np.abs(xarr.posterior_percentiles[int(xarr.n_runs_acc/2):].mean(axis=1) - i)
    quantile_ind.append(np.where(arr==np.min(arr))[0][0])
    
xarr = xarr.assign({
    'posterior_q1': (['parameter'], xarr.sel(draw=quantile_ind[0]).posterior.values),
    'posterior_q2': (['parameter'], xarr.sel(draw=quantile_ind[1]).posterior.values),
    'posterior_q3': (['parameter'], xarr.sel(draw=quantile_ind[2]).posterior.values)
})

In [26]:
# save output
from datetime import datetime

a = ''.join(str(datetime.now().date()).split('-'))
b = ''.join((str(datetime.now().time()).split(':'))[:-1])
timestamp = '_'.join([a, b])

file_path = os.path.join(POST_EST_DIR, f'{mod_name}_{timestamp}.nc')
print(file_path)
if not os.path.exists(file_path):
    xarr.to_netcdf(file_path)
else:
    print('File existst already')


C:\Users\LukasGrahl\Documents\GIT\memoire1\data\posterior_est_out\mod4_rbc_vanilla_20230327_1229.nc
