Ansatz inspiriert von Paper: Macroeconomic forecasting in times of crises 
Download: file:///C:/Users/Maria/Downloads/J%20of%20Applied%20Econometrics%20-%202022%20-%20Guerr%C3%B3on%E2%80%90Quintana%20-%20Macroeconomic%20forecasting%20in%20times%20of%20crises%20(2).pdf

1. Schritt: ARMA Modell für DAX Returns

In [1]:
import pandas as pd
import numpy as np

import pmdarima
import statsmodels as sm

from dax.help_functions.get_dax_data import get_prepared_data

In [2]:
daxdata = get_prepared_data()
daxdata

Unnamed: 0_level_0,Close,LogRetLag1,LogRetLag2,LogRetLag3,LogRetLag4,LogRetLag5
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-09-10 00:00:00+02:00,12268.709961,0.347914,0.629422,1.163586,2.006097,2.960150
2019-09-11 00:00:00+02:00,12359.070312,0.733812,1.081725,1.363234,1.897398,2.739909
2019-09-12 00:00:00+02:00,12410.250000,0.413251,1.147063,1.494977,1.776485,2.310649
2019-09-13 00:00:00+02:00,12468.530273,0.468515,0.881766,1.615578,1.963491,2.245000
2019-09-16 00:00:00+02:00,12380.309570,-0.710062,-0.241547,0.171704,0.905516,1.253429
...,...,...,...,...,...,...
2024-01-04 00:00:00+01:00,16617.289062,0.475928,-0.910971,-0.805254,-0.505791,-0.748107
2024-01-05 00:00:00+01:00,16594.210938,-0.138977,0.336951,-1.049948,-0.944231,-0.644768
2024-01-08 00:00:00+01:00,16716.470703,0.734061,0.595084,1.071012,-0.315887,-0.210170
2024-01-09 00:00:00+01:00,16688.359375,-0.168307,0.565754,0.426777,0.902705,-0.484194


In [4]:
order_dict_bic = {}
for h in ['LogRetLag1', 'LogRetLag2', 'LogRetLag3', 'LogRetLag4', 'LogRetLag5']:
    # fit ARIMA on returns, use bic since paper uses bic too
    arima_model_fitted = pmdarima.auto_arima(daxdata[h], stationary=True, information_criterion='bic')
    order_dict_bic.update({h: arima_model_fitted.order})

order_dict_bic

{'LogRetLag1': (0, 0, 0),
 'LogRetLag2': (5, 0, 0),
 'LogRetLag3': (5, 0, 1),
 'LogRetLag4': (1, 0, 0),
 'LogRetLag5': (5, 0, 2)}

For the beginning: Solely consider LogRetLag1 
OPtimal order is (0,0,0) --> estimated log ret always 0 

In [5]:
# fit model 
from statsmodels.tsa.arima.model import ARIMA
import warnings
# Suppress the specific warning
warnings.filterwarnings(
    "ignore", message="An unsupported index was provided and will be ignored", category=UserWarning)
warnings.filterwarnings(
    "ignore", message="A date index has been provided, but it has no associated frequency information", category=UserWarning)

model = ARIMA(daxdata['LogRetLag1'], order=(0, 0, 0))
model_fit = model.fit()
residuals = model_fit.resid
residuals

Date
2019-09-10 00:00:00+02:00    0.319728
2019-09-11 00:00:00+02:00    0.705626
2019-09-12 00:00:00+02:00    0.385066
2019-09-13 00:00:00+02:00    0.440329
2019-09-16 00:00:00+02:00   -0.738248
                               ...   
2024-01-04 00:00:00+01:00    0.447742
2024-01-05 00:00:00+01:00   -0.167162
2024-01-08 00:00:00+01:00    0.705875
2024-01-09 00:00:00+01:00   -0.196493
2024-01-10 00:00:00+01:00   -0.019490
Length: 1104, dtype: float64

In [6]:
def calculate_distance(daxdata, h, j, k): 
   
    # fixed parameters
    weight_vector = np.array([1/(k-i+1) for i in range(1, k+1)])
    last_k_obs = np.array(daxdata[f'LogRetLag{h}'].iloc[-k:])

    # get obs window
    starting_index = j-k+1
    obs_window = np.array(
        daxdata[starting_index:starting_index+k][f'LogRetLag{h}'])

    # calculate distance 
    dist = np.sum(weight_vector*((last_k_obs-obs_window)**2))

    return dist

def calculate_mean_residuals(daxdata, j_min, k): 
    starting_index = j_min-k+1
    residuals = np.array(
        daxdata[starting_index:starting_index+k]['residuals'])
    
    # calculate mean
    mean_residuals = np.mean(residuals)

    return mean_residuals
    

In [16]:
warnings.filterwarnings(
    "ignore", message="No supported index is available. Prediction results will be given with an integer index beginning at `start`.", category=UserWarning)
warnings.filterwarnings(
    "ignore", message="No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.", category=FutureWarning)
warnings.filterwarnings(
    "ignore", message="Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.", category=UserWarning)
warnings.filterwarnings(
    "ignore", message="Non-invertible starting MA parameters found. Using zeros as starting parameters.", category=UserWarning)

In [22]:
import math
from scipy.stats import norm, t
def get_norm_quantiles_mean(mean, variance):
    corrected_means = np.array(corrected_means)
    variances = np.array(variances)
    norm_quantiles = norm.ppf([0.025, 0.25, 0.5, 0.75, 0.975])
    corrected_means + variances*norm_quantiles

In [50]:
from evaluation.help_functions.prepare_data import next_working_days
from dax.models.ARMA_GARCH.get_quantiles import get_norm_quantiles_mean

quantiles = [0.025, 0.25, 0.5, 0.75, 0.975]
# get data 
daxdata = get_prepared_data()
daxdata = daxdata.reset_index()

# try out fix parameter
k = 10
m = 5
horizons = [1, 2, 3, 4, 5]

# store arima means in dict 
arima_means = {}

# store forecast corrections in dict
forecast_correction = {}

# optimal orders based on bic
opt_orders = {
    1: (0, 0, 0),
    2: (5, 0, 0),
    3: (5, 0, 1),
    4: (1, 0, 0),
    5: (5, 0, 2)
}

# store corrected means and residual variance in list 
corrected_means = []
variances = []

for h in horizons:

    # estimate mean with ARIMA 
    model = ARIMA(daxdata[[f'LogRetLag{h}']], order= opt_orders[h])
    model_fit = model.fit()
    model_residuals = model_fit.resid
    variances.append(np.var(model_residuals))

    # get col index of  observed Log Return
    ci_logret = daxdata.columns.get_loc(f'LogRetLag{h}')

    # forecast desired horizon
    mi = daxdata.index.max()
    arima_mean = model_fit.predict(start=mi+1, end=mi+6).iloc[(h-1)]  
    arima_means.update({h: arima_mean})

    print(f"-----start of horizon {h} -----------")

    # calculate distances for return
    daxdata[f'distance_{h}'] = np.nan
    starting_index = daxdata.index.min() + k

    # get col index 
    ci = daxdata.columns.get_loc(f'distance_{h}')

    for j in range(k, len(daxdata)):
        daxdata.iat[j, ci] = calculate_distance(daxdata, h, j, k)
    daxdata = daxdata.dropna(subset=[f'distance_{h}'])

    # set last value (zero distance) high enough so that index does not get chosen
    daxdata.iat[len(daxdata)-1, ci] = 10

    # top m matching parts
    top_indices = daxdata[f'distance_{h}'].nsmallest(m).index

    # iterate over top m matching parts, estimate prediction and store residual 
    
    residuals = []
    for ti in top_indices: 

        print( '------ start of new part ----------')

        if (ti < len(daxdata)-h):

            # train model with all data up to ti
            model = ARIMA(daxdata[[f'LogRetLag{h}']][:ti+1], order= opt_orders[h])
            model_fit = model.fit()

            # get col index of  observed Log Return
            ci_logret = daxdata.columns.get_loc(f'LogRetLag{h}')

            # calculate residual for desired horizon
            yhat = model_fit.predict(start=ti+1, end=ti+6).iloc[(h-1)]  
            yobs = daxdata.iat[ti+h-1, ci_logret]
            residual = yobs - yhat

            # append to list of residuals 
            residuals.append(residual)
    
    forecast_correction.update({h: np.mean(np.array(residuals))})
    
    # calculate corrected mean
    corrected_means.append(arima_means[h]-forecast_correction[h])

# get quantiles
column_names = [f'q{q}' for q in quantiles]
daxdata = daxdata.set_index("Date")
start_date_dates = max(daxdata.index).strftime('%Y-%m-%d')
dates = next_working_days(start_date_dates, 5)
quantile_df = pd.DataFrame(index=dates, columns=column_names)

# calculate forecasting quantiles
for h in range(0, 5):

    mean = corrected_means[h]
    variance = variances[h]

    for q in quantiles:
        quantile_q = mean + variance*norm.ppf(q)
        quantile_df.loc[dates[h]][f'q{q}'] = quantile_q
        

-----start of horizon 1 -----------
------ start of new part ----------
------ start of new part ----------
------ start of new part ----------
------ start of new part ----------
------ start of new part ----------
-----start of horizon 2 -----------
------ start of new part ----------
------ start of new part ----------
------ start of new part ----------
------ start of new part ----------
------ start of new part ----------
-----start of horizon 3 -----------
------ start of new part ----------
------ start of new part ----------
------ start of new part ----------
------ start of new part ----------
------ start of new part ----------
-----start of horizon 4 -----------
------ start of new part ----------
------ start of new part ----------
------ start of new part ----------
------ start of new part ----------
------ start of new part ----------
-----start of horizon 5 -----------
------ start of new part ----------
------ start of new part ----------
------ start of new part ---

In [43]:
daxdata

Unnamed: 0,Date,Close,LogRetLag1,LogRetLag2,LogRetLag3,LogRetLag4,LogRetLag5,distance_1,distance_2,distance_3,distance_4,distance_5
50,2019-11-20 00:00:00+01:00,13158.139648,-0.477501,-0.370718,-0.633417,-0.167746,-0.545174,1.122675,0.902199,2.532342,2.110909,3.972937
51,2019-11-21 00:00:00+01:00,13137.700195,-0.155458,-0.632959,-0.526176,-0.788874,-0.323204,0.476769,1.164947,2.371784,2.591813,2.211897
52,2019-11-22 00:00:00+01:00,13163.879883,0.199073,0.043615,-0.433885,-0.327103,-0.589801,1.423604,2.021862,3.079018,2.899995,3.087081
53,2019-11-25 00:00:00+01:00,13246.450195,0.625290,0.824363,0.668905,0.191405,0.298188,1.074969,2.324345,1.685376,1.290828,0.795027
54,2019-11-26 00:00:00+01:00,13236.419922,-0.075749,0.549541,0.748614,0.593156,0.115656,1.034182,1.211995,1.231961,0.682654,1.134304
...,...,...,...,...,...,...,...,...,...,...,...,...
1099,2024-01-04 00:00:00+01:00,16617.289062,0.475928,-0.910971,-0.805254,-0.505791,-0.748107,1.727535,2.998210,3.971698,4.088787,3.833784
1100,2024-01-05 00:00:00+01:00,16594.210938,-0.138977,0.336951,-1.049948,-0.944231,-0.644768,1.982513,2.981376,5.629162,3.971698,3.309541
1101,2024-01-08 00:00:00+01:00,16716.470703,0.734061,0.595084,1.071012,-0.315887,-0.210170,1.495468,2.733813,2.981376,2.998210,2.107249
1102,2024-01-09 00:00:00+01:00,16688.359375,-0.168307,0.565754,0.426777,0.902705,-0.484194,1.918693,1.495468,1.982513,1.727535,2.615433


KeyError: "None of ['Date'] are in the columns"

In [49]:
quantile_df

Unnamed: 0,q0.025,q0.25,q0.5,q0.75,q0.975
2024-01-11,-3.5943,-1.16788,0.105265,1.37841,3.804831
2024-01-12,-5.243929,-2.337316,-0.812212,0.712891,3.619505
2024-01-15,-4.539924,-1.193649,0.562145,2.317939,5.664214
2024-01-16,-6.277617,-2.055138,0.160402,2.375941,6.59842
2024-01-17,-6.663453,-3.187437,-1.363568,0.460301,3.936316


In [121]:
top_indices
#h1 --> 0 
#daxdata.iloc[top_indices]
top_indices


Index([1044, 878, 972, 779, 452], dtype='int64')

In [110]:
daxdata

Unnamed: 0,Date,Close,LogRetLag1,LogRetLag2,LogRetLag3,LogRetLag4,LogRetLag5,distance_1,distance_2,distance_3,distance_4,distance_5
50,2019-11-20 00:00:00+01:00,13158.139648,-0.477501,-0.370718,-0.633417,-0.167746,-0.545174,1.122675,0.902199,2.532342,2.110909,3.972937
51,2019-11-21 00:00:00+01:00,13137.700195,-0.155458,-0.632959,-0.526176,-0.788874,-0.323204,0.476769,1.164947,2.371784,2.591813,2.211897
52,2019-11-22 00:00:00+01:00,13163.879883,0.199073,0.043615,-0.433885,-0.327103,-0.589801,1.423604,2.021862,3.079018,2.899995,3.087081
53,2019-11-25 00:00:00+01:00,13246.450195,0.625290,0.824363,0.668905,0.191405,0.298188,1.074969,2.324345,1.685376,1.290828,0.795027
54,2019-11-26 00:00:00+01:00,13236.419922,-0.075749,0.549541,0.748614,0.593156,0.115656,1.034182,1.211995,1.231961,0.682654,1.134304
...,...,...,...,...,...,...,...,...,...,...,...,...
1099,2024-01-04 00:00:00+01:00,16617.289062,0.475928,-0.910971,-0.805254,-0.505791,-0.748107,1.727535,2.998210,3.971698,4.088787,3.833784
1100,2024-01-05 00:00:00+01:00,16594.210938,-0.138977,0.336951,-1.049948,-0.944231,-0.644768,1.982513,2.981376,5.629162,3.971698,3.309541
1101,2024-01-08 00:00:00+01:00,16716.470703,0.734061,0.595084,1.071012,-0.315887,-0.210170,1.495468,2.733813,2.981376,2.998210,2.107249
1102,2024-01-09 00:00:00+01:00,16688.359375,-0.168307,0.565754,0.426777,0.902705,-0.484194,1.918693,1.495468,1.982513,1.727535,2.615433


In [None]:


    model = ARIMA(history, order=order)
    model_fit = model.fit(disp=-1)
    yhat_f = model_fit.forecast()[0][0]
    yhat_p = model_fit.predict(start=len(history), end=len(history))[0]
    predictions_f.append(yhat_f)
    predictions_p.append(yhat_p)
    history.append(test[t])


df_together['distance'] = np.nan
starting_index = df_together.index.min() + k
for j in range(k, len(df_together)):
    df_together.iat[j, 3] = calculate_distance(df_together, j, k)
df_together = df_together.dropna(subset=['distance'])
# set value high enough so that index does not get chosen
df_together.iat[len(df_together)-1, 3] = 10
top_indices = df_together['distance'].nsmallest(m).index
top_indices

top_residuals = []
for j_min in top_indices:
    # plus 1 --> plus horizon, residual is one-step-estimate
    top_residuals.append(daxdata.iat[(j_min+1), 2])

mean_residual = np.mean(top_residuals)
mean_residual

In [66]:
daxdata_small = df_together[-200:]

In [95]:
# try out fix parameter
k = 10
m = 5


df_together['distance'] = np.nan
starting_index = df_together.index.min() + k
for j in range(k, len(df_together)):
    df_together.iat[j, 3] = calculate_distance(df_together, j, k)
df_together = df_together.dropna(subset=['distance'])
df_together.iat[len(df_together)-1, 3] = 10 #set value high enough so that index does not get chosen
top_indices = df_together['distance'].nsmallest(m).index
top_indices

top_residuals = []
for j_min in top_indices: 
    # plus 1 --> plus horizon, residual is one-step-estimate
    top_residuals.append(daxdata.iat[(j_min+1),2])

mean_residual = np.mean(top_residuals)
mean_residual

-0.23512487172602903

Stick with fixed parameteres but adjust horizons

In [84]:
# try out fix parameter
k = 10
m = 3
daxdata_small['distance'] = np.nan
starting_index = daxdata_small.index.min() + k 
for j in range(k,len(daxdata_small)):
    daxdata_small.iat[j,3] = calculate_distance(daxdata_small, j,k)
    
daxdata_small = daxdata_small.dropna(subset=['distance'])
top_3_indices = daxdata_small['distance'].nsmallest(3).index
top_3_indices
daxdata_small

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  daxdata_small['distance'] = np.nan


Unnamed: 0,Date,obs_lagreturn,residuals,distance
904,2023-03-29 00:00:00+02:00,1.225850,1.197664,
905,2023-03-30 00:00:00+02:00,1.255204,1.227018,
906,2023-03-31 00:00:00+02:00,0.683375,0.655189,
907,2023-04-03 00:00:00+02:00,-0.307083,-0.335269,0.552927
908,2023-04-04 00:00:00+02:00,0.144622,0.116437,0.028962
...,...,...,...,...
1099,2024-01-04 00:00:00+01:00,0.475928,0.447742,1.092395
1100,2024-01-05 00:00:00+01:00,-0.138977,-0.167162,1.728817
1101,2024-01-08 00:00:00+01:00,0.734061,0.705875,0.548796
1102,2024-01-09 00:00:00+01:00,-0.168307,-0.196493,0.692528
