In [1]:
%reset

Once deleted, variables cannot be recovered. Proceed (y/[n])? y


In [2]:
import pandas as pd
import numpy as np
import warnings
import statsmodels
import statsmodels.tsa.api as sm

from statsmodels.tsa.api import VAR
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

In [3]:
from google.colab import drive
drive.mount('/content/drive') # give access to Google Drive

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
input_path_original_inflation = '/content/drive/MyDrive/Colab Notebooks/Gradu/Data/Original inflation/'
input_path = '/content/drive/MyDrive/Colab Notebooks/Gradu/Data/2_Stationary/'

In [5]:
countries = ['Finland', 'United States', 'Germany', 'France', 'Spain', 'Italy', 'Netherlands', 'Sweden', 'Belgium', 'Denmark', 'Austria', 'Poland']
transformations = ['FD', 'FD', 'none', 'FD', 'FD', 'FD', 'FD', 'none', 'none', 'FD', 'none', 'FD']

In [6]:
macro_variables = ['inflation', 'unemployment', 'imports', 'exports', 'bond_yield', 'exchange', 'ppi', 'bci', 'cci', 'construction', 'manufacturing', 'world', 'share_prices', 'gdp', 'house_prices', 'investment', 'domestic_demand']
financial_variables = ['oil', 'silver', 'eurusd', 'eurcfh', 'spx']
all_variables = macro_variables + financial_variables

In [7]:
def get_data(variable, path_in = input_path):
    input_file = path_in + variable +'.csv'
    data = pd.read_csv(input_file, header=0, index_col=0)
    data.index = pd.to_datetime(data.index)
    data = data.astype(float)
    data = data.sort_index()
    return data

In [8]:
def drop_constant_columns(data):
    non_constant_columns = data.columns[data.nunique() > 1]
    return data[non_constant_columns].copy()

# Grid Search

This can be a time consuming process. Hence, the results from the grid search are displayed in the next cell after the below cell.

In [None]:
max_order = 10

best_model_orders = [] # will be stored in same order as countries

print("##################")
for country in countries:
    best_aic = float('inf')
    best_order = None

    print("country: " + str(country))
    print()

    # get the data
    init_index = get_data("inflation_Finland", input_path)
    df = pd.DataFrame(index = init_index.index)
    for variable in all_variables:
        data = get_data(variable + "_" + country)
        new_column_name = variable + "_" + country
        data = data.rename({'Data': new_column_name}, axis=1)
        df[new_column_name] = data
    df = drop_constant_columns(df)
    print(df.shape)

    split_point = len(df)-12 # use last 12 months as the out of sample test dataset
    train = df[:split_point]
    test = df[split_point:]

    scaler = StandardScaler()
    normalized_train = scaler.fit_transform(train)

    for model_order in range(max_order + 1):
        warnings.filterwarnings('ignore')
        print("order: " + str(model_order))

        try:
            # train the model
            model = VAR(normalized_train)
            model_fit = model.fit(model_order)
            aic = model_fit.aic
            print("aic: " + str(aic))
            if aic < best_aic:
                best_aic = aic
                best_order = model_order
                print("best AIC: " + str(best_aic))
                print("best AIC order: " + str(best_order))
        except:
            print()
            continue
        print()
    best_model_orders.append(best_order)
    print("country: " + str(country) + " ... best order: " + str(best_order))
    print()
    print("##################")

print("best model orders")
print(best_model_orders)

In [9]:
# results from the grid search saved here in order to avoid running the grid search again above
best_model_orders = [10,10,10,10,10,10,10,10,10,10,10,10]

# Forecasts for 2022

In [13]:
horizons = [1,2,3,6,9,12]

for (country, model_order, transformation) in zip(countries, best_model_orders, transformations):
    warnings.filterwarnings('ignore')

    print("country: " + str(country))
    print("model order: " + str(model_order))
    print()

    # create the data matrix
    init_index = get_data("inflation_Finland", input_path)
    df = pd.DataFrame(index = init_index.index)
    for variable in all_variables:
        data = get_data(variable + "_" + country)
        new_column_name = variable + "_" + country
        data = data.rename({'Data': new_column_name}, axis=1)
        df[new_column_name] = data
    df = drop_constant_columns(df)

    split_point = len(df)-12 # use last 12 months as the out of sample test dataset
    train = df[:split_point]

    # scale the data
    scaler = StandardScaler()
    normalized_train = scaler.fit_transform(train)

    # train the model
    model = VAR(normalized_train)
    fit = model.fit(model_order)

    # original inflation for reversing stationarity and calculating the RMSE
    original_inflation = "original_inflation_" + str(country)
    data_original = get_data(original_inflation, input_path_original_inflation)
    test_original = data_original[split_point:] # original inflation for 2022

    # data for out-of-sample forecast
    input_data = normalized_train[-model_order:]

    for horizon in horizons:
        print("horizon: " + str(horizon))
        horizon_test = test_original[:horizon]
        forecast_df = pd.DataFrame(index=horizon_test.index)
        forecast_df['Actual'] = horizon_test['Data']

        # out-of-sample forecast
        predictions_normalized = fit.forecast(input_data, horizon)
        predictions_denormalized = scaler.inverse_transform(predictions_normalized)
        forecast = predictions_denormalized[:, 0]

        # we need to reverse the stationarity
        if transformation == "FD":
            last_observed_value = data_original[:split_point]
            last_observed_value = last_observed_value.values
            last_observed_value = last_observed_value[-1] # get the last observed value, i.e. 2021-12
            forecasted_original = []
            for pred in forecast:
                forecasted_value = last_observed_value + pred
                forecasted_original.append(forecasted_value)
                last_observed_value = forecasted_value
            forecast = forecasted_original
            forecast = np.concatenate(forecast)
            forecast_df['Forecast'] = forecast
        else:
            forecast_df['Forecast'] = forecast

        rmse = np.sqrt(mean_squared_error(forecast_df['Actual'], forecast_df['Forecast']))
        print('RMSE: ', round(rmse,3))
        print()
    print()
    print("##################")

country: Finland
model order: 10

horizon: 1
RMSE:  1.719

horizon: 2
RMSE:  1.956

horizon: 3
RMSE:  2.691

horizon: 6
RMSE:  4.577

horizon: 9
RMSE:  5.476

horizon: 12
RMSE:  6.565


##################
country: United States
model order: 10

horizon: 1
RMSE:  0.123

horizon: 2
RMSE:  1.437

horizon: 3
RMSE:  1.186

horizon: 6
RMSE:  2.4

horizon: 9
RMSE:  2.125

horizon: 12
RMSE:  2.823


##################
country: Germany
model order: 10

horizon: 1
RMSE:  1.811

horizon: 2
RMSE:  1.305

horizon: 3
RMSE:  1.519

horizon: 6
RMSE:  1.561

horizon: 9
RMSE:  1.508

horizon: 12
RMSE:  2.679


##################
country: France
model order: 10

horizon: 1
RMSE:  2.167

horizon: 2
RMSE:  3.945

horizon: 3
RMSE:  5.348

horizon: 6
RMSE:  5.359

horizon: 9
RMSE:  10.217

horizon: 12
RMSE:  11.623


##################
country: Spain
model order: 10

horizon: 1
RMSE:  4.052

horizon: 2
RMSE:  2.953

horizon: 3
RMSE:  2.525

horizon: 6
RMSE:  12.141

horizon: 9
RMSE:  19.147

horizon: 12
RMSE