In [1]:
%reset

Once deleted, variables cannot be recovered. Proceed (y/[n])? y


In [2]:
import pandas as pd
import numpy as np
import warnings

from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error

In [3]:
from google.colab import drive
drive.mount('/content/drive') # give access to Google Drive

Mounted at /content/drive


In [4]:
input_path_original_inflation = '/content/drive/MyDrive/Colab Notebooks/Gradu/Data/Original inflation/'
input_path = '/content/drive/MyDrive/Colab Notebooks/Gradu/Data/2_Stationary/'

In [5]:
countries = ['Finland', 'United States', 'Germany', 'France', 'Spain', 'Italy', 'Netherlands', 'Sweden', 'Belgium', 'Denmark', 'Austria', 'Poland']
transformations = ['FD', 'FD', 'none', 'FD', 'FD', 'FD', 'FD', 'none', 'none', 'FD', 'none', 'FD'] # these represent stationary transformations

In [6]:
def get_data(variable, path_in = input_path):
    input_file = path_in + variable +'.csv'
    data = pd.read_csv(input_file, header=0, index_col=0)
    data.index = pd.to_datetime(data.index)
    data = data.astype(float)
    data = data.sort_index()
    return data

# Grid Search
This can be a time consuming process. Hence, the results from the grid search are displayed in the next cell after the below cell.

In [None]:
max_ar_order = 15
max_ma_order = 15

best_model_orders = [] # will be stored in same order as countries

print("#################")
for (country,transformation) in zip(countries,transformations): # not needed to go through transformations list here
    best_aic = float('inf')
    best_order = None

    print("country: " + str(country))

    variable = "inflation_" + str(country)
    data = get_data(variable, input_path)
    split_point = len(data)-12 # use last 12 months as the out of sample test dataset
    train = data[:split_point]
    test = data[split_point:]

    for p in range(max_ar_order + 1):
        warnings.filterwarnings('ignore')
        for q in range(max_ma_order + 1):
            model_order = (p, 0, q)  # Set the order as (AR order, differencing order, MA order)
            print("order: " + str(model_order))

            try:
                model = ARIMA(train, order=model_order)
                model_fit = model.fit()
                aic = model_fit.aic
                print("aic: " + str(aic))
                if aic < best_aic:
                    best_aic = aic
                    best_order = model_order
                    print("best AIC: " + str(best_aic))
                    print("best AIC order: " + str(best_order))
            except:
                continue
            print()
    best_model_orders.append(best_order)
    print("country: " + str(country) + " ... best order: " + str(best_order))
    print()
    print("##################")

print("best model orders")
print(best_model_orders)

In [7]:
# results from the grid search saved here in order to avoid running the grid search again above
best_model_orders = [(0, 0, 12), (0, 0, 14), (0, 0, 13), (0, 0, 13), (1, 0, 12), (2, 0, 12), (1, 0, 12), (2, 0, 11), (12, 0, 11), (1, 0, 12), (4, 0, 13), (1, 0, 12)]

# Forecasts for 2022

In [8]:
horizons = [1,2,3,6,9,12]

for (country, model_order, transformation) in zip(countries, best_model_orders, transformations):
    warnings.filterwarnings('ignore')

    print("country: " + str(country))
    print("model order: " + str(model_order))
    print()

    # get the data
    variable = "inflation_" + str(country)
    data = get_data(variable, input_path)
    split_point = len(data)-12 # use last 12 months as the out of sample test dataset
    train = data[:split_point]

    # original inflation for reversing stationarity and calculating the RMSE
    original_inflation = "original_inflation_" + str(country)
    data_original = get_data(original_inflation, input_path_original_inflation)
    test_original = data_original[split_point:] # original inflation for 2022

    # train the model
    model = ARIMA(train, order=model_order)
    fit = model.fit()

    for horizon in horizons:
        print("horizon: " + str(horizon))
        horizon_test = test_original[:horizon]
        forecast_df = pd.DataFrame(index=horizon_test.index)
        forecast_df['Actual'] = horizon_test['Data']

        # out-of-sample forecast
        forecast = fit.forecast(horizon)

        # we need to reverse the stationarity
        if transformation == "FD":
            last_observed_value = data_original[:split_point]
            last_observed_value = last_observed_value.values
            last_observed_value = last_observed_value[-1] # get the last observed value, i.e. 2021-12

            forecasted_original = []
            for pred in forecast:
                forecasted_value = last_observed_value + pred
                forecasted_original.append(forecasted_value)
                last_observed_value = forecasted_value
            forecast = forecasted_original
            forecast = np.concatenate(forecast)
            forecast_df['Forecast'] = forecast
        else:
            forecast_df['Forecast'] = forecast

        rmse = np.sqrt(mean_squared_error(forecast_df['Actual'], forecast_df['Forecast']))
        print('RMSE: ', round(rmse,3))
        print()

    print()
    print("##################")

country: Finland
model order: (0, 0, 12)

horizon: 1
RMSE:  1.213

horizon: 2
RMSE:  1.212

horizon: 3
RMSE:  1.674

horizon: 6
RMSE:  2.624

horizon: 9
RMSE:  3.193

horizon: 12
RMSE:  3.938


##################
country: United States
model order: (0, 0, 14)

horizon: 1
RMSE:  0.371

horizon: 2
RMSE:  0.695

horizon: 3
RMSE:  1.202

horizon: 6
RMSE:  2.283

horizon: 9
RMSE:  2.872

horizon: 12
RMSE:  3.179


##################
country: Germany
model order: (0, 0, 13)

horizon: 1
RMSE:  0.502

horizon: 2
RMSE:  0.643

horizon: 3
RMSE:  1.59

horizon: 6
RMSE:  2.73

horizon: 9
RMSE:  3.612

horizon: 12
RMSE:  4.607


##################
country: France
model order: (0, 0, 13)

horizon: 1
RMSE:  0.493

horizon: 2
RMSE:  0.88

horizon: 3
RMSE:  1.336

horizon: 6
RMSE:  2.32

horizon: 9
RMSE:  2.997

horizon: 12
RMSE:  3.578


##################
country: Spain
model order: (1, 0, 12)

horizon: 1
RMSE:  0.176

horizon: 2
RMSE:  0.68

horizon: 3
RMSE:  2.153

horizon: 6
RMSE:  2.776

horizon: