In [39]:
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
from math import sqrt
import pandas as pd
import numpy as np
import warnings

warnings.filterwarnings('ignore') # Ignoring warnings to keep notebook clean and concise
random_state = 0 # Setting a random state variable to keep all results consistent

In [40]:
gfcf = pd.read_csv('Transformed Data/GFCF.csv')

gfcf['Year'] = pd.to_datetime(gfcf['Year'], format='%Y')

gfcf.set_index('Year', inplace=True)

In [41]:
def arima_model(df, order):
    model = ARIMA(df, order=order)
    model_fit = model.fit(disp=False)
    return model_fit

In [42]:
def arima_grid_search(df, p_values, d_values, q_values):
    best_score, best_order = float("inf"), None
    for p in p_values:
        for d in d_values:
            for q in q_values:
                try:
                    model = arima_model(df, (p,d,q))
                    mse = mean_squared_error(df, model.fittedvalues)
                    rmse = sqrt(mse)
                    if rmse < best_score:
                        best_score, best_order = rmse, (p,d,q)
                except:
                    continue
    return best_score, best_order

In [43]:
def preprocess_data(df):
    return np.log(df)

In [44]:
p_values = [0, 1, 2, 3]
d_values = [0, 1, 2, 3]
q_values = [0, 1, 2, 3]

# Perform grid search for each country
countries = gfcf['Country'].unique()
for country in countries:
    print(f'Country: {country}')
    df_country = preprocess_data(gfcf[gfcf['Country'] == country]['Construction Value'])
    print(f'Datapoints:{len(df_country)}')
    best_score, best_order = arima_grid_search(df_country, p_values, d_values, q_values)
    print(f'Best ARIMA{best_order} RMSE={best_score}\n')

Country: Austria
Datapoints:8
Best ARIMANone RMSE=inf

Country: Switzerland
Datapoints:7
Best ARIMANone RMSE=inf

Country: Cyprus
Datapoints:8
Best ARIMANone RMSE=inf

Country: Germany
Datapoints:8
Best ARIMANone RMSE=inf

Country: Denmark
Datapoints:8
Best ARIMANone RMSE=inf

Country: Estonia
Datapoints:8
Best ARIMANone RMSE=inf

Country: Greece
Datapoints:8
Best ARIMANone RMSE=inf

Country: Spain
Datapoints:8
Best ARIMANone RMSE=inf

Country: Finland
Datapoints:8
Best ARIMANone RMSE=inf

Country: France
Datapoints:8
Best ARIMANone RMSE=inf

Country: Croatia
Datapoints:8
Best ARIMANone RMSE=inf

Country: Hungary
Datapoints:8
Best ARIMANone RMSE=inf

Country: Ireland
Datapoints:8
Best ARIMANone RMSE=inf

Country: Iceland
Datapoints:8
Best ARIMANone RMSE=inf

Country: Italy
Datapoints:8
Best ARIMANone RMSE=inf

Country: Lithuania
Datapoints:8
Best ARIMANone RMSE=inf

Country: Luxembourg
Datapoints:8
Best ARIMANone RMSE=inf

Country: Latvia
Datapoints:8
Best ARIMANone RMSE=inf

Country: 

In [46]:
gfcf_gdp = pd.read_csv('Transformed Data/GFCF-ML.csv')

print(gfcf_gdp)

gfcf_gdp['Year'] = pd.to_datetime(gfcf_gdp['Year'], format='%Y')

gfcf_gdp.set_index('Year', inplace=True)


      Year  Percent GDP         Country
0     1996          6.9         Albania
1     1997          6.4         Albania
2     1998          7.1         Albania
3     1999          7.3         Albania
4     2000         10.3         Albania
...    ...          ...             ...
2141  2015          9.7  United Kingdom
2142  2016          9.8  United Kingdom
2143  2017         10.0  United Kingdom
2144  2018         10.1  United Kingdom
2145  2019         10.4  United Kingdom

[2146 rows x 3 columns]
