In [1]:
%reset

Once deleted, variables cannot be recovered. Proceed (y/[n])? y


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import pandas as pd
import numpy as np
import warnings
import statsmodels
import statsmodels.tsa.api as sm

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn import preprocessing

In [4]:
import tensorflow as tf

from tensorflow import keras as keras
from keras.callbacks import EarlyStopping
from keras import backend as K
from keras.layers import Layer
from keras import activations
from keras import initializers
from keras.layers import Dense, SimpleRNN, Flatten
from keras.models import Sequential

In [5]:
tf.get_logger().setLevel('INFO')
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

In [6]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [7]:
input_path_original_inflation = '/content/drive/MyDrive/Colab Notebooks/Gradu/Data/Original inflation/'
input_path = '/content/drive/MyDrive/Colab Notebooks/Gradu/Data/2_Stationary/'

In [8]:
countries = ['Finland', 'United States', 'Germany', 'France', 'Spain', 'Italy', 'Netherlands', 'Sweden', 'Belgium', 'Denmark', 'Austria', 'Poland']
transformations = ['FD', 'FD', 'none', 'FD', 'FD', 'FD', 'FD', 'none', 'none', 'FD', 'none', 'FD']

In [9]:
macro_variables = ['inflation', 'unemployment', 'imports', 'exports', 'bond_yield', 'exchange', 'ppi', 'bci', 'cci', 'construction', 'manufacturing', 'share_prices', 'gdp', 'house_prices', 'investment', 'domestic_demand']
common_variables = ['oil', 'silver', 'eurusd', 'eurcfh', 'spx', 'world']
all_variables = macro_variables + common_variables

In [10]:
def get_data(variable, path_in = input_path):
    input_file = path_in + variable +'.csv'
    data = pd.read_csv(input_file, header=0, index_col=0)
    data.index = pd.to_datetime(data.index)
    data = data.astype(float)
    data = data.sort_index()
    return data

In [11]:
def drop_constant_columns(data):
    non_constant_columns = data.columns[data.nunique() > 1]
    return data[non_constant_columns].copy()

In [12]:
def split_sequences(sequences, no_steps_in, no_steps_out):
    X, y = list(), list()
    for i in range(len(sequences)):
        #find end of pattern
        end_ix = i + no_steps_in
        out_end_ix = end_ix + no_steps_out - 1

        #check if we are beyond the dataset
        if out_end_ix > len(sequences):
            break

        #gather input and output
        seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix - 1:out_end_ix, -1]
        X.append(seq_x)
        y.append(seq_y)

    return np.array(X), np.array(y)

# Data Matrix

In [13]:
# create the data matrix

init_index = get_data("inflation_Finland", input_path)
df = pd.DataFrame(index = init_index.index)

for country in countries:
    for variable in all_variables:
        data = get_data(variable + "_" + country)
        new_column_name = variable + "_" + country
        data = data.rename({'Data': new_column_name}, axis=1)
        df[new_column_name] = data

# TT Recurrent Layer

In [14]:
class TTRecurrentLayer(Layer):
    def __init__(self, tt_input, tt_output, tt_ranks, activation='tanh', kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', **kwargs):
        self.tt_input = np.array(tt_input)
        self.tt_output = np.array(tt_output)
        self.tt_ranks = np.array(tt_ranks)
        self.num_dim = self.tt_input.shape[0]
        self.parameters = np.sum(self.tt_input * self.tt_output * self.tt_ranks[1:] * self.tt_ranks[:-1])
        self.units = np.prod(np.array(tt_output))
        self.activation = tf.keras.activations.get(activation)
        self.kernel_initializer = tf.keras.initializers.get(kernel_initializer)
        self.recurrent_initializer = tf.keras.initializers.get(recurrent_initializer)
        self.bias_initializer = tf.keras.initializers.get(bias_initializer)

        super(TTRecurrentLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.states = [None]
        if self.stateful:
            self.reset_states()

        self.kernel = self.add_weight(shape=(self.parameters,), initializer=self.kernel_initializer, name='kernel')
        self.bias = self.add_weight(shape=(np.prod(self.tt_output),), initializer=self.bias_initializer, name='bias')

        self.indices = np.zeros(self.num_dim).astype('int32')
        self.shapes = np.zeros((self.num_dim, 2)).astype('int32')
        self.cores = [None] * self.num_dim

        for k in range(self.num_dim - 1, -1, -1):
            self.shapes[k] = (self.tt_input[k] * self.tt_ranks[k + 1], self.tt_ranks[k] * self.tt_output[k])
            self.cores[k] = self.kernel[self.indices[k]:self.indices[k] + np.prod(self.shapes[k])]
            if 0 < k:
                self.indices[k - 1] = self.indices[k] + np.prod(self.shapes[k])

        self.recurrent_kernel = self.add_weight(shape=(self.units, self.units), name='recurrent_kernel', initializer=self.recurrent_initializer)

        fully_connected_params = (np.prod(self.tt_input) * np.prod(self.tt_output))

        #print('Model TT ranks: ' + str(self.tt_ranks))
        #print('TT number of parameters: ', self.parameters)
        #print('Equivalent fully connected layer parameters: ', fully_connected_params)

    def preprocess_input(self, x, training=None):
        return x

    def step(self, x, states):
        res = x
        for i in range(self.num_dim - 1, -1, -1):
            res = tf.linalg.matmul(tf.reshape(res, (-1, self.shapes[i][0])),
                                   tf.reshape(self.cores[i], self.shapes[i]))
            res = tf.transpose(tf.reshape(res, (-1, self.tt_output[i])))
        res = tf.transpose(tf.reshape(res, (-1, tf.shape(x)[0])))

        h = res + self.bias

        prev_output = states[0]
        output = h + tf.linalg.matmul(prev_output, self.recurrent_kernel)
        output = self.activation(output)

        return output, [output]

# Model Specification

In [15]:
def create_TTRNN(tt_input, tt_output, tt_ranks, no_steps_out, lr=1e-3):
    model = Sequential()
    model.add(TTRecurrentLayer(tt_input, tt_output, tt_ranks, activation='tanh')) # activation sigmoid to introduce gating instead
    model.add(SimpleRNN(128,activation = 'tanh'))
    model.add(Dense(no_steps_out))
    model.compile(optimizer = keras.optimizers.Adam(learning_rate = 0.001), loss = 'mse')
    return model

# Forecasting for 2022

In [16]:
horizons = [1,2,3,6,9,12]
steps_in = 12 # use previous 'steps_in' observations to predict the next horizon
features = df.shape[1]
tt_input = [1,11,2,12]
tt_output = [4,4,4,4]
ranks = [1,2,3,4,5,6,7,8,9,10]

In [None]:
for (country, transformation) in zip(countries,transformations):
    warnings.filterwarnings('ignore')

    print("country: " + str(country))
    print()

    # original inflation for reversing stationarity and calculating the RMSE
    split_point = len(df)-12
    original_inflation = "original_inflation_" + str(country)
    data_original = get_data(original_inflation, input_path_original_inflation)
    test_original = data_original[split_point:] # original inflation for 2022

    # train data
    train = np.array(df.loc['2000-01-01':'2021-12-01'])
    train_out = np.array(df.loc['2000-01-01':'2021-12-01']['inflation_' + country])

    # normalise features
    scaler = StandardScaler()
    scaler.fit(train)
    normalized_train = scaler.transform(train)

    # normalise target (needed especially for time series that aren't differenced because otherwise the scales are too different)
    target_scaler = StandardScaler()
    train_out = train_out.reshape(len(train_out), 1)
    target_scaler.fit(train_out)
    normalized_train_out = target_scaler.transform(train_out)

    train_set = np.hstack((normalized_train, normalized_train_out))

    # out-of-sample-inputs
    test = np.array(df.loc['2021-01-01':'2021-12-01'])
    normalized_test = scaler.transform(test)
    test_set = normalized_test.reshape((1, steps_in, features))

    for horizon in horizons:
        keras.utils.set_random_seed(42) # set random seed for reproducability

        warnings.filterwarnings('ignore')

        horizon_test = test_original[:horizon]
        forecast_df = pd.DataFrame(index=horizon_test.index)
        forecast_df['Actual'] = horizon_test['Data']

        X,y = split_sequences(train_set, steps_in, horizon)
        es = EarlyStopping(monitor = 'val_loss', mode = 'min', min_delta = 0.01, patience = 10,verbose = 0)

        best_rmse = 9999
        best_ranks = []

        for r in ranks:
            tt_ranks = [1,r,r,r,1]
            # train the model
            model = create_TTRNN(tt_input, tt_output, tt_ranks, horizon)
            fit = model.fit(X, y, verbose=0, epochs=100, validation_split=0.1, callbacks = [es])

            forecast_normalized = model.predict(test_set, verbose = 0)
            forecast_denormalized = target_scaler.inverse_transform(forecast_normalized)
            forecast = forecast_denormalized.flatten()

            # we need to reverse the stationarity
            if transformation == "FD":
                #print("Reversing the stationarity...")
                last_observed_value = data_original[:split_point]
                last_observed_value = last_observed_value.values
                last_observed_value = last_observed_value[-1] # get the last observed value, i.e. 2021-12

                forecasted_original = []
                for pred in forecast:
                    forecasted_value = last_observed_value + pred
                    forecasted_original.append(forecasted_value)
                    last_observed_value = forecasted_value
                forecast = forecasted_original
                forecast = np.concatenate(forecast)
                forecast_df['Forecast'] = forecast
            else:
                forecast_df['Forecast'] = forecast

            rmse = np.sqrt(mean_squared_error(forecast_df['Actual'], forecast_df['Forecast']))

            if rmse < best_rmse:
                best_rmse = rmse
                best_ranks = tt_ranks

            if horizon == 12 and country == "Finland":
                # plot the model
                output_file = '/content/drive/MyDrive/Colab Notebooks/Gradu/Figures/model_ttrnn.png'
                tf.keras.utils.plot_model(model, to_file=output_file, show_shapes=True, show_layer_activations=True, show_layer_names=False)

        print("horizon: " + str(horizon))
        print("best_ranks: " + str(best_ranks))
        print("rmse: " + str(round(best_rmse,3))) # this is the best rmse
        print()
    print("##################")

country: Finland

horizon: 1
best_ranks: [1, 2, 2, 2, 1]
rmse: 0.551

horizon: 2
best_ranks: [1, 8, 8, 8, 1]
rmse: 0.319

horizon: 3
best_ranks: [1, 6, 6, 6, 1]
rmse: 0.538

horizon: 6
best_ranks: [1, 5, 5, 5, 1]
rmse: 1.4

horizon: 9
best_ranks: [1, 2, 2, 2, 1]
rmse: 1.425

horizon: 12
best_ranks: [1, 7, 7, 7, 1]
rmse: 2.742

##################
country: United States

horizon: 1
best_ranks: [1, 6, 6, 6, 1]
rmse: 0.095

horizon: 2
best_ranks: [1, 3, 3, 3, 1]
rmse: 0.047

horizon: 3
best_ranks: [1, 4, 4, 4, 1]
rmse: 0.39

horizon: 6
best_ranks: [1, 5, 5, 5, 1]
rmse: 0.86

horizon: 9
best_ranks: [1, 2, 2, 2, 1]
rmse: 0.508

horizon: 12
best_ranks: [1, 7, 7, 7, 1]
rmse: 1.518

##################
country: Germany

horizon: 1
best_ranks: [1, 2, 2, 2, 1]
rmse: 0.456

horizon: 2
best_ranks: [1, 7, 7, 7, 1]
rmse: 1.346

horizon: 3
best_ranks: [1, 2, 2, 2, 1]
rmse: 1.802

horizon: 6
best_ranks: [1, 1, 1, 1, 1]
rmse: 2.948

horizon: 9
best_ranks: [1, 10, 10, 10, 1]
rmse: 3.597

horizon: 12
best_