In [None]:
# fix python path if working locally
from utils import fix_pythonpath_if_working_locally

fix_pythonpath_if_working_locally()

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import seaborn as sns

from darts import TimeSeries
from darts.models import TCNModel, RNNModel, NBEATSModel, TFTModel
from darts.dataprocessing.transformers import Scaler
from darts.utils.likelihood_models import GaussianLikelihood, QuantileRegression
from darts.utils.timeseries_generation import datetime_attribute_timeseries
from darts.metrics import mape, r2_score, rmse
from darts.utils.missing_values import fill_missing_values
from darts.datasets import AirPassengersDataset, SunspotsDataset, EnergyDataset
from darts.utils.statistics import check_seasonality, plot_acf, stationarity_tests

%matplotlib inline
plt.rcParams["figure.figsize"] = (20, 10)

In [None]:
# Read data:

missing_values = ["n/a", "na", "NaN", "--", "null"]
df = pd.read_csv('data/CombinedMacroSectorv4.csv', sep=",", na_values = missing_values, index_col=None)
print(df.shape)

In [None]:
df.head()

In [None]:
df['time'] = pd.to_datetime(df['Date'],utc=True)
df['time']= df.time.dt.tz_localize(None)
#df['Time'] = normalize(df)
#df['Class'] = df['Recession']
#std_normalize(["Oil", "Inflation", "CPI", "Dollar Index", "GDP", "Unemployment", "Energy", "Gold",
#"Materials", "Industrials", "Customer Discretionary", "Consumer Staples", "Health",
#"Financials", "Technology", "Telecommunications", "Utilities", "Real Estate", "Spy"], df)
df.drop(['Date'], axis=1, inplace=True)
#df.drop(['Cycle'], axis=1, inplace=True)
#df.drop(['Gold'], axis=1, inplace=True)

#ts = scaler.fit_transform(
#    ts
#)  # scale the whole time series not caring about train/val split...

In [None]:
daily_cov = pd.read_csv('data/news_sentiment_data.csv', sep=",", na_values = missing_values, index_col=None)
print(daily_cov.shape)

In [None]:
daily_cov.head()

In [None]:
daily_cov['time'] = pd.to_datetime(daily_cov['date'],utc=True)
daily_cov['time']= daily_cov.time.dt.tz_localize(None)
daily_cov.drop(['date'], axis=1, inplace=True)
#daily_cov = daily_cov.time.isin(df.time)
daily_cov = daily_cov[daily_cov.time.isin(df.time)]
print(daily_cov.shape)

In [None]:
result = pd.concat([df, daily_cov], axis=1).corr()
sns.heatmap(result, cmap="Greens",annot=True)

In [None]:
series_cov = TimeSeries.from_dataframe(daily_cov, 'time', ["sentiment"])

In [None]:
scaler_cov = Scaler(MinMaxScaler(feature_range=(-1, 1)))
series_cov_transformed = scaler_cov.fit_transform(
    series_cov
)

In [None]:
train_cov, val_cov =  series_cov_transformed.split_after(pd.Timestamp('20170901'))
series_cov_transformed.plot()

In [None]:
columns = ["Oil", "Inflation", "CPI", "Dollar Index", "GDP", "Unemployment", "Energy", "Gold",
"Materials", "Industrials", "Customer Discretionary", "Consumer Staples", "Health",
"Financials", "Technology", "Telecommunications", "Utilities", "Real Estate", "Spy"]

series = TimeSeries.from_dataframe(df, 'time', ['Oil', "CPI", "Dollar Index", "Spy"])

In [None]:
scaler = Scaler()
series_transformed = scaler.fit_transform(
    series
)  #scale the whole time series not caring about train/val split..

train, val =  series_transformed.split_after(pd.Timestamp('20170901'))
series_transformed.plot()

In [None]:
seasonality = check_seasonality(series_transformed["Spy"], max_lag=series_transformed.n_timesteps)
print(seasonality)

In [None]:
plot_acf(series_transformed["Spy"], 365, max_lag=series_transformed.n_timesteps-1)

In [None]:
stationary = stationarity_tests(series_transformed["Spy"])
print(stationary)

In [None]:
def tft_model(hiddensize, layers, attention, train_series, val_series, past_covariates_train=None, past_covariates_val=None, future_covariates_train=None, future_covariates_val=None):

    # default quantiles for QuantileRegression
    quantiles = [
        0.01,
        0.05,
        0.1,
        0.15,
        0.2,
        0.25,
        0.3,
        0.4,
        0.5,
        0.6,
        0.7,
        0.75,
        0.8,
        0.85,
        0.9,
        0.95,
        0.99,
    ]

    model = TFTModel(
        input_chunk_length=365,
        output_chunk_length=7,
        n_epochs=100,
        hidden_size=hiddensize,
        lstm_layers=layers,
        num_attention_heads=attention,
        nr_epochs_val_period=1,
        dropout=0.1,
        batch_size=16,
        add_relative_index=False,
        add_encoders={'cyclic': {'future': ['month']}},
        likelihood=QuantileRegression(
            quantiles=quantiles
        ),  # QuantileRegression is set per default
        # loss_fn=MSELoss(),
    )

    model.fit(
        series=train_series,
        val_series=val_series,
        past_covariates=past_covariates_train,
        val_past_covariates=past_covariates_val,
        future_covariates=future_covariates_train,
        val_future_covariates=future_covariates_val,
        #verbose=True,
    )

    return model

In [None]:
def eval_model(model, series, past_covariates=None, future_covariates=None):
    backtest = model.historical_forecasts(
        series=series,
        past_covariates=past_covariates,
        future_covariates=future_covariates,
        start=0.7,
        forecast_horizon=7,
        stride=10,
        retrain=False,
        #verbose=True,
    )
    series.plot(label="actual")
    backtest.plot(label="backtest (H=7 day)")
    r2_score_value = r2_score(series, backtest)
    plt.title('R2:' + str(r2_score_value))
    plt.legend()
    print('Backtest RMSE = {}'.format(rmse(series, backtest)))

In [None]:
model = tft_model(32, 1, 4, train, val) #was 5
model.save_model('tft.pth.tar')

In [None]:
eval_model(model, series_transformed)

In [None]:
model = tft_model(32, 1, 4, train, val, train_cov, val_cov)

In [None]:
eval_model(model, series_transformed, series_cov_transformed)

In [None]:
model = tft_model(16, 1, 4, train, val)

In [None]:
eval_model(model, series_transformed)

In [None]:
model = tft_model(16, 1, 4, train, val, train_cov, val_cov)

In [None]:
eval_model(model, series_transformed, series_cov_transformed)

In [None]:
model = tft_model(24, 1, 4, train, val)

In [None]:
eval_model(model, series_transformed)

In [None]:
model = tft_model(24, 1, 4, train, val, train_cov, val_cov)

In [None]:
eval_model(model, series_transformed, series_cov_transformed)