In [26]:
from pathlib import Path

from vangja.data_utils import (
    download_data,
    generate_train_test_df_around_point,
    process_data,
)

import pandas as pd
from tqdm import tqdm

import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="sktime")
warnings.filterwarnings("ignore", category=UserWarning, module="sklearn")
warnings.simplefilter("ignore")

In [2]:
dfs = download_data(Path("./data"))
indexes = process_data(dfs[0])
smp = [index for index in indexes if index["series"].iloc[0] == "^GSPC"]
gspc_tickers = process_data(dfs[1])

In [18]:
from sklearn.metrics import (
    mean_absolute_error,
    mean_absolute_percentage_error,
    root_mean_squared_error,
    mean_squared_error,
)


def metrics(y_true, yhat, label="y"):
    y = y_true["y"]
    return pd.DataFrame(
        {
            "mse": {f"{label}": mean_squared_error(y, yhat)},
            "rmse": {f"{label}": root_mean_squared_error(y, yhat)},
            "mae": {f"{label}": mean_absolute_error(y, yhat)},
            "mape": {f"{label}": mean_absolute_percentage_error(y, yhat)},
        }
    )

In [35]:
from sktime.forecasting.arima import AutoARIMA
from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.exp_smoothing import ExponentialSmoothing
from sktime.forecasting.structural import UnobservedComponents

for point in pd.date_range(f"2015-01-01", f"2017-01-01"):
    model_metrics = {
        "arima": [],
        "es": [],
        "uc": []
    }
    arima_model_metrics = []
    es_model_metrics = []
    for gspc_ticker in tqdm(gspc_tickers[:5]):
        check = generate_train_test_df_around_point(
            window=91,
            horizon=365,
            dfs=[gspc_ticker],
            for_prophet=False,
            point=point,
        )
        if check is None:
            continue

        train_df_tickers, test_df_tickers, scales_tickers = check
        y_train = train_df_tickers.set_index("ds")["y"]
        fh = ForecastingHorizon(
            test_df_tickers.set_index("ds").index, is_relative=False
        )

        arima_forecaster = AutoARIMA(suppress_warnings=True)
        arima_forecaster.fit(y=y_train)
        y_pred_arima = arima_forecaster.predict(fh=fh)
        model_metrics["arima"].append(
            metrics(
                test_df_tickers, y_pred_arima, label=train_df_tickers["series"].iloc[0]
            )
        )

        es_forecaster = ExponentialSmoothing(seasonal="additive", sp=7)
        es_forecaster.fit(y=y_train)
        y_pred_es = es_forecaster.predict(fh=fh)
        model_metrics["es"].append(
            metrics(
                test_df_tickers, y_pred_es, label=train_df_tickers["series"].iloc[0]
            )
        )

        uc_forecaster = UnobservedComponents(
            level="local level",
            freq_seasonal=[{"period": 7, "harmonics": 6}, ],
        )
        uc_forecaster.fit(y=y_train)
        y_pred_uc = uc_forecaster.predict(fh=fh)
        model_metrics["uc"].append(
            metrics(
                test_df_tickers, y_pred_uc, label=train_df_tickers["series"].iloc[0]
            )
        )

    for key, one_metrics in model_metrics.items():
        final_metrics = pd.concat(one_metrics)
        final_metrics = final_metrics.sort_index()
        # final_metrics.to_csv(csv_path_2)
        print(f"{key}: {final_metrics['mape'].mean()}")


100%|██████████| 5/5 [00:05<00:00,  1.18s/it]


arima: 0.11770673813145663
es: 0.10914782847391262
uc: 0.10916315417174283


100%|██████████| 5/5 [00:06<00:00,  1.27s/it]

arima: 0.10684527808461222
es: 0.10945930741391571
uc: 0.10946729446579737





In [30]:
final_metrics

Unnamed: 0,mse,rmse,mae,mape
A,0.004435,0.066597,0.049648,0.056210
AAL,0.033787,0.183814,0.165392,0.207262
AAP,0.008641,0.092954,0.071442,0.067872
AAPL,0.009946,0.099730,0.083554,0.079178
ABBV,0.006897,0.083050,0.069613,0.080310
...,...,...,...,...
YUM,0.018500,0.136014,0.104427,0.093996
ZBH,0.006618,0.081353,0.063805,0.071051
ZBRA,0.053293,0.230852,0.171726,0.135347
ZION,0.004574,0.067634,0.055024,0.057346


In [7]:
arima_forecaster.fit(y=train_df_tickers.set_index("ds")["y"])

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


In [10]:
fh = ForecastingHorizon(test_df_tickers.set_index("ds").index, is_relative=False)

In [12]:
y_pred_arima = arima_forecaster.predict(fh=fh)



In [11]:
fh

ForecastingHorizon(['2015-01-01', '2015-01-02', '2015-01-03', '2015-01-04',
               '2015-01-05', '2015-01-06', '2015-01-07', '2015-01-08',
               '2015-01-09', '2015-01-10',
               ...
               '2015-12-22', '2015-12-23', '2015-12-24', '2015-12-25',
               '2015-12-26', '2015-12-27', '2015-12-28', '2015-12-29',
               '2015-12-30', '2015-12-31'],
              dtype='datetime64[ns]', name='ds', length=365, freq=None, is_relative=False)

In [13]:
y_pred_arima

ds
2015-01-01    0.991177
2015-01-02    0.992217
2015-01-03    0.993257
2015-01-04    0.994297
2015-01-05    0.995337
                ...   
2015-12-27    1.365527
2015-12-28    1.366567
2015-12-29    1.367607
2015-12-30    1.368647
2015-12-31    1.369687
Name: y, Length: 365, dtype: float64

In [16]:
mean_squared_error(test_df_tickers["y"], y_pred_arima)

0.012366132441630009

In [17]:
test_df_tickers

Unnamed: 0,typical_price,ds,series,y
19357,78.746783,2015-01-01,CLX,0.986231
19358,78.262116,2015-01-02,CLX,0.980161
19359,78.312404,2015-01-03,CLX,0.980790
19360,78.362693,2015-01-04,CLX,0.981420
19361,78.412981,2015-01-05,CLX,0.982050
...,...,...,...,...
19717,99.100549,2015-12-27,CLX,1.241143
19718,99.101023,2015-12-28,CLX,1.241149
19719,99.829436,2015-12-29,CLX,1.250271
19720,99.728695,2015-12-30,CLX,1.249010


In [19]:
metrics(test_df_tickers, y_pred_arima)

Unnamed: 0,mse,rmse,mae,mape
y,0.012366,0.111203,0.095542,0.087243
