In [1]:
import arviz as az
from pathlib import Path

In [3]:
trace = az.from_netcdf(Path("./") / "models" / "40_y10_w.nc")
trace

In [9]:
trace["posterior"]["fs_0 - beta(p=365.25,n=10)"].to_numpy().std(axis=(0,1))

array([0.00058284, 0.0005868 , 0.00058   , 0.00056681, 0.0005825 ,
       0.00060312, 0.00057107, 0.00058547, 0.00058   , 0.00058128,
       0.00059432, 0.00058552, 0.00059893, 0.00057243, 0.00056806,
       0.00058903, 0.00057379, 0.00058281, 0.00060021, 0.000585  ])

In [16]:
import numpy as np
stds = []
for k in range(20):
    stds.append(trace["posterior"]["fs_0 - beta(p=365.25,n=10)"].to_numpy()[:, :, k].std())

np.array(stds) - trace["posterior"]["fs_0 - beta(p=365.25,n=10)"].to_numpy().std(axis=(0,1))

array([-6.50521303e-19,  2.16840434e-19,  4.33680869e-19, -6.50521303e-19,
       -2.16840434e-19, -8.67361738e-19,  3.25260652e-19, -3.25260652e-19,
       -5.42101086e-19,  7.58941521e-19,  1.08420217e-19,  5.42101086e-19,
        4.33680869e-19,  1.08420217e-18,  2.16840434e-19,  2.16840434e-19,
        2.16840434e-19, -2.16840434e-19,  2.16840434e-19,  5.42101086e-19])

In [9]:
trace["posterior"]["fs_0 - beta(p=365.25,n=10)"].to_numpy().std(axis=(1, 0))

array([0.00026623, 0.00026327, 0.00025478, 0.00025844, 0.0002704 ,
       0.00026999, 0.00026516, 0.00026919, 0.00025538, 0.00027118,
       0.00027061, 0.00026019, 0.00026539, 0.00026639, 0.0002709 ,
       0.00026203, 0.00026664, 0.00026187, 0.00026388, 0.00026198])

In [33]:
trace["posterior"]["fs_1 - beta(p=7,n=3)"].to_numpy().std(axis=1).mean(axis=0)

array([ 9.06452274,  9.06193701,  8.89914216,  7.60633643,  8.70338861,
       10.72171237])

In [1]:
from pathlib import Path
from tqdm import tqdm
import pandas as pd

from vangja_simple.components import LinearTrend, FourierSeasonality, BetaConstant
from vangja.data_utils import (
    generate_train_test_df_around_point,
    download_data,
    process_data,
)

print("Downloading data...")
dfs = download_data(Path("./data"))
indexes = process_data(dfs[0])
smp = [index for index in indexes if index["series"].iloc[0] == "^GSPC"]
gspc_tickers = process_data(dfs[1])
print("Data downloaded!")

Downloading data...
Data downloaded!


In [16]:
from vangja_simple.components import Constant
from vangja_simple.components.normal_constant import NormalConstant

from sklearn.metrics import (
    mean_absolute_error,
    mean_absolute_percentage_error,
    mean_squared_error,
    root_mean_squared_error,
)


def train_metrics(y_true, future, label="y"):
    y = y_true["y"]
    yhat = future["yhat"][:len(y)]
    return pd.DataFrame(
        {
            "mse": {f"{label}": mean_squared_error(y, yhat)},
            "rmse": {f"{label}": root_mean_squared_error(y, yhat)},
            "mae": {f"{label}": mean_absolute_error(y, yhat)},
            "mape": {f"{label}": mean_absolute_percentage_error(y, yhat)},
        }
    )

trend = LinearTrend(changepoint_range=1)
yearly = FourierSeasonality(365.25, 20, allow_tune=True, tune_method="simple")
weekly = FourierSeasonality(7, 3, allow_tune=False, tune_method="simple")
model =  trend ** (yearly + weekly)

point = "2014-01-01"

train_df_smp, test_df_smp, scales_smp = generate_train_test_df_around_point(
    window=365 * 40, horizon=365, dfs=smp, for_prophet=False, point=point
)

model.fit(train_df_smp, mcmc_samples=0, nuts_sampler="numpyro")
# model.fit_params["trace"].to_netcdf(Path("./") / "models" / "109.nc")
yhat = model.predict(365)
print(model.metrics(test_df_smp, yhat)["mape"].iloc[0])

model_metrics = []
point = "2014-01-01"
trend.changepoint_range = 0.8
yearly.beta_sd = 0.001
fit_params = model.fit_params

model_positive = trend ** (NormalConstant(mu=1, sd=0.1) * yearly + weekly)
model_positive.fit_params = fit_params
model_positive.tuned_model = None

model_negative = trend ** (NormalConstant(mu=-1, sd=0.1) * yearly + weekly)
model_negative.fit_params = fit_params
model_negative.tuned_model = None

am_i_right_positive = 0
am_i_right_negative = 0
positive = 0
negative = 0

for idx, gspc_ticker in enumerate(gspc_tickers):
    check = generate_train_test_df_around_point(
        window=91,
        horizon=365,
        dfs=[gspc_ticker],
        for_prophet=False,
        point=point,
    )

    if check is None:
        continue

    train_df_tickers, test_df_tickers, scales_tickers = check
    model_positive.tune(train_df_tickers, progressbar=False)
    # model_negative.tune(train_df_tickers, progressbar=False)
    yhat_positive = model_positive.predict(365)
    # yhat_negative = model_negative.predict(365)
    train_positive_metrics = train_metrics(train_df_tickers, yhat_positive, train_df_tickers.iloc[0]["series"])
    # train_negative_metrics = train_metrics(train_df_tickers, yhat_negative, train_df_tickers.iloc[0]["series"])
    test_positive_metrics = model.metrics(test_df_tickers, yhat_positive, train_df_tickers.iloc[0]["series"])
    # test_negative_metrics = model.metrics(test_df_tickers, yhat_negative, train_df_tickers.iloc[0]["series"])
    # if train_positive_metrics["mape"].iloc[0] <= 2 * train_negative_metrics["mape"].iloc[0]:
    #     positive += 1
    #     model_metrics.append(test_positive_metrics)
    #     if test_positive_metrics["mape"].iloc[0] <= test_negative_metrics["mape"].iloc[0]:
    #         am_i_right_positive += 1
    # else:
    #     negative += 1
    #     model_metrics.append(test_negative_metrics)
    #     if test_positive_metrics["mape"].iloc[0] > test_negative_metrics["mape"].iloc[0]:
    #         am_i_right_negative += 1
    model_metrics.append(test_positive_metrics)
    print(f"{idx}/{len(gspc_tickers)}: {pd.concat(model_metrics)['mape'].mean()}")
    # print(f"tp: {am_i_right_positive}, tn: {am_i_right_negative}, p: {positive}, n: {negative}, acc: {(am_i_right_negative + am_i_right_positive) / (positive + negative)}")

final_metrics = pd.concat(model_metrics)
print(f"{model}: {final_metrics['mape'].mean()}")


0.014020566470886264
0/503: 0.9173598070593292
1/503: 0.5087606428896779
3/503: 0.44028381606016137
4/503: 0.40403251215114483
5/503: 0.37625691877779593
6/503: 0.4098894053256825
7/503: 0.3749030838166935
8/503: 0.3449973443255558
9/503: 0.38583288766573065
10/503: 0.35608902606895454
12/503: 0.33870013980278535
13/503: 0.3479391276737111
14/503: 0.3353717238445034
17/503: 0.31734610398228164
18/503: 0.32875452507843106
19/503: 0.34478388606598154
20/503: 0.3301410590795869
21/503: 0.3242306790456542
24/503: 0.31862656654790394
26/503: 0.3043538761896486
27/503: 0.30972270288767983
28/503: 0.31035674197096974
29/503: 0.3038683006481629
30/503: 0.2969214822885502
31/503: 0.2925504452060324
32/503: 0.2855018652921712
34/503: 0.2858988730863209
35/503: 0.3048369616965415
36/503: 0.29684549685769507
37/503: 0.2946687439295986
38/503: 0.28616883791886977
39/503: 0.28552283160746195
41/503: 0.2793350346727891
42/503: 0.27211971215206887
43/503: 0.27402465041810575
44/503: 0.2728796582854557

UnboundLocalError: cannot access local variable 'right' where it is not associated with a value

In [11]:
model_positive.map_approx

{'lt_0 - slope': array(-0.24753293),
 'lt_0 - delta': array([-4.83735077e-07,  7.96986859e-06, -1.06693205e-04, -5.17911079e-03,
        -2.63657907e-05,  4.99349393e-05,  2.77238304e-01,  1.88510229e-01,
        -3.29914568e-05, -2.72296721e-05, -1.94351204e-02, -5.21763866e-02,
        -4.76070625e-02, -6.95206994e-11, -1.83150080e-05, -7.81989218e-05,
         4.92772049e-05, -2.44251797e-02, -9.58417842e-02, -1.32732202e-01,
        -9.53623189e-03, -1.33859409e-05,  3.28871863e-05,  4.87955465e-07,
         2.89664105e-01]),
 'lt_0 - intercept': array(0.98503531),
 'nc_0 - normal(mu=1,sd=0.1)': array(1.03875157),
 'fs_0 - beta(p=365.25,n=10)': array([ 1.36241518e-02, -1.58049709e-03, -3.60991865e-03,  4.88900912e-03,
        -8.22017925e-04,  3.64743272e-03,  1.72166213e-03, -2.07413988e-04,
        -1.42106004e-04, -1.41960881e-03,  2.31295817e-03,  4.46060841e-04,
        -9.67335906e-04,  1.10517087e-03, -9.54874209e-05,  7.19636772e-04,
         7.62180258e-04,  1.99985226e-03

In [12]:
model_negative.map_approx

AttributeError: 'MultiplicativeTimeSeries' object has no attribute 'map_approx'