> # Бајесова анализа на финансиски податоци од берзата на САД
> #### Јован Крајевски (199015)
> ##### јуни, 2022

## Собирање на податоци од берзата на САД

In [1]:
import yfinance
import time
import pandas as pd
from pathlib import Path

indexes = ["^GSPC"]

OVERWRITE_ANYWAY = False

DATA_LOCATION = Path(".") / "data"
DATA_LOCATION.mkdir(exist_ok=True, parents=True)

start_time = time.time()

if OVERWRITE_ANYWAY or not (DATA_LOCATION / "indexes.pkl").is_file():
    daily_smp = yfinance.download(" ".join(indexes),
                                  period="max",
                                  interval="1d")
    daily_smp.to_pickle(DATA_LOCATION / "indexes.pkl")

daily_smp = pd.read_pickle(DATA_LOCATION / "indexes.pkl")

end_time = time.time()
print(f"{end_time - start_time:.2f}s")

0.00s


In [2]:
daily_smp

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1950-01-03,16.660000,16.660000,16.660000,16.660000,16.660000,1260000
1950-01-04,16.850000,16.850000,16.850000,16.850000,16.850000,1890000
1950-01-05,16.930000,16.930000,16.930000,16.930000,16.930000,2550000
1950-01-06,16.980000,16.980000,16.980000,16.980000,16.980000,2010000
1950-01-09,17.080000,17.080000,17.080000,17.080000,17.080000,2520000
...,...,...,...,...,...,...
2022-05-23,3919.419922,3981.879883,3909.040039,3973.750000,3973.750000,3392770000
2022-05-24,3942.939941,3955.679932,3875.129883,3941.479980,3941.479980,3901640000
2022-05-25,3929.590088,3999.330078,3925.030029,3978.729980,3978.729980,4322190000
2022-05-26,3984.600098,4075.139893,3984.600098,4057.840088,4057.840088,3961940000


In [3]:
%matplotlib notebook
daily_smp["Adj Close"].plot()

<IPython.core.display.Javascript object>

<AxesSubplot:xlabel='Date'>

# Поделба на податоците на тренирачко и тестирачко множество

In [4]:
train_smp = daily_smp[daily_smp.index < "01-01-2007"].copy()
test_smp = daily_smp[daily_smp.index >= "01-01-2007"].copy()
len(train_smp), len(test_smp)

(14341, 3879)

# Трансформации на податоците

In [5]:
import numpy as np


def transform_close(df):
    df["close"] = df["Adj Close"]
    df["close_return"] = df["close"].pct_change(periods=1)
    df["close_diff"] = df["close"].diff(periods=1)
    df["close_log_return"] = np.log(df["close"]) - np.log(df["close"].shift(1))
    df.dropna(inplace=True)


transform_close(train_smp)
transform_close(test_smp)

In [6]:
train_smp["close_return"].plot()

<IPython.core.display.Javascript object>

<AxesSubplot:xlabel='Date'>

In [7]:
train_smp["close_diff"].plot()

<IPython.core.display.Javascript object>

<AxesSubplot:xlabel='Date'>

In [8]:
train_smp["close_log_return"].plot()

<IPython.core.display.Javascript object>

<AxesSubplot:xlabel='Date'>

# Стационарност

adf:  If Test statistic < Critical Value and p-value < 0.05 – Reject Null Hypothesis(HO) i.e., time series does not have a unit root, meaning it is stationary. It does not have a time-dependent structure.
RESULT: stationary

kpss: If Test statistic < Critical Value and p-value < 0.05 – Fail to Reject Null Hypothesis(HO) i.e., time series does not have a unit root, meaning it is trend stationary.
RESULT: not stationary

In [9]:
from statsmodels.tsa.stattools import adfuller, kpss


def adf_test(timeseries):
    adf_test = adfuller(timeseries, autolag='AIC')
    adf_output = {
        title: adf_test[idx]
        for idx, title in enumerate(
            ['Test Statistic', 'p-value', '#Lags Used'])
    }
    for key, value in adf_test[4].items():
        adf_output[f'Critical Value ({key})'] = value

    return adf_output


def kpss_test(timeseries):
    kpss_test = kpss(timeseries, regression='c', nlags="auto")
    kpss_output = {
        title: kpss_test[idx]
        for idx, title in enumerate(
            ['Test Statistic', 'p-value', '#Lags Used'])
    }
    for key, value in kpss_test[3].items():
        kpss_output[f'Critical Value ({key})'] = value

    return kpss_output


def interpret_results(adf_output, kpss_output):
    reject_h0 = []
    for test_output in [adf_output, kpss_output]:
        is_test_stat_larger = True
        for key, value in test_output.items():
            if "Critical" not in key:
                continue

            is_test_stat_larger = is_test_stat_larger and (
                test_output["Test Statistic"] > value)

        reject_h0.append(not is_test_stat_larger
                         and test_output["p-value"] < 0.05)

    if reject_h0[0] and not reject_h0[1]:
        print("stationary")
    elif not reject_h0[0] and reject_h0[1]:
        print("non-stationary")
    elif not reject_h0[0] and not reject_h0[1]:
        print("trend-stationary")
    else:
        print("diff-stationary")


for series in ["close", "close_return", "close_diff", "close_log_return"]:
    adf_output = adf_test(train_smp[series])
    kpss_output = kpss_test(train_smp[series])
    print(series)
    interpret_results(adf_output, kpss_output)

look-up table. The actual p-value is smaller than the p-value returned.



close
trend-stationary


look-up table. The actual p-value is greater than the p-value returned.



close_return
stationary
close_diff
diff-stationary
close_log_return
stationary


look-up table. The actual p-value is greater than the p-value returned.



# Авто-корелираност

The Durbin Watson test has values between 0 and 4. Below is the table containing values and their interpretations:

- 2: No autocorrelation. Generally, we assume 1.5 to 2.5 as no correlation.
- [0, 2): positive autocorrelation. The more close it to 0, the more signs of positive autocorrelation.
- (2 -4]: negative autocorrelation. The more close it to 4, the more signs of negative autocorrelation.

In [10]:
import statsmodels.api as sm
from statsmodels.stats.stattools import durbin_watson
from statsmodels.regression.linear_model import OLS

for series in ["close_return", "close_log_return"]:
    X = np.arange(len(train_smp[series]))
    Y = np.asarray(train_smp[series])
    X = sm.add_constant(X)

    # Fit the ordinary least square method.
    ols_res = OLS(Y, X).fit()
    # apply durbin watson statistic on the ols residual
    dw = durbin_watson(ols_res.resid)
    print(f"{series} durbin-watson test value: {dw}")

close_return durbin-watson test value: 1.844345013591561
close_log_return durbin-watson test value: 1.8448787792469894


# Распределба на log-return

In [11]:
train_smp["close_log_return"].hist(bins=100, density=True)

<IPython.core.display.Javascript object>

<AxesSubplot:>

In [12]:
from scipy import stats


def get_distribution(df):
    cauchy_p_value = stats.kstest(df, "cauchy", stats.cauchy.fit(df))[1]
    gennorm_p_value = stats.kstest(df, "gennorm", stats.gennorm.fit(df))[1]
    p_values = list(zip([cauchy_p_value, gennorm_p_value], [0, 1]))

    return max(p_values)[1]

In [13]:
from pandarallel import pandarallel

pandarallel.initialize()

start_time = time.time()
x = train_smp["close_log_return"].interpolate()
dists = x.rolling(250).parallel_apply(get_distribution)
dists.plot()
end_time = time.time()
print(f"{end_time - start_time:.2f}s")

INFO: Pandarallel will run on 6 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


<IPython.core.display.Javascript object>

34.24s


In [14]:
(dists == 1).mean(), (dists == 0).mean()

(0.9755230125523012, 0.007112970711297071)

In [15]:
def get_gennorm_p_values(df):
    return stats.kstest(df, "gennorm", stats.gennorm.fit(df))[1]


pandarallel.initialize()
start_time = time.time()
x = train_smp["close_log_return"].interpolate()
gennorm_p_values = x.rolling(250).parallel_apply(get_gennorm_p_values)
gennorm_p_values.plot()
end_time = time.time()
print(f"{end_time - start_time:.2f}s")

INFO: Pandarallel will run on 6 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


<IPython.core.display.Javascript object>

29.40s


In [16]:
gennorm_p_values.mean()

0.8211481553531417

In [17]:
from pandarallel import pandarallel


def get_gennorm_betas(df):
    return stats.gennorm.fit(df)[0]


pandarallel.initialize()
start_time = time.time()
x = train_smp["close_log_return"].interpolate()
betas = x.rolling(250).parallel_apply(get_gennorm_betas)
betas.plot()
end_time = time.time()
print(f"{end_time - start_time:.2f}s")

INFO: Pandarallel will run on 6 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


<IPython.core.display.Javascript object>

26.94s


In [18]:
prior_beta_mean, prior_beta_sigma = betas.mean(), betas.std()
prior_beta_mean, prior_beta_sigma

(1.4023509111239734, 0.34852008255186584)

In [19]:
betas.hist(density=True, bins=100)

<IPython.core.display.Javascript object>

<AxesSubplot:>

# Прозорци (rolling windows)

In [6]:
def get_rolling_windows(df, L=250):
    return [df.iloc[x:x + L] for x in range(len(df) - L + 1)]


start_time = time.time()

train_data = get_rolling_windows(train_smp)
test_data = get_rolling_windows(test_smp)

end_time = time.time()
print(f"{end_time - start_time:.2f}s")

len(train_data), len(test_data)

0.54s


(14091, 3629)

In [21]:
train_data[0]

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,close,close_return,close_diff,close_log_return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1950-01-04,16.850000,16.850000,16.850000,16.850000,16.850000,1890000,16.850000,0.011405,0.190001,0.011340
1950-01-05,16.930000,16.930000,16.930000,16.930000,16.930000,2550000,16.930000,0.004748,0.080000,0.004737
1950-01-06,16.980000,16.980000,16.980000,16.980000,16.980000,2010000,16.980000,0.002953,0.049999,0.002949
1950-01-09,17.080000,17.080000,17.080000,17.080000,17.080000,2520000,17.080000,0.005889,0.100000,0.005872
1950-01-10,17.030001,17.030001,17.030001,17.030001,17.030001,2160000,17.030001,-0.002927,-0.049999,-0.002932
...,...,...,...,...,...,...,...,...,...,...
1950-12-27,20.299999,20.299999,20.299999,20.299999,20.299999,2940000,20.299999,0.019076,0.379999,0.018897
1950-12-28,20.379999,20.379999,20.379999,20.379999,20.379999,3560000,20.379999,0.003941,0.080000,0.003933
1950-12-29,20.430000,20.430000,20.430000,20.430000,20.430000,3440000,20.430000,0.002453,0.050001,0.002450
1951-01-02,20.770000,20.770000,20.770000,20.770000,20.770000,3030000,20.770000,0.016642,0.340000,0.016505


# Бајесова анализа

In [8]:
import numpy as np
import pymc as pm
from scipy import stats as ss


def from_posterior(param, samples, testval, set_testval):
    smin, smax = np.min(samples), np.max(samples)
    width = smax - smin
    x = np.linspace(smin, smax, 100)
    y = ss.gaussian_kde(samples.data.flatten())(x)

    # what was never sampled should have a small probability but not 0,
    # so we'll extend the domain and use linear approximation of density on it
    x = np.concatenate([[x[0] - 3 * width], x, [x[-1] + 3 * width]])
    y = np.concatenate([[0], y, [0]])
    if set_testval:
        return pm.distributions.Interpolated(param, x, y, initval=testval)

    return pm.distributions.Interpolated(param, x, y)



In [9]:
def get_stats(sample):
    return [
        np.mean(sample),
        ss.tstd(sample),
        np.mean(sample > 0),
        ss.skew(sample),
        ss.kurtosis(sample),
    ] + [np.percentile(sample, p) for p in range(0, 101, 5)]


get_stats(np.random.randn(10000))

[0.004853597108340356,
 1.002543145747737,
 0.5066,
 -0.00511867765177493,
 0.01559562460072872,
 -3.7542452566898254,
 -1.628084348232827,
 -1.2860695850535393,
 -1.0542514050104348,
 -0.8434462312440084,
 -0.6729444547687096,
 -0.5268903128158161,
 -0.3820885782768081,
 -0.24900540627083592,
 -0.10826300095435723,
 0.014565020267363667,
 0.14028732832964746,
 0.27597453735028665,
 0.40728954313647975,
 0.5407516587584971,
 0.684495075391963,
 0.8384667015565758,
 1.0228632494856928,
 1.27162027231103,
 1.627931755634743,
 4.154136651602819]

## Бајесова анализа со нормален приор

In [10]:
import cloudpickle
import pickle

DATA_LOCATION = Path(".") / "models"
DATA_LOCATION.mkdir(exist_ok=True, parents=True)


def read_stats_and_model(model_name):
    if not (DATA_LOCATION / model_name).is_file():
        return [], None

    with open(DATA_LOCATION / model_name, "rb") as f:
        return pickle.load(f)


def write_stats_and_model(model_name, stats, model):
    with open(DATA_LOCATION / model_name, "wb") as f:
        cloudpickle.dump((stats, model), f)

In [25]:
def get_initial_normal_model(data, prior_mu_mean, prior_mu_sigma,
                             prior_std_sigma):
    mu_testval, std_testval = ss.norm.fit(data.get_value())
    model = pm.Model()
    with model:
        mu = pm.Normal("mu",
                       mu=prior_mu_mean,
                       sigma=prior_mu_sigma,
                       initval=mu_testval)
        std = pm.HalfNormal("std", sigma=prior_std_sigma, initval=std_testval)
        obs = pm.Normal("obs", mu=mu, sigma=std, observed=data)

    return model


def get_next_normal_model(data, trace, set_testval):
    mu_testval, std_testval = ss.norm.fit(data.get_value())
    model = pm.Model()
    with model:
        mu = from_posterior("mu", trace["posterior"]["mu"], mu_testval,
                            set_testval)
        std = from_posterior("std", trace["posterior"]["std"], std_testval,
                             set_testval)
        obs = pm.Normal("obs", mu=mu, sigma=std, observed=data)

    return model

In [26]:
start_time = time.time()

prior_mu_mean = np.array(
    [window["close_log_return"].mean() for window in train_data]).mean()
prior_mu_sigma = np.array(
    [window["close_log_return"].mean() for window in train_data]).std(ddof=1)
prior_std_sigma = np.array(
    [window["close_log_return"].std() for window in train_data]).std(ddof=1)

end_time = time.time()
print(f"{end_time - start_time:.2f}s")
print(prior_mu_mean, prior_mu_sigma, prior_std_sigma)

2.61s
0.000302779667213874 0.0005921371039989852 0.003211753846880127


In [27]:
import aesara

data_sample = aesara.shared(train_data[0]["close_log_return"].to_numpy())

model = get_initial_normal_model(data_sample, prior_mu_mean, prior_mu_sigma,
                                 prior_std_sigma)

with model:
    trace = pm.sample(draws=1000, step=[pm.Metropolis()], chains=4, cores=4)
    posterior_obs = pm.sample_posterior_predictive(trace)

Multiprocess sampling (4 chains in 4 jobs)
CompoundStep
>Metropolis: [mu]
>Metropolis: [std]


Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 1 seconds.


In [28]:
import arviz as az
with model:
    az.plot_trace(trace)
    az.plot_posterior(trace)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [29]:
with model:
    az.plot_dist(posterior_obs["observed_data"]["obs"],
                 rug=True,
                 quantiles=[.25, .5, .75])

<IPython.core.display.Javascript object>

In [30]:
data_sample = aesara.shared(train_data[1]["close_log_return"].to_numpy())

model = get_next_normal_model(data_sample, trace, True)

with model:
    trace = pm.sample(draws=1000, step=[pm.Metropolis()], chains=4, cores=4)
    posterior_obs = pm.sample_posterior_predictive(trace)

Multiprocess sampling (4 chains in 4 jobs)
CompoundStep
>Metropolis: [mu]
>Metropolis: [std]


Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 1 seconds.


In [31]:
with model:
    az.plot_trace(trace)
    az.plot_posterior(trace)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [32]:
with model:
    az.plot_dist(posterior_obs["observed_data"]["obs"],
                 rug=True,
                 quantiles=[.25, .5, .75])

<IPython.core.display.Javascript object>

In [34]:
import aesara


def train_model(
        model_name,
        get_initial_model_func,
        initial_model_args,
        get_next_model_func,
        update_priors_on=60,  # update priors after 60 windows
        save_every=10,  # save model after 10 windows
        verbosity=10,  # print info about progress after 10 windows
        draws=1000,
        chains=4,
        cores=4):
    success = False
    while not success:
        try:
            data_sample = aesara.shared(
                train_data[0]["close_log_return"].to_numpy())

            stats, model = read_stats_and_model(model_name)
            trace = None

            if not model:
                model = get_initial_model_func(data_sample,
                                               *initial_model_args)

            if len(stats) and len(stats) % update_priors_on == 0:
                # recalculate last window so that we have trace
                stats = stats[:-1]

            for idx, window in enumerate(train_data):
                if idx % verbosity == 0:
                    print(
                        f"Window {idx + 1}/{len(train_data)} ({(idx + 1)/len(train_data)*100:.2f}%)..."
                    )

                if idx < len(stats):
                    continue  # window is already processed

                data_sample.set_value(window["close_log_return"].to_numpy())

                if idx % update_priors_on == 0 and idx > 0:
                    next_model = get_next_model_func(data_sample, trace, True)
                    if not np.isnan(
                            np.array(list(
                                next_model.point_logps().values()))).any():
                        model = next_model
                    else:
                        with open("logs.txt", "a") as f:
                            f.write(
                                f"{model_name}_{idx} - Failed set_testval\n")

                        model = get_next_model_func(data_sample, trace, False)

                with model:
                    trace = pm.sample(draws=draws,
                                      step=[pm.Metropolis()],
                                      chains=chains,
                                      cores=cores,
                                      progressbar=False)
                    posterior_obs = pm.sample_posterior_predictive(
                        trace, progressbar=False)

                stats.append(
                    get_stats(
                        posterior_obs["observed_data"]["obs"].data.flatten()))

                if idx % save_every == 0:
                    write_stats_and_model(model_name, stats, model)

            write_stats_and_model(model_name, stats, model)
            success = True
        except Exception as e:
            with open("logs.txt", "a") as f:
                f.write(str(e))
                f.write("\n")

# Бајесова анализа со двојно-експоненцијален (Лапласов) приор

In [35]:
def get_initial_laplace_model(data, prior_mu_mean, prior_mu_sigma,
                              prior_std_sigma):
    mu_testval, b_testval = ss.laplace.fit(data.get_value())
    model = pm.Model()
    with model:
        mu = pm.Normal("mu",
                       mu=prior_mu_mean,
                       sigma=prior_mu_sigma,
                       initval=mu_testval)
        b = pm.HalfNormal("b", sigma=prior_std_sigma, initval=b_testval)
        obs = pm.Laplace("obs", mu=mu, b=b, observed=data)

    return model


def get_next_laplace_model(data, trace, set_testval):
    mu_testval, b_testval = ss.laplace.fit(data.get_value())
    model = pm.Model()
    with model:
        mu = from_posterior("mu", trace["posterior"]["mu"], mu_testval,
                            set_testval)
        b = from_posterior("b", trace["posterior"]["b"], b_testval,
                           set_testval)
        obs = pm.Laplace("obs", mu=mu, b=b, observed=data)

    return model

In [36]:
data_sample = aesara.shared(train_data[0]["close_log_return"].to_numpy())

model = get_initial_laplace_model(data_sample, prior_mu_mean, prior_mu_sigma,
                                 prior_std_sigma)

with model:
    trace = pm.sample(draws=1000, step=[pm.Metropolis()], chains=4, cores=4)
    posterior_obs = pm.sample_posterior_predictive(trace)

Multiprocess sampling (4 chains in 4 jobs)
CompoundStep
>Metropolis: [mu]
>Metropolis: [b]


Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 1 seconds.


In [37]:
with model:
    az.plot_trace(trace)
    az.plot_posterior(trace)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [38]:
with model:
    az.plot_dist(posterior_obs["observed_data"]["obs"],
                 rug=True,
                 quantiles=[.25, .5, .75])

<IPython.core.display.Javascript object>

In [39]:
data_sample = aesara.shared(train_data[1]["close_log_return"].to_numpy())

model = get_next_laplace_model(data_sample, trace, True)

with model:
    trace = pm.sample(draws=1000, step=[pm.Metropolis()], chains=4, cores=4)
    posterior_obs = pm.sample_posterior_predictive(trace)

Multiprocess sampling (4 chains in 4 jobs)
CompoundStep
>Metropolis: [mu]
>Metropolis: [b]


Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 1 seconds.


In [40]:
with model:
    az.plot_trace(trace)
    az.plot_posterior(trace)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [41]:
with model:
    az.plot_dist(posterior_obs["observed_data"]["obs"],
                 rug=True,
                 quantiles=[.25, .5, .75])

<IPython.core.display.Javascript object>

In [43]:
train_model("fixed_normal_metropolis.pkl", get_initial_normal_model,
            [prior_mu_mean, prior_mu_sigma, prior_std_sigma],
            get_next_normal_model)
train_model("fixed_laplace_metropolis.pkl", get_initial_laplace_model,
            [prior_mu_mean, prior_mu_sigma, prior_std_sigma],
            get_next_laplace_model)

Window 1/14091 (0.01%)...
Window 11/14091 (0.08%)...
Window 21/14091 (0.15%)...
Window 31/14091 (0.22%)...
Window 41/14091 (0.29%)...
Window 51/14091 (0.36%)...
Window 61/14091 (0.43%)...
Window 71/14091 (0.50%)...
Window 81/14091 (0.57%)...
Window 91/14091 (0.65%)...
Window 101/14091 (0.72%)...
Window 111/14091 (0.79%)...
Window 121/14091 (0.86%)...
Window 131/14091 (0.93%)...
Window 141/14091 (1.00%)...
Window 151/14091 (1.07%)...
Window 161/14091 (1.14%)...
Window 171/14091 (1.21%)...
Window 181/14091 (1.28%)...
Window 191/14091 (1.36%)...
Window 201/14091 (1.43%)...
Window 211/14091 (1.50%)...
Window 221/14091 (1.57%)...
Window 231/14091 (1.64%)...
Window 241/14091 (1.71%)...
Window 251/14091 (1.78%)...
Window 261/14091 (1.85%)...
Window 271/14091 (1.92%)...
Window 281/14091 (1.99%)...
Window 291/14091 (2.07%)...
Window 301/14091 (2.14%)...
Window 311/14091 (2.21%)...
Window 321/14091 (2.28%)...
Window 331/14091 (2.35%)...
Window 341/14091 (2.42%)...
Window 351/14091 (2.49%)...
Win

Window 1/14091 (0.01%)...
Window 11/14091 (0.08%)...
Window 21/14091 (0.15%)...
Window 31/14091 (0.22%)...
Window 41/14091 (0.29%)...
Window 51/14091 (0.36%)...
Window 61/14091 (0.43%)...
Window 71/14091 (0.50%)...
Window 81/14091 (0.57%)...
Window 91/14091 (0.65%)...
Window 101/14091 (0.72%)...
Window 111/14091 (0.79%)...
Window 121/14091 (0.86%)...
Window 131/14091 (0.93%)...
Window 141/14091 (1.00%)...
Window 151/14091 (1.07%)...
Window 161/14091 (1.14%)...
Window 171/14091 (1.21%)...
Window 181/14091 (1.28%)...
Window 191/14091 (1.36%)...
Window 201/14091 (1.43%)...
Window 211/14091 (1.50%)...
Window 221/14091 (1.57%)...
Window 231/14091 (1.64%)...
Window 241/14091 (1.71%)...
Window 251/14091 (1.78%)...
Window 261/14091 (1.85%)...
Window 271/14091 (1.92%)...
Window 281/14091 (1.99%)...
Window 291/14091 (2.07%)...
Window 301/14091 (2.14%)...
Window 311/14091 (2.21%)...
Window 321/14091 (2.28%)...
Window 331/14091 (2.35%)...
Window 341/14091 (2.42%)...
Window 351/14091 (2.49%)...
Win

# Бајесова анализа со обопштена нормална дистрибуција

In [44]:
import numpy as np
from aesara.tensor.var import TensorVariable
from aesara.tensor.random.op import RandomVariable
from typing import List, Tuple


class GenNormRV(RandomVariable):
    name: str = "GenNorm"
    ndim_supp: int = 0
    ndims_params: List[int] = [0, 0, 0]
    dtype: str = "floatX"
    _print_name: Tuple[str, str] = ("GenNorm", "GGD")

    @classmethod
    def rng_fn(
        cls,
        rng: np.random.RandomState,
        beta: np.ndarray,
        loc: np.ndarray,
        scale: np.ndarray,
        size: Tuple[int, ...],
    ) -> np.ndarray:
        return ss.gennorm.rvs(beta, loc, scale, random_state=rng, size=size)

In [45]:
import aesara.tensor as at
from pymc.aesaraf import floatX, intX
from pymc.distributions.distribution import Continuous
from pymc.distributions.dist_math import check_parameters

gennorm_rv = GenNormRV()


class GenNorm(Continuous):
    rv_op = gennorm_rv

    @classmethod
    def dist(cls, beta, loc, scale, *args, **kwargs):
        beta = at.as_tensor_variable(floatX(beta))
        loc = at.as_tensor_variable(floatX(loc))
        scale = at.as_tensor_variable(floatX(scale))
        return super().dist([beta, loc, scale], *args, **kwargs)

    def moment(rv, size, beta, loc, scale):
        moment, _ = at.broadcast_arrays(beta, loc, scale)
        if not rv_size_is_none(size):
            moment = at.full(size, moment)
        return moment

    def logp(value, beta, loc, scale):
        return check_parameters(
            at.log(beta / (2 * scale)) - at.gammaln(1.0 / beta) -
            (at.abs_(value - loc) / scale)**beta, beta >= 0, scale >= 0)

    def logcdf(value, beta, loc, scale):
        b = value - loc
        c = 0.5 * b / at.abs_(b)
        return (0.5 + c) - c * at.gammaincc(1.0 / beta,
                                            at.abs_(b / scale)**beta)

In [46]:
def get_initial_gennorm_model(data, prior_mu_mean, prior_mu_sigma,
                              prior_beta_mean, prior_beta_sigma,
                              prior_std_sigma):
    beta_testval, loc_testval, scale_testval = ss.gennorm.fit(data.get_value())
    model = pm.Model()
    with model:
        beta = pm.TruncatedNormal("beta",
                                  mu=prior_beta_mean,
                                  sigma=prior_beta_sigma,
                                  lower=0,
                                  initval=beta_testval)
        loc = pm.Normal("loc",
                        mu=prior_mu_mean,
                        sigma=prior_mu_sigma,
                        initval=loc_testval)
        scale = pm.HalfNormal("scale",
                              sigma=prior_std_sigma,
                              initval=scale_testval)
        obs = GenNorm("obs", beta=beta, loc=loc, scale=scale, observed=data)

    return model


def get_next_gennorm_model(data, trace, set_testval):
    beta_testval, loc_testval, scale_testval = ss.gennorm.fit(data.get_value())
    model = pm.Model()
    with model:
        beta = from_posterior("beta", trace["posterior"]["beta"], beta_testval,
                              set_testval)
        loc = from_posterior("loc", trace["posterior"]["loc"], loc_testval,
                             set_testval)
        scale = from_posterior("scale", trace["posterior"]["scale"],
                               scale_testval, set_testval)
        obs = GenNorm("obs", beta=beta, loc=loc, scale=scale, observed=data)

    return model

In [47]:
import aesara

data_sample = aesara.shared(train_data[0]["close_log_return"].to_numpy())

model = get_initial_gennorm_model(data_sample, prior_mu_mean, prior_mu_sigma,
                                  prior_beta_mean, prior_beta_sigma,
                                  prior_std_sigma)

with model:
    trace = pm.sample(draws=1000, step=[pm.Metropolis()], chains=4, cores=4)
    posterior_obs = pm.sample_posterior_predictive(trace)

Multiprocess sampling (4 chains in 4 jobs)
CompoundStep
>Metropolis: [beta]
>Metropolis: [loc]
>Metropolis: [scale]


Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 1 seconds.


In [48]:
import arviz as az
with model:
    az.plot_trace(trace)
    az.plot_posterior(trace)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [49]:
with model:
    az.plot_dist(posterior_obs["observed_data"]["obs"],
                 rug=True,
                 quantiles=[.25, .5, .75])

<IPython.core.display.Javascript object>

In [50]:
data_sample = aesara.shared(train_data[1]["close_log_return"].to_numpy())

model = get_next_gennorm_model(data_sample, trace, True)

with model:
    trace = pm.sample(draws=1000, step=[pm.Metropolis()], chains=4, cores=4)
    posterior_obs = pm.sample_posterior_predictive(trace)

Multiprocess sampling (4 chains in 4 jobs)
CompoundStep
>Metropolis: [beta]
>Metropolis: [loc]
>Metropolis: [scale]


Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 2 seconds.


In [51]:
with model:
    az.plot_trace(trace)
    az.plot_posterior(trace)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [52]:
with model:
    az.plot_dist(posterior_obs["observed_data"]["obs"],
                 rug=True,
                 quantiles=[.25, .5, .75])

<IPython.core.display.Javascript object>

In [53]:
train_model("fixed_gennorm_metropolis.pkl", get_initial_gennorm_model, [
    prior_mu_mean, prior_mu_sigma, prior_beta_mean, prior_beta_sigma,
    prior_std_sigma
], get_next_gennorm_model)

Window 1/14091 (0.01%)...
Window 11/14091 (0.08%)...
Window 21/14091 (0.15%)...
Window 31/14091 (0.22%)...
Window 41/14091 (0.29%)...
Window 51/14091 (0.36%)...
Window 61/14091 (0.43%)...
Window 71/14091 (0.50%)...
Window 81/14091 (0.57%)...
Window 91/14091 (0.65%)...
Window 101/14091 (0.72%)...
Window 111/14091 (0.79%)...
Window 121/14091 (0.86%)...
Window 131/14091 (0.93%)...
Window 141/14091 (1.00%)...
Window 151/14091 (1.07%)...
Window 161/14091 (1.14%)...
Window 171/14091 (1.21%)...
Window 181/14091 (1.28%)...
Window 191/14091 (1.36%)...
Window 201/14091 (1.43%)...
Window 211/14091 (1.50%)...
Window 221/14091 (1.57%)...
Window 231/14091 (1.64%)...
Window 241/14091 (1.71%)...
Window 251/14091 (1.78%)...
Window 261/14091 (1.85%)...
Window 271/14091 (1.92%)...
Window 281/14091 (1.99%)...
Window 291/14091 (2.07%)...
Window 301/14091 (2.14%)...
Window 311/14091 (2.21%)...
Window 321/14091 (2.28%)...
Window 331/14091 (2.35%)...
Window 341/14091 (2.42%)...
Window 351/14091 (2.49%)...
Win

# Тестирање на моделите

In [54]:
def test_model(
        train_model_name,
        get_next_model_func,
        update_priors_on=60,  # update priors after 60 windows
        save_every=10,  # save model after 10 windows
        verbosity=10,  # print info about progress after 10 windows
        draws=1000,
        chains=4,
        cores=4):
    success = False
    while not success:
        try:
            [file_name, file_extenstion] = train_model_name.split('.')
            test_model_name = f"{file_name}_test.{file_extenstion}"
            data_sample = aesara.shared(
                test_data[0]["close_log_return"].to_numpy())

            train_stats, train_model = read_stats_and_model(train_model_name)
            test_stats, test_model = read_stats_and_model(test_model_name)
            trace = None

            if not test_model:
                test_model = train_model

            if len(test_stats) and len(test_stats) % update_priors_on == 0:
                # recalculate last window so that we have trace
                test_stats = test_stats[:-1]

            for idx, window in enumerate(test_data):
                if idx % verbosity == 0:
                    print(
                        f"Window {idx + 1}/{len(test_data)} ({(idx + 1)/len(test_data)*100:.2f}%)..."
                    )

                if idx < len(test_stats):
                    continue  # window is already processed

                data_sample.set_value(window["close_log_return"].to_numpy())

                if idx % update_priors_on == 0 and idx > 0:
                    next_model = get_next_model_func(data_sample, trace, True)
                    if not np.isnan(
                            np.array(list(
                                next_model.point_logps().values()))).any():
                        test_model = next_model
                    else:
                        with open("logs.txt", "a") as f:
                            f.write(
                                f"{test_model_name}_{idx} - Failed set_testval\n"
                            )

                        test_model = get_next_model_func(
                            data_sample, trace, False)

                with test_model:
                    trace = pm.sample(draws=draws,
                                      step=[pm.Metropolis()],
                                      chains=chains,
                                      cores=cores,
                                      progressbar=False)
                    posterior_obs = pm.sample_posterior_predictive(
                        trace, progressbar=False)

                test_stats.append(
                    get_stats(
                        posterior_obs["observed_data"]["obs"].data.flatten()))
                if idx % save_every == 0:
                    write_stats_and_model(test_model_name, test_stats,
                                          test_model)

            write_stats_and_model(test_model_name, test_stats, test_model)
            success = True
        except Exception as e:
            with open("logs.txt", "a") as f:
                f.write(str(e))
                f.write("\n")

In [55]:
test_model("fixed_normal_metropolis.pkl", get_next_normal_model)
test_model("fixed_laplace_metropolis.pkl", get_next_laplace_model)
test_model("fixed_gennorm_metropolis.pkl", get_next_gennorm_model)

Window 1/3629 (0.03%)...
Window 11/3629 (0.30%)...
Window 21/3629 (0.58%)...
Window 31/3629 (0.85%)...
Window 41/3629 (1.13%)...
Window 51/3629 (1.41%)...
Window 61/3629 (1.68%)...
Window 71/3629 (1.96%)...
Window 81/3629 (2.23%)...
Window 91/3629 (2.51%)...
Window 101/3629 (2.78%)...
Window 111/3629 (3.06%)...
Window 121/3629 (3.33%)...
Window 131/3629 (3.61%)...
Window 141/3629 (3.89%)...
Window 151/3629 (4.16%)...
Window 161/3629 (4.44%)...
Window 171/3629 (4.71%)...
Window 181/3629 (4.99%)...
Window 191/3629 (5.26%)...
Window 201/3629 (5.54%)...
Window 211/3629 (5.81%)...
Window 221/3629 (6.09%)...
Window 231/3629 (6.37%)...
Window 241/3629 (6.64%)...
Window 251/3629 (6.92%)...
Window 261/3629 (7.19%)...
Window 271/3629 (7.47%)...
Window 281/3629 (7.74%)...
Window 291/3629 (8.02%)...
Window 301/3629 (8.29%)...
Window 311/3629 (8.57%)...
Window 321/3629 (8.85%)...
Window 331/3629 (9.12%)...
Window 341/3629 (9.40%)...
Window 351/3629 (9.67%)...
Window 361/3629 (9.95%)...
Window 371/3

Window 1/3629 (0.03%)...
Window 11/3629 (0.30%)...
Window 21/3629 (0.58%)...
Window 31/3629 (0.85%)...
Window 41/3629 (1.13%)...
Window 51/3629 (1.41%)...
Window 61/3629 (1.68%)...
Window 71/3629 (1.96%)...
Window 81/3629 (2.23%)...
Window 91/3629 (2.51%)...
Window 101/3629 (2.78%)...
Window 111/3629 (3.06%)...
Window 121/3629 (3.33%)...
Window 131/3629 (3.61%)...
Window 141/3629 (3.89%)...
Window 151/3629 (4.16%)...
Window 161/3629 (4.44%)...
Window 171/3629 (4.71%)...
Window 181/3629 (4.99%)...
Window 191/3629 (5.26%)...
Window 201/3629 (5.54%)...
Window 211/3629 (5.81%)...
Window 221/3629 (6.09%)...
Window 231/3629 (6.37%)...
Window 241/3629 (6.64%)...
Window 251/3629 (6.92%)...
Window 261/3629 (7.19%)...
Window 271/3629 (7.47%)...
Window 281/3629 (7.74%)...
Window 291/3629 (8.02%)...
Window 301/3629 (8.29%)...
Window 311/3629 (8.57%)...
Window 321/3629 (8.85%)...
Window 331/3629 (9.12%)...
Window 341/3629 (9.40%)...
Window 351/3629 (9.67%)...
Window 361/3629 (9.95%)...
Window 371/3

Window 1/3629 (0.03%)...
Window 11/3629 (0.30%)...
Window 21/3629 (0.58%)...
Window 31/3629 (0.85%)...
Window 41/3629 (1.13%)...
Window 51/3629 (1.41%)...
Window 61/3629 (1.68%)...
Window 71/3629 (1.96%)...
Window 81/3629 (2.23%)...
Window 91/3629 (2.51%)...
Window 101/3629 (2.78%)...
Window 111/3629 (3.06%)...
Window 121/3629 (3.33%)...
Window 131/3629 (3.61%)...
Window 141/3629 (3.89%)...
Window 151/3629 (4.16%)...
Window 161/3629 (4.44%)...
Window 171/3629 (4.71%)...
Window 181/3629 (4.99%)...
Window 191/3629 (5.26%)...
Window 201/3629 (5.54%)...
Window 211/3629 (5.81%)...
Window 221/3629 (6.09%)...
Window 231/3629 (6.37%)...
Window 241/3629 (6.64%)...
Window 251/3629 (6.92%)...
Window 261/3629 (7.19%)...
Window 271/3629 (7.47%)...
Window 281/3629 (7.74%)...
Window 291/3629 (8.02%)...
Window 301/3629 (8.29%)...
Window 311/3629 (8.57%)...
Window 321/3629 (8.85%)...
Window 331/3629 (9.12%)...
Window 341/3629 (9.40%)...
Window 351/3629 (9.67%)...
Window 361/3629 (9.95%)...
Window 371/3

# FB Prophet

In [14]:
import os
import logging

from prophet import Prophet

import warnings

warnings.filterwarnings('ignore')

logging.getLogger('prophet').setLevel(logging.WARNING)


class suppress_stdout_stderr(object):
    '''
    A context manager for doing a "deep suppression" of stdout and stderr in
    Python, i.e. will suppress all print, even if the print originates in a
    compiled C/Fortran sub-function.
       This will not suppress raised exceptions, since exceptions are printed
    to stderr just before a script exits, and after the context manager has
    exited (at least, I think that is why it lets exceptions through).

    '''

    def __init__(self):
        # Open a pair of null files
        self.null_fds = [os.open(os.devnull, os.O_RDWR) for x in range(2)]
        # Save the actual stdout (1) and stderr (2) file descriptors.
        self.save_fds = [os.dup(1), os.dup(2)]

    def __enter__(self):
        # Assign the null pointers to stdout and stderr.
        os.dup2(self.null_fds[0], 1)
        os.dup2(self.null_fds[1], 2)

    def __exit__(self, *_):
        # Re-assign the real stdout/stderr back to (1) and (2)
        os.dup2(self.save_fds[0], 1)
        os.dup2(self.save_fds[1], 2)
        # Close the null files
        for fd in self.null_fds + self.save_fds:
            os.close(fd)

for data, name in zip([train_data, test_data], ["train", "test"]):
    X = [
        pd.DataFrame({
            "ds": window.index,
            "y": window["close_log_return"]
        }) for window in data
    ]

    prophet_forecasts = []

    for idx, window in enumerate(X):
        if idx % 100 == 0:
            print(f"Window {idx + 1}/{len(X)} ({(idx + 1)/len(X)*100:.2f}%)...")

        with suppress_stdout_stderr():
            m = Prophet(yearly_seasonality=True,
                        daily_seasonality=True,
                        uncertainty_samples=0)
            m.fit(window)
            future = m.make_future_dataframe(periods=1, include_history=False)
            prophet_forecasts.append(m.predict(future))
            
    write_stats_and_model(f"prophet_{name}.pkl", prophet_forecasts, [])

Window 1/14091 (0.01%)...
Window 101/14091 (0.72%)...
Window 201/14091 (1.43%)...
Window 301/14091 (2.14%)...
Window 401/14091 (2.85%)...
Window 501/14091 (3.56%)...
Window 601/14091 (4.27%)...
Window 701/14091 (4.97%)...
Window 801/14091 (5.68%)...
Window 901/14091 (6.39%)...
Window 1001/14091 (7.10%)...
Window 1101/14091 (7.81%)...
Window 1201/14091 (8.52%)...
Window 1301/14091 (9.23%)...
Window 1401/14091 (9.94%)...
Window 1501/14091 (10.65%)...
Window 1601/14091 (11.36%)...
Window 1701/14091 (12.07%)...
Window 1801/14091 (12.78%)...
Window 1901/14091 (13.49%)...
Window 2001/14091 (14.20%)...
Window 2101/14091 (14.91%)...
Window 2201/14091 (15.62%)...
Window 2301/14091 (16.33%)...
Window 2401/14091 (17.04%)...
Window 2501/14091 (17.75%)...
Window 2601/14091 (18.46%)...
Window 2701/14091 (19.17%)...
Window 2801/14091 (19.88%)...
Window 2901/14091 (20.59%)...
Window 3001/14091 (21.30%)...
Window 3101/14091 (22.01%)...
Window 3201/14091 (22.72%)...
Window 3301/14091 (23.43%)...
Window 

# FB Prophet + статистики од бајесовата анализа

In [88]:
def concat_stats_to_window(window, stat):
    window["stat_mean"] = stat[0]
    window["stat_tstd"] = stat[1]
    window["stat_positive_percentage"] = stat[2]
    window["stat_skew"] = stat[3]
    window["stat_kurtosis"] = stat[4]
    for p in range(0, 101, 5):
        window[f"stat_percentile_{p}"] = stat[4 + p // 5]

    return window


for data, data_name in zip([train_data, test_data], ["train", "test"]):
    for model_name in [
            f"fixed_normal_metropolis_{data_name}.pkl",
            f"fixed_laplace_metropolis_{data_name}.pkl",
            f"fixed_gennorm_metropolis_{data_name}.pkl"
    ]:
        print(f"Model name: {model_name}")
        X = [
            pd.DataFrame({
                "ds": window.index,
                "y": window["close_log_return"]
            }) for window in data[1:]
        ]
        stats = read_stats_and_model(model_name)[0][:-1]
        prophet_forecasts = []

        for idx, (window, stat) in enumerate(zip(X, stats)):
            if idx % 100 == 0:
                print(
                    f"Window {idx + 1}/{len(X)} ({(idx + 1)/len(X)*100:.2f}%)..."
                )

            m = Prophet(yearly_seasonality=True,
                        daily_seasonality=True,
                        uncertainty_samples=0)
            for regressor in [
                    "stat_mean", "stat_tstd", "stat_positive_percentage",
                    "stat_skew", "stat_kurtosis"
            ] + [f"stat_percentile_{p}" for p in range(0, 101, 5)]:
                m.add_regressor(regressor)

            with suppress_stdout_stderr():
                window = concat_stats_to_window(window, stats[0])
                m.fit(window)
                future = concat_stats_to_window(
                    m.make_future_dataframe(periods=1, include_history=False),
                    stats[1])
                prophet_forecasts.append(m.predict(future))

        write_stats_and_model(f"prophet_{model_name}.pkl", prophet_forecasts,
                              [])

Model name: fixed_normal_metropolis_train.pkl
Window 1/14090 (0.01%)...
Window 101/14090 (0.72%)...
Window 201/14090 (1.43%)...
Window 301/14090 (2.14%)...
Window 401/14090 (2.85%)...
Window 501/14090 (3.56%)...
Window 601/14090 (4.27%)...
Window 701/14090 (4.98%)...
Window 801/14090 (5.68%)...
Window 901/14090 (6.39%)...
Window 1001/14090 (7.10%)...
Window 1101/14090 (7.81%)...
Window 1201/14090 (8.52%)...
Window 1301/14090 (9.23%)...
Window 1401/14090 (9.94%)...
Window 1501/14090 (10.65%)...
Window 1601/14090 (11.36%)...
Window 1701/14090 (12.07%)...
Window 1801/14090 (12.78%)...
Window 1901/14090 (13.49%)...
Window 2001/14090 (14.20%)...
Window 2101/14090 (14.91%)...
Window 2201/14090 (15.62%)...
Window 2301/14090 (16.33%)...
Window 2401/14090 (17.04%)...
Window 2501/14090 (17.75%)...
Window 2601/14090 (18.46%)...
Window 2701/14090 (19.17%)...
Window 2801/14090 (19.88%)...
Window 2901/14090 (20.59%)...
Window 3001/14090 (21.30%)...
Window 3101/14090 (22.01%)...
Window 3201/14090 (22

Window 12901/14090 (91.56%)...
Window 13001/14090 (92.27%)...
Window 13101/14090 (92.98%)...
Window 13201/14090 (93.69%)...
Window 13301/14090 (94.40%)...
Window 13401/14090 (95.11%)...
Window 13501/14090 (95.82%)...
Window 13601/14090 (96.53%)...
Window 13701/14090 (97.24%)...
Window 13801/14090 (97.95%)...
Window 13901/14090 (98.66%)...
Window 14001/14090 (99.37%)...
Model name: fixed_gennorm_metropolis_train.pkl
Window 1/14090 (0.01%)...
Window 101/14090 (0.72%)...
Window 201/14090 (1.43%)...
Window 301/14090 (2.14%)...
Window 401/14090 (2.85%)...
Window 501/14090 (3.56%)...
Window 601/14090 (4.27%)...
Window 701/14090 (4.98%)...
Window 801/14090 (5.68%)...
Window 901/14090 (6.39%)...
Window 1001/14090 (7.10%)...
Window 1101/14090 (7.81%)...
Window 1201/14090 (8.52%)...
Window 1301/14090 (9.23%)...
Window 1401/14090 (9.94%)...
Window 1501/14090 (10.65%)...
Window 1601/14090 (11.36%)...
Window 1701/14090 (12.07%)...
Window 1801/14090 (12.78%)...
Window 1901/14090 (13.49%)...
Window 2

# Графички приказ на предвидувањата

In [56]:
from matplotlib import pyplot as plt
import numpy as np


def plot_results(x, y_true, y_hat, y_upper, y_lower):
    fig, ax = plt.subplots()
    ax.plot(x, y_true, linewidth=2, color="green")
    ax.plot(x, y_hat, linewidth=2)
    ax.fill_between(x, y_lower, y_upper, alpha=0.5, linewidth=0)

In [82]:
def get_plot_data(model_name, data, start, end):
    stats, _ = read_stats_and_model(model_name)
    stats_plot = [{
        "y_hat": s[0],
        "y_lower": s[6],
        "y_upper": s[-2]
    } for s in stats[start:end]]
    y_hat = [s["y_hat"] for s in stats_plot]
    y_lower = [s["y_lower"] for s in stats_plot]
    y_upper = [s["y_upper"] for s in stats_plot]
    y_true = [
        window["close_log_return"].iloc[-1].item()
        for window in data[start + 1:end + 1]
    ]
    x = [
        f"{window.index[-1].day} {window.index[-1].month_name()} {window.index[-1].year}"
        for window in data[start + 1:end + 1]
    ]
    return {
        "x": x,
        "y_true": y_true,
        "y_hat": y_hat,
        "y_upper": y_upper,
        "y_lower": y_lower
    }

In [83]:
plot_results(**get_plot_data("fixed_normal_metropolis.pkl", train_data, 1000, 1100))

<IPython.core.display.Javascript object>

INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should

In [84]:
plot_results(**get_plot_data("fixed_laplace_metropolis.pkl", train_data, 1000, 1100))

<IPython.core.display.Javascript object>

INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should

In [85]:
plot_results(**get_plot_data("fixed_gennorm_metropolis.pkl", train_data, 1000, 1100))

<IPython.core.display.Javascript object>

INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should

In [70]:
def get_prophet_plot_data(model_name, data, start, end):
    stats, _ = read_stats_and_model(model_name)
    stats_plot = [{
        "y_hat": s["yhat"].item(),
        "y_lower": s["yhat"].item(),
        "y_upper": s["yhat"].item()
    } for s in stats[start:end]]
    y_hat = [s["y_hat"] for s in stats_plot]
    y_lower = [s["y_lower"] for s in stats_plot]
    y_upper = [s["y_upper"] for s in stats_plot]
    y_true = [
        window["close_log_return"].iloc[-1].item()
        for window in data[start + 1:end + 1]
    ]
    x = [
        f"{window.index[-1].day} {window.index[-1].month_name()} {window.index[-1].year}"
        for window in data[start + 1:end + 1]
    ]
    return {
        "x": x,
        "y_true": y_true,
        "y_hat": y_hat,
        "y_upper": y_upper,
        "y_lower": y_lower
    }

In [71]:
plot_results(**get_prophet_plot_data("prophet_train.pkl", train_data, 1000, 1100))

<IPython.core.display.Javascript object>

INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
INFO:matplotlib.category:Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should