In [1]:
import random

import numpy as np
import pandas as pd
import real_data_loader
import stylized_score
import torch
import visualize_stylized_facts as vsf

## Set seeds for reconstruction of the results

In [2]:
# Set seeds.
SEED = 12345
np.random.seed(SEED)
random.seed(SEED)
torch.manual_seed(SEED)

<torch._C.Generator at 0x7ca51b7b7290>

#### Load Data

Filter out selected times and stocks

In [3]:
real_loader = real_data_loader.RealDataLoader(cache="../data/cache")
real_data = real_loader.get_timeseries(
    col_name="Adj Close", data_path="../data/raw_yahoo_data", update_all=False
)
real_data = real_data.drop(["SPY"], axis="columns")
print(real_data.shape)

Cached data found at ../data/cache/time_series_Adj_Close.csv.
(15648, 503)


Inspect data for missing information

In [4]:
N_STOCKS = 9216
nan_mask = ~np.isnan(real_data)  # returns pd.dataframe
num_non_nans: pd.DataFrame = np.sum(nan_mask, axis=0)
non_nan_orderd_stocks: pd.DataFrame = num_non_nans.sort_values(ascending=False)
# plt.plot(non_nan_orderd_stocks.values)
stocks = non_nan_orderd_stocks.iloc[non_nan_orderd_stocks.values >= N_STOCKS].index

In [5]:
# stocks = ["MSFT", "AAPL", "IBM", "ADBE", "ORCL", "INTC", "AMD", "HPQ"]
# stocks = ['MSFT']
first_date = real_data.index[-N_STOCKS]
real_data = real_data.loc[first_date:, stocks]
real_data = real_data.loc[:, np.all(~np.isnan(real_data), axis=0)]
assert np.all(~np.isnan(real_data))
print(f"Shape: {real_data.shape}")
print(f"Frist Date {real_data.index[0]:%Y-%m-%d}")
print(f"Last Date {real_data.index[-1]:%Y-%m-%d}")

Shape: (9216, 233)
Frist Date 1987-08-05
Last Date 2024-03-01


In [6]:
np_data = np.array(real_data)
np_ret = np.log(np_data[1:] / np_data[:-1])
np_ret[np.abs(np_ret) >= 2] = 0  # clean data

In [7]:
fig = vsf.visualize_stylized_facts_paper(np_ret, 1.1 * 5.106)
fig.savefig("/home/nico/thesis/thesis/figure/st_fact.pgf")

In [8]:
stats = stylized_score._compute_stats(np_ret, "real")

In [9]:
sel = ["beta", "beta_max", "beta_std", "beta_min", "beta_mean", "beta_median"]
cf_stats = stats["cfv_real_stat"]
{s: cf_stats[s] for s in sel}

{'beta': -1.3158444829200642,
 'beta_max': 0.6878266695738646,
 'beta_std': 0.9285091872521446,
 'beta_min': -9.389689121969688,
 'beta_mean': -0.7791100405421878,
 'beta_median': -0.6161710420388364}

In [10]:
sel = ["mse", "mse_max", "mse_std", "mse_min", "mse_mean", "mse_median"]
lu_stats = stats["lu_real_stat"]
{s: lu_stats[s] for s in sel}

{'mse': 0.0001392362375568713,
 'mse_max': 0.0003180039771793697,
 'mse_std': 2.9113411556717668e-05,
 'mse_min': 0.00010418846956381635,
 'mse_mean': 0.0001392362375568713,
 'mse_median': 0.00013107706282784146}

In [11]:
sel = [
    "neg_beta",
    "neg_beta_max",
    "neg_beta_std",
    "neg_beta_min",
    "neg_beta_mean",
    "neg_beta_median",
]
ht_stats = stats["ht_real_stat"]
{s: ht_stats[s] for s in sel}

{'neg_beta': -3.297825272311764,
 'neg_beta_max': -2.261203717318225,
 'neg_beta_std': 0.31571987018756953,
 'neg_beta_min': -4.3268890737840495,
 'neg_beta_mean': -3.1485089480311643,
 'neg_beta_median': -3.1422435517832215}

In [12]:
sel = [
    "pos_beta",
    "pos_beta_max",
    "pos_beta_std",
    "pos_beta_min",
    "pos_beta_mean",
    "pos_beta_median",
]
ht_stats = stats["ht_real_stat"]
{s: ht_stats[s] for s in sel}

{'pos_beta': -3.592265544795493,
 'pos_beta_max': -1.788608450055584,
 'pos_beta_std': 0.4173476388528757,
 'pos_beta_min': -4.398758135867928,
 'pos_beta_mean': -3.1565109534345916,
 'pos_beta_median': -3.1678312778327817}

In [13]:
sel = ["beta", "beta_max", "beta_std", "beta_min", "beta_mean", "beta_median"]
le_stats = stats["le_real_stat"]
{s: le_stats[s] for s in sel}

{'beta': -0.6751522558367844,
 'beta_max': 0.1194477141495297,
 'beta_std': 0.18478433585766968,
 'beta_min': -0.9643298289623893,
 'beta_mean': -0.44702167788996927,
 'beta_median': -0.4610022297348628}

In [14]:
sel = ["beta", "beta_max", "beta_std", "beta_min", "beta_mean", "beta_median"]
vc_stats = stats["vc_real_stat"]
{s: vc_stats[s] for s in sel}

{'beta': -0.30117407015928177,
 'beta_max': -0.10628664404280964,
 'beta_std': 0.11031666888215177,
 'beta_min': -0.771139600968903,
 'beta_mean': -0.3276454014180343,
 'beta_median': -0.3079607028692645}

In [15]:
sel = [
    "arg_diff",
    "arg_diff_mean",
    "arg_diff_std",
    "arg_diff_max",
    "arg_diff_min",
    "arg_diff_median",
]
gl_stats = stats["gl_real_stat"]
{s: gl_stats[s] for s in sel}

{'arg_diff': 5.0,
 'arg_diff_mean': 9.523605150214593,
 'arg_diff_std': 8.631741048392096,
 'arg_diff_max': 46.5,
 'arg_diff_min': -2.5,
 'arg_diff_median': 7.5}