In [1]:
import numpy as np
import pandas as pd
import yfinance
import statsmodels.formula.api as smf
import statsmodels.api as smi
from table_making import summary_col
import matplotlib.pyplot as plt
import re
import os

In [2]:
def latex_format(sm, row=0, delta=False, mean=True):
    ltx = sm.as_latex()
    ltx = re.sub(r" +", " ", ltx)
    row_correspondence = ["1/12", "1/4", "1/2", "1", "2"]
    col_correspondence = ["0.7", "0.85", "1", "1.15", "1.3"]
    row_val = row_correspondence[row]
    models = ""
    if delta:
        for col in col_correspondence:
            models += f"& $|\\Delta x_t[{row_val}, {col}]|$ "
    else:
        for col in col_correspondence:
            models += f"& $x_t[{row_val}, {col}]$ "
    ltx = ltx.replace(" & col0 & col1 & col2 & col3 & col4 \\\\\n\\hline", " " + models + """ \\\\
 & (1) & (2)  & (3) & (4) & (5) \\\\
\cmidrule(lr){2-2} \cmidrule(lr){3-3} \cmidrule(lr){4-4} \cmidrule(lr){5-5} \cmidrule(lr){6-6}""")
    ltx = ltx.replace(" & log\_ret & abs\_ret \\\\\n\\hline", """ & $r_t$ & $|\\Delta r_t|$ \\\\
 & (1) & (2) \\\\
\cmidrule(lr){2-2} \cmidrule(lr){3-3} """)
    ltx = ltx.replace("ret\_std", "$\sigma(\\hat{r_t})$")
    ltx = ltx.replace("ret\_mean", "$\mu(\\hat{r_t})$")
    if mean:
        ltx = ltx.replace("iv", "$\mu(\\hat{x})$")
    else:
        ltx = ltx.replace("iv", "$\sigma(\\hat{x})$")
    ltx = ltx.replace("R-squared Adj.", "\\hline\n$R^2$")
    ltx = ltx.replace("\\$N\\$", "$N$")
    ltx = ltx.replace("""llllll""", """lccccc""")
    ltx = ltx.replace("lll", "lcc")
    ltx = ltx.replace("cc}\n\\hline", "cc}\n\\hline\\hline")
    ltx = ltx.replace("\\begin{table}\n\\caption{}\n\\label{}\n\\begin{center}\n", "")
    ltx = ltx.replace("\\end{center}\n\\end{table}", "")
    return ltx

def latex_format2(df: pd.DataFrame):
    ltx = df.style.to_latex(column_format="lccccc")
    ltx = ltx.replace("nan", " ")
    ltx = ltx.replace("K/S=1.3 \\\\\n", "K/S=1.3 \\\\\n\\cmidrule{2-6}\n")
    ltx = ltx.replace("cc}\n", "cc}\n\\toprule\n")
    ltx = ltx.replace("\\\\\n\\end{tabular}", "\\\\\n\\bottomrule\n\\end{tabular}")
    return ltx

def latex_format3(df: pd.DataFrame):
    ltx = df.style.format("{0:d}").to_latex(column_format="lcccccc")
    ltx = ltx.replace("nan", " ")
    ltx = ltx.replace("2021 \\\\\n", "2021 \\\\\n\\cmidrule{2-7}\n")
    ltx = ltx.replace("cc}\n", "cc}\n\\toprule\n")
    ltx = ltx.replace("\\\\\n\\end{tabular}", "\\\\\n\\bottomrule\n\\end{tabular}")
    return ltx

def latex_format_ret(sm):
    ltx = sm.as_latex()
    ltx = re.sub(r" +", " ", ltx)
    ltx = ltx.replace("& 2000-2002 & 2003-2007 & 2008-2009 & 2010-2014 & 2015-2019 & 2020-2021 \\\\\n\\hline", 
""" & 2000-2002 & 2003-2007 & 2008-2009 & 2010-2014 & 2015-2019 & 2020-2021 \\\\
 & (1) & (2) & (3) & (4) & (5) & (6) \\\\
\cmidrule(lr){2-2} \cmidrule(lr){3-3} \cmidrule(lr){4-4} \cmidrule(lr){5-5} \cmidrule(lr){6-6} \cmidrule(lr){7-7}""")
    ltx = ltx.replace("ret\_std", "$\\sigma(\\hat{r_t})$")
    ltx = ltx.replace("ret\_mean", "$\\mu(\\hat{r_t})$")
    ltx = ltx.replace("R-squared Adj.", "\\hline\n$R^2$")
    ltx = ltx.replace("\\$N\\$", "$N$")
    ltx = ltx.replace("""lllllll""", """lcccccc""")
    ltx = ltx.replace("cc}\n\\hline", "cc}\n\\hline\\hline")
    ltx = ltx.replace("\\begin{table}\n\\caption{}\n\\label{}\n\\begin{center}\n", "")
    ltx = ltx.replace("\\end{center}\n\\end{table}", "")
    return ltx

In [3]:
cols = [
    [
        "ttm_one_month_moneyness_pt_seven",
        "ttm_one_month_moneyness_pt_eightfive",
        "ttm_one_month_moneyness_pt_one",
        "ttm_one_month_moneyness_pt_oneonefive",
        "ttm_one_month_moneyness_pt_onethree",
    ],
    [
        "ttm_three_month_moneyness_pt_seven",
        "ttm_three_month_moneyness_pt_eightfive",
        "ttm_three_month_moneyness_pt_one",
        "ttm_three_month_moneyness_pt_oneonefive",
        "ttm_three_month_moneyness_pt_onethree",
    ],
    [
        "ttm_six_month_moneyness_pt_seven",
        "ttm_six_month_moneyness_pt_eightfive",
        "ttm_six_month_moneyness_pt_one",
        "ttm_six_month_moneyness_pt_oneonefive",
        "ttm_six_month_moneyness_pt_onethree",
    ],
    [
        "ttm_one_year_moneyness_pt_seven",
        "ttm_one_year_moneyness_pt_eightfive",
        "ttm_one_year_moneyness_pt_one",
        "ttm_one_year_moneyness_pt_oneonefive",
        "ttm_one_year_moneyness_pt_onethree",
    ],   
    [
        "ttm_two_year_moneyness_pt_seven",
        "ttm_two_year_moneyness_pt_eightfive",
        "ttm_two_year_moneyness_pt_one",
        "ttm_two_year_moneyness_pt_oneonefive",
        "ttm_two_year_moneyness_pt_onethree",
    ],
]

In [4]:
def regression_surface_grids(simulated_surfaces, simulated_returns, spx_df: pd.DataFrame, average_day=1, 
                             base_folder="tables/iv_all", model_type="loss",
                             use_sample_mean=False):
    os.makedirs(f"{base_folder}/mean", exist_ok=True)
    os.makedirs(f"{base_folder}/std", exist_ok=True)
    df_cols = ["K/S=0.7", "K/S=0.85", "K/S=1", "K/S=1.15", "K/S=1.3"]
    df_rows = ["1 month", "3 month", "6 month", "1 year", "2 year"]

    for test in ["mean", "std"]:
        val_df_coeff = pd.DataFrame(index=df_rows, columns=df_cols)
        val_df_r2 = pd.DataFrame(index=df_rows, columns=df_cols)
        delta_df_coeff = pd.DataFrame(index=df_rows, columns=df_cols)
        delta_df_r2 = pd.DataFrame(index=df_rows, columns=df_cols)

        for row in range(5):
            models = []
            delta_models = []
            for col in range(5):
                curr_grid = cols[row][col]
                regression_df = spx_df.copy()
                regression_df[f"delta_{curr_grid}"] = np.abs(regression_df[curr_grid] - regression_df[curr_grid].rolling(average_day).mean().shift(1))
                regression_df = regression_df.loc[3:5303-1, ["date", curr_grid, f"delta_{curr_grid}"]]
                regression_df1 = regression_df.copy()
                regression_df1["iv_mean"] = np.mean(simulated_surfaces[:, :, row, col], axis=1)
                regression_df1["iv"] = np.mean(simulated_surfaces[:, :, row, col], axis=1) if test=="mean" else np.std(simulated_surfaces[:, :, row, col], axis=1)
                
                regression_df1["ret_std"] = np.std(simulated_returns, axis=1)
                if use_sample_mean:
                    regression_df1[f"delta_{curr_grid}"] = np.abs(regression_df1[curr_grid] - regression_df1["iv_mean"].rolling(average_day).mean().shift(1))

                model = smf.ols(f"{curr_grid}~1+ret_std", data=regression_df1).fit(cov_type="HC3")
                models.append(model)

                model_iv_param = model.params["ret_std"]
                model_iv_pval = model.pvalues["ret_std"]
                model_iv_param_str = "%.4f" % model_iv_param
                if model_iv_pval < .1:
                    model_iv_param_str += "*"
                if model_iv_pval < .05:
                    model_iv_param_str += "*"
                if model_iv_pval < .01:
                    model_iv_param_str += "*"
                model_rsquared_adj = "%.3f" % model.rsquared_adj
                val_df_coeff.loc[df_rows[row], df_cols[col]] = model_iv_param_str
                val_df_r2.loc[df_rows[row], df_cols[col]] = model_rsquared_adj

                delta_model = smf.ols(f"delta_{curr_grid}~1+ret_std", data=regression_df1).fit(cov_type="HC3")
                delta_models.append(delta_model)

                delta_model_iv_param = delta_model.params["ret_std"]
                delta_model_iv_pval = delta_model.pvalues["ret_std"]
                delta_model_iv_param_str = "%.4f" % delta_model_iv_param
                if delta_model_iv_pval < .1:
                    delta_model_iv_param_str += "*"
                if delta_model_iv_pval < .05:
                    delta_model_iv_param_str += "*"
                if model_iv_pval < .01:
                    delta_model_iv_param_str += "*"
                delta_model_rsquared_adj = "%.3f" % delta_model.rsquared_adj
                delta_df_coeff.loc[df_rows[row], df_cols[col]] = delta_model_iv_param_str
                delta_df_r2.loc[df_rows[row], df_cols[col]] = delta_model_rsquared_adj
        
        with open(f"{base_folder}/{test}/reg_overall_{model_type}_models_coeff.tex", "w") as f:
            f.write(latex_format2(val_df_coeff))
        with open(f"{base_folder}/{test}/reg_overall_{model_type}_models_r2.tex", "w") as f:
            f.write(latex_format2(val_df_r2))

        with open(f"{base_folder}/{test}/reg_overall_{model_type}_delta_coeff.tex", "w") as f:
            f.write(latex_format2(delta_df_coeff))
        with open(f"{base_folder}/{test}/reg_overall_{model_type}_delta_r2.tex", "w") as f:
            f.write(latex_format2(delta_df_r2))

In [5]:
spx_df = pd.read_parquet("data/spx_with_vix.parquet")
simulation = np.load("test_spx/2023_08_27/model_1_gen3.npz")
simulated_surfaces = simulation["surfaces"]
simulated_returns = simulation["ex_feats"][:, :, 0]
print(simulated_returns.shape)


simulation_no_loss = np.load("test_spx/2023_08_27/model_0_gen3.npy")
simulation_no_ex = np.load("test_spx/2023_08_27_simple/model_0_gen3.npy")

(5300, 1000)


In [6]:
regression_surface_grids(simulated_surfaces, simulated_returns, spx_df, 1, "tables/iv_ret_oct20")