In [2]:
import numpy as np
import pandas as pd
from statsmodels.stats.proportion import proportions_ztest

In [3]:
taus = [0.0005, 0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.15, 0.2]

In [5]:
def get_stats(taus, batch_str):
    stat_dfs = []
    for tau in taus:
        tau_str = str(tau).replace(".", "_")
        input_path = r"multivariate_scores/" + f'multivariate_dists_scores_11_stocks__{tau_str}.csv'
        df = pd.read_csv(input_path, index_col = 0)
        stats = pd.DataFrame()
        means = []
        stds = []
        percentiles_5 = []
        percentiles_95 = []
        for col in df.columns:
            means.append(df[col].mean())
            stds.append(df[col].std())
            percentiles_5.append(np.percentile(df[col], 5))
            percentiles_95.append(np.percentile(df[col], 95))
        stats[f"Models"] = df.columns.to_list()
        stats["MEAN"] = means
        stats[f"STD"] = stds
        stats[f"5th PERCENTIL"] = percentiles_5
        stats[f"95th PERCENTIL"] = percentiles_95
        stats.sort_values(by="MEAN", inplace=True)
        stat_dfs.append(stats)
    return stat_dfs

In [8]:
stats_1 = get_stats(taus, "")
stats_1[0]

Unnamed: 0,Models,MEAN,STD,5th PERCENTIL,95th PERCENTIL
2,t_dist+val,-0.005383,0.002877,-0.009935,-0.000714
0,gauss_dist+val,-0.001657,0.001339,-0.003741,-6e-05
1,gauss_dist+score,7e-06,1e-06,4e-06,9e-06
3,t_dist+scor,9e-06,2e-06,6e-06,1.1e-05


In [9]:
def get_multivariate_scores_dfs(taus, batch_str):
    dfs = []
    for tau in taus:
        tau_str = str(tau).replace(".", "_")
        input_path = r"multivariate_scores/" + f'multivariate_dists_scores_11_stocks__{tau_str}.csv'
        df = pd.read_csv(input_path, index_col = 0)
        dfs.append(df)
    return dfs

In [11]:
dfs = get_multivariate_scores_dfs(taus, "")
# dfs[0]

In [13]:


tau_idx = 1
data = dfs[tau_idx]
a, b = -0.005, 0
p_value_threshold = 0.05
val_columns = [col for col in data.columns if "val" in col]

results = []

for col in val_columns:
    in_range_count = ((data[col] >= a) & (data[col] <= b)).sum()
    total_count = len(data[col])
    observed_proportion = in_range_count / total_count

    expected_proportion = 1.0

    stat, p_value = proportions_ztest(in_range_count, total_count, value=expected_proportion)

    results.append({
        "Column": col,
        "Observed Proportion": observed_proportion,
        "P-Value": p_value,
        "Reject H0": p_value < p_value_threshold
    })

results_df = pd.DataFrame(results)
results_df

Unnamed: 0,Column,Observed Proportion,P-Value,Reject H0
0,gauss_dist+val,0.984127,0.313439,False
1,t_dist+val,0.333333,3.074833e-29,True
