In [20]:
import numpy as np
import pandas as pd
from statsmodels.stats.proportion import proportions_ztest

import warnings
warnings.filterwarnings('ignore')

In [2]:
taus = [0.0005, 0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.15, 0.2]

In [None]:
def get_stats(df):
    val_columns = [col for col in df.columns if "val" in col]

    stats = pd.DataFrame()
    means = []
    stds = []
    percentiles_5 = []
    percentiles_95 = []
    for val_column in val_columns:
        means.append(df[val_column].mean())
        stds.append(df[val_column].std())
        percentiles_5.append(np.percentile(df[val_column], 5))
        percentiles_95.append(np.percentile(df[val_column], 95))
    stats[f"Models"] = val_columns
    stats["MEAN"] = means
    stats[f"STD"] = stds
    stats[f"5th PERCENTIL"] = percentiles_5
    stats[f"95th PERCENTIL"] = percentiles_95
    # stats.sort_values(by="MEAN", inplace=True)


    return stats

In [4]:
stats_1 = get_stats(taus, "")
stats_1[0]

Unnamed: 0,Models,MEAN,STD,5th PERCENTIL,95th PERCENTIL
2,t_dist+val,-0.005383,0.002877,-0.009935,-0.000714
0,gauss_dist+val,-0.001657,0.001339,-0.003741,-6e-05
1,gauss_dist+score,7e-06,1e-06,4e-06,9e-06
3,t_dist+scor,9e-06,2e-06,6e-06,1.1e-05


In [16]:
def get_multivariate_scores_dfs(taus):
    scores = {}
    for iter in range(1, 11):
        tau_dfs = {}
        for tau in taus:
            tau_str = str(tau).replace(".", "_")
            input_path = r"multivariate_scores/taus/" + f'tau_{tau_str}/multivariate_dists_scores_11_stocks_{tau_str}_iter_{iter}.csv'
            df = pd.read_csv(input_path, index_col = 0)
            df = df.iloc[:63]
            tau_dfs[tau] = df
        scores[iter] = tau_dfs
    return scores

In [17]:
scores = get_multivariate_scores_dfs(taus)
# scores[0]

In [11]:
def get_test_results(data, lower_bound, upper_bound, p_value_threshold):
    val_columns = [col for col in data.columns if "val" in col]

    results = []
    for col in val_columns:
        in_range_count = ((data[col] >= lower_bound) & (data[col] <= upper_bound)).sum()
        total_count = len(data[col])
        observed_proportion = in_range_count / total_count
        expected_proportion = 1.0
        stat, p_value = proportions_ztest(in_range_count, total_count, value=expected_proportion)
        results.append({
            "Column": col,
            "Observed Proportion": observed_proportion,
            "P-Value": p_value,
            "Reject H0": p_value < p_value_threshold
        })
    results_df = pd.DataFrame(results)
    return results_df

In [21]:
scores = get_multivariate_scores_dfs(taus)

a, b = -0.005, 0
p_value_threshold = 0.05

for iter in range(1, 11):
    for tau in taus:
        tau_score = scores[iter][tau]
        results = get_test_results(tau_score, a, b, p_value_threshold)
        print(f"Portfolio Iter: {iter}, Tau: {tau}\n")
        print(results)
        print("\n")

Portfolio Iter: 1, Tau: 0.0005

           Column  Observed Proportion       P-Value  Reject H0
0  gauss_dist+val             0.984127  3.134390e-01      False
1      t_dist+val             0.301587  1.368842e-33       True


Portfolio Iter: 1, Tau: 0.001

           Column  Observed Proportion       P-Value  Reject H0
0  gauss_dist+val             0.984127  3.134390e-01      False
1      t_dist+val             0.333333  3.074833e-29       True


Portfolio Iter: 1, Tau: 0.002

           Column  Observed Proportion       P-Value  Reject H0
0  gauss_dist+val             0.984127  3.134390e-01      False
1      t_dist+val             0.238095  9.347229e-46       True


Portfolio Iter: 1, Tau: 0.005

           Column  Observed Proportion       P-Value  Reject H0
0  gauss_dist+val             0.952381  7.592696e-02      False
1      t_dist+val             0.222222  7.035928e-50       True


Portfolio Iter: 1, Tau: 0.01

           Column  Observed Proportion       P-Value  Reject H0
0  ga

In [None]:
for iter in range(1, 11):
    for tau in taus:
        tau_score = scores[iter][tau]
        stats = get_stats(tau_score)
        print(f"STATS: Portfolio Iter: {iter}, Tau: {tau}\n")
        print(stats)
        print("\n")