In [2]:
# USE 'BASE' ENVIRONMENT

import numpy as np
import pandas as pd
from statsmodels.stats.proportion import proportions_ztest
from matplotlib import pyplot as plt

import warnings
warnings.filterwarnings('ignore')

In [3]:
def get_taus_complex_models_scores(taus):
    scores = {}
    for iter in range(1, 11):
        tau_dfs = {}
        for tau in taus:
            tau_str = str(tau).replace(".", "_")
            input_path = r"../scores/taus/" + f'tau_{tau_str}/scores_11_stocks_{tau_str}_iter_{iter}.csv'
            df = pd.read_csv(input_path, index_col = 0)
            df = df.iloc[:63]
            tau_dfs[tau] = df
        scores[iter] = tau_dfs
    return scores

def get_multivariate_scores_dfs(taus):
    scores = {}
    for iter in range(1, 11):
        tau_dfs = {}
        for tau in taus:
            tau_str = str(tau).replace(".", "_")
            input_path = r"multivariate_scores/taus/" + f'tau_{tau_str}/multivariate_dists_scores_11_stocks_{tau_str}_iter_{iter}.csv'
            df = pd.read_csv(input_path, index_col = 0)
            df = df.iloc[:63]
            tau_dfs[tau] = df
        scores[iter] = tau_dfs
    return scores

def get_test_results(data, lower_bound, upper_bound, p_value_threshold):
    val_columns = [col for col in data.columns if "val" in col]

    results = []
    for col in val_columns:
        in_range_count = ((data[col] >= lower_bound) & (data[col] <= upper_bound)).sum()
        total_count = len(data[col])
        observed_proportion = in_range_count / total_count
        expected_proportion = 1.0
        stat, p_value = proportions_ztest(in_range_count, total_count, value=expected_proportion)
        results.append({
            "Column": col,
            "Observed Proportion": observed_proportion,
            "P-Value": p_value,
            "Reject H0": p_value < p_value_threshold
        })
    results_df = pd.DataFrame(results)
    return results_df

In [4]:
taus = [0.0005, 0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.15, 0.2]

In [5]:

scores = get_taus_complex_models_scores(taus)
multivariate_scores = get_multivariate_scores_dfs(taus)

a, b = -0.005, 0
p_value_threshold = 0.05

for iter in range(1, 11):
    for tau in taus:
        tau_score = scores[iter][tau]
        results = get_test_results(tau_score, a, b, p_value_threshold)

        multivariate_tau_score = multivariate_scores[iter][tau]
        multivariate_results = get_test_results(multivariate_tau_score, a, b, p_value_threshold)

        print(f"Portfolio Iter: {iter}, Tau: {tau}\n")
        print(results)
        print("\n")
        print(multivariate_results)
        print("\n")






Portfolio Iter: 1, Tau: 0.0005

                          Column  Observed Proportion   P-Value  Reject H0
0  clayton_random+gauss_dist+val             0.873016  0.002469       True
1      clayton_random+t_dist+val             0.936508  0.038764       True
2        gaussian+gauss_dist+val             1.000000       NaN      False
3            gaussian+t_dist+val             1.000000       NaN      False
4       t_student+gauss_dist+val             1.000000       NaN      False
5           t_student+t_dist+val             1.000000       NaN      False


           Column  Observed Proportion       P-Value  Reject H0
0  gauss_dist+val             0.984127  3.134390e-01      False
1      t_dist+val             0.301587  1.368842e-33       True


Portfolio Iter: 1, Tau: 0.001

                          Column  Observed Proportion   P-Value  Reject H0
0  clayton_random+gauss_dist+val             0.857143  0.001194       True
1      clayton_random+t_dist+val             0.936508  0.038764   

In [26]:
a, b = -0.005, 0
p_value_threshold = 0.05


complex_model_results = []
multivariate_model_results = []

for iter in range(1, 11):
    for tau in taus:
        tau_score = scores[iter][tau]
        results = get_test_results(tau_score, a, b, p_value_threshold)
        results["Portfolio Iter"] = iter
        complex_model_results.append(results)

        multivariate_tau_score = multivariate_scores[iter][tau]
        multivariate_results = get_test_results(multivariate_tau_score, a, b, p_value_threshold)
        multivariate_results["Portfolio Iter"] = iter
        multivariate_model_results.append(multivariate_results)


complex_model_df = pd.concat(complex_model_results, ignore_index=True)
multivariate_model_df = pd.concat(multivariate_model_results, ignore_index=True)

def summarize_results_with_totals(df, model_type):
    counts = (
        df[df["Reject H0"] == False]
        .groupby(["Portfolio Iter", "Column"])
        .size()
        .reset_index(name="Count")
    )

    all_models = df["Column"].unique()
    all_portfolios = df["Portfolio Iter"].unique()
    full_index = pd.MultiIndex.from_product(
        [all_portfolios, all_models],
        names=["Portfolio Iter", "Column"]
    )
    counts = counts.set_index(["Portfolio Iter", "Column"]).reindex(full_index, fill_value=0).reset_index()
    total_counts = counts.groupby("Column")["Count"].sum().reset_index(name="Total Count")
    min_max_counts = counts.groupby("Column")["Count"].agg(["min", "max"]).reset_index()
    min_max_counts.columns = ["Column", "Min Count", "Max Count"]

    summary = pd.merge(total_counts, min_max_counts, on="Column")
    summary["Model Type"] = model_type
    return summary





In [27]:
complex_model_summary = summarize_results_with_totals(complex_model_df, "Complex Models")
multivariate_model_summary = summarize_results_with_totals(multivariate_model_df, "Multivariate Models")


final_summary = pd.concat([complex_model_summary, multivariate_model_summary], ignore_index=True)
final_summary

Unnamed: 0,Column,Total Count,Min Count,Max Count,Model Type
0,clayton_random+gauss_dist+val,11,0,4,Complex Models
1,clayton_random+t_dist+val,18,0,4,Complex Models
2,gaussian+gauss_dist+val,45,0,6,Complex Models
3,gaussian+t_dist+val,53,3,6,Complex Models
4,t_student+gauss_dist+val,46,3,6,Complex Models
5,t_student+t_dist+val,54,4,6,Complex Models
6,gauss_dist+val,17,0,5,Multivariate Models
7,t_dist+val,0,0,0,Multivariate Models
