In [1]:
from distribution_fit_class import DistributionFit
from portfolio_optimization_class import PortfolioOptimization, optimize_windows
import pandas as pd
import numpy as np

In [2]:
returns_df = pd.read_csv(r'data\random_data\1_stocks_per_sector.csv', index_col=0, parse_dates=True)
file_path = r'data\random_data\1_stocks_per_sector.csv'

copulas = ["clayton_random", "gaussian", "t_student"]
distributions = ["gauss_dist", "t_dist"]
window_size = 32
taus = [0.0005, 0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.15, 0.2]
maximum_weight = 0.3

In [4]:


tau_aggregation = []
for tau in taus:
    tau_dict = {}
    portfolios = optimize_windows(number_of_quarters=window_size, tau=tau, maximum_weight=maximum_weight, data_path=file_path)

    expected_validation_values_norm = []
    expected_scoring_values_norm = []
    expected_validation_values_t = []
    expected_scoring_values_t = []
    for window in range(0, 63):  # 60 windows as specified
        if window % 5 == 0:
            print(f'Processing window: {window}, tau: {tau}')

        # Select returns window for fitting distributions
        returns_window = returns_df.iloc[window: 32 + window_size]
        distribution_fit = DistributionFit()
        distribution_fit.set_df(returns_window)
        distribution_fit.fit_multivariate_distributions(4)  # Fit the multivariate distributions

        # Generate samples from fitted distributions
        distribution_fit.generate_multivariate_normal_samples(10000)
        distribution_fit.generate_multivariate_t_samples(10000)

        samples_normal = distribution_fit.get_generated_multivariated_normal_samples()
        samples_t = distribution_fit.get_generated_multivariated_t_samples()
        # print(samples_normal)

        distributions_samples = [
            (samples_normal, "gauss_dist"),
            (samples_t, "t_dist")
        ]

        for simulated_returns, dist_name in distributions_samples:
            weights = portfolios.iloc[window].filter(regex="^w\d+$").values
            expectile = -portfolios.iloc[window]["EVAR"]
            weighted_returns = simulated_returns.values * weights

            portfolio_returns = np.sum(weighted_returns, axis=1)
            # print(len(portfolio_returns))
            validation_values = (1 - tau) * np.minimum(portfolio_returns - expectile, 0) - tau * np.maximum(portfolio_returns - expectile, 0)
            scoring_values = (1 - tau) * np.minimum((portfolio_returns - expectile)**2, 0) + tau * np.maximum((portfolio_returns - expectile)**2, 0)

            expected_validation_value = np.mean(validation_values)
            expected_scoring_value = np.mean(scoring_values)

            if dist_name == "gauss_dist":
                expected_validation_values_norm.append(expected_validation_value)
                expected_scoring_values_norm.append(expected_scoring_value)
            elif dist_name == "t_dist":
                expected_validation_values_t.append(expected_validation_value)
                expected_scoring_values_t.append(expected_scoring_value)

    tau_dict[f'gauss_dist+val'] = expected_validation_values_norm
    tau_dict[f'gauss_dist+score'] = expected_scoring_values_norm
    tau_dict[f't_dist+val'] = expected_validation_values_t
    tau_dict[f't_dist+scor'] = expected_scoring_values_t
    tau_str = str(tau).replace(".", "_")
    output_path = r"multivariate_scores/" + f'multivariate_dists_scores_11_stocks__{tau_str}.csv'
    tau_df = pd.DataFrame.from_dict(tau_dict)
    tau_df.to_csv(output_path)
    tau_aggregation.append(tau_df)




Processing window: 0, tau: 0.0005
Processing window: 5, tau: 0.0005
Processing window: 10, tau: 0.0005
Processing window: 15, tau: 0.0005
Processing window: 20, tau: 0.0005
Processing window: 25, tau: 0.0005
Processing window: 30, tau: 0.0005
Processing window: 35, tau: 0.0005
Processing window: 40, tau: 0.0005
Processing window: 45, tau: 0.0005
Processing window: 50, tau: 0.0005
Processing window: 55, tau: 0.0005
Processing window: 60, tau: 0.0005
Processing window: 0, tau: 0.001
Processing window: 5, tau: 0.001
Processing window: 10, tau: 0.001
Processing window: 15, tau: 0.001
Processing window: 20, tau: 0.001
Processing window: 25, tau: 0.001
Processing window: 30, tau: 0.001
Processing window: 35, tau: 0.001
Processing window: 40, tau: 0.001
Processing window: 45, tau: 0.001
Processing window: 50, tau: 0.001
Processing window: 55, tau: 0.001
Processing window: 60, tau: 0.001
Processing window: 0, tau: 0.002
Processing window: 5, tau: 0.002
Processing window: 10, tau: 0.002
Process

In [5]:
tau_aggregation[0]

Unnamed: 0,gauss_dist+val,gauss_dist+score,t_dist+val,t_dist+scor
0,-0.008157,0.000005,-0.014506,0.000007
1,-0.003755,0.000006,-0.009763,0.000009
2,-0.004199,0.000007,-0.010409,0.000009
3,-0.004210,0.000006,-0.009954,0.000009
4,-0.003616,0.000007,-0.009969,0.000010
...,...,...,...,...
58,-0.000093,0.000007,-0.001356,0.000008
59,-0.000176,0.000007,-0.001809,0.000008
60,-0.000080,0.000008,-0.001201,0.000009
61,-0.000061,0.000006,-0.001087,0.000007


In [6]:
stat_dfs = []
for tau_idx, tau in enumerate(taus):
    df = tau_aggregation[tau_idx]
    stats = pd.DataFrame()
    means = []
    stds = []
    percentiles_5 = []
    percentiles_95 = []
    for col in df.columns:
        means.append(df[col].mean())
        stds.append(df[col].std())
        percentiles_5.append(np.percentile(df[col], 5))
        percentiles_95.append(np.percentile(df[col], 95))
    stats[f"Models"] = df.columns.to_list()
    stats["MEAN"] = means
    stats[f"STD"] = stds
    stats[f"5th PERCENTIL"] = percentiles_5
    stats[f"95th PERCENTIL"] = percentiles_95
    stats.sort_values(by="MEAN", inplace=True)
    stat_dfs.append(stats)

In [7]:
stat_dfs[5]

Unnamed: 0,Models,MEAN,STD,5th PERCENTIL,95th PERCENTIL
2,t_dist+val,-0.009212,0.003438,-0.01439,-0.003067
0,gauss_dist+val,-0.004702,0.001842,-0.007688,-0.001882
1,gauss_dist+score,0.000206,3.7e-05,0.000154,0.000275
3,t_dist+scor,0.000282,5.9e-05,0.000199,0.000378
