In [1]:
from distribution_fit_class import DistributionFit
from portfolio_optimization_class import PortfolioOptimization, optimize_windows
import pandas as pd
import numpy as np

In [3]:
distribution_fit = DistributionFit()

file_path = r'data\random_data\1_stocks_per_sector_1_iter.csv'
distribution_fit.load_df_from_csv(file_path)

returns_df = pd.read_csv(file_path, index_col=0, parse_dates=True)
returns_df.columns

Index(['MMM', 'A', 'SNPS', 'OMC', 'ROST', 'EIX', 'GS', 'NUE', 'PSA', 'GIS',
       'WMB'],
      dtype='object')

In [4]:
window_size = 32
taus = [0.0005, 0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.15, 0.2]
maximum_weight = 0.3

In [None]:
tau_aggregation = []
for tau_index, tau in enumerate(taus):
    tau_dict = {}
    portfolios = optimize_windows(number_of_quarters=window_size, tau=tau, maximum_weight=maximum_weight, data_path=file_path)

    expected_validation_values_norm = []
    expected_scoring_values_norm = []
    expected_validation_values_t = []
    expected_scoring_values_t = []
    for window in range(0, 64):
        if window % 5 == 0:
            print(f'Tau: {tau}, window - {window}')
        returns_window = returns_df.iloc[window : window + window_size]
        distribution_fit = DistributionFit()
        distribution_fit.set_df(returns_window)
        distribution_fit.fit_distribution_all_stocks()

        simulated_data_path = r'data\copulas_outputs\clayton_random_fixed_coefficient.csv'

        df = pd.read_csv(simulated_data_path)
        df.columns = returns_df.columns          # RENAME OF COLUMNS CAUSE THEY GOT MIXED UP IN ANALYSIS IN R !!!!!!!!!!!!!!!!
        # df.to_csv(simulated_data_path, index=False)

        distribution_fit.set_simulated_data_from_df(df.iloc[:number_of_samples])

        distribution_fit.calculate_returns_from_simulated_quantiles()
        returns_norm = distribution_fit.get_simulated_return_norm()
        returns_t = distribution_fit.get_simulated_return_t_student()
        weights = portfolios.iloc[window].filter(regex="^w\d+$").values

        expectile = -portfolios.iloc[window]["EVAR"]

        portfolio_returns_norm = np.sum(returns_norm.values * weights, axis=1)
        validation_values_norm = (1 - tau) * np.minimum(portfolio_returns_norm - expectile, 0) - tau * np.maximum(portfolio_returns_norm - expectile, 0)
        scoring_values_norm = (1 - tau) * np.minimum((portfolio_returns_norm - expectile)**2, 0) + tau * np.maximum((portfolio_returns_norm - expectile)**2, 0)
        expected_validation_values_norm.append(np.mean(validation_values_norm))
        expected_scoring_values_norm.append(np.mean(scoring_values_norm))

        portfolio_returns_t = np.sum(returns_t.values * weights, axis=1)
        validation_values_t = (1 - tau) * np.minimum(portfolio_returns_t - expectile, 0) - tau * np.maximum(portfolio_returns_t - expectile, 0)
        scoring_values_t = (1 - tau) * np.minimum((portfolio_returns_t - expectile)**2, 0) + tau * np.maximum((portfolio_returns_t - expectile)**2, 0)
        expected_validation_values_t.append(np.mean(validation_values_t))
        expected_scoring_values_t.append(np.mean(scoring_values_t))

    tau_dict[f'{copula}+gauss_dist+val'] = expected_validation_values_norm
    tau_dict[f'{copula}+gauss_dist+score'] = expected_scoring_values_norm
    tau_dict[f'{copula}+t_dist+val'] = expected_validation_values_t
    tau_dict[f'{copula}+t_dist+scor'] = expected_scoring_values_t

tau_aggregation.append(tau_dict)
df = pd.DataFrame.from_dict(tau_dict)
tau_str = str(tau).replace(".", "_")
# output_path = r"../scores/taus/" + f'tau_{tau_str}/scores_{tau_str}{batch_num}.csv'
output_path = r"../scores/taus/" + f'tau_{tau_str}/scores_11_stocks_{tau_str}_iter_{iter}.csv'
df.to_csv(output_path)