In [None]:
# Import required libraries
import pandas as pd
import os

In [None]:
# Parameters and File Paths

# Parameters for data
WINDOW = 21     # rolling window size to use as predictors
DATE_COL = 'Date'
ID_COL = 'PERMNO'
TARGET_COL = 'excess_return'
WEIGHTS_COL = 'avg_market_cap'

# File path to load additional information for stocks
current_directory = os.getcwd()
top50_stocks_info_path = os.path.join(current_directory, 'Data', 'top50_stocks_info.csv')

# File path for the merged prediction results of various models
merged_results_path = os.path.join(current_directory, 'Results', f'merged_results{WINDOW:.0f}.csv')

# File path to save equal weighted portfolio results
bu_equal_portfolio_results_path = os.path.join(current_directory, 'Results', f'bu_equal_portfolio_results{WINDOW:.0f}.csv')
ml_equal_portfolio_results_path = os.path.join(current_directory, 'Results', f'ml_equal_portfolio_results{WINDOW:.0f}.csv')

# File path to save value weighted portfolio results
bu_value_portfolio_results_path = os.path.join(current_directory, 'Results', f'bu_value_portfolio_results{WINDOW:.0f}.csv')
ml_value_portfolio_results_path = os.path.join(current_directory, 'Results', f'ml_value_portfolio_results{WINDOW:.0f}.csv')

In [None]:
# Dictionary for model names
models_dict = {"ols": "OLS",
               "lasso": "Lasso",
               "ridge": "Ridge",
               "enet": "Elastic Net",
               "rf": "RF",
               "xgb": "XGB",
               "nn1": "NN1",
               "nn2": "NN2",
               "nn3": "NN3",
               "nn4": "NN4",
               "nn5": "NN5",
               "tfm1": "TimesFM 1.0",
               "tfm2": "TimesFM 2.0",
               "chr_bolt_tiny": "Chronos-Bolt-Tiny",
               "chr_bolt_mini": "Chronos-Bolt-Mini",
               "chr_bolt_small": "Chronos-Bolt-Small",
               "chr_bolt_base": "Chronos-Bolt-Base",
               "chr_t5_tiny": "Chronos-T5-Tiny",
               "chr_t5_mini": "Chronos-T5-Mini",
               "chr_t5_small": "Chronos-T5-Small",
               "moirai_s": "Moirai-Small",
               "moirai_moe_s": "Moirai-MoE-Small",
               "moirai_moe_b": "Moirai-MoE-Base"
               }

### Step 1: Load Merged Forecast Results

In [None]:
# Load the prediction results of various models for out of sample period into a pandas DataFrames
results = pd.read_csv(merged_results_path, parse_dates=[DATE_COL])

# Merge prediction results with other necessary information required for portfolio building
top50_stocks_info = pd.read_csv(top50_stocks_info_path)
results = results.merge(top50_stocks_info[[ID_COL, "Ticker", "avg_market_cap"]], how="left", on=[ID_COL], )

results.info()

### Step 2: Create Bottom-up Portfolios

In [None]:
# Function to Calculate Predictive-R2 Used in the Finance Literature
def r2(y_true, y_pred):
    return 1-(((y_true-y_pred)**2).sum()/(y_true**2).sum())

In [None]:
# Function to Calculate Expected and Realized Returns for Bottom-up Portfolio-Level Predictions
def bottom_up_portfolio(df, date_col, ret_col, new_col_name, weights_col = None):
    df = df.copy()
    
    def portfolio_return(group):
        if weights_col == None:
            ret = group[ret_col].mean()
        else:
            group[weights_col] /= group[weights_col].sum()
            ret = (group[ret_col] * group[weights_col]).sum()

        return ret
    
    ret = (df.groupby([date_col]).apply(portfolio_return)).reset_index().rename(columns={0:new_col_name})
    return ret

In [None]:
# Evaluate Equal Weighted Bottom-up Portfolio Using Various Models
bu_equal_portfolio_rets = bottom_up_portfolio(results, DATE_COL, TARGET_COL, "r_bu_portfolio")

models = [col for col in results.columns.to_list() if col.startswith("y_")]

for model in models:
    p = bottom_up_portfolio(results, DATE_COL, model, model.replace("y_", "e_"))
    bu_equal_portfolio_rets = bu_equal_portfolio_rets.merge(p, 'left', DATE_COL)

In [None]:
# Evaluate Value Weighted Bottom-up Portfolio Using Various Models
bu_value_portfolio_rets = bottom_up_portfolio(results, DATE_COL, TARGET_COL, "r_bu_portfolio", WEIGHTS_COL)

for model in models:
    p = bottom_up_portfolio(results, DATE_COL, model, model.replace("y_", "e_"), WEIGHTS_COL)
    bu_value_portfolio_rets = bu_value_portfolio_rets.merge(p, 'left', DATE_COL)

##### Save Results

In [None]:
# Save Equal and Value Weighted Bottom-up Portfolio Results
bu_equal_portfolio_rets.to_csv(bu_equal_portfolio_results_path, index=False)
bu_value_portfolio_rets.to_csv(bu_value_portfolio_results_path, index=False)

### Step 2: Create Prediction-Sorted Long-Short Decile Portfolios

In [None]:
# Function to Build ML-Based Long-Short Portfolio and Calculate Expected and Realized Returns
def portfolio_builder(df, date_col, y_true_col, y_pred_col, weights_col = None):
    df = df.copy()
    df['decile'] = df.groupby([date_col])[y_pred_col].transform(
        lambda x: pd.qcut(x, 10, labels=False, duplicates='drop') + 1)

    def actual_ret(group):
        buy_df = group[group['decile'] == 10].copy()
        sell_df = group[group['decile'] == 1].copy()
        if weights_col == None:
            buy_ret = (buy_df[y_true_col] / buy_df.shape[0]).sum()
            sell_ret = (sell_df[y_true_col] / sell_df.shape[0]).sum()
        else:
            buy_df[weights_col] /= buy_df[weights_col].sum()
            sell_df[weights_col] /= sell_df[weights_col].sum()
            buy_ret = (buy_df[y_true_col] * buy_df[weights_col]).sum()
            sell_ret = (sell_df[y_true_col] * sell_df[weights_col]).sum()

        return buy_ret - sell_ret   # for zero-net-investment portfolio
    
    r_ret = (df.groupby([date_col]).apply(actual_ret)).reset_index().rename(columns={0:"r_"+y_pred_col.replace("y_", "")})
    return r_ret

In [None]:
# Build Equal Weighted ML-Based Long-Short Portfolios Using Various Models
ml_equal_portfolio_rets = pd.DataFrame(results[DATE_COL].unique(), columns=[DATE_COL])

for model in models:
    p = portfolio_builder(results, DATE_COL, TARGET_COL, model)
    ml_equal_portfolio_rets = ml_equal_portfolio_rets.merge(p, 'left', DATE_COL)

In [None]:
# Build Value Weighted ML-Based Portfolios Using Various Models
ml_value_portfolio_rets = pd.DataFrame(results[DATE_COL].unique(), columns=[DATE_COL])

for model in models:
    p = portfolio_builder(results, DATE_COL, TARGET_COL, model, WEIGHTS_COL)
    ml_value_portfolio_rets = ml_value_portfolio_rets.merge(p, 'left', DATE_COL)

##### Save Results

In [None]:
# Save Equal and Value Weighted ML-Based Portfolio Results
ml_equal_portfolio_rets.to_csv(ml_equal_portfolio_results_path, index=False)
ml_value_portfolio_rets.to_csv(ml_value_portfolio_results_path, index=False)