# **01. Import Libraries and Load Data**



In [None]:
pip install wrds --no-deps

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import wrds

## Connect to WRDS

In [None]:
# Establish a connection to the WRDS
db = wrds.Connection()

# **02. Data Collection**

## Select 50 Top Stocks

In [None]:
# Get the earliest trading date for each permno
query_earliest_date = """
SELECT
    permno,
    MIN(date) as first_trade_date
FROM
    crsp.dsf
GROUP BY
    permno
HAVING
    MIN(date) <= '2000-01-01'
"""

earliest_dates = db.raw_sql(query_earliest_date)

# Ensure stocks are still active until December 31, 2024 (latest available date)
query_active_stocks = """
SELECT
    permno
FROM
    crsp.dsf
WHERE
    date BETWEEN '2000-01-01' AND '2024-12-31'
GROUP BY
    permno
HAVING
    COUNT(DISTINCT date) = (SELECT COUNT(DISTINCT date)
                            FROM crsp.dsf
                            WHERE date BETWEEN '2000-01-01' AND '2024-12-31')
"""

active_stocks = db.raw_sql(query_active_stocks)

# Combine the two sets of stocks to get those listed before 2000 and still active in 2024
filtered_permnos = earliest_dates.merge(active_stocks, on='permno', how='inner')

# Get the list of permnos as a comma-separated string
permnos_str = ','.join([str(permno) for permno in filtered_permnos['permno'].tolist()])

# Get market capitalisation, company name, and sector information for IT sector
query_main = f"""
SELECT
    a.permco,
    a.permno,
    a.date,
    a.shrout,
    a.prc * a.shrout as market_cap,
    b.shrcd,
    b.exchcd,
    b.siccd,
    b.ncusip,
    b.comnam
FROM
    crsp.dsf AS a
JOIN
    crsp.dsenames AS b
ON
    a.permno = b.permno
WHERE
    (
        (b.siccd BETWEEN 3570 AND 3579) OR  -- IT-related services (programming, software, etc.)
        (b.siccd BETWEEN 3600 AND 3674) OR
        (b.siccd BETWEEN 7370 AND 7379) OR
        (b.siccd BETWEEN 4810 AND 4813)
    )
    AND a.permno IN ({permnos_str})
    AND a.date = '2024-12-31'
    AND b.exchcd IN (1, 3)
"""

# Execute query
crsp_data = db.raw_sql(query_main)

In [None]:
# Check the results from crsp_data
crsp_data.head()

In [None]:
print("Original dataset size: ", len(crsp_data))
print("Original number of stocks: ", len(set(crsp_data['permno'])))

In [None]:
# Filter data for the latest date
latest_date = crsp_data['date'].max()
latest_data = crsp_data[crsp_data['date'] == latest_date]

# Group by permco and permno and select the entry with the highest market capitalisation within each group
top_50_IT_stocks = latest_data.groupby(['permco', 'permno']).apply(lambda x: x.nlargest(1, 'market_cap'))

# Sort by market capitalization and get the top 50 stocks
top_50_IT_stocks = top_50_IT_stocks.sort_values(by='market_cap', ascending=False).head(50)
top_50_IT_stocks.reset_index(drop=True, inplace=True)

In [None]:
print(top_50_IT_stocks)

In [None]:
# Check for missing values in important columns
missing_data = crsp_data[crsp_data[['market_cap', 'comnam', 'ncusip']].isna().any(axis=1)]

# Display the rows with missing data
print(missing_data)

In [None]:
# Before removing duplicates
print(f"Data size before removing duplicates: {crsp_data.shape}")

# Remove duplicates
crsp_data.drop_duplicates(subset=['permno', 'date', 'date'], keep='first', inplace=True)

# After removing duplicates
print(f"Data size after removing duplicates: {crsp_data.shape}")

## Collect Price and Return Data

In [None]:
# Get permno of the top 50 stocks
top_50_permnos = top_50_IT_stocks['permno'].tolist()

# Convert permno list to a string for the SQL IN clause
permnos_str = ', '.join(map(str, top_50_permnos))

### Download train data

In [None]:
# Define the date range
start_date = '2000-01-01'
end_date = '2015-12-31'

# Query to get data for the specified date range and variables for the top 50 stocks
query = f"""
SELECT
    a.permco,
    a.permno,
    b.comnam,
    b.ticker,
    a.date,
    a.prc,
    a.cfacpr,
    a.ret
FROM
    crsp.dsf AS a
JOIN
    (SELECT permno, comnam, ticker, namedt, nameendt
     FROM crsp.dsenames
     WHERE permno IN ({permnos_str}) -- filter for the top 50 stocks
       AND namedt <= '{end_date}'
       AND (nameendt IS NULL OR nameendt >= '{start_date}')) AS b
ON
    a.permno = b.permno
WHERE
    a.permno IN ({permnos_str})     -- filter for the top 50 stocks
    AND a.date BETWEEN '{start_date}' AND '{end_date}'
    AND a.date >= b.namedt
    AND (a.date <= b.nameendt OR b.nameendt IS NULL)
"""

# Execute query
crsp_train = db.raw_sql(query)
crsp_train.sort_values(by=['permco', 'date'], inplace=True)

In [None]:
# Check for missing values
print(crsp_train.isna().sum())

In [None]:
# Drop rows where 'prc' or 'ret' are missing (NaN)
crsp_train = crsp_train.dropna(subset=['prc', 'ret'])

In [None]:
crsp_train

### Merge the risk-free rate with stock returns (calculate excess returns)

In [None]:
# Query to fetch the daily risk-free rate for the period 2000-2015
query_risk_free = """
SELECT
    date,
    rf
FROM
    ff.factors_daily
WHERE
    date BETWEEN '2000-01-01' AND '2015-12-31'
"""
rf_data = db.raw_sql(query_risk_free)

# Ensure both 'date' columns are in datetime format before merging
crsp_train['date'] = pd.to_datetime(crsp_train['date'], errors='coerce')
rf_data['date'] = pd.to_datetime(rf_data['date'], errors='coerce')

# Merge the risk-free rate with stock data
crsp_train = pd.merge(crsp_train, rf_data, how='left', on='date')

# Adjust the returns by factoring in the price adjustment factor (cfacpr)
crsp_train['adjusted_ret'] = crsp_train['ret'] / crsp_train['cfacpr']

# Calculate excess returns using the adjusted returns
crsp_train['excess_ret'] = crsp_train['adjusted_ret'] - crsp_train['rf']

# Clip abnormal returns to +100% and -100%
crsp_train['excess_ret'] = crsp_train['excess_ret'].clip(lower=-1.0, upper=1.0)

# Convert the excess return to a binary target for directional forecasting
crsp_train['directional_target'] = np.where(crsp_train['excess_ret'] > 0, 1, 0)

# Check the results for train data
crsp_train[['permco', 'permno', 'date', 'adjusted_ret', 'excess_ret']].head()

### Download test data (2016-2024)


In [None]:
# Define the date range
start_date = '2016-01-01'
end_date = '2024-12-31'

# Query to get data for the specified date range and variables for the top 50 stocks
query = f"""
SELECT
    a.permco,
    a.permno,
    b.comnam,
    b.ticker,
    a.date,
    a.prc,
    a.cfacpr,
    a.ret
FROM
    crsp.dsf AS a
JOIN
    (SELECT permno, comnam, ticker, namedt, nameendt
     FROM crsp.dsenames
     WHERE permno IN ({permnos_str}) -- filter for the top 50 stocks
       AND namedt <= '{end_date}'
       AND (nameendt IS NULL OR nameendt >= '{start_date}')) AS b
ON
    a.permno = b.permno
WHERE
    a.permno IN ({permnos_str})       -- filter for the top 50 stocks
    AND a.date BETWEEN '{start_date}' AND '{end_date}'
    AND a.date >= b.namedt
    AND (a.date <= b.nameendt OR b.nameendt IS NULL)
"""
# Execute query
crsp_test = db.raw_sql(query)
crsp_test.sort_values(by=['permco', 'date'], inplace=True)

In [None]:
crsp_test

In [None]:
# Check for missing values
print(crsp_test.isna().sum())

### Calculate Excess Returns for Test Data


In [None]:
# Use the Fama French data to get the daily risk-free rate for the test period (2016-2024)
query_risk_free_test = """
SELECT
    date,
    rf
FROM
    ff.factors_daily
WHERE
    date BETWEEN '2016-01-01' AND '2024-12-31'
"""
rf_data_test = db.raw_sql(query_risk_free_test)

# Merge risk-free rate with test data
crsp_test['date'] = pd.to_datetime(crsp_test['date'], errors='coerce')
rf_data_test['date'] = pd.to_datetime(rf_data_test['date'], errors='coerce')

# Merge the test data with the risk-free rate data
crsp_test = pd.merge(crsp_test, rf_data_test, how='left', on='date')

# Adjust the returns by factoring in the price adjustment factor (cfacpr)
crsp_test['adjusted_ret'] = crsp_test['ret'] / crsp_test['cfacpr']

# Calculate excess returns using the adjusted returns
crsp_test['excess_ret'] = crsp_test['adjusted_ret'] - crsp_test['rf']

# Clip abnormal returns to +100% and -100%
crsp_test['excess_ret'] = crsp_test['excess_ret'].clip(lower=-1.0, upper=1.0)

# Convert the excess return to a binary target for directional forecasting
crsp_test['directional_target'] = np.where(crsp_test['excess_ret'] > 0, 1, 0)

# Check the results for test data
crsp_test[['permco', 'permno', 'date', 'adjusted_ret', 'excess_ret']].head()

This is because the risk-free rate (rf) is very close to zero around those years.

## Descriptive Statistics for Excess Returns


In [None]:
# Calculate descriptive statistics for excess returns in the training dataset
in_sample_stats = crsp_train["excess_ret"].describe()

# Print the statistics in the desired format
print("In-Sample Excess Return Stats:")
print(in_sample_stats)

# Display the dtype
print(f"Name: excess_ret, dtype: {crsp_train['excess_ret'].dtype}")

In [None]:
# Calculate descriptive statistics for excess returns in the testing dataset
out_sample_stats = crsp_test["excess_ret"].describe()

# Print the statistics in the desired format
print("Out-Sample Excess Return Stats:")
print(out_sample_stats)

# Display the dtype
print(f"Name: excess_ret, dtype: {crsp_test['excess_ret'].dtype}")

In [None]:
train_stats = crsp_train.groupby('permno')['excess_ret'].describe()
test_stats = crsp_test.groupby('permno')['excess_ret'].describe()

# Print descriptive statistics
print("Descriptive Statistics for Excess Returns (Training Period):")
print(train_stats)

print("\nDescriptive Statistics for Excess Returns (Test Period):")
print(test_stats)

## Create Rolling Windows

In [None]:
def create_lag_features(df, lags):
    # Sort the data by stock ID ('permno') and date to ensure correct time order
    df_sorted = df.sort_values(by=["permno", "date"])

    # Loop through each lag value provided (e.g., 5, 21, 252, 512)
    for lag in lags:
        # Create lag features by shifting excess returns and applying a rolling window
        df[f"lag_{lag}"] = (
            df_sorted.groupby("permno")["excess_ret"]   # Group by stock
            .shift(1)                                   # Shift by 1 day to avoid lookahead bias
            .rolling(window=lag)                        # Rolling window over past 'lag' days
            .mean()                                     # Calculate the mean of the rolling window
            .reset_index(drop=True)                     # Reset index to align with the original DataFrame
        )

    # Return the DataFrame with added lag features
    return df

# Example usage for both crsp_train and crsp_test
lag_days_list = [5, 21, 252, 512]  # Example list of lag days

# Apply the function to both crsp_train and crsp_test
crsp_train_lagged = create_lag_features(crsp_train, lag_days_list)
crsp_test_lagged = create_lag_features(crsp_test, lag_days_list)

# Verify that the lag features are correctly added
print(crsp_train_lagged.head())
print(crsp_test_lagged.head())

# **03. Merged**

In [None]:
# Merge crsp_data and crsp_test_lagged on the stock ID (permno)
merged_df = crsp_test_lagged.merge(crsp_data[['permno', 'market_cap']], on='permno', how='left')

# Rename 'market_cap' in merged_df to avoid conflict during merge
merged_df = merged_df.rename(columns={'market_cap': 'market_cap_merged'})

# Merge 'market_cap' (now renamed to 'market_cap_merged') from merged_df into crsp_test_lagged based on 'permco' and 'date'
crsp_test_lagged = crsp_test_lagged.merge(merged_df[['permco', 'date', 'market_cap_merged']], how='left', on=['permco', 'date'])

# **04. Moirai (uni2ts)**

In [None]:
pip uninstall -y torch torchvision torchaudio lightning pytorch-lightning

In [None]:
pip install torch==2.4.1 torchvision==0.15.2 pytorch-lightning==2.1.0 lightning==2.1.0

In [None]:
!pip install --upgrade uni2ts

In [None]:
!pip install torchvision==0.15.2

In [None]:
pip install pytorch-lightning --upgrade

In [None]:
!pip install torchvision

In [None]:
import torch
import torchvision
import lightning
import pytorch_lightning

print(f"PyTorch version: {torch.__version__}")
print(f"Torchvision version: {torchvision.__version__}")
print(f"Lightning version: {lightning.__version__}")
print(f"PyTorch Lightning version: {pytorch_lightning.__version__}")

In [None]:
!pip show torch
!pip show torchvision

In [None]:
try:
    from torch._utils_internal import maybe_upload_prof_stats_to_manifold
    print("Import successful!")
except ImportError:
    print("Import failed: maybe_upload_prof_stats_to_manifold is not available.")


In [None]:
import sys
import os

# Change to the uni2ts directory
%cd uni2ts

# Add the uni2ts directory to the Python path
sys.path.insert(0, os.path.abspath('.'))

from uni2ts.model.moirai import MoiraiForecast, MoiraiModule

In [None]:
from uni2ts.model.moirai import MoiraiForecast, MoiraiModule
from uni2ts.eval_util.plot import plot_single

In [None]:
from uni2ts.model.moirai_moe import MoiraiMoEForecast, MoiraiMoEModule

In [None]:
!git clone https://github.com/SalesforceAIResearch/uni2ts.git
%cd uni2ts

In [None]:
!pip install virtualenv

In [None]:
!virtualenv venv

In [None]:
!touch .env

In [None]:
from gluonts.dataset.common import ListDataset

## Moirai-Small

In [None]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from tqdm import tqdm
from gluonts.dataset.common import ListDataset
from uni2ts.model.moirai import MoiraiForecast, MoiraiModule

# Define the helper function for calculating performance metrics
def calculate_metrics(actual, predictions):
    mse = mean_squared_error(actual, predictions)
    rmse = mse ** 0.5
    mae = mean_absolute_error(actual, predictions)

    directional_accuracy = np.mean(np.sign(actual) == np.sign(predictions))

    actual_up = actual > 0
    up_dir_acc = np.mean(predictions[actual_up] > 0) if np.any(actual_up) else np.nan

    actual_down = actual < 0
    down_dir_acc = np.mean(predictions[actual_down] < 0) if np.any(actual_down) else np.nan

    r2 = r2_score(actual, predictions)

    scale_factor = np.mean(np.abs(np.diff(actual)))
    mase = mae / scale_factor if scale_factor != 0 else np.nan

    return mse, mae, rmse, directional_accuracy, up_dir_acc, down_dir_acc, r2, mase

# Function to run the forecast with dynamic lags
def run_forecast_uni2ts_small(crsp_train_lagged, crsp_test_lagged, lags, model_name="Salesforce/moirai-1.0-R-small", out_sample_start="2016-01-01", out_sample_end="2024-12-31", device="cpu", batch_size=32):
    # List of allowed lags
    lag_days_list = [5, 21, 252, 512]

    # Validate that only valid lags are passed
    if any(lag not in lag_days_list for lag in lags):
        raise ValueError(f"Invalid lag value. Allowed lags are: {lag_days_list}")

    all_results = []
    all_predictions = []

    # Use the first lag value in lags as the window size
    WINDOW = lags[0]  # Directly using the lag passed in the function call
    PRED_LEN = 1
    DATE_COL = "date"
    ID_COL = "permno"
    TARGET_COL = "excess_ret"
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # set device to cuda or cpu

    df_test = crsp_test_lagged[crsp_test_lagged[DATE_COL] >= pd.to_datetime(out_sample_start)]

    # Normalization (per stock)
    df_test["target"] = df_test.groupby(ID_COL)[TARGET_COL].transform(lambda x: (x - x.mean()) / x.std())

    # Create contexts
    contexts, targets, records = [], [], []

    for permno, group in tqdm(df_test.groupby(ID_COL), desc="Building windows"):
        series = group["target"].values
        dates = group[DATE_COL].values

        if len(series) <= WINDOW:
            continue
        for i in range(len(series) - WINDOW):
            context = torch.tensor(series[i:i+WINDOW], dtype=torch.float32)
            target = series[i + WINDOW]
            contexts.append(context)
            targets.append(target)
            records.append({
                "permno": permno,
                "date": dates[i + WINDOW]
            })

    if len(contexts) == 0:
        print("No valid context windows found. Check 'lag' or data coverage.")
        return pd.DataFrame()

    # === LOAD MODEL ===
    try:
        model = MoiraiForecast(
            module=MoiraiModule.from_pretrained(model_name).to(device),
            prediction_length=PRED_LEN,
            context_length=WINDOW,
            num_samples=100,
            target_dim=1,
            feat_dynamic_real_dim=0,
            past_feat_dynamic_real_dim=0,
        ).to(device)

        print(f"Model initialized successfully.")
    except Exception as e:
        print(f"Error loading model: {e}")
        return pd.DataFrame()

    predictor = model.create_predictor(batch_size=batch_size)

    # Predict
    preds = []
    # Batch the contexts for predictions
    for i in tqdm(range(0, len(contexts), batch_size), desc="Predicting in batches"):
        batch_contexts = contexts[i:i+batch_size]  # Get the next batch of contexts

        gluonts_input = ListDataset(
            [{"start": pd.Timestamp("2000-01-01"), "target": context.squeeze().cpu().numpy().tolist()} for context in batch_contexts],
            freq="B"
        )
        forecasts = list(predictor.predict(gluonts_input))

        for forecast in forecasts:
            mean_pred = forecast.mean[0]
            preds.append(mean_pred)

    # Evaluation
    results = pd.DataFrame(records)
    results["y_true"] = targets
    results["y_pred"] = preds

    y_true = results["y_true"]
    y_pred = results["y_pred"]

    # Calculate metrics
    mse, mae, rmse, directional_accuracy, up_dir_acc, down_dir_acc, r2, mase = calculate_metrics(
        y_true.to_numpy(), y_pred.to_numpy()
    )

    # Summary
    result = pd.DataFrame([{
        "Model": model_name,
        "Lag": lags[0],
        "Directional Accuracy": directional_accuracy,
        "Up Directional Accuracy": up_dir_acc,
        "Down Directional Accuracy": down_dir_acc,
        "R-squared": r2,
        "MSE": mse,
        "RMSE": rmse,
        "MAE": mae,
        "MASE": mase
    }])

    # Save the summarized results for this lag to CSV
    result.to_csv(f"uni2tssmall_results_lag{lags[0]}.csv", index=False)
    results.to_csv(f"uni2tssmall_results_lag{lags[0]}_full.csv", index=False)

    all_results.append(result)
    all_predictions.append(results)

    # Combine all performance metrics
    final_df = pd.concat(all_results, ignore_index=True)

    # Merge all predictions into the test set
    if all_predictions:
        combined_preds = pd.concat(all_predictions, ignore_index=True)
        crsp_test_lagged = crsp_test_lagged.merge(
            combined_preds[['permno', 'date', 'y_pred']],
            on=['permno', 'date'],
            how='left'
        )

        # Rename the merged 'y_pred' column to the desired lag-specific name
        crsp_test_lagged.rename(columns={'y_pred': f'predicted_excess_returns_lag{lags[0]}'}, inplace=True)

    else:
        crsp_test_lagged[f'predicted_excess_returns_lag{lags[0]}'] = np.nan

    return final_df, crsp_test_lagged

In [None]:
# Running the forecast for lag=5
uni2ts_small_results_lag5 = run_forecast_uni2ts_small(
    crsp_train_lagged, crsp_test_lagged, lags=[5], model_name="Salesforce/moirai-1.0-R-small", device="cpu"
)

# Display the results using pandas' display() in Jupyter Notebook
from IPython.display import display
display(uni2ts_small_results_lag5)

In [None]:
# Running the forecast for lag=5
uni2ts_small_results_lag5 = run_forecast_uni2ts_small(
    crsp_train_lagged, crsp_test_lagged, lags=[5], model_name="Salesforce/moirai-1.0-R-small", device="cpu"
)

# Display the results using pandas' display() in Jupyter Notebook
from IPython.display import display
display(uni2ts_small_results_lag5)

In [None]:
# Running the forecast for lag=21
uni2ts_small_results_lag21 = run_forecast_uni2ts_small(
    crsp_train_lagged, crsp_test_lagged, lags=[21], model_name="Salesforce/moirai-1.0-R-small", device="cpu"
)

# Display the results using pandas' display() in Jupyter Notebook
from IPython.display import display
display(uni2ts_small_results_lag21)

In [None]:
# Running the forecast for lag=252
uni2ts_small_results_lag252 = run_forecast_uni2ts_small(
    crsp_train_lagged, crsp_test_lagged, lags=[252], model_name="Salesforce/moirai-1.0-R-small", device="cpu"
)

# Display the results using pandas' display() in Jupyter Notebook
from IPython.display import display
display(uni2ts_small_results_lag252)

In [None]:
# Running the forecast for lag=512
uni2ts_small_results_lag512 = run_forecast_uni2ts_small(
    crsp_train_lagged, crsp_test_lagged, lags=[512], model_name="Salesforce/moirai-1.0-R-small", device="cpu"
)

# Display the results using pandas' display() in Jupyter Notebook
from IPython.display import display
display(uni2ts_small_results_lag512)

## Moirai-Small Portfolio

### Window Size 5

In [None]:
# Add transaction cost (10bps = 0.001)
def calculate_transaction_cost(row):
    return 0.001  # 10 bps for both small and large cap stocks

crsp_test_lagged.loc[:, 'transaction_cost'] = crsp_test_lagged.apply(calculate_transaction_cost, axis=1)

# Use the previously defined `uni2tssmall` to predict excess returns
def uni2tssmall_5_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged, lags=[5]):
    out_sample_start = "2016-01-01"
    modified_crsp_test_lagged = crsp_test_lagged.copy()

    for lag in lags:
        # Pass each lag individually to uni2tssmall
        metrics_df, modified_crsp_test_lagged = run_forecast_uni2ts_small(crsp_train_lagged, modified_crsp_test_lagged, lags=[lag], out_sample_start=out_sample_start)

        # Check if the predicted column is generated correctly
        pred_col = f'predicted_excess_returns_lag{lags[0]}'
        if pred_col not in modified_crsp_test_lagged.columns:
            raise KeyError(f"Column '{pred_col}' not found after running forecast.")

        # Add the predicted returns to the DataFrame
        modified_crsp_test_lagged[f'uni2tssmall_{lags[0]}_predicted_excess_returns'] = modified_crsp_test_lagged[pred_col].values

    return modified_crsp_test_lagged

# Get predicted excess returns using Uni2ts Small model
crsp_test_lagged = uni2tssmall_5_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged, lags=[5])

# Portfolio Construction (Top 10% Long, Bottom 10% Short)
def compute_returns(group, predicted_col):
    # First verify required columns exist
    required_cols = ['adjusted_ret', 'market_cap_merged', 'transaction_cost']
    missing_cols = [col for col in required_cols if col not in group.columns]
    if missing_cols:
        raise ValueError(f"Missing required columns: {missing_cols}")

    # Long position (Top 10% based on predicted returns)
    top_positive = group.nlargest(int(0.1 * len(group)), predicted_col)
    # Short position (Bottom 10% based on predicted returns)
    top_negative = group.nsmallest(int(0.1 * len(group)), predicted_col)

    # Equal-Weighted Long position return (Top 10%)
    equal_long_log_return_with_cost = np.log1p(top_positive['adjusted_ret']).mean() - top_positive['transaction_cost'].mean()
    equal_long_log_return_without_cost = np.log1p(top_positive['adjusted_ret']).mean()

    # Equal-Weighted Short position return (Bottom 10%)
    equal_short_log_return_with_cost = -np.log1p(top_negative['adjusted_ret']).mean() - top_negative['transaction_cost'].mean()
    equal_short_log_return_without_cost = -np.log1p(top_negative['adjusted_ret']).mean()

    # Value-Weighted Long position return (Top 10%) based on market cap
    total_market_cap_positive = top_positive['market_cap_merged'].sum()
    value_long_log_return_with_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive - top_positive['transaction_cost'].mean()
    value_long_log_return_without_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive

    # Value-Weighted Short position return (Bottom 10%) based on market cap
    total_market_cap_negative = top_negative['market_cap_merged'].sum()
    value_short_log_return_with_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative + top_negative['transaction_cost'].mean())
    value_short_log_return_without_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative)

    # Combine Long and Short to get Long-Short return
    equal_long_short_log_return_with_cost = equal_long_log_return_with_cost + equal_short_log_return_with_cost
    equal_long_short_log_return_without_cost = equal_long_log_return_without_cost + equal_short_log_return_without_cost

    value_long_short_log_return_with_cost = value_long_log_return_with_cost + value_short_log_return_with_cost
    value_long_short_log_return_without_cost = value_long_log_return_without_cost + value_short_log_return_without_cost

    return {
        'equal_long_log_return_with_cost': equal_long_log_return_with_cost,
        'equal_short_log_return_with_cost': equal_short_log_return_with_cost,
        'equal_long_short_log_return_with_cost': equal_long_short_log_return_with_cost,
        'equal_long_log_return_without_cost': equal_long_log_return_without_cost,
        'equal_short_log_return_without_cost': equal_short_log_return_without_cost,
        'equal_long_short_log_return_without_cost': equal_long_short_log_return_without_cost,
        'value_long_log_return_with_cost': value_long_log_return_with_cost,
        'value_short_log_return_with_cost': value_short_log_return_with_cost,
        'value_long_short_log_return_with_cost': value_long_short_log_return_with_cost,
        'value_long_log_return_without_cost': value_long_log_return_without_cost,
        'value_short_log_return_without_cost': value_short_log_return_without_cost,
        'value_long_short_log_return_without_cost': value_long_short_log_return_without_cost
    }
# Compute cumulative returns for each date with daily rebalancing
cumulative_log_returns_by_date_uni2tssmall5  = {
    'date': [],
    'cum_EL_return_5_with_cost': [],
    'cum_ES_return_5_with_cost': [],
    'cum_ELS_return_5_with_cost': [],
    'cum_VL_return_5_with_cost': [],
    'cum_VS_return_5_with_cost': [],
    'cum_VLS_return_5_with_cost': [],
    'cum_EL_return_5_without_cost': [],
    'cum_ES_return_5_without_cost': [],
    'cum_ELS_return_5_without_cost': [],
    'cum_VL_return_5_without_cost': [],
    'cum_VS_return_5_without_cost': [],
    'cum_VLS_return_5_without_cost': []
}

# Initialize cumulative returns for lag 5
cum_EL_return_5_with_cost = 0
cum_ES_return_5_with_cost = 0
cum_ELS_return_5_with_cost = 0
cum_VL_return_5_with_cost = 0
cum_VS_return_5_with_cost = 0
cum_VLS_return_5_with_cost = 0

cum_EL_return_5_without_cost = 0
cum_ES_return_5_without_cost = 0
cum_ELS_return_5_without_cost = 0
cum_VL_return_5_without_cost = 0
cum_VS_return_5_without_cost = 0
cum_VLS_return_5_without_cost = 0

# Iterate over each date to compute returns for lag 5 portfolios
for date in crsp_test_lagged['date'].unique():
    group = crsp_test_lagged[crsp_test_lagged['date'] == date]

    # Compute returns for lag 5 (or any other lag if needed)
    returns = compute_returns(group, f'uni2tssmall_{5}_predicted_excess_returns')

    # Update cumulative returns with daily values for lag 5
    cum_EL_return_5_with_cost += returns['equal_long_log_return_with_cost']
    cum_ES_return_5_with_cost += returns['equal_short_log_return_with_cost']
    cum_ELS_return_5_with_cost += returns['equal_long_short_log_return_with_cost']
    cum_VL_return_5_with_cost += returns['value_long_log_return_with_cost']
    cum_VS_return_5_with_cost += returns['value_short_log_return_with_cost']
    cum_VLS_return_5_with_cost += returns['value_long_short_log_return_with_cost']

    cum_EL_return_5_without_cost += returns['equal_long_log_return_without_cost']
    cum_ES_return_5_without_cost += returns['equal_short_log_return_without_cost']
    cum_ELS_return_5_without_cost += returns['equal_long_short_log_return_without_cost']
    cum_VL_return_5_without_cost += returns['value_long_log_return_without_cost']
    cum_VS_return_5_without_cost += returns['value_short_log_return_without_cost']
    cum_VLS_return_5_without_cost += returns['value_long_short_log_return_without_cost']

    # Append results for the day for lag 5 portfolios
    cumulative_log_returns_by_date_uni2tssmall5['date'].append(date)
    cumulative_log_returns_by_date_uni2tssmall5['cum_EL_return_5_with_cost'].append(cum_EL_return_5_with_cost)
    cumulative_log_returns_by_date_uni2tssmall5['cum_ES_return_5_with_cost'].append(cum_ES_return_5_with_cost)
    cumulative_log_returns_by_date_uni2tssmall5['cum_ELS_return_5_with_cost'].append(cum_ELS_return_5_with_cost)
    cumulative_log_returns_by_date_uni2tssmall5['cum_VL_return_5_with_cost'].append(cum_VL_return_5_with_cost)
    cumulative_log_returns_by_date_uni2tssmall5['cum_VS_return_5_with_cost'].append(cum_VS_return_5_with_cost)
    cumulative_log_returns_by_date_uni2tssmall5['cum_VLS_return_5_with_cost'].append(cum_VLS_return_5_with_cost)
    cumulative_log_returns_by_date_uni2tssmall5['cum_EL_return_5_without_cost'].append(cum_EL_return_5_without_cost)
    cumulative_log_returns_by_date_uni2tssmall5['cum_ES_return_5_without_cost'].append(cum_ES_return_5_without_cost)
    cumulative_log_returns_by_date_uni2tssmall5['cum_ELS_return_5_without_cost'].append(cum_ELS_return_5_without_cost)
    cumulative_log_returns_by_date_uni2tssmall5['cum_VL_return_5_without_cost'].append(cum_VL_return_5_without_cost)
    cumulative_log_returns_by_date_uni2tssmall5['cum_VS_return_5_without_cost'].append(cum_VS_return_5_without_cost)
    cumulative_log_returns_by_date_uni2tssmall5['cum_VLS_return_5_without_cost'].append(cum_VLS_return_5_without_cost)


# Convert to DataFrame for lag 5
cumulative_log_returns_uni2tssmall_lag_5 = pd.DataFrame(cumulative_log_returns_by_date_uni2tssmall5)

# Display the cumulative returns DataFrame for lag 5
display(cumulative_log_returns_uni2tssmall_lag_5.head())

# Saving the DataFrame as a CSV file
cumulative_log_returns_uni2tssmall_lag_5.to_csv("cumulative_log_returns_uni2tssmall_lag_5.csv", index=False)

In [None]:
# Cumulative returns
def cumulative_return(daily_returns):
    return np.prod(1 + daily_returns) - 1

# Annualized returns
def annualized_return(daily_returns, periods=252):
    cumulative_return_value = np.prod(1 + daily_returns) - 1
    return (1 + cumulative_return_value) ** (periods / len(daily_returns)) - 1

# Sharpe ratio
def sharpe_ratio(daily_returns, risk_free_rate=0.01, periods=252):
    daily_rf = risk_free_rate / periods  # Assuming 252 trading days
    excess_returns = daily_returns - daily_rf
    return np.sqrt(periods) * excess_returns.mean() / excess_returns.std()

# Calculate volatility (standard deviation) of daily returns
def calculate_volatility(daily_returns, periods=252):
    return np.std(daily_returns) * np.sqrt(periods)

# Calculate maximum drawdown
def maximum_drawdown(daily_returns):
    cum_returns = np.cumprod(1 + daily_returns)
    peak = np.maximum.accumulate(cum_returns)
    drawdown = (cum_returns - peak) / peak
    return np.min(drawdown)

# Apply fixed transaction cost to the daily returns (for other metrics)
def apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001):
    # Subtract the transaction cost from each daily return
    return daily_returns - transaction_cost

# Apply percentage-based transaction cost to the daily returns (for volatility and standard deviation)
def apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001):
    # Apply transaction cost as a percentage of the return
    return daily_returns * (1 - transaction_cost_percentage)

# Prepare portfolio names (with transaction cost)
portfolios_with_cost = [
    'cum_EL_return_5_with_cost', 'cum_ES_return_5_with_cost', 'cum_ELS_return_5_with_cost',
    'cum_VL_return_5_with_cost', 'cum_VS_return_5_with_cost', 'cum_VLS_return_5_with_cost',
]

# Initialize metrics container
metrics = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio with transaction cost
for portfolio in portfolios_with_cost:
    cumulative_returns_uni2tssmall5_c = cumulative_log_returns_uni2tssmall_lag_5[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_uni2tssmall5_c)  # Compute daily returns from cumulative log returns

    # Apply fixed transaction cost for other metrics
    daily_returns_after_cost_fixed = apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001)

    # Apply percentage-based transaction cost for volatility and standard deviation
    daily_returns_after_cost_percentage = apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001)

    # Calculate cumulative returns after fixed transaction cost
    cum_return_after_cost = cumulative_return(daily_returns_after_cost_fixed)

    # Metrics calculations
    ann_return = annualized_return(daily_returns_after_cost_fixed)
    sharpe = sharpe_ratio(daily_returns_after_cost_fixed)
    vol = calculate_volatility(daily_returns_after_cost_percentage)
    max_draw = maximum_drawdown(daily_returns_after_cost_fixed)

    # Standard Deviation of daily returns after cost
    std_dev = np.std(daily_returns_after_cost_percentage)

    # Store results
    metrics['Portfolio'].append(portfolio)
    metrics['Annualized Return'].append(ann_return)
    metrics['Sharpe Ratio'].append(sharpe)
    metrics['Volatility'].append(vol)
    metrics['Standard Deviation'].append(std_dev)
    metrics['Max Drawdown'].append(max_draw)
    metrics['Cumulative Return'].append(cum_return_after_cost)

# Convert the results into a DataFrame for analysis
metrics_uni2tssmall5_c = pd.DataFrame(metrics)
display(metrics_uni2tssmall5_c)


# same calculations for portfolios without transaction cost
portfolios_without_cost = [
    'cum_EL_return_5_without_cost', 'cum_ES_return_5_without_cost', 'cum_ELS_return_5_without_cost',
    'cum_VL_return_5_without_cost', 'cum_VS_return_5_without_cost', 'cum_VLS_return_5_without_cost',
]

# Initialize metrics container for portfolios without transaction cost
metrics_wc = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio without transaction cost
for portfolio in portfolios_without_cost:
    cumulative_returns_uni2tssmall5_wc = cumulative_log_returns_uni2tssmall_lag_5[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_uni2tssmall5_wc)  # Compute daily returns from cumulative log returns

    # Calculate cumulative returns without transaction cost
    cum_return_without_cost = cumulative_return(daily_returns)

    # Metrics calculations
    ann_return = annualized_return(daily_returns)
    sharpe = sharpe_ratio(daily_returns)
    vol = calculate_volatility(daily_returns)
    max_draw = maximum_drawdown(daily_returns)

    # Standard Deviation of daily returns without cost
    std_dev = np.std(daily_returns)

    # Store results
    metrics_wc['Portfolio'].append(portfolio)
    metrics_wc['Annualized Return'].append(ann_return)
    metrics_wc['Sharpe Ratio'].append(sharpe)
    metrics_wc['Volatility'].append(vol)
    metrics_wc['Standard Deviation'].append(std_dev)
    metrics_wc['Max Drawdown'].append(max_draw)
    metrics_wc['Cumulative Return'].append(cum_return_without_cost)

# Convert the results into a DataFrame for analysis
metrics_uni2tssmall5_wc = pd.DataFrame(metrics_wc)
display(metrics_uni2tssmall5_wc)

# Save the portfolio metrics with transaction costs
metrics_uni2tssmall5_c.to_csv('metrics_uni2tssmall5_with_cost.csv', index=False)

# Save the portfolio metrics without transaction costs
metrics_uni2tssmall5_wc.to_csv('metrics_uni2tssmall5_without_cost.csv', index=False)

### Window Size 21

In [None]:
# Add transaction cost (10bps = 0.001)
def calculate_transaction_cost(row):
    return 0.001  # 10 bps for both small and large cap stocks

crsp_test_lagged.loc[:, 'transaction_cost'] = crsp_test_lagged.apply(calculate_transaction_cost, axis=1)

# Use the previously defined `uni2tssmall` to predict excess returns
def uni2tssmall_21_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged, lags=[21]):
    out_sample_start = "2016-01-01"
    modified_crsp_test_lagged = crsp_test_lagged.copy()

    for lag in lags:
        # Pass each lag individually to uni2tssmall
        metrics_df, modified_crsp_test_lagged = run_forecast_uni2ts_small(crsp_train_lagged, modified_crsp_test_lagged, lags=[lag], out_sample_start=out_sample_start)

        # Check if the predicted column is generated correctly
        pred_col = f'predicted_excess_returns_lag{lags[0]}'
        if pred_col not in modified_crsp_test_lagged.columns:
            raise KeyError(f"Column '{pred_col}' not found after running forecast.")

        # Add the predicted returns to the DataFrame
        modified_crsp_test_lagged[f'uni2tssmall_{lags[0]}_predicted_excess_returns'] = modified_crsp_test_lagged[pred_col].values

    return modified_crsp_test_lagged

# Get predicted excess returns using Uni2ts Small model
crsp_test_lagged = uni2tssmall_21_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged, lags=[21])

# Portfolio Construction (Top 10% Long, Bottom 10% Short)
def compute_returns(group, predicted_col):
    # First verify required columns exist
    required_cols = ['adjusted_ret', 'market_cap_merged', 'transaction_cost']
    missing_cols = [col for col in required_cols if col not in group.columns]
    if missing_cols:
        raise ValueError(f"Missing required columns: {missing_cols}")

    # Long position (Top 10% based on predicted returns)
    top_positive = group.nlargest(int(0.1 * len(group)), predicted_col)
    # Short position (Bottom 10% based on predicted returns)
    top_negative = group.nsmallest(int(0.1 * len(group)), predicted_col)

    # Equal-Weighted Long position return (Top 10%)
    equal_long_log_return_with_cost = np.log1p(top_positive['adjusted_ret']).mean() - top_positive['transaction_cost'].mean()
    equal_long_log_return_without_cost = np.log1p(top_positive['adjusted_ret']).mean()

    # Equal-Weighted Short position return (Bottom 10%)
    equal_short_log_return_with_cost = -np.log1p(top_negative['adjusted_ret']).mean() - top_negative['transaction_cost'].mean()
    equal_short_log_return_without_cost = -np.log1p(top_negative['adjusted_ret']).mean()

    # Value-Weighted Long position return (Top 10%) based on market cap
    total_market_cap_positive = top_positive['market_cap_merged'].sum()
    value_long_log_return_with_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive - top_positive['transaction_cost'].mean()
    value_long_log_return_without_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive

    # Value-Weighted Short position return (Bottom 10%) based on market cap
    total_market_cap_negative = top_negative['market_cap_merged'].sum()
    value_short_log_return_with_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative + top_negative['transaction_cost'].mean())
    value_short_log_return_without_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative)

    # Combine Long and Short to get Long-Short return
    equal_long_short_log_return_with_cost = equal_long_log_return_with_cost + equal_short_log_return_with_cost
    equal_long_short_log_return_without_cost = equal_long_log_return_without_cost + equal_short_log_return_without_cost

    value_long_short_log_return_with_cost = value_long_log_return_with_cost + value_short_log_return_with_cost
    value_long_short_log_return_without_cost = value_long_log_return_without_cost + value_short_log_return_without_cost

    return {
        'equal_long_log_return_with_cost': equal_long_log_return_with_cost,
        'equal_short_log_return_with_cost': equal_short_log_return_with_cost,
        'equal_long_short_log_return_with_cost': equal_long_short_log_return_with_cost,
        'equal_long_log_return_without_cost': equal_long_log_return_without_cost,
        'equal_short_log_return_without_cost': equal_short_log_return_without_cost,
        'equal_long_short_log_return_without_cost': equal_long_short_log_return_without_cost,
        'value_long_log_return_with_cost': value_long_log_return_with_cost,
        'value_short_log_return_with_cost': value_short_log_return_with_cost,
        'value_long_short_log_return_with_cost': value_long_short_log_return_with_cost,
        'value_long_log_return_without_cost': value_long_log_return_without_cost,
        'value_short_log_return_without_cost': value_short_log_return_without_cost,
        'value_long_short_log_return_without_cost': value_long_short_log_return_without_cost
    }
# Compute cumulative returns for each date with daily rebalancing
cumulative_log_returns_by_date_uni2tssmall21  = {
    'date': [],
    'cum_EL_return_21_with_cost': [],
    'cum_ES_return_21_with_cost': [],
    'cum_ELS_return_21_with_cost': [],
    'cum_VL_return_21_with_cost': [],
    'cum_VS_return_21_with_cost': [],
    'cum_VLS_return_21_with_cost': [],
    'cum_EL_return_21_without_cost': [],
    'cum_ES_return_21_without_cost': [],
    'cum_ELS_return_21_without_cost': [],
    'cum_VL_return_21_without_cost': [],
    'cum_VS_return_21_without_cost': [],
    'cum_VLS_return_21_without_cost': []
}

# Initialize cumulative returns for lag 21
cum_EL_return_21_with_cost = 0
cum_ES_return_21_with_cost = 0
cum_ELS_return_21_with_cost = 0
cum_VL_return_21_with_cost = 0
cum_VS_return_21_with_cost = 0
cum_VLS_return_21_with_cost = 0

cum_EL_return_21_without_cost = 0
cum_ES_return_21_without_cost = 0
cum_ELS_return_21_without_cost = 0
cum_VL_return_21_without_cost = 0
cum_VS_return_21_without_cost = 0
cum_VLS_return_21_without_cost = 0

# Iterate over each date to compute returns for lag 21 portfolios
for date in crsp_test_lagged['date'].unique():
    group = crsp_test_lagged[crsp_test_lagged['date'] == date]

    # Compute returns for lag 21 (or any other lag if needed)
    returns = compute_returns(group, f'uni2tssmall_{21}_predicted_excess_returns')

    # Update cumulative returns with daily values for lag 21
    cum_EL_return_21_with_cost += returns['equal_long_log_return_with_cost']
    cum_ES_return_21_with_cost += returns['equal_short_log_return_with_cost']
    cum_ELS_return_21_with_cost += returns['equal_long_short_log_return_with_cost']
    cum_VL_return_21_with_cost += returns['value_long_log_return_with_cost']
    cum_VS_return_21_with_cost += returns['value_short_log_return_with_cost']
    cum_VLS_return_21_with_cost += returns['value_long_short_log_return_with_cost']

    cum_EL_return_21_without_cost += returns['equal_long_log_return_without_cost']
    cum_ES_return_21_without_cost += returns['equal_short_log_return_without_cost']
    cum_ELS_return_21_without_cost += returns['equal_long_short_log_return_without_cost']
    cum_VL_return_21_without_cost += returns['value_long_log_return_without_cost']
    cum_VS_return_21_without_cost += returns['value_short_log_return_without_cost']
    cum_VLS_return_21_without_cost += returns['value_long_short_log_return_without_cost']

    # Append results for the day for lag 21 portfolios
    cumulative_log_returns_by_date_uni2tssmall21['date'].append(date)
    cumulative_log_returns_by_date_uni2tssmall21['cum_EL_return_21_with_cost'].append(cum_EL_return_21_with_cost)
    cumulative_log_returns_by_date_uni2tssmall21['cum_ES_return_21_with_cost'].append(cum_ES_return_21_with_cost)
    cumulative_log_returns_by_date_uni2tssmall21['cum_ELS_return_21_with_cost'].append(cum_ELS_return_21_with_cost)
    cumulative_log_returns_by_date_uni2tssmall21['cum_VL_return_21_with_cost'].append(cum_VL_return_21_with_cost)
    cumulative_log_returns_by_date_uni2tssmall21['cum_VS_return_21_with_cost'].append(cum_VS_return_21_with_cost)
    cumulative_log_returns_by_date_uni2tssmall21['cum_VLS_return_21_with_cost'].append(cum_VLS_return_21_with_cost)
    cumulative_log_returns_by_date_uni2tssmall21['cum_EL_return_21_without_cost'].append(cum_EL_return_21_without_cost)
    cumulative_log_returns_by_date_uni2tssmall21['cum_ES_return_21_without_cost'].append(cum_ES_return_21_without_cost)
    cumulative_log_returns_by_date_uni2tssmall21['cum_ELS_return_21_without_cost'].append(cum_ELS_return_21_without_cost)
    cumulative_log_returns_by_date_uni2tssmall21['cum_VL_return_21_without_cost'].append(cum_VL_return_21_without_cost)
    cumulative_log_returns_by_date_uni2tssmall21['cum_VS_return_21_without_cost'].append(cum_VS_return_21_without_cost)
    cumulative_log_returns_by_date_uni2tssmall21['cum_VLS_return_21_without_cost'].append(cum_VLS_return_21_without_cost)


# Convert to DataFrame for lag 21
cumulative_log_returns_uni2tssmall_lag_21 = pd.DataFrame(cumulative_log_returns_by_date_uni2tssmall21)

# Display the cumulative returns DataFrame for lag 21
display(cumulative_log_returns_uni2tssmall_lag_21.head())

# Saving the DataFrame as a CSV file
cumulative_log_returns_uni2tssmall_lag_21.to_csv("cumulative_log_returns_uni2tssmall_lag_21.csv", index=False)

In [None]:
# Cumulative returns
def cumulative_return(daily_returns):
    return np.prod(1 + daily_returns) - 1

# Annualized returns
def annualized_return(daily_returns, periods=252):
    cumulative_return_value = np.prod(1 + daily_returns) - 1
    return (1 + cumulative_return_value) ** (periods / len(daily_returns)) - 1

# Sharpe ratio
def sharpe_ratio(daily_returns, risk_free_rate=0.01, periods=252):
    daily_rf = risk_free_rate / periods  # Assuming 252 trading days
    excess_returns = daily_returns - daily_rf
    return np.sqrt(periods) * excess_returns.mean() / excess_returns.std()

# Calculate volatility (standard deviation) of daily returns
def calculate_volatility(daily_returns, periods=252):
    return np.std(daily_returns) * np.sqrt(periods)

# Calculate maximum drawdown
def maximum_drawdown(daily_returns):
    cum_returns = np.cumprod(1 + daily_returns)
    peak = np.maximum.accumulate(cum_returns)
    drawdown = (cum_returns - peak) / peak
    return np.min(drawdown)

# Apply fixed transaction cost to the daily returns
def apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001):
    # Subtract the transaction cost from each daily return
    return daily_returns - transaction_cost

# Apply percentage-based transaction cost to the daily returns
def apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001):
    # Apply transaction cost as a percentage of the return
    return daily_returns * (1 - transaction_cost_percentage)

# Prepare portfolio names (with transaction cost)
portfolios_with_cost = [
    'cum_EL_return_21_with_cost', 'cum_ES_return_21_with_cost', 'cum_ELS_return_21_with_cost',
    'cum_VL_return_21_with_cost', 'cum_VS_return_21_with_cost', 'cum_VLS_return_21_with_cost',
]

# Initialize metrics container
metrics = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio with transaction cost
for portfolio in portfolios_with_cost:
    cumulative_returns_uni2tssmall21_c = cumulative_log_returns_uni2tssmall_lag_21[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_uni2tssmall21_c)  # Compute daily returns from cumulative log returns

    # Apply fixed transaction cost for other metrics (e.g., cumulative returns, Sharpe ratio)
    daily_returns_after_cost_fixed = apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001)

    # Apply percentage-based transaction cost for volatility and standard deviation
    daily_returns_after_cost_percentage = apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001)

    # Calculate cumulative returns after fixed transaction cost
    cum_return_after_cost = cumulative_return(daily_returns_after_cost_fixed)

    # Metrics calculations
    ann_return = annualized_return(daily_returns_after_cost_fixed)
    sharpe = sharpe_ratio(daily_returns_after_cost_fixed)
    vol = calculate_volatility(daily_returns_after_cost_percentage)
    max_draw = maximum_drawdown(daily_returns_after_cost_fixed)

    # Standard Deviation of daily returns after cost
    std_dev = np.std(daily_returns_after_cost_percentage)

    # Store results
    metrics['Portfolio'].append(portfolio)
    metrics['Annualized Return'].append(ann_return)
    metrics['Sharpe Ratio'].append(sharpe)
    metrics['Volatility'].append(vol)
    metrics['Standard Deviation'].append(std_dev)
    metrics['Max Drawdown'].append(max_draw)
    metrics['Cumulative Return'].append(cum_return_after_cost)

# Convert the results into a DataFrame for analysis
metrics_uni2tssmall21_c = pd.DataFrame(metrics)
display(metrics_uni2tssmall21_c)

# same calculations for portfolios without transaction cost
portfolios_without_cost = [
    'cum_EL_return_21_without_cost', 'cum_ES_return_21_without_cost', 'cum_ELS_return_21_without_cost',
    'cum_VL_return_21_without_cost', 'cum_VS_return_21_without_cost', 'cum_VLS_return_21_without_cost',
]

# Initialize metrics container for portfolios without transaction cost
metrics_wc = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio without transaction cost
for portfolio in portfolios_without_cost:
    cumulative_returns_uni2tssmall21_wc = cumulative_log_returns_uni2tssmall_lag_21[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_uni2tssmall21_wc)  # Compute daily returns from cumulative log returns

    # Calculate cumulative returns without transaction cost
    cum_return_without_cost = cumulative_return(daily_returns)

    # Metrics calculations
    ann_return = annualized_return(daily_returns)
    sharpe = sharpe_ratio(daily_returns)
    vol = calculate_volatility(daily_returns)
    max_draw = maximum_drawdown(daily_returns)

    # Standard Deviation of daily returns without cost
    std_dev = np.std(daily_returns)

    # Store results
    metrics_wc['Portfolio'].append(portfolio)
    metrics_wc['Annualized Return'].append(ann_return)
    metrics_wc['Sharpe Ratio'].append(sharpe)
    metrics_wc['Volatility'].append(vol)
    metrics_wc['Standard Deviation'].append(std_dev)
    metrics_wc['Max Drawdown'].append(max_draw)
    metrics_wc['Cumulative Return'].append(cum_return_without_cost)

# Convert the results into a DataFrame for analysis
metrics_uni2tssmall21_wc = pd.DataFrame(metrics_wc)
display(metrics_uni2tssmall21_wc)

# Save the portfolio metrics with transaction costs
metrics_uni2tssmall21_c.to_csv('metrics_uni2tssmall21_with_cost.csv', index=False)

# Save the portfolio metrics without transaction costs
metrics_uni2tssmall21_wc.to_csv('metrics_uni2tssmall21_without_cost.csv', index=False)

### Window Size 252

In [None]:
# Add transaction cost (10bps = 0.001)
def calculate_transaction_cost(row):
    return 0.001  # 10 bps for both small and large cap stocks

crsp_test_lagged.loc[:, 'transaction_cost'] = crsp_test_lagged.apply(calculate_transaction_cost, axis=1)

# Use the previously defined `uni2tssmall` to predict excess returns
def uni2tssmall_252_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged, lags=[252]):
    out_sample_start = "2016-01-01"
    modified_crsp_test_lagged = crsp_test_lagged.copy()

    for lag in lags:
        # Pass each lag individually to uni2tssmall
        metrics_df, modified_crsp_test_lagged = run_forecast_uni2ts_small(crsp_train_lagged, modified_crsp_test_lagged, lags=[lag], out_sample_start=out_sample_start)

        # Check if the predicted column is generated correctly
        pred_col = f'predicted_excess_returns_lag{lags[0]}'
        if pred_col not in modified_crsp_test_lagged.columns:
            raise KeyError(f"Column '{pred_col}' not found after running forecast.")

        # Add the predicted returns to the DataFrame
        modified_crsp_test_lagged[f'uni2tssmall_{lags[0]}_predicted_excess_returns'] = modified_crsp_test_lagged[pred_col].values

    return modified_crsp_test_lagged

# Get predicted excess returns using Uni2ts Small model
crsp_test_lagged = uni2tssmall_252_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged, lags=[252])

# Portfolio Construction (Top 10% Long, Bottom 10% Short)
def compute_returns(group, predicted_col):
    # First verify required columns exist
    required_cols = ['adjusted_ret', 'market_cap_merged', 'transaction_cost']
    missing_cols = [col for col in required_cols if col not in group.columns]
    if missing_cols:
        raise ValueError(f"Missing required columns: {missing_cols}")

    # Long position (Top 10% based on predicted returns)
    top_positive = group.nlargest(int(0.1 * len(group)), predicted_col)
    # Short position (Bottom 10% based on predicted returns)
    top_negative = group.nsmallest(int(0.1 * len(group)), predicted_col)

    # Equal-Weighted Long position return (Top 10%)
    equal_long_log_return_with_cost = np.log1p(top_positive['adjusted_ret']).mean() - top_positive['transaction_cost'].mean()
    equal_long_log_return_without_cost = np.log1p(top_positive['adjusted_ret']).mean()

    # Equal-Weighted Short position return (Bottom 10%)
    equal_short_log_return_with_cost = -np.log1p(top_negative['adjusted_ret']).mean() - top_negative['transaction_cost'].mean()
    equal_short_log_return_without_cost = -np.log1p(top_negative['adjusted_ret']).mean()

    # Value-Weighted Long position return (Top 10%) based on market cap
    total_market_cap_positive = top_positive['market_cap_merged'].sum()
    value_long_log_return_with_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive - top_positive['transaction_cost'].mean()
    value_long_log_return_without_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive

    # Value-Weighted Short position return (Bottom 10%) based on market cap
    total_market_cap_negative = top_negative['market_cap_merged'].sum()
    value_short_log_return_with_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative + top_negative['transaction_cost'].mean())
    value_short_log_return_without_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative)

    # Combine Long and Short to get Long-Short return
    equal_long_short_log_return_with_cost = equal_long_log_return_with_cost + equal_short_log_return_with_cost
    equal_long_short_log_return_without_cost = equal_long_log_return_without_cost + equal_short_log_return_without_cost

    value_long_short_log_return_with_cost = value_long_log_return_with_cost + value_short_log_return_with_cost
    value_long_short_log_return_without_cost = value_long_log_return_without_cost + value_short_log_return_without_cost

    return {
        'equal_long_log_return_with_cost': equal_long_log_return_with_cost,
        'equal_short_log_return_with_cost': equal_short_log_return_with_cost,
        'equal_long_short_log_return_with_cost': equal_long_short_log_return_with_cost,
        'equal_long_log_return_without_cost': equal_long_log_return_without_cost,
        'equal_short_log_return_without_cost': equal_short_log_return_without_cost,
        'equal_long_short_log_return_without_cost': equal_long_short_log_return_without_cost,
        'value_long_log_return_with_cost': value_long_log_return_with_cost,
        'value_short_log_return_with_cost': value_short_log_return_with_cost,
        'value_long_short_log_return_with_cost': value_long_short_log_return_with_cost,
        'value_long_log_return_without_cost': value_long_log_return_without_cost,
        'value_short_log_return_without_cost': value_short_log_return_without_cost,
        'value_long_short_log_return_without_cost': value_long_short_log_return_without_cost
    }
# Compute cumulative returns for each date with daily rebalancing
cumulative_log_returns_by_date_uni2tssmall252  = {
    'date': [],
    'cum_EL_return_252_with_cost': [],
    'cum_ES_return_252_with_cost': [],
    'cum_ELS_return_252_with_cost': [],
    'cum_VL_return_252_with_cost': [],
    'cum_VS_return_252_with_cost': [],
    'cum_VLS_return_252_with_cost': [],
    'cum_EL_return_252_without_cost': [],
    'cum_ES_return_252_without_cost': [],
    'cum_ELS_return_252_without_cost': [],
    'cum_VL_return_252_without_cost': [],
    'cum_VS_return_252_without_cost': [],
    'cum_VLS_return_252_without_cost': []
}

# Initialize cumulative returns for lag 252
cum_EL_return_252_with_cost = 0
cum_ES_return_252_with_cost = 0
cum_ELS_return_252_with_cost = 0
cum_VL_return_252_with_cost = 0
cum_VS_return_252_with_cost = 0
cum_VLS_return_252_with_cost = 0

cum_EL_return_252_without_cost = 0
cum_ES_return_252_without_cost = 0
cum_ELS_return_252_without_cost = 0
cum_VL_return_252_without_cost = 0
cum_VS_return_252_without_cost = 0
cum_VLS_return_252_without_cost = 0

# Iterate over each date to compute returns for lag 252 portfolios
for date in crsp_test_lagged['date'].unique():
    group = crsp_test_lagged[crsp_test_lagged['date'] == date]

    # Compute returns for lag 252
    returns = compute_returns(group, f'uni2tssmall_{252}_predicted_excess_returns')

    # Update cumulative returns with daily values for lag 252
    cum_EL_return_252_with_cost += returns['equal_long_log_return_with_cost']
    cum_ES_return_252_with_cost += returns['equal_short_log_return_with_cost']
    cum_ELS_return_252_with_cost += returns['equal_long_short_log_return_with_cost']
    cum_VL_return_252_with_cost += returns['value_long_log_return_with_cost']
    cum_VS_return_252_with_cost += returns['value_short_log_return_with_cost']
    cum_VLS_return_252_with_cost += returns['value_long_short_log_return_with_cost']

    cum_EL_return_252_without_cost += returns['equal_long_log_return_without_cost']
    cum_ES_return_252_without_cost += returns['equal_short_log_return_without_cost']
    cum_ELS_return_252_without_cost += returns['equal_long_short_log_return_without_cost']
    cum_VL_return_252_without_cost += returns['value_long_log_return_without_cost']
    cum_VS_return_252_without_cost += returns['value_short_log_return_without_cost']
    cum_VLS_return_252_without_cost += returns['value_long_short_log_return_without_cost']

    # Append results for the day for lag 252 portfolios
    cumulative_log_returns_by_date_uni2tssmall252['date'].append(date)
    cumulative_log_returns_by_date_uni2tssmall252['cum_EL_return_252_with_cost'].append(cum_EL_return_252_with_cost)
    cumulative_log_returns_by_date_uni2tssmall252['cum_ES_return_252_with_cost'].append(cum_ES_return_252_with_cost)
    cumulative_log_returns_by_date_uni2tssmall252['cum_ELS_return_252_with_cost'].append(cum_ELS_return_252_with_cost)
    cumulative_log_returns_by_date_uni2tssmall252['cum_VL_return_252_with_cost'].append(cum_VL_return_252_with_cost)
    cumulative_log_returns_by_date_uni2tssmall252['cum_VS_return_252_with_cost'].append(cum_VS_return_252_with_cost)
    cumulative_log_returns_by_date_uni2tssmall252['cum_VLS_return_252_with_cost'].append(cum_VLS_return_252_with_cost)
    cumulative_log_returns_by_date_uni2tssmall252['cum_EL_return_252_without_cost'].append(cum_EL_return_252_without_cost)
    cumulative_log_returns_by_date_uni2tssmall252['cum_ES_return_252_without_cost'].append(cum_ES_return_252_without_cost)
    cumulative_log_returns_by_date_uni2tssmall252['cum_ELS_return_252_without_cost'].append(cum_ELS_return_252_without_cost)
    cumulative_log_returns_by_date_uni2tssmall252['cum_VL_return_252_without_cost'].append(cum_VL_return_252_without_cost)
    cumulative_log_returns_by_date_uni2tssmall252['cum_VS_return_252_without_cost'].append(cum_VS_return_252_without_cost)
    cumulative_log_returns_by_date_uni2tssmall252['cum_VLS_return_252_without_cost'].append(cum_VLS_return_252_without_cost)

# Convert to DataFrame for lag 252
cumulative_log_returns_uni2tssmall_lag_252 = pd.DataFrame(cumulative_log_returns_by_date_uni2tssmall252)

# Display the cumulative returns DataFrame for lag 252
display(cumulative_log_returns_uni2tssmall_lag_252.head())

# Saving the DataFrame as a CSV file
cumulative_log_returns_uni2tssmall_lag_252.to_csv("cumulative_log_returns_uni2tssmall_lag_252.csv", index=False)

In [None]:
# Cumulative returns
def cumulative_return(daily_returns):
    return np.prod(1 + daily_returns) - 1

# Annualized returns
def annualized_return(daily_returns, periods=252):
    cumulative_return_value = np.prod(1 + daily_returns) - 1
    return (1 + cumulative_return_value) ** (periods / len(daily_returns)) - 1

# Sharpe ratio
def sharpe_ratio(daily_returns, risk_free_rate=0.01, periods=252):
    daily_rf = risk_free_rate / periods  # Assuming 252 trading days
    excess_returns = daily_returns - daily_rf
    return np.sqrt(periods) * excess_returns.mean() / excess_returns.std()

# Calculate volatility (standard deviation) of daily returns
def calculate_volatility(daily_returns, periods=252):
    return np.std(daily_returns) * np.sqrt(periods)

# Calculate maximum drawdown
def maximum_drawdown(daily_returns):
    cum_returns = np.cumprod(1 + daily_returns)
    peak = np.maximum.accumulate(cum_returns)
    drawdown = (cum_returns - peak) / peak
    return np.min(drawdown)

# Apply fixed transaction cost to the daily returns (for other metrics)
def apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001):
    # Subtract the transaction cost from each daily return
    return daily_returns - transaction_cost

# Apply percentage-based transaction cost to the daily returns (for volatility and standard deviation)
def apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001):
    # Apply transaction cost as a percentage of the return
    return daily_returns * (1 - transaction_cost_percentage)

# Prepare portfolio names (with transaction cost)
portfolios_with_cost = [
    'cum_EL_return_252_with_cost', 'cum_ES_return_252_with_cost', 'cum_ELS_return_252_with_cost',
    'cum_VL_return_252_with_cost', 'cum_VS_return_252_with_cost', 'cum_VLS_return_252_with_cost',
]

# Initialize metrics container
metrics = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio with transaction cost
for portfolio in portfolios_with_cost:
    cumulative_returns_uni2tssmall252_c = cumulative_log_returns_uni2tssmall_lag_252[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_uni2tssmall252_c)  # Compute daily returns from cumulative log returns

    # Apply fixed transaction cost for other metrics
    daily_returns_after_cost_fixed = apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001)

    # Apply percentage-based transaction cost for volatility and standard deviation
    daily_returns_after_cost_percentage = apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001)

    # Calculate cumulative returns after fixed transaction cost
    cum_return_after_cost = cumulative_return(daily_returns_after_cost_fixed)

    # Metrics calculations
    ann_return = annualized_return(daily_returns_after_cost_fixed)
    sharpe = sharpe_ratio(daily_returns_after_cost_fixed)
    vol = calculate_volatility(daily_returns_after_cost_percentage)
    max_draw = maximum_drawdown(daily_returns_after_cost_fixed)

    # Standard Deviation of daily returns after cost
    std_dev = np.std(daily_returns_after_cost_percentage)

    # Store results
    metrics['Portfolio'].append(portfolio)
    metrics['Annualized Return'].append(ann_return)
    metrics['Sharpe Ratio'].append(sharpe)
    metrics['Volatility'].append(vol)
    metrics['Standard Deviation'].append(std_dev)
    metrics['Max Drawdown'].append(max_draw)
    metrics['Cumulative Return'].append(cum_return_after_cost)

# Convert the results into a DataFrame for analysis
metrics_uni2tssmall252_c = pd.DataFrame(metrics)
display(metrics_uni2tssmall252_c)

# same calculations for portfolios without transaction cost
portfolios_without_cost = [
    'cum_EL_return_252_without_cost', 'cum_ES_return_252_without_cost', 'cum_ELS_return_252_without_cost',
    'cum_VL_return_252_without_cost', 'cum_VS_return_252_without_cost', 'cum_VLS_return_252_without_cost',
]

# Initialize metrics container for portfolios without transaction cost
metrics_wc = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio without transaction cost
for portfolio in portfolios_without_cost:
    cumulative_returns_uni2tssmall252_wc = cumulative_log_returns_uni2tssmall_lag_252[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_uni2tssmall252_wc)

    # Calculate cumulative returns without transaction cost
    cum_return_without_cost = cumulative_return(daily_returns)

    # Metrics calculations
    ann_return = annualized_return(daily_returns)
    sharpe = sharpe_ratio(daily_returns)
    vol = calculate_volatility(daily_returns)
    max_draw = maximum_drawdown(daily_returns)

    # Standard Deviation of daily returns without cost
    std_dev = np.std(daily_returns)

    # Store results
    metrics_wc['Portfolio'].append(portfolio)
    metrics_wc['Annualized Return'].append(ann_return)
    metrics_wc['Sharpe Ratio'].append(sharpe)
    metrics_wc['Volatility'].append(vol)
    metrics_wc['Standard Deviation'].append(std_dev)
    metrics_wc['Max Drawdown'].append(max_draw)
    metrics_wc['Cumulative Return'].append(cum_return_without_cost)

# Convert the results into a DataFrame for analysis
metrics_uni2tssmall252_wc = pd.DataFrame(metrics_wc)
display(metrics_uni2tssmall252_wc)

# Save the portfolio metrics with transaction costs
metrics_uni2tssmall252_c.to_csv('metrics_uni2tssmall252_with_cost.csv', index=False)

# Save the portfolio metrics without transaction costs
metrics_uni2tssmall252_wc.to_csv('metrics_uni2tssmall252_without_cost.csv', index=False)

### Window Size 512

In [None]:
# Add transaction cost (10bps = 0.001)
def calculate_transaction_cost(row):
    return 0.001  # 10 bps for both small and large cap stocks

crsp_test_lagged.loc[:, 'transaction_cost'] = crsp_test_lagged.apply(calculate_transaction_cost, axis=1)

# Use the previously defined `uni2tssmall` to predict excess returns
def uni2tssmall_512_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged, lags=[512]):
    out_sample_start = "2016-01-01"
    modified_crsp_test_lagged = crsp_test_lagged.copy()

    for lag in lags:
        # Pass each lag individually to uni2tssmall
        metrics_df, modified_crsp_test_lagged = run_forecast_uni2ts_small(crsp_train_lagged, modified_crsp_test_lagged, lags=[lag], out_sample_start=out_sample_start)

        # Check if the predicted column is generated correctly
        pred_col = f'predicted_excess_returns_lag{lags[0]}'
        if pred_col not in modified_crsp_test_lagged.columns:
            raise KeyError(f"Column '{pred_col}' not found after running forecast.")

        # Add the predicted returns to the DataFrame
        modified_crsp_test_lagged[f'uni2tssmall_{lags[0]}_predicted_excess_returns'] = modified_crsp_test_lagged[pred_col].values

    return modified_crsp_test_lagged

# Get predicted excess returns using Uni2ts Small model
crsp_test_lagged = uni2tssmall_512_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged, lags=[512])

# Portfolio Construction (Top 10% Long, Bottom 10% Short)
def compute_returns(group, predicted_col):
    # First verify required columns exist
    required_cols = ['adjusted_ret', 'market_cap_merged', 'transaction_cost']
    missing_cols = [col for col in required_cols if col not in group.columns]
    if missing_cols:
        raise ValueError(f"Missing required columns: {missing_cols}")
        T
    # Long position (Top 10% based on predicted returns)
    top_positive = group.nlargest(int(0.1 * len(group)), predicted_col)
    # Short position (Bottom 10% based on predicted returns)
    top_negative = group.nsmallest(int(0.1 * len(group)), predicted_col)

    # Equal-Weighted Long position return (Top 10%)
    equal_long_log_return_with_cost = np.log1p(top_positive['adjusted_ret']).mean() - top_positive['transaction_cost'].mean()
    equal_long_log_return_without_cost = np.log1p(top_positive['adjusted_ret']).mean()

    # Equal-Weighted Short position return (Bottom 10%)
    equal_short_log_return_with_cost = np.log1p(top_negative['adjusted_ret']).mean() - top_negative['transaction_cost'].mean()
    equal_short_log_return_without_cost = np.log1p(top_negative['adjusted_ret']).mean()

    # Value-Weighted Long position return (Top 10%) based on market cap
    total_market_cap_positive = top_positive['market_cap_merged'].sum()
    value_long_log_return_with_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive - top_positive['transaction_cost'].mean()
    value_long_log_return_without_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive

    # Value-Weighted Short position return (Bottom 10%) based on market cap
    total_market_cap_negative = top_negative['market_cap_merged'].sum()
    value_short_log_return_with_cost = (np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative - top_negative['transaction_cost'].mean()
    value_short_log_return_without_cost = (np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative

    # Combine Long and Short to get Long-Short return
    equal_long_short_log_return_with_cost = equal_long_log_return_with_cost + equal_short_log_return_with_cost
    equal_long_short_log_return_without_cost = equal_long_log_return_without_cost + equal_short_log_return_without_cost
    value_long_short_log_return_with_cost = value_long_log_return_with_cost + value_short_log_return_with_cost
    value_long_short_log_return_without_cost = value_long_log_return_without_cost + value_short_log_return_without_cost

    return {
        'equal_long_log_return_with_cost': equal_long_log_return_with_cost,
        'equal_short_log_return_with_cost': equal_short_log_return_with_cost,
        'equal_long_short_log_return_with_cost': equal_long_short_log_return_with_cost,
        'equal_long_log_return_without_cost': equal_long_log_return_without_cost,
        'equal_short_log_return_without_cost': equal_short_log_return_without_cost,
        'equal_long_short_log_return_without_cost': equal_long_short_log_return_without_cost,
        'value_long_log_return_with_cost': value_long_log_return_with_cost,
        'value_short_log_return_with_cost': value_short_log_return_with_cost,
        'value_long_short_log_return_with_cost': value_long_short_log_return_with_cost,
        'value_long_log_return_without_cost': value_long_log_return_without_cost,
        'value_short_log_return_without_cost': value_short_log_return_without_cost,
        'value_long_short_log_return_without_cost': value_long_short_log_return_without_cost
    }
# Compute cumulative returns for each date with daily rebalancing
cumulative_log_returns_by_date_uni2tssmall512  = {
    'date': [],
    'cum_EL_return_512_with_cost': [],
    'cum_ES_return_512_with_cost': [],
    'cum_ELS_return_512_with_cost': [],
    'cum_VL_return_512_with_cost': [],
    'cum_VS_return_512_with_cost': [],
    'cum_VLS_return_512_with_cost': [],
    'cum_EL_return_512_without_cost': [],
    'cum_ES_return_512_without_cost': [],
    'cum_ELS_return_512_without_cost': [],
    'cum_VL_return_512_without_cost': [],
    'cum_VS_return_512_without_cost': [],
    'cum_VLS_return_512_without_cost': []
}

# Initialize cumulative returns for lag 512
cum_EL_return_512_with_cost = 0
cum_ES_return_512_with_cost = 0
cum_ELS_return_512_with_cost = 0
cum_VL_return_512_with_cost = 0
cum_VS_return_512_with_cost = 0
cum_VLS_return_512_with_cost = 0

cum_EL_return_512_without_cost = 0
cum_ES_return_512_without_cost = 0
cum_ELS_return_512_without_cost = 0
cum_VL_return_512_without_cost = 0
cum_VS_return_512_without_cost = 0
cum_VLS_return_512_without_cost = 0

# Iterate over each date to compute returns for lag 512 portfolios
for date in crsp_test_lagged['date'].unique():
    group = crsp_test_lagged[crsp_test_lagged['date'] == date]

    # Compute returns for lag 512
    returns = compute_returns(group, f'uni2tssmall_{512}_predicted_excess_returns')

    # Update cumulative returns with daily values for lag 512
    cum_EL_return_512_with_cost += returns['equal_long_log_return_with_cost']
    cum_ES_return_512_with_cost += returns['equal_short_log_return_with_cost']
    cum_ELS_return_512_with_cost += returns['equal_long_short_log_return_with_cost']
    cum_VL_return_512_with_cost += returns['value_long_log_return_with_cost']
    cum_VS_return_512_with_cost += returns['value_short_log_return_with_cost']
    cum_VLS_return_512_with_cost += returns['value_long_short_log_return_with_cost']

    cum_EL_return_512_without_cost += returns['equal_long_log_return_without_cost']
    cum_ES_return_512_without_cost += returns['equal_short_log_return_without_cost']
    cum_ELS_return_512_without_cost += returns['equal_long_short_log_return_without_cost']
    cum_VL_return_512_without_cost += returns['value_long_log_return_without_cost']
    cum_VS_return_512_without_cost += returns['value_short_log_return_without_cost']
    cum_VLS_return_512_without_cost += returns['value_long_short_log_return_without_cost']

    # Append results for the day for lag 512 portfolios
    cumulative_log_returns_by_date_uni2tssmall512['date'].append(date)
    cumulative_log_returns_by_date_uni2tssmall512['cum_EL_return_512_with_cost'].append(cum_EL_return_512_with_cost)
    cumulative_log_returns_by_date_uni2tssmall512['cum_ES_return_512_with_cost'].append(cum_ES_return_512_with_cost)
    cumulative_log_returns_by_date_uni2tssmall512['cum_ELS_return_512_with_cost'].append(cum_ELS_return_512_with_cost)
    cumulative_log_returns_by_date_uni2tssmall512['cum_VL_return_512_with_cost'].append(cum_VL_return_512_with_cost)
    cumulative_log_returns_by_date_uni2tssmall512['cum_VS_return_512_with_cost'].append(cum_VS_return_512_with_cost)
    cumulative_log_returns_by_date_uni2tssmall512['cum_VLS_return_512_with_cost'].append(cum_VLS_return_512_with_cost)
    cumulative_log_returns_by_date_uni2tssmall512['cum_EL_return_512_without_cost'].append(cum_EL_return_512_without_cost)
    cumulative_log_returns_by_date_uni2tssmall512['cum_ES_return_512_without_cost'].append(cum_ES_return_512_without_cost)
    cumulative_log_returns_by_date_uni2tssmall512['cum_ELS_return_512_without_cost'].append(cum_ELS_return_512_without_cost)
    cumulative_log_returns_by_date_uni2tssmall512['cum_VL_return_512_without_cost'].append(cum_VL_return_512_without_cost)
    cumulative_log_returns_by_date_uni2tssmall512['cum_VS_return_512_without_cost'].append(cum_VS_return_512_without_cost)
    cumulative_log_returns_by_date_uni2tssmall512['cum_VLS_return_512_without_cost'].append(cum_VLS_return_512_without_cost)

# Convert to DataFrame for lag 512
cumulative_log_returns_uni2tssmall_lag_512 = pd.DataFrame(cumulative_log_returns_by_date_uni2tssmall512)

# Display the cumulative returns DataFrame for lag 512
display(cumulative_log_returns_uni2tssmall_lag_512.head())

# Saving the DataFrame as a CSV file
cumulative_log_returns_uni2tssmall_lag_512.to_csv("cumulative_log_returns_uni2tssmall_lag_512.csv", index=False)

In [None]:
# Cumulative returns
def cumulative_return(daily_returns):
    return np.prod(1 + daily_returns) - 1

# Annualized returns
def annualized_return(daily_returns, periods=252):
    cumulative_return_value = np.prod(1 + daily_returns) - 1
    return (1 + cumulative_return_value) ** (periods / len(daily_returns)) - 1

# Sharpe ratio
def sharpe_ratio(daily_returns, risk_free_rate=0.01, periods=252):
    daily_rf = risk_free_rate / periods  # Assuming 252 trading days
    excess_returns = daily_returns - daily_rf
    return np.sqrt(periods) * excess_returns.mean() / excess_returns.std()

# Calculate volatility (standard deviation) of daily returns
def calculate_volatility(daily_returns, periods=252):
    return np.std(daily_returns) * np.sqrt(periods)

# Calculate maximum drawdown
def maximum_drawdown(daily_returns):
    cum_returns = np.cumprod(1 + daily_returns)
    peak = np.maximum.accumulate(cum_returns)
    drawdown = (cum_returns - peak) / peak
    return np.min(drawdown)

# Apply fixed transaction cost to the daily returns (for other metrics)
def apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001):
    # Subtract the transaction cost from each daily return
    return daily_returns - transaction_cost

# Apply percentage-based transaction cost to the daily returns (for volatility and standard deviation)
def apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001):
    # Apply transaction cost as a percentage of the return
    return daily_returns * (1 - transaction_cost_percentage)

# Prepare portfolio names (with transaction cost)
portfolios_with_cost = [
    'cum_EL_return_512_with_cost', 'cum_ES_return_512_with_cost', 'cum_ELS_return_512_with_cost',
    'cum_VL_return_512_with_cost', 'cum_VS_return_512_with_cost', 'cum_VLS_return_512_with_cost',
]

# Initialize metrics container
metrics = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio with transaction cost
for portfolio in portfolios_with_cost:
    cumulative_returns_uni2tssmall512_c = cumulative_log_returns_uni2tssmall_lag_512[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_uni2tssmall512_c)  # Compute daily returns from cumulative log returns

    # Apply fixed transaction cost for other metrics (e.g., cumulative returns, Sharpe ratio)
    daily_returns_after_cost_fixed = apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001)

    # Apply percentage-based transaction cost for volatility and standard deviation
    daily_returns_after_cost_percentage = apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001)

    # Calculate cumulative returns after fixed transaction cost
    cum_return_after_cost = cumulative_return(daily_returns_after_cost_fixed)

    # Metrics calculations
    ann_return = annualized_return(daily_returns_after_cost_fixed)
    sharpe = sharpe_ratio(daily_returns_after_cost_fixed)
    vol = calculate_volatility(daily_returns_after_cost_percentage)  # Using percentage-based cost for volatility
    max_draw = maximum_drawdown(daily_returns_after_cost_fixed)

    # Standard Deviation of daily returns after cost (using percentage-based cost for standard deviation)
    std_dev = np.std(daily_returns_after_cost_percentage)

    # Store results
    metrics['Portfolio'].append(portfolio)
    metrics['Annualized Return'].append(ann_return)
    metrics['Sharpe Ratio'].append(sharpe)
    metrics['Volatility'].append(vol)
    metrics['Standard Deviation'].append(std_dev)
    metrics['Max Drawdown'].append(max_draw)
    metrics['Cumulative Return'].append(cum_return_after_cost)

# Convert the results into a DataFrame for analysis
metrics_uni2tssmall512_c = pd.DataFrame(metrics)
display(metrics_uni2tssmall512_c)


# Now, the same calculations for portfolios without transaction cost
portfolios_without_cost = [
    'cum_EL_return_512_without_cost', 'cum_ES_return_512_without_cost', 'cum_ELS_return_512_without_cost',
    'cum_VL_return_512_without_cost', 'cum_VS_return_512_without_cost', 'cum_VLS_return_512_without_cost',
]

# Initialize metrics container for portfolios without transaction cost
metrics_wc = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio without transaction cost
for portfolio in portfolios_without_cost:
    cumulative_returns_uni2tssmall512_wc = cumulative_log_returns_uni2tssmall_lag_512[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_uni2tssmall512_wc)  # Compute daily returns from cumulative log returns

    # Calculate cumulative returns without transaction cost
    cum_return_without_cost = cumulative_return(daily_returns)

    # Metrics calculations
    ann_return = annualized_return(daily_returns)
    sharpe = sharpe_ratio(daily_returns)
    vol = calculate_volatility(daily_returns)
    max_draw = maximum_drawdown(daily_returns)

    # Standard Deviation of daily returns without cost
    std_dev = np.std(daily_returns)

    # Store results
    metrics_wc['Portfolio'].append(portfolio)
    metrics_wc['Annualized Return'].append(ann_return)
    metrics_wc['Sharpe Ratio'].append(sharpe)
    metrics_wc['Volatility'].append(vol)
    metrics_wc['Standard Deviation'].append(std_dev)
    metrics_wc['Max Drawdown'].append(max_draw)
    metrics_wc['Cumulative Return'].append(cum_return_without_cost)

# Convert the results into a DataFrame for analysis
metrics_uni2tssmall512_wc = pd.DataFrame(metrics_wc)
display(metrics_uni2tssmall512_wc)

# Save the portfolio metrics with transaction costs
metrics_uni2tssmall512_c.to_csv('metrics_uni2tssmall512_with_cost.csv', index=False)

# Save the portfolio metrics without transaction costs
metrics_uni2tssmall512_wc.to_csv('metrics_uni2tssmall512_without_cost.csv', index=False)

## Moirai-Moe Small

In [None]:
from uni2ts.model.moirai_moe import MoiraiMoEForecast, MoiraiMoEModule

# Define the helper function for calculating performance metrics
def calculate_metrics(actual, predictions):
    mse = mean_squared_error(actual, predictions)
    rmse = mse ** 0.5
    mae = mean_absolute_error(actual, predictions)

    directional_accuracy = np.mean(np.sign(actual) == np.sign(predictions))

    actual_up = actual > 0
    up_dir_acc = np.mean(predictions[actual_up] > 0) if np.any(actual_up) else np.nan

    actual_down = actual < 0
    down_dir_acc = np.mean(predictions[actual_down] < 0) if np.any(actual_down) else np.nan

    r2 = r2_score(actual, predictions)

    scale_factor = np.mean(np.abs(np.diff(actual)))
    mase = mae / scale_factor if scale_factor != 0 else np.nan

    return mse, mae, rmse, directional_accuracy, up_dir_acc, down_dir_acc, r2, mase

# Function to run the forecast with dynamic lags
def run_forecast_uni2ts_moe_small(crsp_train_lagged, crsp_test_lagged, lags, model_name="Salesforce/moirai-moe-1.0-R-small", out_sample_start="2016-01-01", out_sample_end="2024-12-31", device="cpu", batch_size=32):
    # List of allowed lags
    lag_days_list = [5, 21, 252, 512]

    # Validate that only valid lags are passed
    if any(lag not in lag_days_list for lag in lags):
        raise ValueError(f"Invalid lag value. Allowed lags are: {lag_days_list}")

    all_results = []
    all_predictions = []

    # Use the first lag value in lags as the window size
    WINDOW = lags[0]  # Directly using the lag passed in the function call
    PRED_LEN = 1
    DATE_COL = "date"
    ID_COL = "permno"
    TARGET_COL = "excess_ret"
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # set device to cuda or cpu

    df_test = crsp_test_lagged[crsp_test_lagged[DATE_COL] >= pd.to_datetime(out_sample_start)]

    # Normalization (per stock)
    df_test["target"] = df_test.groupby(ID_COL)[TARGET_COL].transform(lambda x: (x - x.mean()) / x.std())

    # Create contexts
    contexts, targets, records = [], [], []

    for permno, group in tqdm(df_test.groupby(ID_COL), desc="Building windows"):
        series = group["target"].values
        dates = group[DATE_COL].values

        if len(series) <= WINDOW:
            continue
        for i in range(len(series) - WINDOW):
            context = torch.tensor(series[i:i+WINDOW], dtype=torch.float32)
            target = series[i + WINDOW]
            contexts.append(context)
            targets.append(target)
            records.append({
                "permno": permno,
                "date": dates[i + WINDOW]
            })

    if len(contexts) == 0:
        print("No valid context windows found. Check 'lag' or data coverage.")
        return pd.DataFrame()

    # Load Model
    try:
        model = MoiraiMoEForecast(
            module=MoiraiMoEModule.from_pretrained(model_name).to(device),
            prediction_length=PRED_LEN,
            context_length=WINDOW,
            num_samples=100,
            target_dim=1,
            feat_dynamic_real_dim=0,
            past_feat_dynamic_real_dim=0,
        ).to(device)

        print(f"Model initialized successfully.")
    except Exception as e:
        print(f"Error loading model: {e}")
        return pd.DataFrame()

    predictor = model.create_predictor(batch_size=batch_size)

    # Predict
    preds = []
    # Batch the contexts for predictions
    for i in tqdm(range(0, len(contexts), batch_size), desc="Predicting in batches"):
        batch_contexts = contexts[i:i+batch_size]  # Get the next batch of contexts

        gluonts_input = ListDataset(
            [{"start": pd.Timestamp("2000-01-01"), "target": context.squeeze().cpu().numpy().tolist()} for context in batch_contexts],
            freq="B"
        )
        forecasts = list(predictor.predict(gluonts_input))

        for forecast in forecasts:
            mean_pred = forecast.mean[0]
            preds.append(mean_pred)

    # Evaluation
    results = pd.DataFrame(records)
    results["y_true"] = targets
    results["y_pred"] = preds

    y_true = results["y_true"]
    y_pred = results["y_pred"]

    # Calculate metrics
    mse, mae, rmse, directional_accuracy, up_dir_acc, down_dir_acc, r2, mase = calculate_metrics(
        y_true.to_numpy(), y_pred.to_numpy()
    )

    # Summary
    result = pd.DataFrame([{
        "Model": model_name,
        "Lag": lags[0],
        "Directional Accuracy": directional_accuracy,
        "Up Directional Accuracy": up_dir_acc,
        "Down Directional Accuracy": down_dir_acc,
        "R-squared": r2,
        "MSE": mse,
        "RMSE": rmse,
        "MAE": mae,
        "MASE": mase
    }])

    # Save the summarized results for this lag to CSV
    result.to_csv(f"uni2tssmallmoe_results_lag{lags[0]}.csv", index=False)
    results.to_csv(f"uni2tssmallmoe_results_lag{lags[0]}_full.csv", index=False)

    all_results.append(result)
    all_predictions.append(results)

    # Combine all performance metrics
    final_df = pd.concat(all_results, ignore_index=True)

    # Merge all predictions into the test set
    if all_predictions:
        combined_preds = pd.concat(all_predictions, ignore_index=True)
        crsp_test_lagged = crsp_test_lagged.merge(
            combined_preds[['permno', 'date', 'y_pred']],
            on=['permno', 'date'],
            how='left'
        )

        # Rename the merged 'y_pred' column to the desired lag-specific name
        crsp_test_lagged.rename(columns={'y_pred': f'predicted_excess_returns_lag{lags[0]}'}, inplace=True)

    else:
        crsp_test_lagged[f'predicted_excess_returns_lag{lags[0]}'] = np.nan

    return final_df, crsp_test_lagged

In [None]:
# Running the forecast for lag=5
uni2ts_small_moe_results_lag5 = run_forecast_uni2ts_moe_small(
    crsp_train_lagged, crsp_test_lagged, lags=[5], model_name="Salesforce/moirai-moe-1.0-R-small", device="cpu"
)

# Display the results using pandas' display() in Jupyter Notebook
from IPython.display import display
display(uni2ts_small_moe_results_lag5)

In [None]:
# Running the forecast for lag=21
uni2ts_small_moe_results_lag21 = run_forecast_uni2ts_moe_small(
    crsp_train_lagged, crsp_test_lagged, lags=[21], model_name="Salesforce/moirai-moe-1.0-R-small", device="cpu"
)

# Display the results using pandas' display() in Jupyter Notebook
from IPython.display import display
display(uni2ts_small_moe_results_lag21)

In [None]:
# Running the forecast for lag=252
uni2ts_small_moe_results_lag252 = run_forecast_uni2ts_moe_small(
    crsp_train_lagged, crsp_test_lagged, lags=[252], model_name="Salesforce/moirai-moe-1.0-R-small", device="cpu"
)

# Display the results using pandas' display() in Jupyter Notebook
from IPython.display import display
display(uni2ts_small_moe_results_lag252)

In [None]:
# Running the forecast for lag=512
uni2ts_small_moe_results_lag512 = run_forecast_uni2ts_moe_small(
    crsp_train_lagged, crsp_test_lagged, lags=[512], model_name="Salesforce/moirai-moe-1.0-R-small", device="cpu"
)

# Display the results using pandas' display() in Jupyter Notebook
from IPython.display import display
display(uni2ts_small_moe_results_lag512)

## Moirai-Moe Small Portfolio

### Window Size 5

In [None]:
# Add transaction cost (10bps = 0.001)
def calculate_transaction_cost(row):
    return 0.001  # 10 bps for both small and large cap stocks

crsp_test_lagged.loc[:, 'transaction_cost'] = crsp_test_lagged.apply(calculate_transaction_cost, axis=1)

# Use the previously defined `uni2tssmallmoe` to predict excess returns
def uni2tssmallmoe_5_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged, lags=[5]):
    out_sample_start = "2016-01-01"
    modified_crsp_test_lagged = crsp_test_lagged.copy()

    for lag in lags:
        # Pass each lag individually to uni2tssmallmoe
        metrics_df, modified_crsp_test_lagged = run_forecast_uni2ts_moe_small(crsp_train_lagged, modified_crsp_test_lagged, lags=[lag], out_sample_start=out_sample_start)

        # Check if the predicted column is generated correctly
        pred_col = f'predicted_excess_returns_lag{lags[0]}'
        if pred_col not in modified_crsp_test_lagged.columns:
            raise KeyError(f"Column '{pred_col}' not found after running forecast.")

        # Add the predicted returns to the DataFrame
        modified_crsp_test_lagged[f'uni2tssmallmoe_{lags[0]}_predicted_excess_returns'] = modified_crsp_test_lagged[pred_col].values

    return modified_crsp_test_lagged

# Get predicted excess returns using Uni2ts Small Moe model
crsp_test_lagged = uni2tssmallmoe_5_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged, lags=[5])

# Portfolio Construction (Top 10% Long, Bottom 10% Short)
def compute_returns(group, predicted_col):
    # First verify required columns exist
    required_cols = ['adjusted_ret', 'market_cap_merged', 'transaction_cost']
    missing_cols = [col for col in required_cols if col not in group.columns]
    if missing_cols:
        raise ValueError(f"Missing required columns: {missing_cols}")

    # Long position (Top 10% based on predicted returns)
    top_positive = group.nlargest(int(0.1 * len(group)), predicted_col)
    # Short position (Bottom 10% based on predicted returns)
    top_negative = group.nsmallest(int(0.1 * len(group)), predicted_col)

    # Equal-Weighted Long position return (Top 10%)
    equal_long_log_return_with_cost = np.log1p(top_positive['adjusted_ret']).mean() - top_positive['transaction_cost'].mean()
    equal_long_log_return_without_cost = np.log1p(top_positive['adjusted_ret']).mean()

    # Equal-Weighted Short position return (Bottom 10%)
    equal_short_log_return_with_cost = -np.log1p(top_negative['adjusted_ret']).mean() - top_negative['transaction_cost'].mean()
    equal_short_log_return_without_cost = -np.log1p(top_negative['adjusted_ret']).mean()

    # Value-Weighted Long position return (Top 10%) based on market cap
    total_market_cap_positive = top_positive['market_cap_merged'].sum()
    value_long_log_return_with_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive - top_positive['transaction_cost'].mean()
    value_long_log_return_without_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive

    # Value-Weighted Short position return (Bottom 10%) based on market cap
    total_market_cap_negative = top_negative['market_cap_merged'].sum()
    value_short_log_return_with_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative + top_negative['transaction_cost'].mean())
    value_short_log_return_without_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative)

    # Combine Long and Short to get Long-Short return
    equal_long_short_log_return_with_cost = equal_long_log_return_with_cost + equal_short_log_return_with_cost
    equal_long_short_log_return_without_cost = equal_long_log_return_without_cost + equal_short_log_return_without_cost

    value_long_short_log_return_with_cost = value_long_log_return_with_cost + value_short_log_return_with_cost
    value_long_short_log_return_without_cost = value_long_log_return_without_cost + value_short_log_return_without_cost

    return {
        'equal_long_log_return_with_cost': equal_long_log_return_with_cost,
        'equal_short_log_return_with_cost': equal_short_log_return_with_cost,
        'equal_long_short_log_return_with_cost': equal_long_short_log_return_with_cost,
        'equal_long_log_return_without_cost': equal_long_log_return_without_cost,
        'equal_short_log_return_without_cost': equal_short_log_return_without_cost,
        'equal_long_short_log_return_without_cost': equal_long_short_log_return_without_cost,
        'value_long_log_return_with_cost': value_long_log_return_with_cost,
        'value_short_log_return_with_cost': value_short_log_return_with_cost,
        'value_long_short_log_return_with_cost': value_long_short_log_return_with_cost,
        'value_long_log_return_without_cost': value_long_log_return_without_cost,
        'value_short_log_return_without_cost': value_short_log_return_without_cost,
        'value_long_short_log_return_without_cost': value_long_short_log_return_without_cost
    }
# Compute cumulative returns for each date with daily rebalancing
cumulative_log_returns_by_date_uni2tssmallmoe5  = {
    'date': [],
    'cum_EL_return_5_with_cost': [],
    'cum_ES_return_5_with_cost': [],
    'cum_ELS_return_5_with_cost': [],
    'cum_VL_return_5_with_cost': [],
    'cum_VS_return_5_with_cost': [],
    'cum_VLS_return_5_with_cost': [],
    'cum_EL_return_5_without_cost': [],
    'cum_ES_return_5_without_cost': [],
    'cum_ELS_return_5_without_cost': [],
    'cum_VL_return_5_without_cost': [],
    'cum_VS_return_5_without_cost': [],
    'cum_VLS_return_5_without_cost': []
}

# Initialize cumulative returns for lag 5
cum_EL_return_5_with_cost = 0
cum_ES_return_5_with_cost = 0
cum_ELS_return_5_with_cost = 0
cum_VL_return_5_with_cost = 0
cum_VS_return_5_with_cost = 0
cum_VLS_return_5_with_cost = 0

cum_EL_return_5_without_cost = 0
cum_ES_return_5_without_cost = 0
cum_ELS_return_5_without_cost = 0
cum_VL_return_5_without_cost = 0
cum_VS_return_5_without_cost = 0
cum_VLS_return_5_without_cost = 0

# Iterate over each date to compute returns for lag 5 portfolios
for date in crsp_test_lagged['date'].unique():
    group = crsp_test_lagged[crsp_test_lagged['date'] == date]

    # Compute returns for lag 5 (or any other lag if needed)
    returns = compute_returns(group, f'uni2tssmallmoe_{5}_predicted_excess_returns')

    # Update cumulative returns with daily values for lag 5
    cum_EL_return_5_with_cost += returns['equal_long_log_return_with_cost']
    cum_ES_return_5_with_cost += returns['equal_short_log_return_with_cost']
    cum_ELS_return_5_with_cost += returns['equal_long_short_log_return_with_cost']
    cum_VL_return_5_with_cost += returns['value_long_log_return_with_cost']
    cum_VS_return_5_with_cost += returns['value_short_log_return_with_cost']
    cum_VLS_return_5_with_cost += returns['value_long_short_log_return_with_cost']

    cum_EL_return_5_without_cost += returns['equal_long_log_return_without_cost']
    cum_ES_return_5_without_cost += returns['equal_short_log_return_without_cost']
    cum_ELS_return_5_without_cost += returns['equal_long_short_log_return_without_cost']
    cum_VL_return_5_without_cost += returns['value_long_log_return_without_cost']
    cum_VS_return_5_without_cost += returns['value_short_log_return_without_cost']
    cum_VLS_return_5_without_cost += returns['value_long_short_log_return_without_cost']

    # Append results for the day for lag 5 portfolios
    cumulative_log_returns_by_date_uni2tssmallmoe5['date'].append(date)
    cumulative_log_returns_by_date_uni2tssmallmoe5['cum_EL_return_5_with_cost'].append(cum_EL_return_5_with_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe5['cum_ES_return_5_with_cost'].append(cum_ES_return_5_with_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe5['cum_ELS_return_5_with_cost'].append(cum_ELS_return_5_with_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe5['cum_VL_return_5_with_cost'].append(cum_VL_return_5_with_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe5['cum_VS_return_5_with_cost'].append(cum_VS_return_5_with_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe5['cum_VLS_return_5_with_cost'].append(cum_VLS_return_5_with_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe5['cum_EL_return_5_without_cost'].append(cum_EL_return_5_without_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe5['cum_ES_return_5_without_cost'].append(cum_ES_return_5_without_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe5['cum_ELS_return_5_without_cost'].append(cum_ELS_return_5_without_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe5['cum_VL_return_5_without_cost'].append(cum_VL_return_5_without_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe5['cum_VS_return_5_without_cost'].append(cum_VS_return_5_without_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe5['cum_VLS_return_5_without_cost'].append(cum_VLS_return_5_without_cost)

# Convert to DataFrame for lag 5
cumulative_log_returns_uni2tssmallmoe_lag_5 = pd.DataFrame(cumulative_log_returns_by_date_uni2tssmallmoe5)

# Display the cumulative returns DataFrame for lag 5
display(cumulative_log_returns_uni2tssmallmoe_lag_5.head())

# Saving the DataFrame as a CSV file
cumulative_log_returns_uni2tssmallmoe_lag_5.to_csv("cumulative_log_returns_uni2tssmallmoe_lag_5.csv", index=False)

In [None]:
# Cumulative returns
def cumulative_return(daily_returns):
    return np.prod(1 + daily_returns) - 1

# Annualized returns
def annualized_return(daily_returns, periods=252):
    cumulative_return_value = np.prod(1 + daily_returns) - 1
    return (1 + cumulative_return_value) ** (periods / len(daily_returns)) - 1

# Sharpe ratio
def sharpe_ratio(daily_returns, risk_free_rate=0.01, periods=252):
    daily_rf = risk_free_rate / periods  # Assuming 252 trading days
    excess_returns = daily_returns - daily_rf
    return np.sqrt(periods) * excess_returns.mean() / excess_returns.std()

# Calculate volatility (standard deviation) of daily returns
def calculate_volatility(daily_returns, periods=252):
    return np.std(daily_returns) * np.sqrt(periods)

# Calculate maximum drawdown
def maximum_drawdown(daily_returns):
    cum_returns = np.cumprod(1 + daily_returns)
    peak = np.maximum.accumulate(cum_returns)
    drawdown = (cum_returns - peak) / peak
    return np.min(drawdown)

# Apply fixed transaction cost to the daily returns
def apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001):
    # Subtract the transaction cost from each daily return
    return daily_returns - transaction_cost

# Apply percentage-based transaction cost to the daily returns
def apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001):
    # Apply transaction cost as a percentage of the return
    return daily_returns * (1 - transaction_cost_percentage)

# Prepare portfolio names (with transaction cost)
portfolios_with_cost = [
    'cum_EL_return_5_with_cost', 'cum_ES_return_5_with_cost', 'cum_ELS_return_5_with_cost',
    'cum_VL_return_5_with_cost', 'cum_VS_return_5_with_cost', 'cum_VLS_return_5_with_cost',
]

# Initialize metrics container
metrics = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio with transaction cost
for portfolio in portfolios_with_cost:
    cumulative_returns_uni2tssmallmoe5_c = cumulative_log_returns_uni2tssmallmoe_lag_5[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_uni2tssmallmoe5_c)

    # Apply fixed transaction cost for other metrics
    daily_returns_after_cost_fixed = apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001)

    # Apply percentage-based transaction cost for volatility and standard deviation
    daily_returns_after_cost_percentage = apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001)

    # Calculate cumulative returns after fixed transaction cost
    cum_return_after_cost = cumulative_return(daily_returns_after_cost_fixed)

    # Metrics calculations
    ann_return = annualized_return(daily_returns_after_cost_fixed)
    sharpe = sharpe_ratio(daily_returns_after_cost_fixed)
    vol = calculate_volatility(daily_returns_after_cost_percentage)
    max_draw = maximum_drawdown(daily_returns_after_cost_fixed)

    # Standard Deviation of daily returns after cost
    std_dev = np.std(daily_returns_after_cost_percentage)

    # Store results
    metrics['Portfolio'].append(portfolio)
    metrics['Annualized Return'].append(ann_return)
    metrics['Sharpe Ratio'].append(sharpe)
    metrics['Volatility'].append(vol)
    metrics['Standard Deviation'].append(std_dev)
    metrics['Max Drawdown'].append(max_draw)
    metrics['Cumulative Return'].append(cum_return_after_cost)

# Convert the results into a DataFrame for analysis
metrics_uni2tssmallmoe5_c = pd.DataFrame(metrics)
display(metrics_uni2tssmallmoe5_c)

# same calculations for portfolios without transaction cost
portfolios_without_cost = [
    'cum_EL_return_5_without_cost', 'cum_ES_return_5_without_cost', 'cum_ELS_return_5_without_cost',
    'cum_VL_return_5_without_cost', 'cum_VS_return_5_without_cost', 'cum_VLS_return_5_without_cost',
]

# Initialize metrics container for portfolios without transaction cost
metrics_wc = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio without transaction cost
for portfolio in portfolios_without_cost:
    cumulative_returns_uni2tssmallmoe5_wc = cumulative_log_returns_uni2tssmallmoe_lag_5[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_uni2tssmallmoe5_wc)  # Compute daily returns from cumulative log returns

    # Calculate cumulative returns without transaction cost
    cum_return_without_cost = cumulative_return(daily_returns)

    # Metrics calculations
    ann_return = annualized_return(daily_returns)
    sharpe = sharpe_ratio(daily_returns)
    vol = calculate_volatility(daily_returns)
    max_draw = maximum_drawdown(daily_returns)

    # Standard Deviation of daily returns without cost
    std_dev = np.std(daily_returns)

    # Store results
    metrics_wc['Portfolio'].append(portfolio)
    metrics_wc['Annualized Return'].append(ann_return)
    metrics_wc['Sharpe Ratio'].append(sharpe)
    metrics_wc['Volatility'].append(vol)
    metrics_wc['Standard Deviation'].append(std_dev)
    metrics_wc['Max Drawdown'].append(max_draw)
    metrics_wc['Cumulative Return'].append(cum_return_without_cost)

# Convert the results into a DataFrame for analysis
metrics_uni2tssmallmoe5_wc = pd.DataFrame(metrics_wc)
display(metrics_uni2tssmallmoe5_wc)

# Save the portfolio metrics with transaction costs
metrics_uni2tssmallmoe5_c.to_csv('metrics_uni2tssmallmoe5_with_cost.csv', index=False)

# Save the portfolio metrics without transaction costs
metrics_uni2tssmallmoe5_wc.to_csv('metrics_uni2tssmallmoe5_without_cost.csv', index=False)

### Window Size 21

In [None]:
# Add transaction cost (10bps = 0.001)
def calculate_transaction_cost(row):
    return 0.001  # 10 bps for both small and large cap stocks

crsp_test_lagged.loc[:, 'transaction_cost'] = crsp_test_lagged.apply(calculate_transaction_cost, axis=1)

# Use the previously defined `uni2tssmallmoe` to predict excess returns
def uni2tssmallmoe_21_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged, lags=[21]):
    out_sample_start = "2016-01-01"
    modified_crsp_test_lagged = crsp_test_lagged.copy()

    for lag in lags:
        # Pass each lag individually to uni2tssmallmoe
        metrics_df, modified_crsp_test_lagged = run_forecast_uni2ts_moe_small(crsp_train_lagged, modified_crsp_test_lagged, lags=[lag], out_sample_start=out_sample_start)

        # Check if the predicted column is generated correctly
        pred_col = f'predicted_excess_returns_lag{lags[0]}'
        if pred_col not in modified_crsp_test_lagged.columns:
            raise KeyError(f"Column '{pred_col}' not found after running forecast.")

        # Add the predicted returns to the DataFrame
        modified_crsp_test_lagged[f'uni2tssmallmoe_{lags[0]}_predicted_excess_returns'] = modified_crsp_test_lagged[pred_col].values

    return modified_crsp_test_lagged

# Get predicted excess returns using Uni2ts Small Moe model
crsp_test_lagged = uni2tssmallmoe_21_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged, lags=[21])

# Portfolio Construction (Top 10% Long, Bottom 10% Short)
def compute_returns(group, predicted_col):
    # First verify required columns exist
    required_cols = ['adjusted_ret', 'market_cap_merged', 'transaction_cost']
    missing_cols = [col for col in required_cols if col not in group.columns]
    if missing_cols:
        raise ValueError(f"Missing required columns: {missing_cols}")

    # Long position (Top 10% based on predicted returns)
    top_positive = group.nlargest(int(0.1 * len(group)), predicted_col)
    # Short position (Bottom 10% based on predicted returns)
    top_negative = group.nsmallest(int(0.1 * len(group)), predicted_col)

    # Equal-Weighted Long position return (Top 10%)
    equal_long_log_return_with_cost = np.log1p(top_positive['adjusted_ret']).mean() - top_positive['transaction_cost'].mean()
    equal_long_log_return_without_cost = np.log1p(top_positive['adjusted_ret']).mean()

    # Equal-Weighted Short position return (Bottom 10%)
    equal_short_log_return_with_cost = -np.log1p(top_negative['adjusted_ret']).mean() - top_negative['transaction_cost'].mean()
    equal_short_log_return_without_cost = -np.log1p(top_negative['adjusted_ret']).mean()

    # Value-Weighted Long position return (Top 10%) based on market cap
    total_market_cap_positive = top_positive['market_cap_merged'].sum()
    value_long_log_return_with_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive - top_positive['transaction_cost'].mean()
    value_long_log_return_without_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive

    # Value-Weighted Short position return (Bottom 10%) based on market cap
    total_market_cap_negative = top_negative['market_cap_merged'].sum()
    value_short_log_return_with_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative + top_negative['transaction_cost'].mean())
    value_short_log_return_without_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative)

    # Combine Long and Short to get Long-Short return
    equal_long_short_log_return_with_cost = equal_long_log_return_with_cost + equal_short_log_return_with_cost
    equal_long_short_log_return_without_cost = equal_long_log_return_without_cost + equal_short_log_return_without_cost

    value_long_short_log_return_with_cost = value_long_log_return_with_cost + value_short_log_return_with_cost
    value_long_short_log_return_without_cost = value_long_log_return_without_cost + value_short_log_return_without_cost

    return {
        'equal_long_log_return_with_cost': equal_long_log_return_with_cost,
        'equal_short_log_return_with_cost': equal_short_log_return_with_cost,
        'equal_long_short_log_return_with_cost': equal_long_short_log_return_with_cost,
        'equal_long_log_return_without_cost': equal_long_log_return_without_cost,
        'equal_short_log_return_without_cost': equal_short_log_return_without_cost,
        'equal_long_short_log_return_without_cost': equal_long_short_log_return_without_cost,
        'value_long_log_return_with_cost': value_long_log_return_with_cost,
        'value_short_log_return_with_cost': value_short_log_return_with_cost,
        'value_long_short_log_return_with_cost': value_long_short_log_return_with_cost,
        'value_long_log_return_without_cost': value_long_log_return_without_cost,
        'value_short_log_return_without_cost': value_short_log_return_without_cost,
        'value_long_short_log_return_without_cost': value_long_short_log_return_without_cost
    }
# Compute cumulative returns for each date with daily rebalancing
cumulative_log_returns_by_date_uni2tssmallmoe21  = {
    'date': [],
    'cum_EL_return_21_with_cost': [],
    'cum_ES_return_21_with_cost': [],
    'cum_ELS_return_21_with_cost': [],
    'cum_VL_return_21_with_cost': [],
    'cum_VS_return_21_with_cost': [],
    'cum_VLS_return_21_with_cost': [],
    'cum_EL_return_21_without_cost': [],
    'cum_ES_return_21_without_cost': [],
    'cum_ELS_return_21_without_cost': [],
    'cum_VL_return_21_without_cost': [],
    'cum_VS_return_21_without_cost': [],
    'cum_VLS_return_21_without_cost': []
}

# Initialize cumulative returns for lag 21
cum_EL_return_21_with_cost = 0
cum_ES_return_21_with_cost = 0
cum_ELS_return_21_with_cost = 0
cum_VL_return_21_with_cost = 0
cum_VS_return_21_with_cost = 0
cum_VLS_return_21_with_cost = 0

cum_EL_return_21_without_cost = 0
cum_ES_return_21_without_cost = 0
cum_ELS_return_21_without_cost = 0
cum_VL_return_21_without_cost = 0
cum_VS_return_21_without_cost = 0
cum_VLS_return_21_without_cost = 0

# Iterate over each date to compute returns for lag 21 portfolios
for date in crsp_test_lagged['date'].unique():
    group = crsp_test_lagged[crsp_test_lagged['date'] == date]

    # Compute returns for lag 21
    returns = compute_returns(group, f'uni2tssmallmoe_{21}_predicted_excess_returns')

    # Update cumulative returns with daily values for lag 21
    cum_EL_return_21_with_cost += returns['equal_long_log_return_with_cost']
    cum_ES_return_21_with_cost += returns['equal_short_log_return_with_cost']
    cum_ELS_return_21_with_cost += returns['equal_long_short_log_return_with_cost']
    cum_VL_return_21_with_cost += returns['value_long_log_return_with_cost']
    cum_VS_return_21_with_cost += returns['value_short_log_return_with_cost']
    cum_VLS_return_21_with_cost += returns['value_long_short_log_return_with_cost']

    cum_EL_return_21_without_cost += returns['equal_long_log_return_without_cost']
    cum_ES_return_21_without_cost += returns['equal_short_log_return_without_cost']
    cum_ELS_return_21_without_cost += returns['equal_long_short_log_return_without_cost']
    cum_VL_return_21_without_cost += returns['value_long_log_return_without_cost']
    cum_VS_return_21_without_cost += returns['value_short_log_return_without_cost']
    cum_VLS_return_21_without_cost += returns['value_long_short_log_return_without_cost']

    # Append results for the day for lag 21 portfolios
    cumulative_log_returns_by_date_uni2tssmallmoe21['date'].append(date)
    cumulative_log_returns_by_date_uni2tssmallmoe21['cum_EL_return_21_with_cost'].append(cum_EL_return_21_with_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe21['cum_ES_return_21_with_cost'].append(cum_ES_return_21_with_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe21['cum_ELS_return_21_with_cost'].append(cum_ELS_return_21_with_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe21['cum_VL_return_21_with_cost'].append(cum_VL_return_21_with_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe21['cum_VS_return_21_with_cost'].append(cum_VS_return_21_with_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe21['cum_VLS_return_21_with_cost'].append(cum_VLS_return_21_with_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe21['cum_EL_return_21_without_cost'].append(cum_EL_return_21_without_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe21['cum_ES_return_21_without_cost'].append(cum_ES_return_21_without_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe21['cum_ELS_return_21_without_cost'].append(cum_ELS_return_21_without_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe21['cum_VL_return_21_without_cost'].append(cum_VL_return_21_without_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe21['cum_VS_return_21_without_cost'].append(cum_VS_return_21_without_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe21['cum_VLS_return_21_without_cost'].append(cum_VLS_return_21_without_cost)

# Convert to DataFrame for lag 21
cumulative_log_returns_uni2tssmallmoe_lag_21 = pd.DataFrame(cumulative_log_returns_by_date_uni2tssmallmoe21)

# Display the cumulative returns DataFrame for lag 21
display(cumulative_log_returns_uni2tssmallmoe_lag_21.head())

# Saving the DataFrame as a CSV file
cumulative_log_returns_uni2tssmallmoe_lag_21.to_csv("cumulative_log_returns_uni2tssmallmoe_lag_21.csv", index=False)

In [None]:
# Cumulative returns
def cumulative_return(daily_returns):
    return np.prod(1 + daily_returns) - 1

# Annualized returns
def annualized_return(daily_returns, periods=252):
    cumulative_return_value = np.prod(1 + daily_returns) - 1
    return (1 + cumulative_return_value) ** (periods / len(daily_returns)) - 1

# Sharpe ratio
def sharpe_ratio(daily_returns, risk_free_rate=0.01, periods=252):
    daily_rf = risk_free_rate / periods  # Assuming 252 trading days
    excess_returns = daily_returns - daily_rf
    return np.sqrt(periods) * excess_returns.mean() / excess_returns.std()

# Calculate volatility (standard deviation) of daily returns
def calculate_volatility(daily_returns, periods=252):
    return np.std(daily_returns) * np.sqrt(periods)

# Calculate maximum drawdown
def maximum_drawdown(daily_returns):
    cum_returns = np.cumprod(1 + daily_returns)
    peak = np.maximum.accumulate(cum_returns)
    drawdown = (cum_returns - peak) / peak
    return np.min(drawdown)

# Apply fixed transaction cost to the daily returns
def apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001):
    # Subtract the transaction cost from each daily return
    return daily_returns - transaction_cost

# Apply percentage-based transaction cost to the daily returns
def apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001):
    # Apply transaction cost as a percentage of the return
    return daily_returns * (1 - transaction_cost_percentage)

# Prepare portfolio names (with transaction cost)
portfolios_with_cost = [
    'cum_EL_return_21_with_cost', 'cum_ES_return_21_with_cost', 'cum_ELS_return_21_with_cost',
    'cum_VL_return_21_with_cost', 'cum_VS_return_21_with_cost', 'cum_VLS_return_21_with_cost',
]

# Initialize metrics container
metrics = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio with transaction cost
for portfolio in portfolios_with_cost:
    cumulative_returns_uni2tssmallmoe21_c = cumulative_log_returns_uni2tssmallmoe_lag_21[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_uni2tssmallmoe21_c)

    # Apply fixed transaction cost for other metrics
    daily_returns_after_cost_fixed = apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001)

    # Apply percentage-based transaction cost for volatility and standard deviation
    daily_returns_after_cost_percentage = apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001)

    # Calculate cumulative returns after fixed transaction cost
    cum_return_after_cost = cumulative_return(daily_returns_after_cost_fixed)

    # Metrics calculations
    ann_return = annualized_return(daily_returns_after_cost_fixed)
    sharpe = sharpe_ratio(daily_returns_after_cost_fixed)
    vol = calculate_volatility(daily_returns_after_cost_percentage)
    max_draw = maximum_drawdown(daily_returns_after_cost_fixed)

    # Standard Deviation of daily returns after cost
    std_dev = np.std(daily_returns_after_cost_percentage)

    # Store results
    metrics['Portfolio'].append(portfolio)
    metrics['Annualized Return'].append(ann_return)
    metrics['Sharpe Ratio'].append(sharpe)
    metrics['Volatility'].append(vol)
    metrics['Standard Deviation'].append(std_dev)
    metrics['Max Drawdown'].append(max_draw)
    metrics['Cumulative Return'].append(cum_return_after_cost)

# Convert the results into a DataFrame for analysis
metrics_uni2tssmallmoe21_c = pd.DataFrame(metrics)
display(metrics_uni2tssmallmoe21_c)

# same calculations for portfolios without transaction cost
portfolios_without_cost = [
    'cum_EL_return_21_without_cost', 'cum_ES_return_21_without_cost', 'cum_ELS_return_21_without_cost',
    'cum_VL_return_21_without_cost', 'cum_VS_return_21_without_cost', 'cum_VLS_return_21_without_cost',
]

# Initialize metrics container for portfolios without transaction cost
metrics_wc = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio without transaction cost
for portfolio in portfolios_without_cost:
    cumulative_returns_uni2tssmallmoe21_wc = cumulative_log_returns_uni2tssmallmoe_lag_21[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_uni2tssmallmoe21_wc)  # Compute daily returns from cumulative log returns

    # Calculate cumulative returns without transaction cost
    cum_return_without_cost = cumulative_return(daily_returns)

    # Metrics calculations
    ann_return = annualized_return(daily_returns)
    sharpe = sharpe_ratio(daily_returns)
    vol = calculate_volatility(daily_returns)
    max_draw = maximum_drawdown(daily_returns)

    # Standard Deviation of daily returns without cost
    std_dev = np.std(daily_returns)

    # Store results
    metrics_wc['Portfolio'].append(portfolio)
    metrics_wc['Annualized Return'].append(ann_return)
    metrics_wc['Sharpe Ratio'].append(sharpe)
    metrics_wc['Volatility'].append(vol)
    metrics_wc['Standard Deviation'].append(std_dev)
    metrics_wc['Max Drawdown'].append(max_draw)
    metrics_wc['Cumulative Return'].append(cum_return_without_cost)

# Convert the results into a DataFrame for analysis
metrics_uni2tssmallmoe21_wc = pd.DataFrame(metrics_wc)
display(metrics_uni2tssmallmoe21_wc)

# Save the portfolio metrics with transaction costs
metrics_uni2tssmallmoe21_c.to_csv('metrics_uni2tssmallmoe21_with_cost.csv', index=False)

# Save the portfolio metrics without transaction costs
metrics_uni2tssmallmoe21_wc.to_csv('metrics_uni2tssmallmoe21_without_cost.csv', index=False)

### Window Size 252

In [None]:
# Add transaction cost (10bps = 0.001)
def calculate_transaction_cost(row):
    return 0.001  # 10 bps for both small and large cap stocks

crsp_test_lagged.loc[:, 'transaction_cost'] = crsp_test_lagged.apply(calculate_transaction_cost, axis=1)

# Use the previously defined `uni2tssmallmoe` to predict excess returns
def uni2tssmallmoe_252_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged, lags=[252]):
    out_sample_start = "2016-01-01"
    modified_crsp_test_lagged = crsp_test_lagged.copy()

    for lag in lags:
        # Pass each lag individually to uni2tssmallmoe
        metrics_df, modified_crsp_test_lagged = run_forecast_uni2ts_moe_small(crsp_train_lagged, modified_crsp_test_lagged, lags=[lag], out_sample_start=out_sample_start)

        # Check if the predicted column is generated correctly
        pred_col = f'predicted_excess_returns_lag{lags[0]}'
        if pred_col not in modified_crsp_test_lagged.columns:
            raise KeyError(f"Column '{pred_col}' not found after running forecast.")

        # Add the predicted returns to the DataFrame
        modified_crsp_test_lagged[f'uni2tssmallmoe_{lags[0]}_predicted_excess_returns'] = modified_crsp_test_lagged[pred_col].values

    return modified_crsp_test_lagged

# Get predicted excess returns using Uni2ts SMall Moe model
crsp_test_lagged = uni2tssmallmoe_252_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged, lags=[252])

# Portfolio Construction (Top 10% Long, Bottom 10% Short)
def compute_returns(group, predicted_col):
    # First verify required columns exist
    required_cols = ['adjusted_ret', 'market_cap_merged', 'transaction_cost']
    missing_cols = [col for col in required_cols if col not in group.columns]
    if missing_cols:
        raise ValueError(f"Missing required columns: {missing_cols}")

    # Long position (Top 10% based on predicted returns)
    top_positive = group.nlargest(int(0.1 * len(group)), predicted_col)
    # Short position (Bottom 10% based on predicted returns)
    top_negative = group.nsmallest(int(0.1 * len(group)), predicted_col)

    # Equal-Weighted Long position return (Top 10%)
    equal_long_log_return_with_cost = np.log1p(top_positive['adjusted_ret']).mean() - top_positive['transaction_cost'].mean()
    equal_long_log_return_without_cost = np.log1p(top_positive['adjusted_ret']).mean()

    # Equal-Weighted Short position return (Bottom 10%)
    equal_short_log_return_with_cost = -np.log1p(top_negative['adjusted_ret']).mean() - top_negative['transaction_cost'].mean()
    equal_short_log_return_without_cost = -np.log1p(top_negative['adjusted_ret']).mean()

    # Value-Weighted Long position return (Top 10%) based on market cap
    total_market_cap_positive = top_positive['market_cap_merged'].sum()
    value_long_log_return_with_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive - top_positive['transaction_cost'].mean()
    value_long_log_return_without_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive

    # Value-Weighted Short position return (Bottom 10%) based on market cap
    total_market_cap_negative = top_negative['market_cap_merged'].sum()
    value_short_log_return_with_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative + top_negative['transaction_cost'].mean())
    value_short_log_return_without_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative)

    # Combine Long and Short to get Long-Short return
    equal_long_short_log_return_with_cost = equal_long_log_return_with_cost + equal_short_log_return_with_cost
    equal_long_short_log_return_without_cost = equal_long_log_return_without_cost + equal_short_log_return_without_cost

    value_long_short_log_return_with_cost = value_long_log_return_with_cost + value_short_log_return_with_cost
    value_long_short_log_return_without_cost = value_long_log_return_without_cost + value_short_log_return_without_cost

    return {
        'equal_long_log_return_with_cost': equal_long_log_return_with_cost,
        'equal_short_log_return_with_cost': equal_short_log_return_with_cost,
        'equal_long_short_log_return_with_cost': equal_long_short_log_return_with_cost,
        'equal_long_log_return_without_cost': equal_long_log_return_without_cost,
        'equal_short_log_return_without_cost': equal_short_log_return_without_cost,
        'equal_long_short_log_return_without_cost': equal_long_short_log_return_without_cost,
        'value_long_log_return_with_cost': value_long_log_return_with_cost,
        'value_short_log_return_with_cost': value_short_log_return_with_cost,
        'value_long_short_log_return_with_cost': value_long_short_log_return_with_cost,
        'value_long_log_return_without_cost': value_long_log_return_without_cost,
        'value_short_log_return_without_cost': value_short_log_return_without_cost,
        'value_long_short_log_return_without_cost': value_long_short_log_return_without_cost
    }
# Compute cumulative returns for each date with daily rebalancing
cumulative_log_returns_by_date_uni2tssmallmoe252  = {
    'date': [],
    'cum_EL_return_252_with_cost': [],
    'cum_ES_return_252_with_cost': [],
    'cum_ELS_return_252_with_cost': [],
    'cum_VL_return_252_with_cost': [],
    'cum_VS_return_252_with_cost': [],
    'cum_VLS_return_252_with_cost': [],
    'cum_EL_return_252_without_cost': [],
    'cum_ES_return_252_without_cost': [],
    'cum_ELS_return_252_without_cost': [],
    'cum_VL_return_252_without_cost': [],
    'cum_VS_return_252_without_cost': [],
    'cum_VLS_return_252_without_cost': []
}

# Initialize cumulative returns for lag 252
cum_EL_return_252_with_cost = 0
cum_ES_return_252_with_cost = 0
cum_ELS_return_252_with_cost = 0
cum_VL_return_252_with_cost = 0
cum_VS_return_252_with_cost = 0
cum_VLS_return_252_with_cost = 0

cum_EL_return_252_without_cost = 0
cum_ES_return_252_without_cost = 0
cum_ELS_return_252_without_cost = 0
cum_VL_return_252_without_cost = 0
cum_VS_return_252_without_cost = 0
cum_VLS_return_252_without_cost = 0

# Iterate over each date to compute returns for lag 252 portfolios
for date in crsp_test_lagged['date'].unique():
    group = crsp_test_lagged[crsp_test_lagged['date'] == date]

    # Compute returns for lag 252
    returns = compute_returns(group, f'uni2tssmallmoe_{252}_predicted_excess_returns')

    # Update cumulative returns with daily values for lag 252
    cum_EL_return_252_with_cost += returns['equal_long_log_return_with_cost']
    cum_ES_return_252_with_cost += returns['equal_short_log_return_with_cost']
    cum_ELS_return_252_with_cost += returns['equal_long_short_log_return_with_cost']
    cum_VL_return_252_with_cost += returns['value_long_log_return_with_cost']
    cum_VS_return_252_with_cost += returns['value_short_log_return_with_cost']
    cum_VLS_return_252_with_cost += returns['value_long_short_log_return_with_cost']

    cum_EL_return_252_without_cost += returns['equal_long_log_return_without_cost']
    cum_ES_return_252_without_cost += returns['equal_short_log_return_without_cost']
    cum_ELS_return_252_without_cost += returns['equal_long_short_log_return_without_cost']
    cum_VL_return_252_without_cost += returns['value_long_log_return_without_cost']
    cum_VS_return_252_without_cost += returns['value_short_log_return_without_cost']
    cum_VLS_return_252_without_cost += returns['value_long_short_log_return_without_cost']

    # Append results for the day for lag 252 portfolios
    cumulative_log_returns_by_date_uni2tssmallmoe252['date'].append(date)
    cumulative_log_returns_by_date_uni2tssmallmoe252['cum_EL_return_252_with_cost'].append(cum_EL_return_252_with_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe252['cum_ES_return_252_with_cost'].append(cum_ES_return_252_with_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe252['cum_ELS_return_252_with_cost'].append(cum_ELS_return_252_with_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe252['cum_VL_return_252_with_cost'].append(cum_VL_return_252_with_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe252['cum_VS_return_252_with_cost'].append(cum_VS_return_252_with_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe252['cum_VLS_return_252_with_cost'].append(cum_VLS_return_252_with_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe252['cum_EL_return_252_without_cost'].append(cum_EL_return_252_without_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe252['cum_ES_return_252_without_cost'].append(cum_ES_return_252_without_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe252['cum_ELS_return_252_without_cost'].append(cum_ELS_return_252_without_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe252['cum_VL_return_252_without_cost'].append(cum_VL_return_252_without_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe252['cum_VS_return_252_without_cost'].append(cum_VS_return_252_without_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe252['cum_VLS_return_252_without_cost'].append(cum_VLS_return_252_without_cost)


# Convert to DataFrame for lag 252
cumulative_log_returns_uni2tssmallmoe_lag_252 = pd.DataFrame(cumulative_log_returns_by_date_uni2tssmallmoe252)

# Display the cumulative returns DataFrame for lag 252
display(cumulative_log_returns_uni2tssmallmoe_lag_252.head())

# Save results to csv
cumulative_log_returns_uni2tssmallmoe_lag_252.to_csv("cumulative_log_returns_uni2tssmallmoe_lag_252.csv", index=False)

In [None]:
# Cumulative returns
def cumulative_return(daily_returns):
    return np.prod(1 + daily_returns) - 1

# Annualized returns
def annualized_return(daily_returns, periods=252):
    cumulative_return_value = np.prod(1 + daily_returns) - 1
    return (1 + cumulative_return_value) ** (periods / len(daily_returns)) - 1

# Sharpe ratio
def sharpe_ratio(daily_returns, risk_free_rate=0.01, periods=252):
    daily_rf = risk_free_rate / periods  # Assuming 252 trading days
    excess_returns = daily_returns - daily_rf
    return np.sqrt(periods) * excess_returns.mean() / excess_returns.std()

# Calculate volatility (standard deviation) of daily returns
def calculate_volatility(daily_returns, periods=252):
    return np.std(daily_returns) * np.sqrt(periods)

# Calculate maximum drawdown
def maximum_drawdown(daily_returns):
    cum_returns = np.cumprod(1 + daily_returns)
    peak = np.maximum.accumulate(cum_returns)
    drawdown = (cum_returns - peak) / peak
    return np.min(drawdown)

# Apply fixed transaction cost to the daily returns
def apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001):
    # Subtract the transaction cost from each daily return
    return daily_returns - transaction_cost

# Apply percentage-based transaction cost to the daily returns
def apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001):
    # Apply transaction cost as a percentage of the return
    return daily_returns * (1 - transaction_cost_percentage)

# Prepare portfolio names (with transaction cost)
portfolios_with_cost = [
    'cum_EL_return_252_with_cost', 'cum_ES_return_252_with_cost', 'cum_ELS_return_252_with_cost',
    'cum_VL_return_252_with_cost', 'cum_VS_return_252_with_cost', 'cum_VLS_return_252_with_cost',
]

# Initialize metrics container
metrics = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio with transaction cost
for portfolio in portfolios_with_cost:
    cumulative_returns_uni2tssmallmoe252_c = cumulative_log_returns_uni2tssmallmoe_lag_252[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_uni2tssmallmoe252_c)

    # Apply fixed transaction cost for other metrics
    daily_returns_after_cost_fixed = apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001)

    # Apply percentage-based transaction cost for volatility and standard deviation
    daily_returns_after_cost_percentage = apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001)

    # Calculate cumulative returns after fixed transaction cost
    cum_return_after_cost = cumulative_return(daily_returns_after_cost_fixed)

    # Metrics calculations
    ann_return = annualized_return(daily_returns_after_cost_fixed)
    sharpe = sharpe_ratio(daily_returns_after_cost_fixed)
    vol = calculate_volatility(daily_returns_after_cost_percentage)
    max_draw = maximum_drawdown(daily_returns_after_cost_fixed)

    # Standard Deviation of daily returns after cost
    std_dev = np.std(daily_returns_after_cost_percentage)

    # Store results
    metrics['Portfolio'].append(portfolio)
    metrics['Annualized Return'].append(ann_return)
    metrics['Sharpe Ratio'].append(sharpe)
    metrics['Volatility'].append(vol)
    metrics['Standard Deviation'].append(std_dev)
    metrics['Max Drawdown'].append(max_draw)
    metrics['Cumulative Return'].append(cum_return_after_cost)

# Convert the results into a DataFrame for analysis
metrics_uni2tssmallmoe252_c = pd.DataFrame(metrics)
display(metrics_uni2tssmallmoe252_c)

#  same calculations for portfolios without transaction cost
portfolios_without_cost = [
    'cum_EL_return_252_without_cost', 'cum_ES_return_252_without_cost', 'cum_ELS_return_252_without_cost',
    'cum_VL_return_252_without_cost', 'cum_VS_return_252_without_cost', 'cum_VLS_return_252_without_cost',
]

# Initialize metrics container for portfolios without transaction cost
metrics_wc = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio without transaction cost
for portfolio in portfolios_without_cost:
    cumulative_returns_uni2tssmallmoe252_wc = cumulative_log_returns_uni2tssmallmoe_lag_252[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_uni2tssmallmoe252_wc)  # Compute daily returns from cumulative log returns

    # Calculate cumulative returns without transaction cost
    cum_return_without_cost = cumulative_return(daily_returns)

    # Metrics calculations
    ann_return = annualized_return(daily_returns)
    sharpe = sharpe_ratio(daily_returns)
    vol = calculate_volatility(daily_returns)
    max_draw = maximum_drawdown(daily_returns)

    # Standard Deviation of daily returns without cost
    std_dev = np.std(daily_returns)

    # Store results
    metrics_wc['Portfolio'].append(portfolio)
    metrics_wc['Annualized Return'].append(ann_return)
    metrics_wc['Sharpe Ratio'].append(sharpe)
    metrics_wc['Volatility'].append(vol)
    metrics_wc['Standard Deviation'].append(std_dev)
    metrics_wc['Max Drawdown'].append(max_draw)
    metrics_wc['Cumulative Return'].append(cum_return_without_cost)

# Convert the results into a DataFrame for analysis
metrics_uni2tssmallmoe252_wc = pd.DataFrame(metrics_wc)
display(metrics_uni2tssmallmoe252_wc)

# Save the portfolio metrics with transaction costs
metrics_uni2tssmallmoe252_c.to_csv('metrics_uni2tssmallmoe252_with_cost.csv', index=False)

# Save the portfolio metrics without transaction costs
metrics_uni2tssmallmoe252_wc.to_csv('metrics_uni2tssmallmoe252_without_cost.csv', index=False)

### Window Size 512

In [None]:
# Add transaction cost (10bps = 0.001)
def calculate_transaction_cost(row):
    return 0.001  # 10 bps for both small and large cap stocks

crsp_test_lagged.loc[:, 'transaction_cost'] = crsp_test_lagged.apply(calculate_transaction_cost, axis=1)

# Use the previously defined `uni2tssmallmoe` to predict excess returns
def uni2tssmallmoe_512_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged, lags=[512]):
    out_sample_start = "2016-01-01"
    modified_crsp_test_lagged = crsp_test_lagged.copy()

    for lag in lags:
        # Pass each lag individually to uni2tssmallmoe
        metrics_df, modified_crsp_test_lagged = run_forecast_uni2ts_moe_small(crsp_train_lagged, modified_crsp_test_lagged, lags=[lag], out_sample_start=out_sample_start)

        # Check if the predicted column is generated correctly
        pred_col = f'predicted_excess_returns_lag{lags[0]}'  # this should be dynamically set based on `lag`
        if pred_col not in modified_crsp_test_lagged.columns:
            raise KeyError(f"Column '{pred_col}' not found after running forecast.")

        # Add the predicted returns to the DataFrame (correct the column name dynamically)
        modified_crsp_test_lagged[f'uni2tssmallmoe_{lags[0]}_predicted_excess_returns'] = modified_crsp_test_lagged[pred_col].values

    return modified_crsp_test_lagged

# Get predicted excess returns using Uni2ts Small Moe model (this should generate binary outcomes)
crsp_test_lagged = uni2tssmallmoe_512_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged, lags=[512])

# Portfolio Construction (Top 10% Long, Bottom 10% Short)
def compute_returns(group, predicted_col):
    # First verify required columns exist
    required_cols = ['adjusted_ret', 'market_cap_merged', 'transaction_cost']
    missing_cols = [col for col in required_cols if col not in group.columns]
    if missing_cols:
        raise ValueError(f"Missing required columns: {missing_cols}")

    # Long position (Top 10% based on predicted returns)
    top_positive = group.nlargest(int(0.1 * len(group)), predicted_col)
    # Short position (Bottom 10% based on predicted returns)
    top_negative = group.nsmallest(int(0.1 * len(group)), predicted_col)

    # Equal-Weighted Long position return (Top 10%)
    equal_long_log_return_with_cost = np.log1p(top_positive['adjusted_ret']).mean() - top_positive['transaction_cost'].mean()
    equal_long_log_return_without_cost = np.log1p(top_positive['adjusted_ret']).mean()

    # Equal-Weighted Short position return (Bottom 10%)
    equal_short_log_return_with_cost = -np.log1p(top_negative['adjusted_ret']).mean() - top_negative['transaction_cost'].mean()
    equal_short_log_return_without_cost = -np.log1p(top_negative['adjusted_ret']).mean()

    # Value-Weighted Long position return (Top 10%) based on market cap
    total_market_cap_positive = top_positive['market_cap_merged'].sum()
    value_long_log_return_with_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive - top_positive['transaction_cost'].mean()
    value_long_log_return_without_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive

    # Value-Weighted Short position return (Bottom 10%) based on market cap
    total_market_cap_negative = top_negative['market_cap_merged'].sum()
    value_short_log_return_with_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative + top_negative['transaction_cost'].mean())
    value_short_log_return_without_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative)

    # Combine Long and Short to get Long-Short return
    equal_long_short_log_return_with_cost = equal_long_log_return_with_cost + equal_short_log_return_with_cost
    equal_long_short_log_return_without_cost = equal_long_log_return_without_cost + equal_short_log_return_without_cost

    value_long_short_log_return_with_cost = value_long_log_return_with_cost + value_short_log_return_with_cost
    value_long_short_log_return_without_cost = value_long_log_return_without_cost + value_short_log_return_without_cost

    return {
        'equal_long_log_return_with_cost': equal_long_log_return_with_cost,
        'equal_short_log_return_with_cost': equal_short_log_return_with_cost,
        'equal_long_short_log_return_with_cost': equal_long_short_log_return_with_cost,
        'equal_long_log_return_without_cost': equal_long_log_return_without_cost,
        'equal_short_log_return_without_cost': equal_short_log_return_without_cost,
        'equal_long_short_log_return_without_cost': equal_long_short_log_return_without_cost,
        'value_long_log_return_with_cost': value_long_log_return_with_cost,
        'value_short_log_return_with_cost': value_short_log_return_with_cost,
        'value_long_short_log_return_with_cost': value_long_short_log_return_with_cost,
        'value_long_log_return_without_cost': value_long_log_return_without_cost,
        'value_short_log_return_without_cost': value_short_log_return_without_cost,
        'value_long_short_log_return_without_cost': value_long_short_log_return_without_cost
    }
# Compute cumulative returns for each date with daily rebalancing
cumulative_log_returns_by_date_uni2tssmallmoe512  = {
    'date': [],
    'cum_EL_return_512_with_cost': [],
    'cum_ES_return_512_with_cost': [],
    'cum_ELS_return_512_with_cost': [],
    'cum_VL_return_512_with_cost': [],
    'cum_VS_return_512_with_cost': [],
    'cum_VLS_return_512_with_cost': [],
    'cum_EL_return_512_without_cost': [],
    'cum_ES_return_512_without_cost': [],
    'cum_ELS_return_512_without_cost': [],
    'cum_VL_return_512_without_cost': [],
    'cum_VS_return_512_without_cost': [],
    'cum_VLS_return_512_without_cost': []
}

# Initialize cumulative returns for lag 512
cum_EL_return_512_with_cost = 0
cum_ES_return_512_with_cost = 0
cum_ELS_return_512_with_cost = 0
cum_VL_return_512_with_cost = 0
cum_VS_return_512_with_cost = 0
cum_VLS_return_512_with_cost = 0

cum_EL_return_512_without_cost = 0
cum_ES_return_512_without_cost = 0
cum_ELS_return_512_without_cost = 0
cum_VL_return_512_without_cost = 0
cum_VS_return_512_without_cost = 0
cum_VLS_return_512_without_cost = 0

# Iterate over each date to compute returns for lag 512 portfolios
for date in crsp_test_lagged['date'].unique():
    group = crsp_test_lagged[crsp_test_lagged['date'] == date]

    # Compute returns for lag 512 (or any other lag if needed)
    returns = compute_returns(group, f'uni2tssmallmoe_{512}_predicted_excess_returns')

    # Update cumulative returns with daily values for lag 512
    cum_EL_return_512_with_cost += returns['equal_long_log_return_with_cost']
    cum_ES_return_512_with_cost += returns['equal_short_log_return_with_cost']
    cum_ELS_return_512_with_cost += returns['equal_long_short_log_return_with_cost']
    cum_VL_return_512_with_cost += returns['value_long_log_return_with_cost']
    cum_VS_return_512_with_cost += returns['value_short_log_return_with_cost']
    cum_VLS_return_512_with_cost += returns['value_long_short_log_return_with_cost']

    cum_EL_return_512_without_cost += returns['equal_long_log_return_without_cost']
    cum_ES_return_512_without_cost += returns['equal_short_log_return_without_cost']
    cum_ELS_return_512_without_cost += returns['equal_long_short_log_return_without_cost']
    cum_VL_return_512_without_cost += returns['value_long_log_return_without_cost']
    cum_VS_return_512_without_cost += returns['value_short_log_return_without_cost']
    cum_VLS_return_512_without_cost += returns['value_long_short_log_return_without_cost']

    # Append results for the day for lag 512 portfolios
    cumulative_log_returns_by_date_uni2tssmallmoe512['date'].append(date)
    cumulative_log_returns_by_date_uni2tssmallmoe512['cum_EL_return_512_with_cost'].append(cum_EL_return_512_with_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe512['cum_ES_return_512_with_cost'].append(cum_ES_return_512_with_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe512['cum_ELS_return_512_with_cost'].append(cum_ELS_return_512_with_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe512['cum_VL_return_512_with_cost'].append(cum_VL_return_512_with_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe512['cum_VS_return_512_with_cost'].append(cum_VS_return_512_with_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe512['cum_VLS_return_512_with_cost'].append(cum_VLS_return_512_with_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe512['cum_EL_return_512_without_cost'].append(cum_EL_return_512_without_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe512['cum_ES_return_512_without_cost'].append(cum_ES_return_512_without_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe512['cum_ELS_return_512_without_cost'].append(cum_ELS_return_512_without_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe512['cum_VL_return_512_without_cost'].append(cum_VL_return_512_without_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe512['cum_VS_return_512_without_cost'].append(cum_VS_return_512_without_cost)
    cumulative_log_returns_by_date_uni2tssmallmoe512['cum_VLS_return_512_without_cost'].append(cum_VLS_return_512_without_cost)


# Convert to DataFrame for lag 512
cumulative_log_returns_uni2tssmallmoe_lag_512 = pd.DataFrame(cumulative_log_returns_by_date_uni2tssmallmoe512)

# Display the cumulative returns DataFrame for lag 512
display(cumulative_log_returns_uni2tssmallmoe_lag_512.head())

# Saving the DataFrame as a CSV file
cumulative_log_returns_uni2tssmallmoe_lag_512.to_csv("cumulative_log_returns_uni2tssmallmoe_lag_512.csv", index=False)

In [None]:
# Cumulative returns
def cumulative_return(daily_returns):
    return np.prod(1 + daily_returns) - 1

# Annualized returns
def annualized_return(daily_returns, periods=252):
    cumulative_return_value = np.prod(1 + daily_returns) - 1
    return (1 + cumulative_return_value) ** (periods / len(daily_returns)) - 1

# Sharpe ratio
def sharpe_ratio(daily_returns, risk_free_rate=0.01, periods=252):
    daily_rf = risk_free_rate / periods  # Assuming 252 trading days
    excess_returns = daily_returns - daily_rf
    return np.sqrt(periods) * excess_returns.mean() / excess_returns.std()

# Calculate volatility (standard deviation) of daily returns
def calculate_volatility(daily_returns, periods=252):
    return np.std(daily_returns) * np.sqrt(periods)

# Calculate maximum drawdown
def maximum_drawdown(daily_returns):
    cum_returns = np.cumprod(1 + daily_returns)
    peak = np.maximum.accumulate(cum_returns)
    drawdown = (cum_returns - peak) / peak
    return np.min(drawdown)

# Apply fixed transaction cost to the daily returns (for other metrics)
def apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001):
    # Subtract the transaction cost from each daily return
    return daily_returns - transaction_cost

# Apply percentage-based transaction cost to the daily returns (for volatility and standard deviation)
def apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001):
    # Apply transaction cost as a percentage of the return
    return daily_returns * (1 - transaction_cost_percentage)

# Prepare portfolio names (with transaction cost)
portfolios_with_cost = [
    'cum_EL_return_512_with_cost', 'cum_ES_return_512_with_cost', 'cum_ELS_return_512_with_cost',
    'cum_VL_return_512_with_cost', 'cum_VS_return_512_with_cost', 'cum_VLS_return_512_with_cost',
]

# Initialize metrics container
metrics = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio with transaction cost
for portfolio in portfolios_with_cost:
    cumulative_returns_uni2tssmallmoe512_c = cumulative_log_returns_uni2tssmallmoe_lag_512[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_uni2tssmallmoe512_c)  # Compute daily returns from cumulative log returns

    # Apply fixed transaction cost for other metrics (e.g., cumulative returns, Sharpe ratio)
    daily_returns_after_cost_fixed = apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001)

    # Apply percentage-based transaction cost for volatility and standard deviation
    daily_returns_after_cost_percentage = apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001)

    # Calculate cumulative returns after fixed transaction cost
    cum_return_after_cost = cumulative_return(daily_returns_after_cost_fixed)

    # Metrics calculations
    ann_return = annualized_return(daily_returns_after_cost_fixed)
    sharpe = sharpe_ratio(daily_returns_after_cost_fixed)
    vol = calculate_volatility(daily_returns_after_cost_percentage)  # Using percentage-based cost for volatility
    max_draw = maximum_drawdown(daily_returns_after_cost_fixed)

    # Standard Deviation of daily returns after cost (using percentage-based cost for standard deviation)
    std_dev = np.std(daily_returns_after_cost_percentage)

    # Store results
    metrics['Portfolio'].append(portfolio)
    metrics['Annualized Return'].append(ann_return)
    metrics['Sharpe Ratio'].append(sharpe)
    metrics['Volatility'].append(vol)
    metrics['Standard Deviation'].append(std_dev)
    metrics['Max Drawdown'].append(max_draw)
    metrics['Cumulative Return'].append(cum_return_after_cost)

# Convert the results into a DataFrame for analysis
metrics_uni2tssmallmoe512_c = pd.DataFrame(metrics)
display(metrics_uni2tssmallmoe512_c)


# same calculations for portfolios without transaction cost
portfolios_without_cost = [
    'cum_EL_return_512_without_cost', 'cum_ES_return_512_without_cost', 'cum_ELS_return_512_without_cost',
    'cum_VL_return_512_without_cost', 'cum_VS_return_512_without_cost', 'cum_VLS_return_512_without_cost',
]

# Initialize metrics container for portfolios without transaction cost
metrics_wc = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio without transaction cost
for portfolio in portfolios_without_cost:
    cumulative_returns_uni2tssmallmoe512_wc = cumulative_log_returns_uni2tssmallmoe_lag_512[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_uni2tssmallmoe512_wc)  # Compute daily returns from cumulative log returns

    # Calculate cumulative returns without transaction cost
    cum_return_without_cost = cumulative_return(daily_returns)

    # Metrics calculations
    ann_return = annualized_return(daily_returns)
    sharpe = sharpe_ratio(daily_returns)
    vol = calculate_volatility(daily_returns)
    max_draw = maximum_drawdown(daily_returns)

    # Standard Deviation of daily returns without cost
    std_dev = np.std(daily_returns)

    # Store results
    metrics_wc['Portfolio'].append(portfolio)
    metrics_wc['Annualized Return'].append(ann_return)
    metrics_wc['Sharpe Ratio'].append(sharpe)
    metrics_wc['Volatility'].append(vol)
    metrics_wc['Standard Deviation'].append(std_dev)
    metrics_wc['Max Drawdown'].append(max_draw)
    metrics_wc['Cumulative Return'].append(cum_return_without_cost)

# Convert the results into a DataFrame for analysis
metrics_uni2tssmallmoe512_wc = pd.DataFrame(metrics_wc)
display(metrics_uni2tssmallmoe512_wc)

# Save the portfolio metrics with transaction costs
metrics_uni2tssmallmoe512_c.to_csv('metrics_uni2tssmallmoe512_with_cost.csv', index=False)

# Save the portfolio metrics without transaction costs
metrics_uni2tssmallmoe512_wc.to_csv('metrics_uni2tssmallmoe512_without_cost.csv', index=False)

## Moirai-Moe Base

In [None]:
# Define the helper function for calculating performance metrics
def calculate_metrics(actual, predictions):
    mse = mean_squared_error(actual, predictions)
    rmse = mse ** 0.5
    mae = mean_absolute_error(actual, predictions)

    directional_accuracy = np.mean(np.sign(actual) == np.sign(predictions))

    actual_up = actual > 0
    up_dir_acc = np.mean(predictions[actual_up] > 0) if np.any(actual_up) else np.nan

    actual_down = actual < 0
    down_dir_acc = np.mean(predictions[actual_down] < 0) if np.any(actual_down) else np.nan

    r2 = r2_score(actual, predictions)

    scale_factor = np.mean(np.abs(np.diff(actual)))
    mase = mae / scale_factor if scale_factor != 0 else np.nan

    return mse, mae, rmse, directional_accuracy, up_dir_acc, down_dir_acc, r2, mase

# Function to run the forecast with dynamic lags
def run_forecast_uni2ts_moe_base(crsp_train_lagged, crsp_test_lagged, lags, model_name="Salesforce/moirai-moe-1.0-R-base", out_sample_start="2016-01-01", out_sample_end="2024-12-31", device="cpu", batch_size=32):
    # List of allowed lags
    lag_days_list = [5, 21, 252, 512]

    # Validate that only valid lags are passed
    if any(lag not in lag_days_list for lag in lags):
        raise ValueError(f"Invalid lag value. Allowed lags are: {lag_days_list}")

    all_results = []
    all_predictions = []

    # Use the first lag value in lags as the window size
    WINDOW = lags[0]
    PRED_LEN = 1
    DATE_COL = "date"
    ID_COL = "permno"
    TARGET_COL = "excess_ret"
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # set device to cuda or cpu

    df_test = crsp_test_lagged[crsp_test_lagged[DATE_COL] >= pd.to_datetime(out_sample_start)]

    # Normalization (per stock)
    df_test["target"] = df_test.groupby(ID_COL)[TARGET_COL].transform(lambda x: (x - x.mean()) / x.std())

    # Create contexts
    contexts, targets, records = [], [], []

    for permno, group in tqdm(df_test.groupby(ID_COL), desc="Building windows"):
        series = group["target"].values
        dates = group[DATE_COL].values

        if len(series) <= WINDOW:
            continue
        for i in range(len(series) - WINDOW):
            context = torch.tensor(series[i:i+WINDOW], dtype=torch.float32)
            target = series[i + WINDOW]
            contexts.append(context)
            targets.append(target)
            records.append({
                "permno": permno,
                "date": dates[i + WINDOW]
            })

    if len(contexts) == 0:
        print("No valid context windows found. Check 'lag' or data coverage.")
        return pd.DataFrame()

    # Load Model
    try:
        model = MoiraiMoEForecast(
            module=MoiraiMoEModule.from_pretrained(model_name).to(device),
            prediction_length=PRED_LEN,
            context_length=WINDOW,
            num_samples=100,
            target_dim=1,
            feat_dynamic_real_dim=0,
            past_feat_dynamic_real_dim=0,
        ).to(device)

        print(f"Model initialized successfully.")
    except Exception as e:
        print(f"Error loading model: {e}")
        return pd.DataFrame()

    predictor = model.create_predictor(batch_size=batch_size)

    # Predict
    preds = []
    # Batch the contexts for predictions
    for i in tqdm(range(0, len(contexts), batch_size), desc="Predicting in batches"):
        batch_contexts = contexts[i:i+batch_size]  # Get the next batch of contexts

        gluonts_input = ListDataset(
            [{"start": pd.Timestamp("2000-01-01"), "target": context.squeeze().cpu().numpy().tolist()} for context in batch_contexts],
            freq="B"
        )
        forecasts = list(predictor.predict(gluonts_input))

        for forecast in forecasts:
            mean_pred = forecast.mean[0]
            preds.append(mean_pred)

    # Evaluation
    results = pd.DataFrame(records)
    results["y_true"] = targets
    results["y_pred"] = preds

    y_true = results["y_true"]
    y_pred = results["y_pred"]

    # Calculate metrics
    mse, mae, rmse, directional_accuracy, up_dir_acc, down_dir_acc, r2, mase = calculate_metrics(
        y_true.to_numpy(), y_pred.to_numpy()
    )

    # SUmmary
    result = pd.DataFrame([{
        "Model": model_name,
        "Lag": lags[0],
        "Directional Accuracy": directional_accuracy,
        "Up Directional Accuracy": up_dir_acc,
        "Down Directional Accuracy": down_dir_acc,
        "R-squared": r2,
        "MSE": mse,
        "RMSE": rmse,
        "MAE": mae,
        "MASE": mase
    }])

    # Save the summarized results for this lag to CSV
    result.to_csv(f"uni2tsbasemoe_results_lag{lags[0]}.csv", index=False)
    results.to_csv(f"uni2tsbasemoe_results_lag{lags[0]}_full.csv", index=False)

    all_results.append(result)
    all_predictions.append(results)

    # Combine all performance metrics
    final_df = pd.concat(all_results, ignore_index=True)

    # Merge all predictions into the test set
    if all_predictions:
        combined_preds = pd.concat(all_predictions, ignore_index=True)
        crsp_test_lagged = crsp_test_lagged.merge(
            combined_preds[['permno', 'date', 'y_pred']],
            on=['permno', 'date'],
            how='left'
        )

        # Rename the merged 'y_pred' column to the desired lag-specific name
        crsp_test_lagged.rename(columns={'y_pred': f'predicted_excess_returns_lag{lags[0]}'}, inplace=True)

    else:
        crsp_test_lagged[f'predicted_excess_returns_lag{lags[0]}'] = np.nan

    return final_df, crsp_test_lagged

In [None]:
# Running the forecast for lag=5
uni2ts_base_moe_results_lag5 = run_forecast_uni2ts_moe_base(
    crsp_train_lagged, crsp_test_lagged, lags=[5], model_name="Salesforce/moirai-moe-1.0-R-base", device="cpu"
)

# Display the results using pandas' display() in Jupyter Notebook
from IPython.display import display
display(uni2ts_base_moe_results_lag5)

In [None]:
# Running the forecast for lag=21
uni2ts_base_moe_results_lag21 = run_forecast_uni2ts_moe_base(
    crsp_train_lagged, crsp_test_lagged, lags=[21], model_name="Salesforce/moirai-moe-1.0-R-base", device="cpu"
)

# Display the results using pandas' display() in Jupyter Notebook
from IPython.display import display
display(uni2ts_base_moe_results_lag21)

In [None]:
# Running the forecast for lag=252
uni2ts_base_moe_results_lag252 = run_forecast_uni2ts_moe_base(
    crsp_train_lagged, crsp_test_lagged, lags=[252], model_name="Salesforce/moirai-moe-1.0-R-base", device="cpu"
)

# Display the results using pandas' display() in Jupyter Notebook
from IPython.display import display
display(uni2ts_base_moe_results_lag252)

In [None]:
# Running the forecast for lag=512
uni2ts_base_moe_results_lag512 = run_forecast_uni2ts_moe_base(
    crsp_train_lagged, crsp_test_lagged, lags=[512], model_name="Salesforce/moirai-moe-1.0-R-base", device="cpu"
)

# Display the results using pandas' display() in Jupyter Notebook
from IPython.display import display
display(uni2ts_base_moe_results_lag512)

## Moirai-Moe Base Portfolio

### Window Size 5

In [None]:
# Add transaction cost (10bps = 0.001)
def calculate_transaction_cost(row):
    return 0.001  # 10 bps for both small and large cap stocks

crsp_test_lagged.loc[:, 'transaction_cost'] = crsp_test_lagged.apply(calculate_transaction_cost, axis=1)

# Use the previously defined `uni2tsbasemoe` to predict excess returns
def uni2tsbasemoe_5_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged, lags=[5]):
    out_sample_start = "2016-01-01"
    modified_crsp_test_lagged = crsp_test_lagged.copy()

    for lag in lags:
        # Pass each lag individually to uni2tsbasemoe
        metrics_df, modified_crsp_test_lagged = run_forecast_uni2ts_moe_base(crsp_train_lagged, modified_crsp_test_lagged, lags=[lag], out_sample_start=out_sample_start)

        # Check if the predicted column is generated correctly
        pred_col = f'predicted_excess_returns_lag{lags[0]}'
        if pred_col not in modified_crsp_test_lagged.columns:
            raise KeyError(f"Column '{pred_col}' not found after running forecast.")

        # Add the predicted returns to the DataFrame (correct the column name dynamically)
        modified_crsp_test_lagged[f'uni2tsbasemoe_{lags[0]}_predicted_excess_returns'] = modified_crsp_test_lagged[pred_col].values

    return modified_crsp_test_lagged

# Get predicted excess returns using Uni2ts Base Moe model
crsp_test_lagged = uni2tsbasemoe_5_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged, lags=[5])

# Portfolio Construction (Top 10% Long, Bottom 10% Short)
def compute_returns(group, predicted_col):
    # First verify required columns exist
    required_cols = ['adjusted_ret', 'market_cap_merged', 'transaction_cost']
    missing_cols = [col for col in required_cols if col not in group.columns]
    if missing_cols:
        raise ValueError(f"Missing required columns: {missing_cols}")
        T
    # Long position (Top 10% based on predicted returns)
    top_positive = group.nlargest(int(0.1 * len(group)), predicted_col)
    # Short position (Bottom 10% based on predicted returns)
    top_negative = group.nsmallest(int(0.1 * len(group)), predicted_col)

    # Equal-Weighted Long position return (Top 10%)
    equal_long_log_return_with_cost = np.log1p(top_positive['adjusted_ret']).mean() - top_positive['transaction_cost'].mean()
    equal_long_log_return_without_cost = np.log1p(top_positive['adjusted_ret']).mean()

    # Equal-Weighted Short position return (Bottom 10%)
    equal_short_log_return_with_cost = -np.log1p(top_negative['adjusted_ret']).mean() - top_negative['transaction_cost'].mean()
    equal_short_log_return_without_cost = -np.log1p(top_negative['adjusted_ret']).mean()

    # Value-Weighted Long position return (Top 10%) based on market cap
    total_market_cap_positive = top_positive['market_cap_merged'].sum()
    value_long_log_return_with_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive - top_positive['transaction_cost'].mean()
    value_long_log_return_without_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive

    # Value-Weighted Short position return (Bottom 10%) based on market cap
    total_market_cap_negative = top_negative['market_cap_merged'].sum()
    value_short_log_return_with_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative + top_negative['transaction_cost'].mean())
    value_short_log_return_without_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative)

    # Combine Long and Short to get Long-Short return
    equal_long_short_log_return_with_cost = equal_long_log_return_with_cost + equal_short_log_return_with_cost
    equal_long_short_log_return_without_cost = equal_long_log_return_without_cost + equal_short_log_return_without_cost

    value_long_short_log_return_with_cost = value_long_log_return_with_cost + value_short_log_return_with_cost
    value_long_short_log_return_without_cost = value_long_log_return_without_cost + value_short_log_return_without_cost

    return {
        'equal_long_log_return_with_cost': equal_long_log_return_with_cost,
        'equal_short_log_return_with_cost': equal_short_log_return_with_cost,
        'equal_long_short_log_return_with_cost': equal_long_short_log_return_with_cost,
        'equal_long_log_return_without_cost': equal_long_log_return_without_cost,
        'equal_short_log_return_without_cost': equal_short_log_return_without_cost,
        'equal_long_short_log_return_without_cost': equal_long_short_log_return_without_cost,
        'value_long_log_return_with_cost': value_long_log_return_with_cost,
        'value_short_log_return_with_cost': value_short_log_return_with_cost,
        'value_long_short_log_return_with_cost': value_long_short_log_return_with_cost,
        'value_long_log_return_without_cost': value_long_log_return_without_cost,
        'value_short_log_return_without_cost': value_short_log_return_without_cost,
        'value_long_short_log_return_without_cost': value_long_short_log_return_without_cost
    }
# Compute cumulative returns for each date with daily rebalancing
cumulative_log_returns_by_date_uni2tsbasemoe5  = {
    'date': [],
    'cum_EL_return_5_with_cost': [],
    'cum_ES_return_5_with_cost': [],
    'cum_ELS_return_5_with_cost': [],
    'cum_VL_return_5_with_cost': [],
    'cum_VS_return_5_with_cost': [],
    'cum_VLS_return_5_with_cost': [],
    'cum_EL_return_5_without_cost': [],
    'cum_ES_return_5_without_cost': [],
    'cum_ELS_return_5_without_cost': [],
    'cum_VL_return_5_without_cost': [],
    'cum_VS_return_5_without_cost': [],
    'cum_VLS_return_5_without_cost': []
}

# Initialize cumulative returns for lag 5
cum_EL_return_5_with_cost = 0
cum_ES_return_5_with_cost = 0
cum_ELS_return_5_with_cost = 0
cum_VL_return_5_with_cost = 0
cum_VS_return_5_with_cost = 0
cum_VLS_return_5_with_cost = 0

cum_EL_return_5_without_cost = 0
cum_ES_return_5_without_cost = 0
cum_ELS_return_5_without_cost = 0
cum_VL_return_5_without_cost = 0
cum_VS_return_5_without_cost = 0
cum_VLS_return_5_without_cost = 0

# Iterate over each date to compute returns for lag 5 portfolios
for date in crsp_test_lagged['date'].unique():
    group = crsp_test_lagged[crsp_test_lagged['date'] == date]

    # Compute returns for lag 5
    returns = compute_returns(group, f'uni2tsbasemoe_{5}_predicted_excess_returns')

    # Update cumulative returns with daily values for lag 5
    cum_EL_return_5_with_cost += returns['equal_long_log_return_with_cost']
    cum_ES_return_5_with_cost += returns['equal_short_log_return_with_cost']
    cum_ELS_return_5_with_cost += returns['equal_long_short_log_return_with_cost']
    cum_VL_return_5_with_cost += returns['value_long_log_return_with_cost']
    cum_VS_return_5_with_cost += returns['value_short_log_return_with_cost']
    cum_VLS_return_5_with_cost += returns['value_long_short_log_return_with_cost']

    cum_EL_return_5_without_cost += returns['equal_long_log_return_without_cost']
    cum_ES_return_5_without_cost += returns['equal_short_log_return_without_cost']
    cum_ELS_return_5_without_cost += returns['equal_long_short_log_return_without_cost']
    cum_VL_return_5_without_cost += returns['value_long_log_return_without_cost']
    cum_VS_return_5_without_cost += returns['value_short_log_return_without_cost']
    cum_VLS_return_5_without_cost += returns['value_long_short_log_return_without_cost']

    # Append results for the day for lag 5 portfolios
    cumulative_log_returns_by_date_uni2tsbasemoe5['date'].append(date)
    cumulative_log_returns_by_date_uni2tsbasemoe5['cum_EL_return_5_with_cost'].append(cum_EL_return_5_with_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe5['cum_ES_return_5_with_cost'].append(cum_ES_return_5_with_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe5['cum_ELS_return_5_with_cost'].append(cum_ELS_return_5_with_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe5['cum_VL_return_5_with_cost'].append(cum_VL_return_5_with_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe5['cum_VS_return_5_with_cost'].append(cum_VS_return_5_with_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe5['cum_VLS_return_5_with_cost'].append(cum_VLS_return_5_with_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe5['cum_EL_return_5_without_cost'].append(cum_EL_return_5_without_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe5['cum_ES_return_5_without_cost'].append(cum_ES_return_5_without_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe5['cum_ELS_return_5_without_cost'].append(cum_ELS_return_5_without_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe5['cum_VL_return_5_without_cost'].append(cum_VL_return_5_without_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe5['cum_VS_return_5_without_cost'].append(cum_VS_return_5_without_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe5['cum_VLS_return_5_without_cost'].append(cum_VLS_return_5_without_cost)

# Convert to DataFrame for lag 5
cumulative_log_returns_uni2tsbasemoe_lag_5 = pd.DataFrame(cumulative_log_returns_by_date_uni2tsbasemoe5)

# Display the cumulative returns DataFrame for lag 5
display(cumulative_log_returns_uni2tsbasemoe_lag_5.head())

# Saving the DataFrame as a CSV file
cumulative_log_returns_uni2tsbasemoe_lag_5.to_csv("cumulative_log_returns_uni2tsbasemoe_lag_5.csv", index=False)

In [None]:
# Cumulative returns
def cumulative_return(daily_returns):
    return np.prod(1 + daily_returns) - 1

# Annualized returns
def annualized_return(daily_returns, periods=252):
    cumulative_return_value = np.prod(1 + daily_returns) - 1
    return (1 + cumulative_return_value) ** (periods / len(daily_returns)) - 1

# Sharpe ratio
def sharpe_ratio(daily_returns, risk_free_rate=0.01, periods=252):
    daily_rf = risk_free_rate / periods  # Assuming 252 trading days
    excess_returns = daily_returns - daily_rf
    return np.sqrt(periods) * excess_returns.mean() / excess_returns.std()

# Calculate volatility (standard deviation) of daily returns
def calculate_volatility(daily_returns, periods=252):
    return np.std(daily_returns) * np.sqrt(periods)

# Calculate maximum drawdown
def maximum_drawdown(daily_returns):
    cum_returns = np.cumprod(1 + daily_returns)
    peak = np.maximum.accumulate(cum_returns)
    drawdown = (cum_returns - peak) / peak
    return np.min(drawdown)

# Apply fixed transaction cost to the daily returns
def apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001):
    # Subtract the transaction cost from each daily return
    return daily_returns - transaction_cost

# Apply percentage-based transaction cost to the daily returns
def apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001):
    # Apply transaction cost as a percentage of the return
    return daily_returns * (1 - transaction_cost_percentage)

# Prepare portfolio names (with transaction cost)
portfolios_with_cost = [
    'cum_EL_return_5_with_cost', 'cum_ES_return_5_with_cost', 'cum_ELS_return_5_with_cost',
    'cum_VL_return_5_with_cost', 'cum_VS_return_5_with_cost', 'cum_VLS_return_5_with_cost',
]

# Initialize metrics container
metrics = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio with transaction cost
for portfolio in portfolios_with_cost:
    cumulative_returns_uni2tsbasemoe5_c = cumulative_log_returns_uni2tsbasemoe_lag_5[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_uni2tsbasemoe5_c)

    # Apply fixed transaction cost for other metrics
    daily_returns_after_cost_fixed = apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001)

    # Apply percentage-based transaction cost for volatility and standard deviation
    daily_returns_after_cost_percentage = apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001)

    # Calculate cumulative returns after fixed transaction cost
    cum_return_after_cost = cumulative_return(daily_returns_after_cost_fixed)

    # Metrics calculations
    ann_return = annualized_return(daily_returns_after_cost_fixed)
    sharpe = sharpe_ratio(daily_returns_after_cost_fixed)
    vol = calculate_volatility(daily_returns_after_cost_percentage)
    max_draw = maximum_drawdown(daily_returns_after_cost_fixed)

    # Standard Deviation of daily returns after cost
    std_dev = np.std(daily_returns_after_cost_percentage)

    # Store results
    metrics['Portfolio'].append(portfolio)
    metrics['Annualized Return'].append(ann_return)
    metrics['Sharpe Ratio'].append(sharpe)
    metrics['Volatility'].append(vol)
    metrics['Standard Deviation'].append(std_dev)
    metrics['Max Drawdown'].append(max_draw)
    metrics['Cumulative Return'].append(cum_return_after_cost)

# Convert the results into a DataFrame for analysis
metrics_uni2tsbasemoe5_c = pd.DataFrame(metrics)
display(metrics_uni2tsbasemoe5_c)

# same calculations for portfolios without transaction cost
portfolios_without_cost = [
    'cum_EL_return_5_without_cost', 'cum_ES_return_5_without_cost', 'cum_ELS_return_5_without_cost',
    'cum_VL_return_5_without_cost', 'cum_VS_return_5_without_cost', 'cum_VLS_return_5_without_cost',
]

# Initialize metrics container for portfolios without transaction cost
metrics_wc = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio without transaction cost
for portfolio in portfolios_without_cost:
    cumulative_returns_uni2tsbasemoe5_wc = cumulative_log_returns_uni2tsbasemoe_lag_5[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_uni2tsbasemoe5_wc)  # Compute daily returns from cumulative log returns

    # Calculate cumulative returns without transaction cost
    cum_return_without_cost = cumulative_return(daily_returns)

    # Metrics calculations
    ann_return = annualized_return(daily_returns)
    sharpe = sharpe_ratio(daily_returns)
    vol = calculate_volatility(daily_returns)
    max_draw = maximum_drawdown(daily_returns)

    # Standard Deviation of daily returns without cost
    std_dev = np.std(daily_returns)

    # Store results
    metrics_wc['Portfolio'].append(portfolio)
    metrics_wc['Annualized Return'].append(ann_return)
    metrics_wc['Sharpe Ratio'].append(sharpe)
    metrics_wc['Volatility'].append(vol)
    metrics_wc['Standard Deviation'].append(std_dev)
    metrics_wc['Max Drawdown'].append(max_draw)
    metrics_wc['Cumulative Return'].append(cum_return_without_cost)

# Convert the results into a DataFrame for analysis
metrics_uni2tsbasemoe5_wc = pd.DataFrame(metrics_wc)
display(metrics_uni2tsbasemoe5_wc)

# Save the portfolio metrics with transaction costs
metrics_uni2tsbasemoe5_c.to_csv('metrics_uni2tsbasemoe5_with_cost.csv', index=False)

# Save the portfolio metrics without transaction costs
metrics_uni2tsbasemoe5_wc.to_csv('metrics_uni2tsbasemoe5_without_cost.csv', index=False)

### Window Size 21

In [None]:
# Add transaction cost (10bps = 0.001)
def calculate_transaction_cost(row):
    return 0.001  # 10 bps for both small and large cap stocks

crsp_test_lagged.loc[:, 'transaction_cost'] = crsp_test_lagged.apply(calculate_transaction_cost, axis=1)

# Use the previously defined `uni2tsbasemoe` to predict excess returns
def uni2tsbasemoe_21_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged, lags=[21]):
    out_sample_start = "2016-01-01"
    modified_crsp_test_lagged = crsp_test_lagged.copy()

    for lag in lags:
        # Pass each lag individually to uni2tsbasemoe
        metrics_df, modified_crsp_test_lagged = run_forecast_uni2ts_moe_base(crsp_train_lagged, modified_crsp_test_lagged, lags=[lag], out_sample_start=out_sample_start)

        # Check if the predicted column is generated correctly
        pred_col = f'predicted_excess_returns_lag{lags[0]}'
        if pred_col not in modified_crsp_test_lagged.columns:
            raise KeyError(f"Column '{pred_col}' not found after running forecast.")

        # Add the predicted returns to the DataFrame
        modified_crsp_test_lagged[f'uni2tsbasemoe_{lags[0]}_predicted_excess_returns'] = modified_crsp_test_lagged[pred_col].values

    return modified_crsp_test_lagged

# Get predicted excess returns using Uni2ts Base Moe model
crsp_test_lagged = uni2tsbasemoe_21_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged, lags=[21])

# Portfolio Construction (Top 10% Long, Bottom 10% Short)
def compute_returns(group, predicted_col):
    # First verify required columns exist
    required_cols = ['adjusted_ret', 'market_cap_merged', 'transaction_cost']
    missing_cols = [col for col in required_cols if col not in group.columns]
    if missing_cols:
        raise ValueError(f"Missing required columns: {missing_cols}")

    # Long position (Top 10% based on predicted returns)
    top_positive = group.nlargest(int(0.1 * len(group)), predicted_col)
    # Short position (Bottom 10% based on predicted returns)
    top_negative = group.nsmallest(int(0.1 * len(group)), predicted_col)

    # Equal-Weighted Long position return (Top 10%)
    equal_long_log_return_with_cost = np.log1p(top_positive['adjusted_ret']).mean() - top_positive['transaction_cost'].mean()
    equal_long_log_return_without_cost = np.log1p(top_positive['adjusted_ret']).mean()

    # Equal-Weighted Short position return (Bottom 10%)
    equal_short_log_return_with_cost = -np.log1p(top_negative['adjusted_ret']).mean() - top_negative['transaction_cost'].mean()
    equal_short_log_return_without_cost = -np.log1p(top_negative['adjusted_ret']).mean()

    # Value-Weighted Long position return (Top 10%) based on market cap
    total_market_cap_positive = top_positive['market_cap_merged'].sum()
    value_long_log_return_with_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive - top_positive['transaction_cost'].mean()
    value_long_log_return_without_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive

    # Value-Weighted Short position return (Bottom 10%) based on market cap
    total_market_cap_negative = top_negative['market_cap_merged'].sum()
    value_short_log_return_with_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative + top_negative['transaction_cost'].mean())
    value_short_log_return_without_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative)

    # Combine Long and Short to get Long-Short return
    equal_long_short_log_return_with_cost = equal_long_log_return_with_cost + equal_short_log_return_with_cost
    equal_long_short_log_return_without_cost = equal_long_log_return_without_cost + equal_short_log_return_without_cost

    value_long_short_log_return_with_cost = value_long_log_return_with_cost + value_short_log_return_with_cost
    value_long_short_log_return_without_cost = value_long_log_return_without_cost + value_short_log_return_without_cost

    return {
        'equal_long_log_return_with_cost': equal_long_log_return_with_cost,
        'equal_short_log_return_with_cost': equal_short_log_return_with_cost,
        'equal_long_short_log_return_with_cost': equal_long_short_log_return_with_cost,
        'equal_long_log_return_without_cost': equal_long_log_return_without_cost,
        'equal_short_log_return_without_cost': equal_short_log_return_without_cost,
        'equal_long_short_log_return_without_cost': equal_long_short_log_return_without_cost,
        'value_long_log_return_with_cost': value_long_log_return_with_cost,
        'value_short_log_return_with_cost': value_short_log_return_with_cost,
        'value_long_short_log_return_with_cost': value_long_short_log_return_with_cost,
        'value_long_log_return_without_cost': value_long_log_return_without_cost,
        'value_short_log_return_without_cost': value_short_log_return_without_cost,
        'value_long_short_log_return_without_cost': value_long_short_log_return_without_cost
    }
# Compute cumulative returns for each date with daily rebalancing
cumulative_log_returns_by_date_uni2tsbasemoe21  = {
    'date': [],
    'cum_EL_return_21_with_cost': [],
    'cum_ES_return_21_with_cost': [],
    'cum_ELS_return_21_with_cost': [],
    'cum_VL_return_21_with_cost': [],
    'cum_VS_return_21_with_cost': [],
    'cum_VLS_return_21_with_cost': [],
    'cum_EL_return_21_without_cost': [],
    'cum_ES_return_21_without_cost': [],
    'cum_ELS_return_21_without_cost': [],
    'cum_VL_return_21_without_cost': [],
    'cum_VS_return_21_without_cost': [],
    'cum_VLS_return_21_without_cost': []
}

# Initialize cumulative returns for lag 21
cum_EL_return_21_with_cost = 0
cum_ES_return_21_with_cost = 0
cum_ELS_return_21_with_cost = 0
cum_VL_return_21_with_cost = 0
cum_VS_return_21_with_cost = 0
cum_VLS_return_21_with_cost = 0

cum_EL_return_21_without_cost = 0
cum_ES_return_21_without_cost = 0
cum_ELS_return_21_without_cost = 0
cum_VL_return_21_without_cost = 0
cum_VS_return_21_without_cost = 0
cum_VLS_return_21_without_cost = 0

# Iterate over each date to compute returns for lag 21 portfolios
for date in crsp_test_lagged['date'].unique():
    group = crsp_test_lagged[crsp_test_lagged['date'] == date]

    # Compute returns for lag 21
    returns = compute_returns(group, f'uni2tsbasemoe_{21}_predicted_excess_returns')

    # Update cumulative returns with daily values for lag 21
    cum_EL_return_21_with_cost += returns['equal_long_log_return_with_cost']
    cum_ES_return_21_with_cost += returns['equal_short_log_return_with_cost']
    cum_ELS_return_21_with_cost += returns['equal_long_short_log_return_with_cost']
    cum_VL_return_21_with_cost += returns['value_long_log_return_with_cost']
    cum_VS_return_21_with_cost += returns['value_short_log_return_with_cost']
    cum_VLS_return_21_with_cost += returns['value_long_short_log_return_with_cost']

    cum_EL_return_21_without_cost += returns['equal_long_log_return_without_cost']
    cum_ES_return_21_without_cost += returns['equal_short_log_return_without_cost']
    cum_ELS_return_21_without_cost += returns['equal_long_short_log_return_without_cost']
    cum_VL_return_21_without_cost += returns['value_long_log_return_without_cost']
    cum_VS_return_21_without_cost += returns['value_short_log_return_without_cost']
    cum_VLS_return_21_without_cost += returns['value_long_short_log_return_without_cost']

    # Append results for the day for lag 21 portfolios
    cumulative_log_returns_by_date_uni2tsbasemoe21['date'].append(date)
    cumulative_log_returns_by_date_uni2tsbasemoe21['cum_EL_return_21_with_cost'].append(cum_EL_return_21_with_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe21['cum_ES_return_21_with_cost'].append(cum_ES_return_21_with_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe21['cum_ELS_return_21_with_cost'].append(cum_ELS_return_21_with_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe21['cum_VL_return_21_with_cost'].append(cum_VL_return_21_with_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe21['cum_VS_return_21_with_cost'].append(cum_VS_return_21_with_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe21['cum_VLS_return_21_with_cost'].append(cum_VLS_return_21_with_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe21['cum_EL_return_21_without_cost'].append(cum_EL_return_21_without_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe21['cum_ES_return_21_without_cost'].append(cum_ES_return_21_without_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe21['cum_ELS_return_21_without_cost'].append(cum_ELS_return_21_without_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe21['cum_VL_return_21_without_cost'].append(cum_VL_return_21_without_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe21['cum_VS_return_21_without_cost'].append(cum_VS_return_21_without_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe21['cum_VLS_return_21_without_cost'].append(cum_VLS_return_21_without_cost)


# Convert to DataFrame for lag 21
cumulative_log_returns_uni2tsbasemoe_lag_21 = pd.DataFrame(cumulative_log_returns_by_date_uni2tsbasemoe21)

# Display the cumulative returns DataFrame for lag 21
display(cumulative_log_returns_uni2tsbasemoe_lag_21.head())

# Saving the DataFrame as a CSV file
cumulative_log_returns_uni2tsbasemoe_lag_21.to_csv("cumulative_log_returns_uni2tsbasemoe_lag_21.csv", index=False)

In [None]:
# Cumulative returns
def cumulative_return(daily_returns):
    return np.prod(1 + daily_returns) - 1

# Annualized returns
def annualized_return(daily_returns, periods=252):
    cumulative_return_value = np.prod(1 + daily_returns) - 1
    return (1 + cumulative_return_value) ** (periods / len(daily_returns)) - 1

# Sharpe ratio
def sharpe_ratio(daily_returns, risk_free_rate=0.01, periods=252):
    daily_rf = risk_free_rate / periods  # Assuming 252 trading days
    excess_returns = daily_returns - daily_rf
    return np.sqrt(periods) * excess_returns.mean() / excess_returns.std()

# Calculate volatility (standard deviation) of daily returns
def calculate_volatility(daily_returns, periods=252):
    return np.std(daily_returns) * np.sqrt(periods)

# Calculate maximum drawdown
def maximum_drawdown(daily_returns):
    cum_returns = np.cumprod(1 + daily_returns)
    peak = np.maximum.accumulate(cum_returns)
    drawdown = (cum_returns - peak) / peak
    return np.min(drawdown)

# Apply fixed transaction cost to the daily returns
def apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001):
    # Subtract the transaction cost from each daily return
    return daily_returns - transaction_cost

# Apply percentage-based transaction cost to the daily returns
def apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001):
    # Apply transaction cost as a percentage of the return
    return daily_returns * (1 - transaction_cost_percentage)

# Prepare portfolio names (with transaction cost)
portfolios_with_cost = [
    'cum_EL_return_21_with_cost', 'cum_ES_return_21_with_cost', 'cum_ELS_return_21_with_cost',
    'cum_VL_return_21_with_cost', 'cum_VS_return_21_with_cost', 'cum_VLS_return_21_with_cost',
]

# Initialize metrics container
metrics = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio with transaction cost
for portfolio in portfolios_with_cost:
    cumulative_returns_uni2tsbasemoe21_c = cumulative_log_returns_uni2tsbasemoe_lag_21[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_uni2tsbasemoe21_c)

    # Apply fixed transaction cost for other metrics
    daily_returns_after_cost_fixed = apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001)

    # Apply percentage-based transaction cost for volatility and standard deviation
    daily_returns_after_cost_percentage = apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001)

    # Calculate cumulative returns after fixed transaction cost
    cum_return_after_cost = cumulative_return(daily_returns_after_cost_fixed)

    # Metrics calculations
    ann_return = annualized_return(daily_returns_after_cost_fixed)
    sharpe = sharpe_ratio(daily_returns_after_cost_fixed)
    vol = calculate_volatility(daily_returns_after_cost_percentage)
    max_draw = maximum_drawdown(daily_returns_after_cost_fixed)

    # Standard Deviation of daily returns after cost
    std_dev = np.std(daily_returns_after_cost_percentage)

    # Store results
    metrics['Portfolio'].append(portfolio)
    metrics['Annualized Return'].append(ann_return)
    metrics['Sharpe Ratio'].append(sharpe)
    metrics['Volatility'].append(vol)
    metrics['Standard Deviation'].append(std_dev)
    metrics['Max Drawdown'].append(max_draw)
    metrics['Cumulative Return'].append(cum_return_after_cost)

# Convert the results into a DataFrame for analysis
metrics_uni2tsbasemoe21_c = pd.DataFrame(metrics)
display(metrics_uni2tsbasemoe21_c)

# same calculations for portfolios without transaction cost
portfolios_without_cost = [
    'cum_EL_return_21_without_cost', 'cum_ES_return_21_without_cost', 'cum_ELS_return_21_without_cost',
    'cum_VL_return_21_without_cost', 'cum_VS_return_21_without_cost', 'cum_VLS_return_21_without_cost',
]

# Initialize metrics container for portfolios without transaction cost
metrics_wc = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio without transaction cost
for portfolio in portfolios_without_cost:
    cumulative_returns_uni2tsbasemoe21_wc = cumulative_log_returns_uni2tsbasemoe_lag_21[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_uni2tsbasemoe21_wc)  # Compute daily returns from cumulative log returns

    # Calculate cumulative returns without transaction cost
    cum_return_without_cost = cumulative_return(daily_returns)

    # Metrics calculations
    ann_return = annualized_return(daily_returns)
    sharpe = sharpe_ratio(daily_returns)
    vol = calculate_volatility(daily_returns)
    max_draw = maximum_drawdown(daily_returns)

    # Standard Deviation of daily returns without cost
    std_dev = np.std(daily_returns)

    # Store results
    metrics_wc['Portfolio'].append(portfolio)
    metrics_wc['Annualized Return'].append(ann_return)
    metrics_wc['Sharpe Ratio'].append(sharpe)
    metrics_wc['Volatility'].append(vol)
    metrics_wc['Standard Deviation'].append(std_dev)
    metrics_wc['Max Drawdown'].append(max_draw)
    metrics_wc['Cumulative Return'].append(cum_return_without_cost)

# Convert the results into a DataFrame for analysis
metrics_uni2tsbasemoe21_wc = pd.DataFrame(metrics_wc)
display(metrics_uni2tsbasemoe21_wc)

# Save the portfolio metrics with transaction costs
metrics_uni2tsbasemoe21_c.to_csv('metrics_uni2tsbasemoe21_with_cost.csv', index=False)

# Save the portfolio metrics without transaction costs
metrics_uni2tsbasemoe21_wc.to_csv('metrics_uni2tsbasemoe21_without_cost.csv', index=False)

### Window Size 252

In [None]:
# Add transaction cost (10bps = 0.001)
def calculate_transaction_cost(row):
    return 0.001  # 10 bps for both small and large cap stocks

crsp_test_lagged.loc[:, 'transaction_cost'] = crsp_test_lagged.apply(calculate_transaction_cost, axis=1)

# Use the previously defined `uni2tsbasemoe` to predict excess returns
def uni2tsbasemoe_252_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged, lags=[252]):
    out_sample_start = "2016-01-01"
    modified_crsp_test_lagged = crsp_test_lagged.copy()

    for lag in lags:
        # Pass each lag individually to uni2tsbasemoe
        metrics_df, modified_crsp_test_lagged = run_forecast_uni2ts_moe_base(crsp_train_lagged, modified_crsp_test_lagged, lags=[lag], out_sample_start=out_sample_start)

        # Check if the predicted column is generated correctly
        pred_col = f'predicted_excess_returns_lag{lags[0]}'
        if pred_col not in modified_crsp_test_lagged.columns:
            raise KeyError(f"Column '{pred_col}' not found after running forecast.")

        # Add the predicted returns to the DataFrame
        modified_crsp_test_lagged[f'uni2tsbasemoe_{lags[0]}_predicted_excess_returns'] = modified_crsp_test_lagged[pred_col].values

    return modified_crsp_test_lagged

# Get predicted excess returns using Uni2ts Base Moe model
crsp_test_lagged = uni2tsbasemoe_252_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged, lags=[252])

# Portfolio Construction (Top 10% Long, Bottom 10% Short)
def compute_returns(group, predicted_col):
    # First verify required columns exist
    required_cols = ['adjusted_ret', 'market_cap_merged', 'transaction_cost']
    missing_cols = [col for col in required_cols if col not in group.columns]
    if missing_cols:
        raise ValueError(f"Missing required columns: {missing_cols}")

    # Long position (Top 10% based on predicted returns)
    top_positive = group.nlargest(int(0.1 * len(group)), predicted_col)
    # Short position (Bottom 10% based on predicted returns)
    top_negative = group.nsmallest(int(0.1 * len(group)), predicted_col)

    # Equal-Weighted Long position return (Top 10%)
    equal_long_log_return_with_cost = np.log1p(top_positive['adjusted_ret']).mean() - top_positive['transaction_cost'].mean()
    equal_long_log_return_without_cost = np.log1p(top_positive['adjusted_ret']).mean()

    # Equal-Weighted Short position return (Bottom 10%)
    equal_short_log_return_with_cost = -np.log1p(top_negative['adjusted_ret']).mean() - top_negative['transaction_cost'].mean()
    equal_short_log_return_without_cost = -np.log1p(top_negative['adjusted_ret']).mean()

    # Value-Weighted Long position return (Top 10%) based on market cap
    total_market_cap_positive = top_positive['market_cap_merged'].sum()
    value_long_log_return_with_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive - top_positive['transaction_cost'].mean()
    value_long_log_return_without_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive

    # Value-Weighted Short position return (Bottom 10%) based on market cap
    total_market_cap_negative = top_negative['market_cap_merged'].sum()
    value_short_log_return_with_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative + top_negative['transaction_cost'].mean())
    value_short_log_return_without_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative)

    # Combine Long and Short to get Long-Short return
    equal_long_short_log_return_with_cost = equal_long_log_return_with_cost + equal_short_log_return_with_cost
    equal_long_short_log_return_without_cost = equal_long_log_return_without_cost + equal_short_log_return_without_cost

    value_long_short_log_return_with_cost = value_long_log_return_with_cost + value_short_log_return_with_cost
    value_long_short_log_return_without_cost = value_long_log_return_without_cost + value_short_log_return_without_cost

    return {
        'equal_long_log_return_with_cost': equal_long_log_return_with_cost,
        'equal_short_log_return_with_cost': equal_short_log_return_with_cost,
        'equal_long_short_log_return_with_cost': equal_long_short_log_return_with_cost,
        'equal_long_log_return_without_cost': equal_long_log_return_without_cost,
        'equal_short_log_return_without_cost': equal_short_log_return_without_cost,
        'equal_long_short_log_return_without_cost': equal_long_short_log_return_without_cost,
        'value_long_log_return_with_cost': value_long_log_return_with_cost,
        'value_short_log_return_with_cost': value_short_log_return_with_cost,
        'value_long_short_log_return_with_cost': value_long_short_log_return_with_cost,
        'value_long_log_return_without_cost': value_long_log_return_without_cost,
        'value_short_log_return_without_cost': value_short_log_return_without_cost,
        'value_long_short_log_return_without_cost': value_long_short_log_return_without_cost
    }
# Compute cumulative returns for each date with daily rebalancing
cumulative_log_returns_by_date_uni2tsbasemoe252  = {
    'date': [],
    'cum_EL_return_252_with_cost': [],
    'cum_ES_return_252_with_cost': [],
    'cum_ELS_return_252_with_cost': [],
    'cum_VL_return_252_with_cost': [],
    'cum_VS_return_252_with_cost': [],
    'cum_VLS_return_252_with_cost': [],
    'cum_EL_return_252_without_cost': [],
    'cum_ES_return_252_without_cost': [],
    'cum_ELS_return_252_without_cost': [],
    'cum_VL_return_252_without_cost': [],
    'cum_VS_return_252_without_cost': [],
    'cum_VLS_return_252_without_cost': []
}

# Initialize cumulative returns for lag 252
cum_EL_return_252_with_cost = 0
cum_ES_return_252_with_cost = 0
cum_ELS_return_252_with_cost = 0
cum_VL_return_252_with_cost = 0
cum_VS_return_252_with_cost = 0
cum_VLS_return_252_with_cost = 0

cum_EL_return_252_without_cost = 0
cum_ES_return_252_without_cost = 0
cum_ELS_return_252_without_cost = 0
cum_VL_return_252_without_cost = 0
cum_VS_return_252_without_cost = 0
cum_VLS_return_252_without_cost = 0

# Iterate over each date to compute returns for lag 252 portfolios
for date in crsp_test_lagged['date'].unique():
    group = crsp_test_lagged[crsp_test_lagged['date'] == date]

    # Compute returns for lag 252
    returns = compute_returns(group, f'uni2tsbasemoe_{252}_predicted_excess_returns')

    # Update cumulative returns with daily values for lag 252
    cum_EL_return_252_with_cost += returns['equal_long_log_return_with_cost']
    cum_ES_return_252_with_cost += returns['equal_short_log_return_with_cost']
    cum_ELS_return_252_with_cost += returns['equal_long_short_log_return_with_cost']
    cum_VL_return_252_with_cost += returns['value_long_log_return_with_cost']
    cum_VS_return_252_with_cost += returns['value_short_log_return_with_cost']
    cum_VLS_return_252_with_cost += returns['value_long_short_log_return_with_cost']

    cum_EL_return_252_without_cost += returns['equal_long_log_return_without_cost']
    cum_ES_return_252_without_cost += returns['equal_short_log_return_without_cost']
    cum_ELS_return_252_without_cost += returns['equal_long_short_log_return_without_cost']
    cum_VL_return_252_without_cost += returns['value_long_log_return_without_cost']
    cum_VS_return_252_without_cost += returns['value_short_log_return_without_cost']
    cum_VLS_return_252_without_cost += returns['value_long_short_log_return_without_cost']

    # Append results for the day for lag 252 portfolios
    cumulative_log_returns_by_date_uni2tsbasemoe252['date'].append(date)
    cumulative_log_returns_by_date_uni2tsbasemoe252['cum_EL_return_252_with_cost'].append(cum_EL_return_252_with_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe252['cum_ES_return_252_with_cost'].append(cum_ES_return_252_with_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe252['cum_ELS_return_252_with_cost'].append(cum_ELS_return_252_with_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe252['cum_VL_return_252_with_cost'].append(cum_VL_return_252_with_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe252['cum_VS_return_252_with_cost'].append(cum_VS_return_252_with_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe252['cum_VLS_return_252_with_cost'].append(cum_VLS_return_252_with_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe252['cum_EL_return_252_without_cost'].append(cum_EL_return_252_without_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe252['cum_ES_return_252_without_cost'].append(cum_ES_return_252_without_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe252['cum_ELS_return_252_without_cost'].append(cum_ELS_return_252_without_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe252['cum_VL_return_252_without_cost'].append(cum_VL_return_252_without_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe252['cum_VS_return_252_without_cost'].append(cum_VS_return_252_without_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe252['cum_VLS_return_252_without_cost'].append(cum_VLS_return_252_without_cost)

# Convert to DataFrame for lag 252
cumulative_log_returns_uni2tsbasemoe_lag_252 = pd.DataFrame(cumulative_log_returns_by_date_uni2tsbasemoe252)

# Display the cumulative returns DataFrame for lag 252
display(cumulative_log_returns_uni2tsbasemoe_lag_252.head())

# Saving the DataFrame as a CSV file
cumulative_log_returns_uni2tsbasemoe_lag_252.to_csv("cumulative_log_returns_uni2tsbasemoe_lag_252.csv", index=False)

In [None]:
# Cumulative returns
def cumulative_return(daily_returns):
    return np.prod(1 + daily_returns) - 1

# Annualized returns
def annualized_return(daily_returns, periods=252):
    cumulative_return_value = np.prod(1 + daily_returns) - 1
    return (1 + cumulative_return_value) ** (periods / len(daily_returns)) - 1

# Sharpe ratio
def sharpe_ratio(daily_returns, risk_free_rate=0.01, periods=252):
    daily_rf = risk_free_rate / periods  # Assuming 252 trading days
    excess_returns = daily_returns - daily_rf
    return np.sqrt(periods) * excess_returns.mean() / excess_returns.std()

# Calculate volatility (standard deviation) of daily returns
def calculate_volatility(daily_returns, periods=252):
    return np.std(daily_returns) * np.sqrt(periods)

# Calculate maximum drawdown
def maximum_drawdown(daily_returns):
    cum_returns = np.cumprod(1 + daily_returns)
    peak = np.maximum.accumulate(cum_returns)
    drawdown = (cum_returns - peak) / peak
    return np.min(drawdown)

# Apply fixed transaction cost to the daily returns
def apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001):
    # Subtract the transaction cost from each daily return
    return daily_returns - transaction_cost

# Apply percentage-based transaction cost to the daily returns
def apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001):
    # Apply transaction cost as a percentage of the return
    return daily_returns * (1 - transaction_cost_percentage)

# Prepare portfolio names (with transaction cost)
portfolios_with_cost = [
    'cum_EL_return_252_with_cost', 'cum_ES_return_252_with_cost', 'cum_ELS_return_252_with_cost',
    'cum_VL_return_252_with_cost', 'cum_VS_return_252_with_cost', 'cum_VLS_return_252_with_cost',
]

# Initialize metrics container
metrics = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio with transaction cost
for portfolio in portfolios_with_cost:
    cumulative_returns_uni2tsbasemoe252_c = cumulative_log_returns_uni2tsbasemoe_lag_252[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_uni2tsbasemoe252_c)

    # Apply fixed transaction cost for other metrics
    daily_returns_after_cost_fixed = apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001)

    # Apply percentage-based transaction cost for volatility and standard deviation
    daily_returns_after_cost_percentage = apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001)

    # Calculate cumulative returns after fixed transaction cost
    cum_return_after_cost = cumulative_return(daily_returns_after_cost_fixed)

    # Metrics calculations
    ann_return = annualized_return(daily_returns_after_cost_fixed)
    sharpe = sharpe_ratio(daily_returns_after_cost_fixed)
    vol = calculate_volatility(daily_returns_after_cost_percentage)
    max_draw = maximum_drawdown(daily_returns_after_cost_fixed)

    # Standard Deviation of daily returns after cost
    std_dev = np.std(daily_returns_after_cost_percentage)

    # Store results
    metrics['Portfolio'].append(portfolio)
    metrics['Annualized Return'].append(ann_return)
    metrics['Sharpe Ratio'].append(sharpe)
    metrics['Volatility'].append(vol)
    metrics['Standard Deviation'].append(std_dev)
    metrics['Max Drawdown'].append(max_draw)
    metrics['Cumulative Return'].append(cum_return_after_cost)

# Convert the results into a DataFrame for analysis
metrics_uni2tsbasemoe252_c = pd.DataFrame(metrics)
display(metrics_uni2tsbasemoe252_c)

# same calculations for portfolios without transaction cost
portfolios_without_cost = [
    'cum_EL_return_252_without_cost', 'cum_ES_return_252_without_cost', 'cum_ELS_return_252_without_cost',
    'cum_VL_return_252_without_cost', 'cum_VS_return_252_without_cost', 'cum_VLS_return_252_without_cost',
]

# Initialize metrics container for portfolios without transaction cost
metrics_wc = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio without transaction cost
for portfolio in portfolios_without_cost:
    cumulative_returns_uni2tsbasemoe252_wc = cumulative_log_returns_uni2tsbasemoe_lag_252[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_uni2tsbasemoe252_wc)  # Compute daily returns from cumulative log returns

    # Calculate cumulative returns without transaction cost
    cum_return_without_cost = cumulative_return(daily_returns)

    # Metrics calculations
    ann_return = annualized_return(daily_returns)
    sharpe = sharpe_ratio(daily_returns)
    vol = calculate_volatility(daily_returns)
    max_draw = maximum_drawdown(daily_returns)

    # Standard Deviation of daily returns without cost
    std_dev = np.std(daily_returns)

    # Store results
    metrics_wc['Portfolio'].append(portfolio)
    metrics_wc['Annualized Return'].append(ann_return)
    metrics_wc['Sharpe Ratio'].append(sharpe)
    metrics_wc['Volatility'].append(vol)
    metrics_wc['Standard Deviation'].append(std_dev)
    metrics_wc['Max Drawdown'].append(max_draw)
    metrics_wc['Cumulative Return'].append(cum_return_without_cost)

# Convert the results into a DataFrame for analysis
metrics_uni2tsbasemoe252_wc = pd.DataFrame(metrics_wc)
display(metrics_uni2tsbasemoe252_wc)

# Save the portfolio metrics with transaction costs
metrics_uni2tsbasemoe252_c.to_csv('metrics_uni2tsbasemoe252_with_cost.csv', index=False)

# Save the portfolio metrics without transaction costs
metrics_uni2tsbasemoe252_wc.to_csv('metrics_uni2tsbasemoe252_without_cost.csv', index=False)

### Window Size 512

In [None]:
# Add transaction cost (10bps = 0.001)
def calculate_transaction_cost(row):
    return 0.001  # 10 bps for both small and large cap stocks

crsp_test_lagged.loc[:, 'transaction_cost'] = crsp_test_lagged.apply(calculate_transaction_cost, axis=1)

# Use the previously defined `uni2tsbasemoe` to predict excess returns
def uni2tsbasemoe_512_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged, lags=[512]):
    out_sample_start = "2016-01-01"
    modified_crsp_test_lagged = crsp_test_lagged.copy()

    for lag in lags:
        # Pass each lag individually to uni2tsbasemoe
        metrics_df, modified_crsp_test_lagged = run_forecast_uni2ts_moe_base(crsp_train_lagged, modified_crsp_test_lagged, lags=[lag], out_sample_start=out_sample_start)

        # Check if the predicted column is generated correctly
        pred_col = f'predicted_excess_returns_lag{lags[0]}'
        if pred_col not in modified_crsp_test_lagged.columns:
            raise KeyError(f"Column '{pred_col}' not found after running forecast.")

        # Add the predicted returns to the DataFrame
        modified_crsp_test_lagged[f'uni2tsbasemoe_{lags[0]}_predicted_excess_returns'] = modified_crsp_test_lagged[pred_col].values

    return modified_crsp_test_lagged

# Get predicted excess returns using Uni2ts Base Moe model
crsp_test_lagged = uni2tsbasemoe_512_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged, lags=[512])

# Portfolio Construction (Top 10% Long, Bottom 10% Short)
def compute_returns(group, predicted_col):
    # First verify required columns exist
    required_cols = ['adjusted_ret', 'market_cap_merged', 'transaction_cost']
    missing_cols = [col for col in required_cols if col not in group.columns]
    if missing_cols:
        raise ValueError(f"Missing required columns: {missing_cols}")

    # Long position (Top 10% based on predicted returns)
    top_positive = group.nlargest(int(0.1 * len(group)), predicted_col)
    # Short position (Bottom 10% based on predicted returns)
    top_negative = group.nsmallest(int(0.1 * len(group)), predicted_col)

    # Equal-Weighted Long position return (Top 10%)
    equal_long_log_return_with_cost = np.log1p(top_positive['adjusted_ret']).mean() - top_positive['transaction_cost'].mean()
    equal_long_log_return_without_cost = np.log1p(top_positive['adjusted_ret']).mean()

    # Equal-Weighted Short position return (Bottom 10%)
    equal_short_log_return_with_cost = -np.log1p(top_negative['adjusted_ret']).mean() - top_negative['transaction_cost'].mean()
    equal_short_log_return_without_cost = -np.log1p(top_negative['adjusted_ret']).mean()

    # Value-Weighted Long position return (Top 10%) based on market cap
    total_market_cap_positive = top_positive['market_cap_merged'].sum()
    value_long_log_return_with_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive - top_positive['transaction_cost'].mean()
    value_long_log_return_without_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive

    # Value-Weighted Short position return (Bottom 10%) based on market cap
    total_market_cap_negative = top_negative['market_cap_merged'].sum()
    value_short_log_return_with_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative + top_negative['transaction_cost'].mean())
    value_short_log_return_without_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative)

    # Combine Long and Short to get Long-Short return
    equal_long_short_log_return_with_cost = equal_long_log_return_with_cost + equal_short_log_return_with_cost
    equal_long_short_log_return_without_cost = equal_long_log_return_without_cost + equal_short_log_return_without_cost

    value_long_short_log_return_with_cost = value_long_log_return_with_cost + value_short_log_return_with_cost
    value_long_short_log_return_without_cost = value_long_log_return_without_cost + value_short_log_return_without_cost

    return {
        'equal_long_log_return_with_cost': equal_long_log_return_with_cost,
        'equal_short_log_return_with_cost': equal_short_log_return_with_cost,
        'equal_long_short_log_return_with_cost': equal_long_short_log_return_with_cost,
        'equal_long_log_return_without_cost': equal_long_log_return_without_cost,
        'equal_short_log_return_without_cost': equal_short_log_return_without_cost,
        'equal_long_short_log_return_without_cost': equal_long_short_log_return_without_cost,
        'value_long_log_return_with_cost': value_long_log_return_with_cost,
        'value_short_log_return_with_cost': value_short_log_return_with_cost,
        'value_long_short_log_return_with_cost': value_long_short_log_return_with_cost,
        'value_long_log_return_without_cost': value_long_log_return_without_cost,
        'value_short_log_return_without_cost': value_short_log_return_without_cost,
        'value_long_short_log_return_without_cost': value_long_short_log_return_without_cost
    }
# Compute cumulative returns for each date with daily rebalancing
cumulative_log_returns_by_date_uni2tsbasemoe512  = {
    'date': [],
    'cum_EL_return_512_with_cost': [],
    'cum_ES_return_512_with_cost': [],
    'cum_ELS_return_512_with_cost': [],
    'cum_VL_return_512_with_cost': [],
    'cum_VS_return_512_with_cost': [],
    'cum_VLS_return_512_with_cost': [],
    'cum_EL_return_512_without_cost': [],
    'cum_ES_return_512_without_cost': [],
    'cum_ELS_return_512_without_cost': [],
    'cum_VL_return_512_without_cost': [],
    'cum_VS_return_512_without_cost': [],
    'cum_VLS_return_512_without_cost': []
}

# Initialize cumulative returns for lag 512
cum_EL_return_512_with_cost = 0
cum_ES_return_512_with_cost = 0
cum_ELS_return_512_with_cost = 0
cum_VL_return_512_with_cost = 0
cum_VS_return_512_with_cost = 0
cum_VLS_return_512_with_cost = 0

cum_EL_return_512_without_cost = 0
cum_ES_return_512_without_cost = 0
cum_ELS_return_512_without_cost = 0
cum_VL_return_512_without_cost = 0
cum_VS_return_512_without_cost = 0
cum_VLS_return_512_without_cost = 0

# Iterate over each date to compute returns for lag 512 portfolios
for date in crsp_test_lagged['date'].unique():
    group = crsp_test_lagged[crsp_test_lagged['date'] == date]

    # Compute returns for lag 512 (or any other lag if needed)
    returns = compute_returns(group, f'uni2tsbasemoe_{512}_predicted_excess_returns')

    # Update cumulative returns with daily values for lag 512
    cum_EL_return_512_with_cost += returns['equal_long_log_return_with_cost']
    cum_ES_return_512_with_cost += returns['equal_short_log_return_with_cost']
    cum_ELS_return_512_with_cost += returns['equal_long_short_log_return_with_cost']
    cum_VL_return_512_with_cost += returns['value_long_log_return_with_cost']
    cum_VS_return_512_with_cost += returns['value_short_log_return_with_cost']
    cum_VLS_return_512_with_cost += returns['value_long_short_log_return_with_cost']

    cum_EL_return_512_without_cost += returns['equal_long_log_return_without_cost']
    cum_ES_return_512_without_cost += returns['equal_short_log_return_without_cost']
    cum_ELS_return_512_without_cost += returns['equal_long_short_log_return_without_cost']
    cum_VL_return_512_without_cost += returns['value_long_log_return_without_cost']
    cum_VS_return_512_without_cost += returns['value_short_log_return_without_cost']
    cum_VLS_return_512_without_cost += returns['value_long_short_log_return_without_cost']

    # Append results for the day for lag 512 portfolios
    cumulative_log_returns_by_date_uni2tsbasemoe512['date'].append(date)
    cumulative_log_returns_by_date_uni2tsbasemoe512['cum_EL_return_512_with_cost'].append(cum_EL_return_512_with_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe512['cum_ES_return_512_with_cost'].append(cum_ES_return_512_with_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe512['cum_ELS_return_512_with_cost'].append(cum_ELS_return_512_with_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe512['cum_VL_return_512_with_cost'].append(cum_VL_return_512_with_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe512['cum_VS_return_512_with_cost'].append(cum_VS_return_512_with_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe512['cum_VLS_return_512_with_cost'].append(cum_VLS_return_512_with_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe512['cum_EL_return_512_without_cost'].append(cum_EL_return_512_without_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe512['cum_ES_return_512_without_cost'].append(cum_ES_return_512_without_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe512['cum_ELS_return_512_without_cost'].append(cum_ELS_return_512_without_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe512['cum_VL_return_512_without_cost'].append(cum_VL_return_512_without_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe512['cum_VS_return_512_without_cost'].append(cum_VS_return_512_without_cost)
    cumulative_log_returns_by_date_uni2tsbasemoe512['cum_VLS_return_512_without_cost'].append(cum_VLS_return_512_without_cost)

# Convert to DataFrame for lag 512
cumulative_log_returns_uni2tsbasemoe_lag_512 = pd.DataFrame(cumulative_log_returns_by_date_uni2tsbasemoe512)

# Display the cumulative returns DataFrame for lag 512
display(cumulative_log_returns_uni2tsbasemoe_lag_512.head())

# Saving the DataFrame as a CSV file
cumulative_log_returns_uni2tsbasemoe_lag_512.to_csv("cumulative_log_returns_uni2tsbasemoe_lag_512.csv", index=False)

In [None]:
# Cumulative returns
def cumulative_return(daily_returns):
    return np.prod(1 + daily_returns) - 1

# Annualized returns
def annualized_return(daily_returns, periods=252):
    cumulative_return_value = np.prod(1 + daily_returns) - 1
    return (1 + cumulative_return_value) ** (periods / len(daily_returns)) - 1

# Sharpe ratio
def sharpe_ratio(daily_returns, risk_free_rate=0.01, periods=252):
    daily_rf = risk_free_rate / periods  # Assuming 252 trading days
    excess_returns = daily_returns - daily_rf
    return np.sqrt(periods) * excess_returns.mean() / excess_returns.std()

# Calculate volatility (standard deviation) of daily returns
def calculate_volatility(daily_returns, periods=252):
    return np.std(daily_returns) * np.sqrt(periods)

# Calculate maximum drawdown
def maximum_drawdown(daily_returns):
    cum_returns = np.cumprod(1 + daily_returns)
    peak = np.maximum.accumulate(cum_returns)
    drawdown = (cum_returns - peak) / peak
    return np.min(drawdown)

# Apply fixed transaction cost to the daily returns (for other metrics)
def apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001):
    # Subtract the transaction cost from each daily return
    return daily_returns - transaction_cost

# Apply percentage-based transaction cost to the daily returns (for volatility and standard deviation)
def apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001):
    # Apply transaction cost as a percentage of the return
    return daily_returns * (1 - transaction_cost_percentage)

# Prepare portfolio names (with transaction cost)
portfolios_with_cost = [
    'cum_EL_return_512_with_cost', 'cum_ES_return_512_with_cost', 'cum_ELS_return_512_with_cost',
    'cum_VL_return_512_with_cost', 'cum_VS_return_512_with_cost', 'cum_VLS_return_512_with_cost',
]

# Initialize metrics container
metrics = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio with transaction cost
for portfolio in portfolios_with_cost:
    cumulative_returns_uni2tsbasemoe512_c = cumulative_log_returns_uni2tsbasemoe_lag_512[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_uni2tsbasemoe512_c)  # Compute daily returns from cumulative log returns

    # Apply fixed transaction cost for other metrics (e.g., cumulative returns, Sharpe ratio)
    daily_returns_after_cost_fixed = apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001)

    # Apply percentage-based transaction cost for volatility and standard deviation
    daily_returns_after_cost_percentage = apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001)

    # Calculate cumulative returns after fixed transaction cost
    cum_return_after_cost = cumulative_return(daily_returns_after_cost_fixed)

    # Metrics calculations
    ann_return = annualized_return(daily_returns_after_cost_fixed)
    sharpe = sharpe_ratio(daily_returns_after_cost_fixed)
    vol = calculate_volatility(daily_returns_after_cost_percentage)  # Using percentage-based cost for volatility
    max_draw = maximum_drawdown(daily_returns_after_cost_fixed)

    # Standard Deviation of daily returns after cost (using percentage-based cost for standard deviation)
    std_dev = np.std(daily_returns_after_cost_percentage)

    # Store results
    metrics['Portfolio'].append(portfolio)
    metrics['Annualized Return'].append(ann_return)
    metrics['Sharpe Ratio'].append(sharpe)
    metrics['Volatility'].append(vol)
    metrics['Standard Deviation'].append(std_dev)
    metrics['Max Drawdown'].append(max_draw)
    metrics['Cumulative Return'].append(cum_return_after_cost)

# Convert the results into a DataFrame for analysis
metrics_uni2tsbasemoe512_c = pd.DataFrame(metrics)
display(metrics_uni2tsbasemoe512_c)

# same calculations for portfolios without transaction cost
portfolios_without_cost = [
    'cum_EL_return_512_without_cost', 'cum_ES_return_512_without_cost', 'cum_ELS_return_512_without_cost',
    'cum_VL_return_512_without_cost', 'cum_VS_return_512_without_cost', 'cum_VLS_return_512_without_cost',
]

# Initialize metrics container for portfolios without transaction cost
metrics_wc = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio without transaction cost
for portfolio in portfolios_without_cost:
    cumulative_returns_uni2tsbasemoe512_wc = cumulative_log_returns_uni2tsbasemoe_lag_512[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_uni2tsbasemoe512_wc)  # Compute daily returns from cumulative log returns

    # Calculate cumulative returns without transaction cost
    cum_return_without_cost = cumulative_return(daily_returns)

    # Metrics calculations
    ann_return = annualized_return(daily_returns)
    sharpe = sharpe_ratio(daily_returns)
    vol = calculate_volatility(daily_returns)
    max_draw = maximum_drawdown(daily_returns)

    # Standard Deviation of daily returns without cost
    std_dev = np.std(daily_returns)

    # Store results
    metrics_wc['Portfolio'].append(portfolio)
    metrics_wc['Annualized Return'].append(ann_return)
    metrics_wc['Sharpe Ratio'].append(sharpe)
    metrics_wc['Volatility'].append(vol)
    metrics_wc['Standard Deviation'].append(std_dev)
    metrics_wc['Max Drawdown'].append(max_draw)
    metrics_wc['Cumulative Return'].append(cum_return_without_cost)

# Convert the results into a DataFrame for analysis
metrics_uni2tsbasemoe512_wc = pd.DataFrame(metrics_wc)
display(metrics_uni2tsbasemoe512_wc)

# Save the portfolio metrics with transaction costs
metrics_uni2tsbasemoe512_c.to_csv('metrics_uni2tsbasemoe512_with_cost.csv', index=False)

# Save the portfolio metrics without transaction costs
metrics_uni2tsbasemoe512_wc.to_csv('metrics_uni2tsbasemoe512_without_cost.csv', index=False)