# **01. Import Libraries and Load Data**



In [None]:
pip install wrds --no-deps

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import wrds

## **Connect to WRDS**

In [None]:
# Establish a connection to the WRDS
db = wrds.Connection()

# **02. Data Collection**

## Select 50 Top Stocks

In [None]:
# Get the earliest trading date for each permno
query_earliest_date = """
SELECT
    permno,
    MIN(date) as first_trade_date
FROM
    crsp.dsf
GROUP BY
    permno
HAVING
    MIN(date) <= '2000-01-01'
"""

earliest_dates = db.raw_sql(query_earliest_date)

# Ensure stocks are still active until December 31, 2024 (latest available date)
query_active_stocks = """
SELECT
    permno
FROM
    crsp.dsf
WHERE
    date BETWEEN '2000-01-01' AND '2024-12-31'
GROUP BY
    permno
HAVING
    COUNT(DISTINCT date) = (SELECT COUNT(DISTINCT date)
                            FROM crsp.dsf
                            WHERE date BETWEEN '2000-01-01' AND '2024-12-31')
"""

active_stocks = db.raw_sql(query_active_stocks)

# combine the two sets of stocks to get those listed before 2000 and still active in 2024
filtered_permnos = earliest_dates.merge(active_stocks, on='permno', how='inner')

# Get the list of permnos as a comma-separated string
permnos_str = ','.join([str(permno) for permno in filtered_permnos['permno'].tolist()])

# Get market capitalisation, company name, and sector information for IT sector
query_main = f"""
SELECT
    a.permco,
    a.permno,
    a.date,
    a.shrout,
    a.prc * a.shrout as market_cap,
    b.shrcd,
    b.exchcd,
    b.siccd,
    b.ncusip,
    b.comnam
FROM
    crsp.dsf AS a
JOIN
    crsp.dsenames AS b
ON
    a.permno = b.permno
WHERE
    (
        (b.siccd BETWEEN 3570 AND 3579) OR  -- IT-related services (programming, software, etc.)
        (b.siccd BETWEEN 3600 AND 3674) OR
        (b.siccd BETWEEN 7370 AND 7379) OR
        (b.siccd BETWEEN 4810 AND 4813)
    )
    AND a.permno IN ({permnos_str})
    AND a.date = '2024-12-31'
    AND b.exchcd IN (1, 3)
"""

# Execute query
crsp_data = db.raw_sql(query_main)

In [None]:
# Check the results from crsp_data
crsp_data.head()

In [None]:
print("Original dataset size: ", len(crsp_data))
print("Original number of stocks: ", len(set(crsp_data['permno'])))

In [None]:
# Filter data for the latest date
latest_date = crsp_data['date'].max()
latest_data = crsp_data[crsp_data['date'] == latest_date]

# Group by permco and permno and select the entry with the highest market capitalisation within each group
top_50_IT_stocks = latest_data.groupby(['permco', 'permno']).apply(lambda x: x.nlargest(1, 'market_cap'))

# Sort by market capitalization and get the top 50 stocks
top_50_IT_stocks = top_50_IT_stocks.sort_values(by='market_cap', ascending=False).head(50)
top_50_IT_stocks.reset_index(drop=True, inplace=True)

In [None]:
print(top_50_IT_stocks)

In [None]:
# Check for missing values in important columns
missing_data = crsp_data[crsp_data[['market_cap', 'comnam', 'ncusip']].isna().any(axis=1)]

# Display the rows with missing data
print(missing_data)

In [None]:
# Before removing duplicates
print(f"Data size before removing duplicates: {crsp_data.shape}")

# Remove duplicates
crsp_data.drop_duplicates(subset=['permno', 'date', 'date'], keep='first', inplace=True)

# After removing duplicates
print(f"Data size after removing duplicates: {crsp_data.shape}")

## Collect Price and Return Data

In [None]:
# Get permno of the top 50 stocks
top_50_permnos = top_50_IT_stocks['permno'].tolist()

# Convert permno list to a string for the SQL IN clause
permnos_str = ', '.join(map(str, top_50_permnos))

### Download train data

In [None]:
# Define the date range
start_date = '2000-01-01'
end_date = '2015-12-31'

# Query to get data for the specified date range and variables for the top 50 stocks
query = f"""
SELECT
    a.permco,
    a.permno,
    b.comnam,
    b.ticker,
    a.date,
    a.prc,
    a.cfacpr,
    a.ret
FROM
    crsp.dsf AS a
JOIN
    (SELECT permno, comnam, ticker, namedt, nameendt
     FROM crsp.dsenames
     WHERE permno IN ({permnos_str}) -- filter for the top 50 stocks
       AND namedt <= '{end_date}'
       AND (nameendt IS NULL OR nameendt >= '{start_date}')) AS b
ON
    a.permno = b.permno
WHERE
    a.permno IN ({permnos_str})     -- filter for the top 50 stocks
    AND a.date BETWEEN '{start_date}' AND '{end_date}'
    AND a.date >= b.namedt
    AND (a.date <= b.nameendt OR b.nameendt IS NULL)
"""

# Execute query
crsp_train = db.raw_sql(query)
crsp_train.sort_values(by=['permco', 'date'], inplace=True)

In [None]:
# Check for missing values
print(crsp_train.isna().sum())

In [None]:
# Drop rows where 'prc' or 'ret' are missing (NaN)
crsp_train = crsp_train.dropna(subset=['prc', 'ret'])

In [None]:
crsp_train

## Merge the risk-free rate with stock returns (calculate excess returns)

In [None]:
# Query to fetch the daily risk-free rate for the period 2000-2015
query_risk_free = """
SELECT
    date,
    rf
FROM
    ff.factors_daily
WHERE
    date BETWEEN '2000-01-01' AND '2015-12-31'
"""
rf_data = db.raw_sql(query_risk_free)

# Ensure both 'date' columns are in datetime format before merging
crsp_train['date'] = pd.to_datetime(crsp_train['date'], errors='coerce')
rf_data['date'] = pd.to_datetime(rf_data['date'], errors='coerce')

# Merge the risk-free rate with stock data
crsp_train = pd.merge(crsp_train, rf_data, how='left', on='date')

# Adjust the returns by factoring in the price adjustment factor (cfacpr)
crsp_train['adjusted_ret'] = crsp_train['ret'] / crsp_train['cfacpr']

# Calculate excess returns using the adjusted returns
crsp_train['excess_ret'] = crsp_train['adjusted_ret'] - crsp_train['rf']

# Clip abnormal returns to +100% and -100%
crsp_train['excess_ret'] = crsp_train['excess_ret'].clip(lower=-1.0, upper=1.0)

# Convert the excess return to a binary target for directional forecasting
crsp_train['directional_target'] = np.where(crsp_train['excess_ret'] > 0, 1, 0)

# Check the results for train data
crsp_train[['permco', 'permno', 'date', 'adjusted_ret', 'excess_ret']].head()

### Download test data (2016-2024)


In [None]:
# Define the date range
start_date = '2016-01-01'
end_date = '2024-12-31'

# Query to get data for the specified date range and variables for the top 50 stocks
query = f"""
SELECT
    a.permco,
    a.permno,
    b.comnam,
    b.ticker,
    a.date,
    a.prc,
    a.cfacpr,
    a.ret
FROM
    crsp.dsf AS a
JOIN
    (SELECT permno, comnam, ticker, namedt, nameendt
     FROM crsp.dsenames
     WHERE permno IN ({permnos_str}) -- filter for the top 50 stocks
       AND namedt <= '{end_date}'
       AND (nameendt IS NULL OR nameendt >= '{start_date}')) AS b
ON
    a.permno = b.permno
WHERE
    a.permno IN ({permnos_str})       -- filter for the top 50 stocks
    AND a.date BETWEEN '{start_date}' AND '{end_date}'
    AND a.date >= b.namedt
    AND (a.date <= b.nameendt OR b.nameendt IS NULL)
"""
# Execute query
crsp_test = db.raw_sql(query)
crsp_test.sort_values(by=['permco', 'date'], inplace=True)

In [None]:
crsp_test

In [None]:
# Check for missing values
print(crsp_test.isna().sum())

### Calculate Excess Returns for Test Data


In [None]:
# Use the Fama French data to get the daily risk-free rate for the test period (2016-2024)
query_risk_free_test = """
SELECT
    date,
    rf
FROM
    ff.factors_daily
WHERE
    date BETWEEN '2016-01-01' AND '2024-12-31'
"""
rf_data_test = db.raw_sql(query_risk_free_test)

# Merge risk-free rate with test data
crsp_test['date'] = pd.to_datetime(crsp_test['date'], errors='coerce')
rf_data_test['date'] = pd.to_datetime(rf_data_test['date'], errors='coerce')

# Merge the test data with the risk-free rate data
crsp_test = pd.merge(crsp_test, rf_data_test, how='left', on='date')

# Adjust the returns by factoring in the price adjustment factor (cfacpr)
crsp_test['adjusted_ret'] = crsp_test['ret'] / crsp_test['cfacpr']

# Calculate excess returns using the adjusted returns
crsp_test['excess_ret'] = crsp_test['adjusted_ret'] - crsp_test['rf']

# Clip abnormal returns to +100% and -100%
crsp_test['excess_ret'] = crsp_test['excess_ret'].clip(lower=-1.0, upper=1.0)

# Convert the excess return to a binary target for directional forecasting
crsp_test['directional_target'] = np.where(crsp_test['excess_ret'] > 0, 1, 0)

# Check the results for test data
crsp_test[['permco', 'permno', 'date', 'adjusted_ret', 'excess_ret']].head()

This is because the risk-free rate (rf) is very close to zero around those years.

## Descriptive Statistics for Excess Returns


In [None]:
# Calculate descriptive statistics for excess returns in the training dataset
in_sample_stats = crsp_train["excess_ret"].describe()

# Print the statistics in the desired format
print("In-Sample Excess Return Stats:")
print(in_sample_stats)

# Display the dtype
print(f"Name: excess_ret, dtype: {crsp_train['excess_ret'].dtype}")

In [None]:
# Calculate descriptive statistics for excess returns in the testing dataset
out_sample_stats = crsp_test["excess_ret"].describe()

# Print the statistics in the desired format
print("Out-Sample Excess Return Stats:")
print(out_sample_stats)

# Display the dtype
print(f"Name: excess_ret, dtype: {crsp_test['excess_ret'].dtype}")

In [None]:
train_stats = crsp_train.groupby('permno')['excess_ret'].describe()
test_stats = crsp_test.groupby('permno')['excess_ret'].describe()

# Print descriptive statistics
print("Descriptive Statistics for Excess Returns (Training Period):")
print(train_stats)

print("\nDescriptive Statistics for Excess Returns (Test Period):")
print(test_stats)

## Create Rolling Windows

In [None]:
def create_lag_features(df, lags):
    # Sort the data by stock ID ('permno') and date to ensure correct time order
    df_sorted = df.sort_values(by=["permno", "date"])

    # Loop through each lag value provided (e.g., 5, 21, 252, 512)
    for lag in lags:
        # Create lag features by shifting excess returns and applying a rolling window
        df[f"lag_{lag}"] = (
            df_sorted.groupby("permno")["excess_ret"]  # Group by stock
            .shift(1)  # Shift by 1 day to avoid lookahead bias
            .rolling(window=lag, min_periods=1)  # Rolling window over past 'lag' days
            .mean()  # Calculate the mean of the rolling window
        )

    # Return the DataFrame with added lag features
    return df

# Example usage for both crsp_train and crsp_test
lag_days_list = [5, 21, 252, 512]  # Example list of lag days

# Apply the function to both crsp_train and crsp_test
crsp_train_lagged = create_lag_features(crsp_train, lag_days_list)
crsp_test_lagged = create_lag_features(crsp_test, lag_days_list)

# Drop rows where any of the lag columns are NaN in crsp_test_lagged
crsp_test_lagged = crsp_test_lagged.dropna(subset=[f'lag_{lag}' for lag in lag_days_list])

# Verify that the lag features are correctly added
print(crsp_train_lagged.head())
print(crsp_test_lagged.head())

# **03. Merged**

In [None]:
# Merge crsp_data and crsp_test_lagged on the stock ID (permno)
merged_df = crsp_test_lagged.merge(crsp_data[['permno', 'market_cap']], on='permno', how='left')

# Rename 'market_cap' in merged_df to avoid conflict during merge
merged_df = merged_df.rename(columns={'market_cap': 'market_cap_merged'})

# Merge 'market_cap' (now renamed to 'market_cap_merged') from merged_df into crsp_test_lagged based on 'permco' and 'date'
crsp_test_lagged = crsp_test_lagged.merge(merged_df[['permco', 'date', 'market_cap_merged']], how='left', on=['permco', 'date'])

# **04. TimesFM**

In [None]:
# Install the base package for PyTorch
!pip install torch  # Make sure you have PyTorch installed

In [None]:
pip install git+https://github.com/google/pax.git


In [None]:
!pip install git+https://github.com/google/pax.git

In [None]:
!pip -q install huggingface_hub[cli] utilsforecast praxis paxlm jax[cuda12]==0.4.26 einshape

In [None]:
!pip install -q -e git+https://github/com/google-research/timesfm.git@master#egg=timesfm

In [None]:
!sudo apt-get update -y
!sudo apt-get install python3.11 python3.11-distutils python3.11-venv -y
!sudo apt-get install python3.11-pip

In [None]:
!sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1
!sudo update-alternatives --config python3

In [None]:
!wget https://bootstrap.pypa.io/get-pip.py
!python3.11 get-pip.py

In [None]:
pip install git+https://github.com/google-research/timesfm.git

In [None]:
import sys
sys.path.append('/usr/local/lib/python3.11/dist-packages')

In [None]:
import timesfm

In [None]:
from timesfm import TimesFm, TimesFmHparams, TimesFmCheckpoint

In [None]:
# Merge crsp_data and crsp_test_lagged on the stock ID (permno)
merged_df = crsp_test_lagged.merge(crsp_data[['permno', 'market_cap']], on='permno', how='left')

# Rename 'market_cap' in merged_df to avoid conflict during merge
merged_df = merged_df.rename(columns={'market_cap': 'market_cap_merged'})

# Merge 'market_cap' (now renamed to 'market_cap_merged') from merged_df into crsp_test_lagged based on 'permco' and 'date'
crsp_test_lagged = crsp_test_lagged.merge(merged_df[['permco', 'date', 'market_cap_merged']], how='left', on=['permco', 'date'])

## TimesFM 1.0-200m

In [None]:
import torch
import matplotlib.pyplot as plt
from timesfm import TimesFm, TimesFmHparams, TimesFmCheckpoint
from sklearn.metrics import mean_squared_error, mean_absolute_error

def rolling_forecast_tfm1(crsp_train_lagged, crsp_test_lagged, lags=[5], model_name="TimesFM 1.0-200M"):
    # Constants
    DATE_COL = 'date'
    ID_COL = 'permno'
    TARGET_COL = 'excess_ret'
    device_map = "cuda" if torch.cuda.is_available() else "cpu"
    device = torch.device(device_map)

    # Clean and sort input data
    crsp_test_lagged = crsp_test_lagged.dropna(subset=[TARGET_COL])
    crsp_test_lagged = crsp_test_lagged.sort_values([ID_COL, DATE_COL]).reset_index(drop=True)
    crsp_test_lagged = crsp_test_lagged.copy()

    crsp_train_lagged = crsp_train_lagged.sort_values([ID_COL, DATE_COL]).reset_index(drop=True)

    # For collecting results
    all_results = []
    all_predictions = []

    # Iterate through lags
    for lag in lags:
        print(f"\nTraining with context window of length: {lag}")

        contexts, targets_raw, denorm_stats, records = [], [], [], []

        # Build contexts and targets
        for permno, grp in crsp_test_lagged.groupby(ID_COL):
            grp = grp.reset_index(drop=True)
            values = grp[TARGET_COL].values
            dates = grp[DATE_COL].values
            if len(values) <= lag:
                continue
            for i in range(lag, len(values) - 1):
                context = values[i - lag:i]
                target = values[i + 1]
                context_mean = np.mean(context)
                context_std = np.std(context)
                if context_std == 0:
                    continue
                context_norm = (context - context_mean) / context_std
                contexts.append(context_norm.tolist())
                targets_raw.append(target)
                denorm_stats.append((context_mean, context_std))
                records.append({
                    ID_COL: permno,
                    TARGET_COL: target,
                    DATE_COL: dates[i + 1]
                })

        # If no usable data
        if not contexts:
            print(f" No usable data for lag {lag}")
            continue

        y_test = pd.Series(targets_raw)
        contexts_tensor = torch.tensor(contexts).float().to(device)

        # Initialize model
        tfm1 = TimesFm(
            hparams=TimesFmHparams(
                context_len=int(32 * np.ceil(lag / 32)),
                horizon_len=1,
                input_patch_len=32,
                output_patch_len=128,
                num_layers=20,
                model_dims=1280,
                use_positional_embedding=False,
                backend=device_map
            ),
            checkpoint=TimesFmCheckpoint(huggingface_repo_id="google/timesfm-1.0-200m-pytorch")
        )

        # Run forecasts
        freqs = [0] * len(contexts)
        preds, _ = tfm1.forecast(contexts_tensor, freq=freqs)
        y_pred_norm = preds.reshape(-1)

        # Denormalize predictions
        y_pred = []
        for i, pred in enumerate(y_pred_norm):
            mu, sigma = denorm_stats[i]
            y_pred.append(pred * sigma + mu)
        y_pred = pd.Series(y_pred)

        # Results DataFrame
        results_df = pd.DataFrame(records)
        # Assign predicted returns per lag
        results_df[f'predicted_excess_returns_lag{lag}'] = y_pred

        # Evaluation metrics
        def r2_score_fn(y_true, y_pred):
            ssr = np.sum(np.square(y_true - y_pred))
            tss = np.sum(np.square(y_true - np.mean(y_true)))
            return 1 - ssr / tss if tss != 0 else np.nan

        r2_score = r2_score_fn(y_test, y_pred)
        mse = mean_squared_error(y_test, y_pred)
        mae = mean_absolute_error(y_test, y_pred)
        rmse = np.sqrt(mse)
        acc = (np.sign(y_test) == np.sign(y_pred)).mean()
        up_acc = (np.sign(y_pred[y_test > 0]) == 1).mean() if (y_test > 0).any() else np.nan
        down_acc = (np.sign(y_pred[y_test < 0]) == -1).mean() if (y_test < 0).any() else np.nan

        result = pd.DataFrame([{
            "Model": f"{model_name} Lag {lag}",
            "Lag": lag,
            "R-squared": r2_score,
            "MSE": mse,
            "MAE": mae,
            "RMSE": rmse,
            "Direction Accuracy": acc,
            "Up Direction Accuracy": up_acc,
            "Down Direction Accuracy": down_acc
        }])

        # Save results
        result.to_csv(f"timesfm1_results_lag{lag}.csv", index=False)
        results_df.to_csv(f"timesfm1_results_lag{lag}_full.csv", index=False)

        all_results.append(result)
        all_predictions.append(results_df)

    # Combine all performance metrics
    final_df = pd.concat(all_results, ignore_index=True)

    # Merge all predictions into the test set
    if all_predictions:
        combined_preds = pd.concat(all_predictions, ignore_index=True)
        crsp_test_lagged = crsp_test_lagged.merge(
            combined_preds[[ID_COL, DATE_COL] + [f'predicted_excess_returns_lag{lag}' for lag in lags]],
            on=[ID_COL, DATE_COL],
            how='left'
        )
    else:
        crsp_test_lagged['predicted_excess_returns'] = np.nan

    return final_df, crsp_test_lagged

In [None]:
# Run for lag 5
tfm1_results_lag5 = rolling_forecast_tfm1(crsp_train_lagged, crsp_test_lagged, lags=[5])

# Show results in notebook
from IPython.display import display
display(tfm1_results_lag5)

In [None]:
# Run for lag 21
tfm1_results_lag21 = rolling_forecast_tfm1(crsp_train_lagged, crsp_test_lagged, lags=[21])

# Show results in notebook
from IPython.display import display
display(tfm1_results_lag21)

In [None]:
# Run for lag 252
tfm1_results_lag252 = rolling_forecast_tfm1(crsp_train_lagged, crsp_test_lagged, lags=[252])

# Show results in notebook
from IPython.display import display
display(tfm1_results_lag252)

In [None]:
# Run for lag 512
tfm1_results_lag512 = rolling_forecast_tfm1(crsp_train_lagged, crsp_test_lagged, lags=[512])

# Show results in notebook
from IPython.display import display
display(tfm1_results_lag512)

## TimesFM 1.0-200m Portfolio

### Window Size 5

In [None]:
# Add transaction cost (10bps = 0.001)
def calculate_transaction_cost(row):
    return 0.001  # 10 bps for both small and large cap stocks

crsp_test_lagged.loc[:, 'transaction_cost'] = crsp_test_lagged.apply(calculate_transaction_cost, axis=1)

# Use the previously defined `timesfm1` to predict excess returns
def timesfm1_5_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged, lags=[5]):
    # Get the predicted excess returns using the timesfm model
    metrics_df, crsp_test_lagged = rolling_forecast_tfm1(crsp_train_lagged, crsp_test_lagged, lags)

    # For each lag, extract the predicted excess returns
    for lag in lags:
        pred_col = f'predicted_excess_returns_lag{lag}'
        if pred_col in crsp_test_lagged.columns:
            crsp_test_lagged[f'timesfm1_5_predicted_excess_returns_lag{lag}'] = crsp_test_lagged[pred_col].values

    return crsp_test_lagged

# Get predicted excess returns using TimesFM1 model
crsp_test_lagged = timesfm1_5_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged)

# Portfolio Construction (Top 10% Long, Bottom 10% Short)
def compute_returns(group, predicted_col):
    # Long position (Top 10% based on predicted returns)
    top_positive = group.nlargest(int(0.1 * len(group)), predicted_col)
    # Short position (Bottom 10% based on predicted returns)
    top_negative = group.nsmallest(int(0.1 * len(group)), predicted_col)

    # Equal-Weighted Long position return (Top 10%)
    equal_long_log_return_with_cost = np.log1p(top_positive['adjusted_ret']).mean() - top_positive['transaction_cost'].mean()
    equal_long_log_return_without_cost = np.log1p(top_positive['adjusted_ret']).mean()

    # Equal-Weighted Short position return (Bottom 10%)
    equal_short_log_return_with_cost = -np.log1p(top_negative['adjusted_ret']).mean() - top_negative['transaction_cost'].mean()
    equal_short_log_return_without_cost = -np.log1p(top_negative['adjusted_ret']).mean()

    # Value-Weighted Long position return (Top 10%) based on market cap
    total_market_cap_positive = top_positive['market_cap_merged'].sum()
    value_long_log_return_with_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive - top_positive['transaction_cost'].mean()
    value_long_log_return_without_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive

    # Value-Weighted Short position return (Bottom 10%) based on market cap
    total_market_cap_negative = top_negative['market_cap_merged'].sum()
    value_short_log_return_with_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative + top_negative['transaction_cost'].mean())
    value_short_log_return_without_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative)

    # Combine Long and Short to get Long-Short return
    equal_long_short_log_return_with_cost = equal_long_log_return_with_cost + equal_short_log_return_with_cost
    equal_long_short_log_return_without_cost = equal_long_log_return_without_cost + equal_short_log_return_without_cost

    value_long_short_log_return_with_cost = value_long_log_return_with_cost + value_short_log_return_with_cost
    value_long_short_log_return_without_cost = value_long_log_return_without_cost + value_short_log_return_without_cost

    return {
        'equal_long_log_return_with_cost': equal_long_log_return_with_cost,
        'equal_short_log_return_with_cost': equal_short_log_return_with_cost,
        'equal_long_short_log_return_with_cost': equal_long_short_log_return_with_cost,
        'equal_long_log_return_without_cost': equal_long_log_return_without_cost,
        'equal_short_log_return_without_cost': equal_short_log_return_without_cost,
        'equal_long_short_log_return_without_cost': equal_long_short_log_return_without_cost,
        'value_long_log_return_with_cost': value_long_log_return_with_cost,
        'value_short_log_return_with_cost': value_short_log_return_with_cost,
        'value_long_short_log_return_with_cost': value_long_short_log_return_with_cost,
        'value_long_log_return_without_cost': value_long_log_return_without_cost,
        'value_short_log_return_without_cost': value_short_log_return_without_cost,
        'value_long_short_log_return_without_cost': value_long_short_log_return_without_cost
    }

# Compute cumulative returns for each date with daily rebalancing
cumulative_log_returns_by_date_timesfm15  = {
    'date': [],
    'cum_EL_return_5_with_cost': [],
    'cum_ES_return_5_with_cost': [],
    'cum_ELS_return_5_with_cost': [],
    'cum_VL_return_5_with_cost': [],
    'cum_VS_return_5_with_cost': [],
    'cum_VLS_return_5_with_cost': [],
    'cum_EL_return_5_without_cost': [],
    'cum_ES_return_5_without_cost': [],
    'cum_ELS_return_5_without_cost': [],
    'cum_VL_return_5_without_cost': [],
    'cum_VS_return_5_without_cost': [],
    'cum_VLS_return_5_without_cost': []
}

# Initialize cumulative returns for lag 5
cum_EL_return_5_with_cost = 0
cum_ES_return_5_with_cost = 0
cum_ELS_return_5_with_cost = 0
cum_VL_return_5_with_cost = 0
cum_VS_return_5_with_cost = 0
cum_VLS_return_5_with_cost = 0

cum_EL_return_5_without_cost = 0
cum_ES_return_5_without_cost = 0
cum_ELS_return_5_without_cost = 0
cum_VL_return_5_without_cost = 0
cum_VS_return_5_without_cost = 0
cum_VLS_return_5_without_cost = 0

# Iterate over each date to compute returns for lag 5 portfolios
for date in crsp_test_lagged['date'].unique():
    group = crsp_test_lagged[crsp_test_lagged['date'] == date]

    # Compute returns for lag 5 (or any other lag if needed)
    predicted_col = 'timesfm1_5_predicted_excess_returns_lag5'
    returns = compute_returns(group, predicted_col)

    # Update cumulative returns with daily values for lag 5
    cum_EL_return_5_with_cost += returns['equal_long_log_return_with_cost']
    cum_ES_return_5_with_cost += returns['equal_short_log_return_with_cost']
    cum_ELS_return_5_with_cost += returns['equal_long_short_log_return_with_cost']
    cum_VL_return_5_with_cost += returns['value_long_log_return_with_cost']
    cum_VS_return_5_with_cost += returns['value_short_log_return_with_cost']
    cum_VLS_return_5_with_cost += returns['value_long_short_log_return_with_cost']

    cum_EL_return_5_without_cost += returns['equal_long_log_return_without_cost']
    cum_ES_return_5_without_cost += returns['equal_short_log_return_without_cost']
    cum_ELS_return_5_without_cost += returns['equal_long_short_log_return_without_cost']
    cum_VL_return_5_without_cost += returns['value_long_log_return_without_cost']
    cum_VS_return_5_without_cost += returns['value_short_log_return_without_cost']
    cum_VLS_return_5_without_cost += returns['value_long_short_log_return_without_cost']

    # Append results for the day for lag 5 portfolios
    cumulative_log_returns_by_date_timesfm15['date'].append(date)
    cumulative_log_returns_by_date_timesfm15['cum_EL_return_5_with_cost'].append(cum_EL_return_5_with_cost)
    cumulative_log_returns_by_date_timesfm15['cum_ES_return_5_with_cost'].append(cum_ES_return_5_with_cost)
    cumulative_log_returns_by_date_timesfm15['cum_ELS_return_5_with_cost'].append(cum_ELS_return_5_with_cost)
    cumulative_log_returns_by_date_timesfm15['cum_VL_return_5_with_cost'].append(cum_VL_return_5_with_cost)
    cumulative_log_returns_by_date_timesfm15['cum_VS_return_5_with_cost'].append(cum_VS_return_5_with_cost)
    cumulative_log_returns_by_date_timesfm15['cum_VLS_return_5_with_cost'].append(cum_VLS_return_5_with_cost)
    cumulative_log_returns_by_date_timesfm15['cum_EL_return_5_without_cost'].append(cum_EL_return_5_without_cost)
    cumulative_log_returns_by_date_timesfm15['cum_ES_return_5_without_cost'].append(cum_ES_return_5_without_cost)
    cumulative_log_returns_by_date_timesfm15['cum_ELS_return_5_without_cost'].append(cum_ELS_return_5_without_cost)
    cumulative_log_returns_by_date_timesfm15['cum_VL_return_5_without_cost'].append(cum_VL_return_5_without_cost)
    cumulative_log_returns_by_date_timesfm15['cum_VS_return_5_without_cost'].append(cum_VS_return_5_without_cost)
    cumulative_log_returns_by_date_timesfm15['cum_VLS_return_5_without_cost'].append(cum_VLS_return_5_without_cost)

# Convert to DataFrame for lag 5
cumulative_log_returns_timesfm1_lag_5 = pd.DataFrame(cumulative_log_returns_by_date_timesfm15)

# Display the cumulative returns DataFrame for lag 5
display(cumulative_log_returns_timesfm1_lag_5.head())

# Saving the DataFrame as a CSV file
cumulative_log_returns_timesfm1_lag_5.to_csv("cumulative_log_timesfm1_lag_5.csv", index=False)

In [None]:
# Cumulative returns
def cumulative_return(daily_returns):
    return np.prod(1 + daily_returns) - 1

# Annualized returns
def annualized_return(daily_returns, periods=252):
    cumulative_return_value = np.prod(1 + daily_returns) - 1
    return (1 + cumulative_return_value) ** (periods / len(daily_returns)) - 1

# Sharpe ratio
def sharpe_ratio(daily_returns, risk_free_rate=0.01, periods=252):
    daily_rf = risk_free_rate / periods  # Assuming 252 trading days
    excess_returns = daily_returns - daily_rf
    return np.sqrt(periods) * excess_returns.mean() / excess_returns.std()

# Calculate volatility (standard deviation) of daily returns
def calculate_volatility(daily_returns, periods=252):
    return np.std(daily_returns) * np.sqrt(periods)

# Calculate maximum drawdown
def maximum_drawdown(daily_returns):
    cum_returns = np.cumprod(1 + daily_returns)
    peak = np.maximum.accumulate(cum_returns)
    drawdown = (cum_returns - peak) / peak
    return np.min(drawdown)

# Apply fixed transaction cost to the daily returns
def apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001):
    # Subtract the transaction cost from each daily return
    return daily_returns - transaction_cost

# Apply percentage-based transaction cost to the daily returns
def apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001):
    # Apply transaction cost as a percentage of the return
    return daily_returns * (1 - transaction_cost_percentage)

# Prepare portfolio names (with transaction cost)
portfolios_with_cost = [
    'cum_EL_return_5_with_cost', 'cum_ES_return_5_with_cost', 'cum_ELS_return_5_with_cost',
    'cum_VL_return_5_with_cost', 'cum_VS_return_5_with_cost', 'cum_VLS_return_5_with_cost',
]

# Initialize metrics container
metrics = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio with transaction cost
for portfolio in portfolios_with_cost:
    cumulative_returns_timesfm15_c = cumulative_log_returns_timesfm1_lag_5[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_timesfm15_c)

    # Apply fixed transaction cost for other metrics
    daily_returns_after_cost_fixed = apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001)

    # Apply percentage-based transaction cost for volatility and standard deviation
    daily_returns_after_cost_percentage = apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001)

    # Calculate cumulative returns after fixed transaction cost
    cum_return_after_cost = cumulative_return(daily_returns_after_cost_fixed)

    # Metrics calculations
    ann_return = annualized_return(daily_returns_after_cost_fixed)
    sharpe = sharpe_ratio(daily_returns_after_cost_fixed)
    vol = calculate_volatility(daily_returns_after_cost_percentage)
    max_draw = maximum_drawdown(daily_returns_after_cost_fixed)

    # Standard Deviation of daily returns after cost
    std_dev = np.std(daily_returns_after_cost_percentage)

    # Store results
    metrics['Portfolio'].append(portfolio)
    metrics['Annualized Return'].append(ann_return)
    metrics['Sharpe Ratio'].append(sharpe)
    metrics['Volatility'].append(vol)
    metrics['Standard Deviation'].append(std_dev)
    metrics['Max Drawdown'].append(max_draw)
    metrics['Cumulative Return'].append(cum_return_after_cost)

# Convert the results into a DataFrame for analysis
metrics_timesfm15_c = pd.DataFrame(metrics)
display(metrics_timesfm15_c)

# same calculations for portfolios without transaction cost
portfolios_without_cost = [
    'cum_EL_return_5_without_cost', 'cum_ES_return_5_without_cost', 'cum_ELS_return_5_without_cost',
    'cum_VL_return_5_without_cost', 'cum_VS_return_5_without_cost', 'cum_VLS_return_5_without_cost',
]

# Initialize metrics container for portfolios without transaction cost
metrics_wc = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio without transaction cost
for portfolio in portfolios_without_cost:
    cumulative_returns_timsfm15_wc = cumulative_log_returns_timesfm1_lag_5[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_timsfm15_wc)  # Compute daily returns from cumulative log returns

    # Calculate cumulative returns without transaction cost
    cum_return_without_cost = cumulative_return(daily_returns)

    # Metrics calculations
    ann_return = annualized_return(daily_returns)
    sharpe = sharpe_ratio(daily_returns)
    vol = calculate_volatility(daily_returns)
    max_draw = maximum_drawdown(daily_returns)

    # Standard Deviation of daily returns without cost
    std_dev = np.std(daily_returns)

    # Store results
    metrics_wc['Portfolio'].append(portfolio)
    metrics_wc['Annualized Return'].append(ann_return)
    metrics_wc['Sharpe Ratio'].append(sharpe)
    metrics_wc['Volatility'].append(vol)
    metrics_wc['Standard Deviation'].append(std_dev)
    metrics_wc['Max Drawdown'].append(max_draw)
    metrics_wc['Cumulative Return'].append(cum_return_without_cost)

# Convert the results into a DataFrame for analysis
metrics_timesfm15_wc = pd.DataFrame(metrics_wc)
display(metrics_timesfm15_wc)

### Window Size 21

In [None]:
# Add transaction cost (10bps = 0.001)
def calculate_transaction_cost(row):
    return 0.001  # 10 bps for both small and large cap stocks

crsp_test_lagged.loc[:, 'transaction_cost'] = crsp_test_lagged.apply(calculate_transaction_cost, axis=1)

# Use the previously defined `timesfm1` to predict excess returns
def timesfm1_21_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged, lags=[21]):
    # Get the predicted excess returns using the timesfm model
    metrics_df, crsp_test_lagged = rolling_forecast_tfm1(crsp_train_lagged, crsp_test_lagged, lags)

    # For each lag, extract the predicted excess returns
    for lag in lags:
        pred_col = f'predicted_excess_returns_lag{lag}'
        if pred_col in crsp_test_lagged.columns:
            crsp_test_lagged[f'timesfm1_21_predicted_excess_returns_lag{lag}'] = crsp_test_lagged[pred_col].values

    return crsp_test_lagged

# Get predicted excess returns using TimesFM1 model
crsp_test_lagged = timesfm1_21_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged)

# Portfolio Construction (Top 10% Long, Bottom 10% Short)
def compute_returns(group, predicted_col):
    # Long position (Top 10% based on predicted returns)
    top_positive = group.nlargest(int(0.1 * len(group)), predicted_col)
    # Short position (Bottom 10% based on predicted returns)
    top_negative = group.nsmallest(int(0.1 * len(group)), predicted_col)

    # Equal-Weighted Long position return (Top 10%)
    equal_long_log_return_with_cost = np.log1p(top_positive['adjusted_ret']).mean() - top_positive['transaction_cost'].mean()
    equal_long_log_return_without_cost = np.log1p(top_positive['adjusted_ret']).mean()

    # Equal-Weighted Short position return (Bottom 10%)
    equal_short_log_return_with_cost = -np.log1p(top_negative['adjusted_ret']).mean() - top_negative['transaction_cost'].mean()
    equal_short_log_return_without_cost = -np.log1p(top_negative['adjusted_ret']).mean()

    # Value-Weighted Long position return (Top 10%) based on market cap
    total_market_cap_positive = top_positive['market_cap_merged'].sum()
    value_long_log_return_with_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive - top_positive['transaction_cost'].mean()
    value_long_log_return_without_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive

    # Value-Weighted Short position return (Bottom 10%) based on market cap
    total_market_cap_negative = top_negative['market_cap_merged'].sum()
    value_short_log_return_with_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative + top_negative['transaction_cost'].mean())
    value_short_log_return_without_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative)

    # Combine Long and Short to get Long-Short return
    equal_long_short_log_return_with_cost = equal_long_log_return_with_cost + equal_short_log_return_with_cost
    equal_long_short_log_return_without_cost = equal_long_log_return_without_cost + equal_short_log_return_without_cost

    value_long_short_log_return_with_cost = value_long_log_return_with_cost + value_short_log_return_with_cost
    value_long_short_log_return_without_cost = value_long_log_return_without_cost + value_short_log_return_without_cost

    return {
        'equal_long_log_return_with_cost': equal_long_log_return_with_cost,
        'equal_short_log_return_with_cost': equal_short_log_return_with_cost,
        'equal_long_short_log_return_with_cost': equal_long_short_log_return_with_cost,
        'equal_long_log_return_without_cost': equal_long_log_return_without_cost,
        'equal_short_log_return_without_cost': equal_short_log_return_without_cost,
        'equal_long_short_log_return_without_cost': equal_long_short_log_return_without_cost,
        'value_long_log_return_with_cost': value_long_log_return_with_cost,
        'value_short_log_return_with_cost': value_short_log_return_with_cost,
        'value_long_short_log_return_with_cost': value_long_short_log_return_with_cost,
        'value_long_log_return_without_cost': value_long_log_return_without_cost,
        'value_short_log_return_without_cost': value_short_log_return_without_cost,
        'value_long_short_log_return_without_cost': value_long_short_log_return_without_cost
    }

# Compute cumulative returns for each date with daily rebalancing
cumulative_log_returns_by_date_timesfm121  = {
    'date': [],
    'cum_EL_return_21_with_cost': [],
    'cum_ES_return_21_with_cost': [],
    'cum_ELS_return_21_with_cost': [],
    'cum_VL_return_21_with_cost': [],
    'cum_VS_return_21_with_cost': [],
    'cum_VLS_return_21_with_cost': [],
    'cum_EL_return_21_without_cost': [],
    'cum_ES_return_21_without_cost': [],
    'cum_ELS_return_21_without_cost': [],
    'cum_VL_return_21_without_cost': [],
    'cum_VS_return_21_without_cost': [],
    'cum_VLS_return_21_without_cost': []
}

# Initialize cumulative returns for lag 21
cum_EL_return_21_with_cost = 0
cum_ES_return_21_with_cost = 0
cum_ELS_return_21_with_cost = 0
cum_VL_return_21_with_cost = 0
cum_VS_return_21_with_cost = 0
cum_VLS_return_21_with_cost = 0

cum_EL_return_21_without_cost = 0
cum_ES_return_21_without_cost = 0
cum_ELS_return_21_without_cost = 0
cum_VL_return_21_without_cost = 0
cum_VS_return_21_without_cost = 0
cum_VLS_return_21_without_cost = 0

# Iterate over each date to compute returns for lag 21 portfolios
for date in crsp_test_lagged['date'].unique():
    group = crsp_test_lagged[crsp_test_lagged['date'] == date]

    # Compute returns for lag 21 (or any other lag if needed)
    predicted_col = 'timesfm1_21_predicted_excess_returns_lag21'
    returns = compute_returns(group, predicted_col)

    # Update cumulative returns with daily values for lag 21
    cum_EL_return_21_with_cost += returns['equal_long_log_return_with_cost']
    cum_ES_return_21_with_cost += returns['equal_short_log_return_with_cost']
    cum_ELS_return_21_with_cost += returns['equal_long_short_log_return_with_cost']
    cum_VL_return_21_with_cost += returns['value_long_log_return_with_cost']
    cum_VS_return_21_with_cost += returns['value_short_log_return_with_cost']
    cum_VLS_return_21_with_cost += returns['value_long_short_log_return_with_cost']

    cum_EL_return_21_without_cost += returns['equal_long_log_return_without_cost']
    cum_ES_return_21_without_cost += returns['equal_short_log_return_without_cost']
    cum_ELS_return_21_without_cost += returns['equal_long_short_log_return_without_cost']
    cum_VL_return_21_without_cost += returns['value_long_log_return_without_cost']
    cum_VS_return_21_without_cost += returns['value_short_log_return_without_cost']
    cum_VLS_return_21_without_cost += returns['value_long_short_log_return_without_cost']

    # Append results for the day for lag 21 portfolios
    cumulative_log_returns_by_date_timesfm121['date'].append(date)
    cumulative_log_returns_by_date_timesfm121['cum_EL_return_21_with_cost'].append(cum_EL_return_21_with_cost)
    cumulative_log_returns_by_date_timesfm121['cum_ES_return_21_with_cost'].append(cum_ES_return_21_with_cost)
    cumulative_log_returns_by_date_timesfm121['cum_ELS_return_21_with_cost'].append(cum_ELS_return_21_with_cost)
    cumulative_log_returns_by_date_timesfm121['cum_VL_return_21_with_cost'].append(cum_VL_return_21_with_cost)
    cumulative_log_returns_by_date_timesfm121['cum_VS_return_21_with_cost'].append(cum_VS_return_21_with_cost)
    cumulative_log_returns_by_date_timesfm121['cum_VLS_return_21_with_cost'].append(cum_VLS_return_21_with_cost)
    cumulative_log_returns_by_date_timesfm121['cum_EL_return_21_without_cost'].append(cum_EL_return_21_without_cost)
    cumulative_log_returns_by_date_timesfm121['cum_ES_return_21_without_cost'].append(cum_ES_return_21_without_cost)
    cumulative_log_returns_by_date_timesfm121['cum_ELS_return_21_without_cost'].append(cum_ELS_return_21_without_cost)
    cumulative_log_returns_by_date_timesfm121['cum_VL_return_21_without_cost'].append(cum_VL_return_21_without_cost)
    cumulative_log_returns_by_date_timesfm121['cum_VS_return_21_without_cost'].append(cum_VS_return_21_without_cost)
    cumulative_log_returns_by_date_timesfm121['cum_VLS_return_21_without_cost'].append(cum_VLS_return_21_without_cost)

# Convert to DataFrame for lag 21
cumulative_log_returns_timesfm1_lag_21 = pd.DataFrame(cumulative_log_returns_by_date_timesfm121)

# Display the cumulative returns DataFrame for lag 21
display(cumulative_log_returns_timesfm1_lag_21.head())

# Saving the DataFrame as a CSV file
cumulative_log_returns_timesfm1_lag_21.to_csv("cumulative_log_timesfm1_lag_21.csv", index=False)

# Saving the DataFrame as a CSV file
cumulative_log_returns_timesfm1_lag_21.to_csv("cumulative_log_timesfm1_lag_21.csv", index=False)

In [None]:
# Cumulative returns
def cumulative_return(daily_returns):
    return np.prod(1 + daily_returns) - 1

# Annualized returns
def annualized_return(daily_returns, periods=252):
    cumulative_return_value = np.prod(1 + daily_returns) - 1
    return (1 + cumulative_return_value) ** (periods / len(daily_returns)) - 1

# Sharpe ratio
def sharpe_ratio(daily_returns, risk_free_rate=0.01, periods=252):
    daily_rf = risk_free_rate / periods  # Assuming 252 trading days
    excess_returns = daily_returns - daily_rf
    return np.sqrt(periods) * excess_returns.mean() / excess_returns.std()

# Calculate volatility (standard deviation) of daily returns
def calculate_volatility(daily_returns, periods=252):
    return np.std(daily_returns) * np.sqrt(periods)

# Calculate maximum drawdown
def maximum_drawdown(daily_returns):
    cum_returns = np.cumprod(1 + daily_returns)
    peak = np.maximum.accumulate(cum_returns)
    drawdown = (cum_returns - peak) / peak
    return np.min(drawdown)

# Apply fixed transaction cost to the daily returns
def apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001):
    # Subtract the transaction cost from each daily return
    return daily_returns - transaction_cost

# Apply percentage-based transaction cost to the daily returns
def apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001):
    # Apply transaction cost as a percentage of the return
    return daily_returns * (1 - transaction_cost_percentage)

# Prepare portfolio names (with transaction cost)
portfolios_with_cost = [
    'cum_EL_return_21_with_cost', 'cum_ES_return_21_with_cost', 'cum_ELS_return_21_with_cost',
    'cum_VL_return_21_with_cost', 'cum_VS_return_21_with_cost', 'cum_VLS_return_21_with_cost',
]

# Initialize metrics container
metrics = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio with transaction cost
for portfolio in portfolios_with_cost:
    cumulative_returns_timesfm121_c = cumulative_log_returns_timesfm1_lag_21[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_timesfm121_c)

    # Apply fixed transaction cost for other metrics
    daily_returns_after_cost_fixed = apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001)

    # Apply percentage-based transaction cost for volatility and standard deviation
    daily_returns_after_cost_percentage = apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001)

    # Calculate cumulative returns after fixed transaction cost
    cum_return_after_cost = cumulative_return(daily_returns_after_cost_fixed)

    # Metrics calculations
    ann_return = annualized_return(daily_returns_after_cost_fixed)
    sharpe = sharpe_ratio(daily_returns_after_cost_fixed)
    vol = calculate_volatility(daily_returns_after_cost_percentage)
    max_draw = maximum_drawdown(daily_returns_after_cost_fixed)

    # Standard Deviation of daily returns after cost
    std_dev = np.std(daily_returns_after_cost_percentage)

    # Store results
    metrics['Portfolio'].append(portfolio)
    metrics['Annualized Return'].append(ann_return)
    metrics['Sharpe Ratio'].append(sharpe)
    metrics['Volatility'].append(vol)
    metrics['Standard Deviation'].append(std_dev)
    metrics['Max Drawdown'].append(max_draw)
    metrics['Cumulative Return'].append(cum_return_after_cost)

# Convert the results into a DataFrame for analysis
metrics_timesfm121_c = pd.DataFrame(metrics)
display(metrics_timesfm121_c)

# same calculations for portfolios without transaction cost
portfolios_without_cost = [
    'cum_EL_return_21_without_cost', 'cum_ES_return_21_without_cost', 'cum_ELS_return_21_without_cost',
    'cum_VL_return_21_without_cost', 'cum_VS_return_21_without_cost', 'cum_VLS_return_21_without_cost',
]

# Initialize metrics container for portfolios without transaction cost
metrics_wc = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio without transaction cost
for portfolio in portfolios_without_cost:
    cumulative_returns_timsfm121_wc = cumulative_log_returns_timesfm1_lag_21[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_timsfm15_wc)  # Compute daily returns from cumulative log returns

    # Calculate cumulative returns without transaction cost
    cum_return_without_cost = cumulative_return(daily_returns)

    # Metrics calculations
    ann_return = annualized_return(daily_returns)
    sharpe = sharpe_ratio(daily_returns)
    vol = calculate_volatility(daily_returns)
    max_draw = maximum_drawdown(daily_returns)

    # Standard Deviation of daily returns without cost
    std_dev = np.std(daily_returns)

    # Store results
    metrics_wc['Portfolio'].append(portfolio)
    metrics_wc['Annualized Return'].append(ann_return)
    metrics_wc['Sharpe Ratio'].append(sharpe)
    metrics_wc['Volatility'].append(vol)
    metrics_wc['Standard Deviation'].append(std_dev)
    metrics_wc['Max Drawdown'].append(max_draw)
    metrics_wc['Cumulative Return'].append(cum_return_without_cost)

# Convert the results into a DataFrame for analysis
metrics_timesfm121_wc = pd.DataFrame(metrics_wc)
display(metrics_timesfm121_wc)

### Window Size 252

In [None]:
# Add transaction cost (10bps = 0.001)
def calculate_transaction_cost(row):
    return 0.001  # 10 bps for both small and large cap stocks

crsp_test_lagged.loc[:, 'transaction_cost'] = crsp_test_lagged.apply(calculate_transaction_cost, axis=1)

# Use the previously defined `timesfm1` to predict excess returns
def timesfm1_252_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged, lags=[252]):
    # Get the predicted excess returns using the timesfm model
    metrics_df, crsp_test_lagged = rolling_forecast_tfm1(crsp_train_lagged, crsp_test_lagged, lags)

    # For each lag, extract the predicted excess returns
    for lag in lags:
        pred_col = f'predicted_excess_returns_lag{lag}'
        if pred_col in crsp_test_lagged.columns:
            crsp_test_lagged[f'timesfm1_252_predicted_excess_returns_lag{lag}'] = crsp_test_lagged[pred_col].values

    return crsp_test_lagged

# Get predicted excess returns using TimesFM1 model
crsp_test_lagged = timesfm1_252_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged)

# Portfolio Construction (Top 10% Long, Bottom 10% Short)
def compute_returns(group, predicted_col):
    # Long position (Top 10% based on predicted returns)
    top_positive = group.nlargest(int(0.1 * len(group)), predicted_col)
    # Short position (Bottom 10% based on predicted returns)
    top_negative = group.nsmallest(int(0.1 * len(group)), predicted_col)

    # Equal-Weighted Long position return (Top 10%)
    equal_long_log_return_with_cost = np.log1p(top_positive['adjusted_ret']).mean() - top_positive['transaction_cost'].mean()
    equal_long_log_return_without_cost = np.log1p(top_positive['adjusted_ret']).mean()

    # Equal-Weighted Short position return (Bottom 10%)
    equal_short_log_return_with_cost = -np.log1p(top_negative['adjusted_ret']).mean() - top_negative['transaction_cost'].mean()
    equal_short_log_return_without_cost = -np.log1p(top_negative['adjusted_ret']).mean()

    # Value-Weighted Long position return (Top 10%) based on market cap
    total_market_cap_positive = top_positive['market_cap_merged'].sum()
    value_long_log_return_with_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive - top_positive['transaction_cost'].mean()
    value_long_log_return_without_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive

    # Value-Weighted Short position return (Bottom 10%) based on market cap
    total_market_cap_negative = top_negative['market_cap_merged'].sum()
    value_short_log_return_with_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative + top_negative['transaction_cost'].mean())
    value_short_log_return_without_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative)

    # Combine Long and Short to get Long-Short return
    equal_long_short_log_return_with_cost = equal_long_log_return_with_cost + equal_short_log_return_with_cost
    equal_long_short_log_return_without_cost = equal_long_log_return_without_cost + equal_short_log_return_without_cost

    value_long_short_log_return_with_cost = value_long_log_return_with_cost + value_short_log_return_with_cost
    value_long_short_log_return_without_cost = value_long_log_return_without_cost + value_short_log_return_without_cost

    return {
        'equal_long_log_return_with_cost': equal_long_log_return_with_cost,
        'equal_short_log_return_with_cost': equal_short_log_return_with_cost,
        'equal_long_short_log_return_with_cost': equal_long_short_log_return_with_cost,
        'equal_long_log_return_without_cost': equal_long_log_return_without_cost,
        'equal_short_log_return_without_cost': equal_short_log_return_without_cost,
        'equal_long_short_log_return_without_cost': equal_long_short_log_return_without_cost,
        'value_long_log_return_with_cost': value_long_log_return_with_cost,
        'value_short_log_return_with_cost': value_short_log_return_with_cost,
        'value_long_short_log_return_with_cost': value_long_short_log_return_with_cost,
        'value_long_log_return_without_cost': value_long_log_return_without_cost,
        'value_short_log_return_without_cost': value_short_log_return_without_cost,
        'value_long_short_log_return_without_cost': value_long_short_log_return_without_cost
    }

# Compute cumulative returns for each date with daily rebalancing
cumulative_log_returns_by_date_timesfm1252  = {
    'date': [],
    'cum_EL_return_252_with_cost': [],
    'cum_ES_return_252_with_cost': [],
    'cum_ELS_return_252_with_cost': [],
    'cum_VL_return_252_with_cost': [],
    'cum_VS_return_252_with_cost': [],
    'cum_VLS_return_252_with_cost': [],
    'cum_EL_return_252_without_cost': [],
    'cum_ES_return_252_without_cost': [],
    'cum_ELS_return_252_without_cost': [],
    'cum_VL_return_252_without_cost': [],
    'cum_VS_return_252_without_cost': [],
    'cum_VLS_return_252_without_cost': []
}

# Initialize cumulative returns for lag 252
cum_EL_return_252_with_cost = 0
cum_ES_return_252_with_cost = 0
cum_ELS_return_252_with_cost = 0
cum_VL_return_252_with_cost = 0
cum_VS_return_252_with_cost = 0
cum_VLS_return_252_with_cost = 0

cum_EL_return_252_without_cost = 0
cum_ES_return_252_without_cost = 0
cum_ELS_return_252_without_cost = 0
cum_VL_return_252_without_cost = 0
cum_VS_return_252_without_cost = 0
cum_VLS_return_252_without_cost = 0

# Iterate over each date to compute returns for lag 252 portfolios
for date in crsp_test_lagged['date'].unique():
    group = crsp_test_lagged[crsp_test_lagged['date'] == date]

    # Compute returns for lag 252 (or any other lag if needed)
    predicted_col = 'timesfm1_252_predicted_excess_returns_lag252'
    returns = compute_returns(group, predicted_col)

    # Update cumulative returns with daily values for lag 252
    cum_EL_return_252_with_cost += returns['equal_long_log_return_with_cost']
    cum_ES_return_252_with_cost += returns['equal_short_log_return_with_cost']
    cum_ELS_return_252_with_cost += returns['equal_long_short_log_return_with_cost']
    cum_VL_return_252_with_cost += returns['value_long_log_return_with_cost']
    cum_VS_return_252_with_cost += returns['value_short_log_return_with_cost']
    cum_VLS_return_252_with_cost += returns['value_long_short_log_return_with_cost']

    cum_EL_return_252_without_cost += returns['equal_long_log_return_without_cost']
    cum_ES_return_252_without_cost += returns['equal_short_log_return_without_cost']
    cum_ELS_return_252_without_cost += returns['equal_long_short_log_return_without_cost']
    cum_VL_return_252_without_cost += returns['value_long_log_return_without_cost']
    cum_VS_return_252_without_cost += returns['value_short_log_return_without_cost']
    cum_VLS_return_252_without_cost += returns['value_long_short_log_return_without_cost']

    # Append results for the day for lag 252 portfolios
    cumulative_log_returns_by_date_timesfm1252['date'].append(date)
    cumulative_log_returns_by_date_timesfm1252['cum_EL_return_252_with_cost'].append(cum_EL_return_252_with_cost)
    cumulative_log_returns_by_date_timesfm1252['cum_ES_return_252_with_cost'].append(cum_ES_return_252_with_cost)
    cumulative_log_returns_by_date_timesfm1252['cum_ELS_return_252_with_cost'].append(cum_ELS_return_252_with_cost)
    cumulative_log_returns_by_date_timesfm1252['cum_VL_return_252_with_cost'].append(cum_VL_return_252_with_cost)
    cumulative_log_returns_by_date_timesfm1252['cum_VS_return_252_with_cost'].append(cum_VS_return_252_with_cost)
    cumulative_log_returns_by_date_timesfm1252['cum_VLS_return_252_with_cost'].append(cum_VLS_return_252_with_cost)
    cumulative_log_returns_by_date_timesfm1252['cum_EL_return_252_without_cost'].append(cum_EL_return_252_without_cost)
    cumulative_log_returns_by_date_timesfm1252['cum_ES_return_252_without_cost'].append(cum_ES_return_252_without_cost)
    cumulative_log_returns_by_date_timesfm1252['cum_ELS_return_252_without_cost'].append(cum_ELS_return_252_without_cost)
    cumulative_log_returns_by_date_timesfm1252['cum_VL_return_252_without_cost'].append(cum_VL_return_252_without_cost)
    cumulative_log_returns_by_date_timesfm1252['cum_VS_return_252_without_cost'].append(cum_VS_return_252_without_cost)
    cumulative_log_returns_by_date_timesfm1252['cum_VLS_return_252_without_cost'].append(cum_VLS_return_252_without_cost)

# Convert to DataFrame for lag 252
cumulative_log_returns_timesfm1_lag_252 = pd.DataFrame(cumulative_log_returns_by_date_timesfm1252)

# Display the cumulative returns DataFrame for lag 252
display(cumulative_log_returns_timesfm1_lag_252.head())

# Saving the DataFrame as a CSV file
cumulative_log_returns_timesfm1_lag_252.to_csv("cumulative_log_timesfm1_lag_252.csv", index=False)

In [None]:
# Cumulative returns
def cumulative_return(daily_returns):
    return np.prod(1 + daily_returns) - 1

# Annualized returns
def annualized_return(daily_returns, periods=252):
    cumulative_return_value = np.prod(1 + daily_returns) - 1
    return (1 + cumulative_return_value) ** (periods / len(daily_returns)) - 1

# Sharpe ratio
def sharpe_ratio(daily_returns, risk_free_rate=0.01, periods=252):
    daily_rf = risk_free_rate / periods  # Assuming 252 trading days
    excess_returns = daily_returns - daily_rf
    return np.sqrt(periods) * excess_returns.mean() / excess_returns.std()

# Calculate volatility (standard deviation) of daily returns
def calculate_volatility(daily_returns, periods=252):
    return np.std(daily_returns) * np.sqrt(periods)

# Calculate maximum drawdown
def maximum_drawdown(daily_returns):
    cum_returns = np.cumprod(1 + daily_returns)
    peak = np.maximum.accumulate(cum_returns)
    drawdown = (cum_returns - peak) / peak
    return np.min(drawdown)

# Apply fixed transaction cost to the daily returns
def apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001):
    # Subtract the transaction cost from each daily return
    return daily_returns - transaction_cost

# Apply percentage-based transaction cost to the daily returns
def apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001):
    # Apply transaction cost as a percentage of the return
    return daily_returns * (1 - transaction_cost_percentage)

# Prepare portfolio names (with transaction cost)
portfolios_with_cost = [
    'cum_EL_return_252_with_cost', 'cum_ES_return_252_with_cost', 'cum_ELS_return_252_with_cost',
    'cum_VL_return_252_with_cost', 'cum_VS_return_252_with_cost', 'cum_VLS_return_252_with_cost',
]

# Initialize metrics container
metrics = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio with transaction cost
for portfolio in portfolios_with_cost:
    cumulative_returns_timesfm1252_c = cumulative_log_returns_timesfm1_lag_252[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_timesfm1252_c)  # Compute daily returns from cumulative log returns

    # Apply fixed transaction cost for other metrics
    daily_returns_after_cost_fixed = apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001)

    # Apply percentage-based transaction cost for volatility and standard deviation
    daily_returns_after_cost_percentage = apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001)

    # Calculate cumulative returns after fixed transaction cost
    cum_return_after_cost = cumulative_return(daily_returns_after_cost_fixed)

    # Metrics calculations
    ann_return = annualized_return(daily_returns_after_cost_fixed)
    sharpe = sharpe_ratio(daily_returns_after_cost_fixed)
    vol = calculate_volatility(daily_returns_after_cost_percentage)
    max_draw = maximum_drawdown(daily_returns_after_cost_fixed)

    # Standard Deviation of daily returns after cost
    std_dev = np.std(daily_returns_after_cost_percentage)

    # Store results
    metrics['Portfolio'].append(portfolio)
    metrics['Annualized Return'].append(ann_return)
    metrics['Sharpe Ratio'].append(sharpe)
    metrics['Volatility'].append(vol)
    metrics['Standard Deviation'].append(std_dev)
    metrics['Max Drawdown'].append(max_draw)
    metrics['Cumulative Return'].append(cum_return_after_cost)

# Convert the results into a DataFrame for analysis
metrics_timesfm1252_c = pd.DataFrame(metrics)
display(metrics_timesfm1252_c)

# same calculations for portfolios without transaction cost
portfolios_without_cost = [
    'cum_EL_return_252_without_cost', 'cum_ES_return_252_without_cost', 'cum_ELS_return_252_without_cost',
    'cum_VL_return_252_without_cost', 'cum_VS_return_252_without_cost', 'cum_VLS_return_252_without_cost',
]

# Initialize metrics container for portfolios without transaction cost
metrics_wc = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio without transaction cost
for portfolio in portfolios_without_cost:
    cumulative_returns_timsfm1252_wc = cumulative_log_returns_timesfm1_lag_252[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_timsfm15_wc)  # Compute daily returns from cumulative log returns

    # Calculate cumulative returns without transaction cost
    cum_return_without_cost = cumulative_return(daily_returns)

    # Metrics calculations
    ann_return = annualized_return(daily_returns)
    sharpe = sharpe_ratio(daily_returns)
    vol = calculate_volatility(daily_returns)
    max_draw = maximum_drawdown(daily_returns)

    # Standard Deviation of daily returns without cost
    std_dev = np.std(daily_returns)

    # Store results
    metrics_wc['Portfolio'].append(portfolio)
    metrics_wc['Annualized Return'].append(ann_return)
    metrics_wc['Sharpe Ratio'].append(sharpe)
    metrics_wc['Volatility'].append(vol)
    metrics_wc['Standard Deviation'].append(std_dev)
    metrics_wc['Max Drawdown'].append(max_draw)
    metrics_wc['Cumulative Return'].append(cum_return_without_cost)

# Convert the results into a DataFrame for analysis
metrics_timesfm1252_wc = pd.DataFrame(metrics_wc)
display(metrics_timesfm1252_wc)

### Window Size 512

In [None]:
# Add transaction cost (10bps = 0.001)
def calculate_transaction_cost(row):
    return 0.001  # 10 bps for both small and large cap stocks

crsp_test_lagged.loc[:, 'transaction_cost'] = crsp_test_lagged.apply(calculate_transaction_cost, axis=1)

# Use the previously defined `timesfm1` to predict excess returns
def timesfm1_512_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged, lags=[512]):
    # Get the predicted excess returns using the timesfm model
    metrics_df, crsp_test_lagged = rolling_forecast_tfm1(crsp_train_lagged, crsp_test_lagged, lags)

    # For each lag, extract the predicted excess returns
    for lag in lags:
        pred_col = f'predicted_excess_returns_lag{lag}'
        if pred_col in crsp_test_lagged.columns:
            crsp_test_lagged[f'timesfm1_512_predicted_excess_returns_lag{lag}'] = crsp_test_lagged[pred_col].values

    return crsp_test_lagged

# Get predicted excess returns using TimesFM1 model
crsp_test_lagged = timesfm1_512_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged)

# Step 5: Portfolio Construction (Top 10% Long, Bottom 10% Short)
def compute_returns(group, predicted_col):
    # Long position (Top 10% based on predicted returns)
    top_positive = group.nlargest(int(0.1 * len(group)), predicted_col)
    # Short position (Bottom 10% based on predicted returns)
    top_negative = group.nsmallest(int(0.1 * len(group)), predicted_col)

    # Equal-Weighted Long position return (Top 10%)
    equal_long_log_return_with_cost = np.log1p(top_positive['adjusted_ret']).mean() - top_positive['transaction_cost'].mean()
    equal_long_log_return_without_cost = np.log1p(top_positive['adjusted_ret']).mean()

    # Equal-Weighted Short position return (Bottom 10%)
    equal_short_log_return_with_cost = -np.log1p(top_negative['adjusted_ret']).mean() - top_negative['transaction_cost'].mean()
    equal_short_log_return_without_cost = -np.log1p(top_negative['adjusted_ret']).mean()

    # Value-Weighted Long position return (Top 10%) based on market cap
    total_market_cap_positive = top_positive['market_cap_merged'].sum()
    value_long_log_return_with_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive - top_positive['transaction_cost'].mean()
    value_long_log_return_without_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive

    # Value-Weighted Short position return (Bottom 10%) based on market cap
    total_market_cap_negative = top_negative['market_cap_merged'].sum()
    value_short_log_return_with_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative + top_negative['transaction_cost'].mean())
    value_short_log_return_without_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative)

    # Combine Long and Short to get Long-Short return
    equal_long_short_log_return_with_cost = equal_long_log_return_with_cost + equal_short_log_return_with_cost
    equal_long_short_log_return_without_cost = equal_long_log_return_without_cost + equal_short_log_return_without_cost

    value_long_short_log_return_with_cost = value_long_log_return_with_cost + value_short_log_return_with_cost
    value_long_short_log_return_without_cost = value_long_log_return_without_cost + value_short_log_return_without_cost

    return {
        'equal_long_log_return_with_cost': equal_long_log_return_with_cost,
        'equal_short_log_return_with_cost': equal_short_log_return_with_cost,
        'equal_long_short_log_return_with_cost': equal_long_short_log_return_with_cost,
        'equal_long_log_return_without_cost': equal_long_log_return_without_cost,
        'equal_short_log_return_without_cost': equal_short_log_return_without_cost,
        'equal_long_short_log_return_without_cost': equal_long_short_log_return_without_cost,
        'value_long_log_return_with_cost': value_long_log_return_with_cost,
        'value_short_log_return_with_cost': value_short_log_return_with_cost,
        'value_long_short_log_return_with_cost': value_long_short_log_return_with_cost,
        'value_long_log_return_without_cost': value_long_log_return_without_cost,
        'value_short_log_return_without_cost': value_short_log_return_without_cost,
        'value_long_short_log_return_without_cost': value_long_short_log_return_without_cost
    }

# Compute cumulative returns for each date with daily rebalancing
cumulative_log_returns_by_date_timesfm1512  = {
    'date': [],
    'cum_EL_return_512_with_cost': [],
    'cum_ES_return_512_with_cost': [],
    'cum_ELS_return_512_with_cost': [],
    'cum_VL_return_512_with_cost': [],
    'cum_VS_return_512_with_cost': [],
    'cum_VLS_return_512_with_cost': [],
    'cum_EL_return_512_without_cost': [],
    'cum_ES_return_512_without_cost': [],
    'cum_ELS_return_512_without_cost': [],
    'cum_VL_return_512_without_cost': [],
    'cum_VS_return_512_without_cost': [],
    'cum_VLS_return_512_without_cost': []
}

# Initialize cumulative returns for lag 512
cum_EL_return_512_with_cost = 0
cum_ES_return_512_with_cost = 0
cum_ELS_return_512_with_cost = 0
cum_VL_return_512_with_cost = 0
cum_VS_return_512_with_cost = 0
cum_VLS_return_512_with_cost = 0

cum_EL_return_512_without_cost = 0
cum_ES_return_512_without_cost = 0
cum_ELS_return_512_without_cost = 0
cum_VL_return_512_without_cost = 0
cum_VS_return_512_without_cost = 0
cum_VLS_return_512_without_cost = 0

# Iterate over each date to compute returns for lag 512 portfolios
for date in crsp_test_lagged['date'].unique():
    group = crsp_test_lagged[crsp_test_lagged['date'] == date]

    # Compute returns for lag 512 (or any other lag if needed)
    predicted_col = 'timesfm1_512_predicted_excess_returns_lag512'
    returns = compute_returns(group, predicted_col)

    # Update cumulative returns with daily values for lag 512
    cum_EL_return_512_with_cost += returns['equal_long_log_return_with_cost']
    cum_ES_return_512_with_cost += returns['equal_short_log_return_with_cost']
    cum_ELS_return_512_with_cost += returns['equal_long_short_log_return_with_cost']
    cum_VL_return_512_with_cost += returns['value_long_log_return_with_cost']
    cum_VS_return_512_with_cost += returns['value_short_log_return_with_cost']
    cum_VLS_return_512_with_cost += returns['value_long_short_log_return_with_cost']

    cum_EL_return_512_without_cost += returns['equal_long_log_return_without_cost']
    cum_ES_return_512_without_cost += returns['equal_short_log_return_without_cost']
    cum_ELS_return_512_without_cost += returns['equal_long_short_log_return_without_cost']
    cum_VL_return_512_without_cost += returns['value_long_log_return_without_cost']
    cum_VS_return_512_without_cost += returns['value_short_log_return_without_cost']
    cum_VLS_return_512_without_cost += returns['value_long_short_log_return_without_cost']

    # Append results for the day for lag 512 portfolios
    cumulative_log_returns_by_date_timesfm1512['date'].append(date)
    cumulative_log_returns_by_date_timesfm1512['cum_EL_return_512_with_cost'].append(cum_EL_return_512_with_cost)
    cumulative_log_returns_by_date_timesfm1512['cum_ES_return_512_with_cost'].append(cum_ES_return_512_with_cost)
    cumulative_log_returns_by_date_timesfm1512['cum_ELS_return_512_with_cost'].append(cum_ELS_return_512_with_cost)
    cumulative_log_returns_by_date_timesfm1512['cum_VL_return_512_with_cost'].append(cum_VL_return_512_with_cost)
    cumulative_log_returns_by_date_timesfm1512['cum_VS_return_512_with_cost'].append(cum_VS_return_512_with_cost)
    cumulative_log_returns_by_date_timesfm1512['cum_VLS_return_512_with_cost'].append(cum_VLS_return_512_with_cost)
    cumulative_log_returns_by_date_timesfm1512['cum_EL_return_512_without_cost'].append(cum_EL_return_512_without_cost)
    cumulative_log_returns_by_date_timesfm1512['cum_ES_return_512_without_cost'].append(cum_ES_return_512_without_cost)
    cumulative_log_returns_by_date_timesfm1512['cum_ELS_return_512_without_cost'].append(cum_ELS_return_512_without_cost)
    cumulative_log_returns_by_date_timesfm1512['cum_VL_return_512_without_cost'].append(cum_VL_return_512_without_cost)
    cumulative_log_returns_by_date_timesfm1512['cum_VS_return_512_without_cost'].append(cum_VS_return_512_without_cost)
    cumulative_log_returns_by_date_timesfm1512['cum_VLS_return_512_without_cost'].append(cum_VLS_return_512_without_cost)

# Convert to DataFrame for lag 512
cumulative_log_returns_timesfm1_lag_512 = pd.DataFrame(cumulative_log_returns_by_date_timesfm1512)

# Display the cumulative returns DataFrame for lag 512
display(cumulative_log_returns_timesfm1_lag_512.head())

# Saving the DataFrame as a CSV file
cumulative_log_returns_timesfm1_lag_512.to_csv("cumulative_log_timesfm1_lag_512.csv", index=False)

In [None]:
# Cumulative returns
def cumulative_return(daily_returns):
    return np.prod(1 + daily_returns) - 1

# Annualized returns
def annualized_return(daily_returns, periods=252):
    cumulative_return_value = np.prod(1 + daily_returns) - 1
    return (1 + cumulative_return_value) ** (periods / len(daily_returns)) - 1

# Sharpe ratio
def sharpe_ratio(daily_returns, risk_free_rate=0.01, periods=252):
    daily_rf = risk_free_rate / periods  # Assuming 252 trading days
    excess_returns = daily_returns - daily_rf
    return np.sqrt(periods) * excess_returns.mean() / excess_returns.std()

# Calculate volatility (standard deviation) of daily returns
def calculate_volatility(daily_returns, periods=252):
    return np.std(daily_returns) * np.sqrt(periods)

# Calculate maximum drawdown
def maximum_drawdown(daily_returns):
    cum_returns = np.cumprod(1 + daily_returns)
    peak = np.maximum.accumulate(cum_returns)
    drawdown = (cum_returns - peak) / peak
    return np.min(drawdown)

# Apply fixed transaction cost to the daily returns
def apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001):
    # Subtract the transaction cost from each daily return
    return daily_returns - transaction_cost

# Apply percentage-based transaction cost to the daily returns
def apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001):
    # Apply transaction cost as a percentage of the return
    return daily_returns * (1 - transaction_cost_percentage)

# Prepare portfolio names (with transaction cost)
portfolios_with_cost = [
    'cum_EL_return_512_with_cost', 'cum_ES_return_512_with_cost', 'cum_ELS_return_512_with_cost',
    'cum_VL_return_512_with_cost', 'cum_VS_return_512_with_cost', 'cum_VLS_return_512_with_cost',
]

# Initialize metrics container
metrics = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio with transaction cost
for portfolio in portfolios_with_cost:
    cumulative_returns_timesfm1512_c = cumulative_log_returns_timesfm1_lag_512[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_timesfm1512_c)

    # Apply fixed transaction cost for other metrics
    daily_returns_after_cost_fixed = apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001)

    # Apply percentage-based transaction cost for volatility and standard deviation
    daily_returns_after_cost_percentage = apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001)

    # Calculate cumulative returns after fixed transaction cost
    cum_return_after_cost = cumulative_return(daily_returns_after_cost_fixed)

    # Metrics calculations
    ann_return = annualized_return(daily_returns_after_cost_fixed)
    sharpe = sharpe_ratio(daily_returns_after_cost_fixed)
    vol = calculate_volatility(daily_returns_after_cost_percentage)
    max_draw = maximum_drawdown(daily_returns_after_cost_fixed)

    # Standard Deviation of daily returns after cost
    std_dev = np.std(daily_returns_after_cost_percentage)

    # Store results
    metrics['Portfolio'].append(portfolio)
    metrics['Annualized Return'].append(ann_return)
    metrics['Sharpe Ratio'].append(sharpe)
    metrics['Volatility'].append(vol)
    metrics['Standard Deviation'].append(std_dev)
    metrics['Max Drawdown'].append(max_draw)
    metrics['Cumulative Return'].append(cum_return_after_cost)

# Convert the results into a DataFrame for analysis
metrics_timesfm1512_c = pd.DataFrame(metrics)
display(metrics_timesfm1512_c)

# same calculations for portfolios without transaction cost
portfolios_without_cost = [
    'cum_EL_return_512_without_cost', 'cum_ES_return_512_without_cost', 'cum_ELS_return_512_without_cost',
    'cum_VL_return_512_without_cost', 'cum_VS_return_512_without_cost', 'cum_VLS_return_512_without_cost',
]

# Initialize metrics container for portfolios without transaction cost
metrics_wc = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio without transaction cost
for portfolio in portfolios_without_cost:
    cumulative_returns_timsfm1512_wc = cumulative_log_returns_timesfm1_lag_512[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_timsfm15_wc)  # Compute daily returns from cumulative log returns

    # Calculate cumulative returns without transaction cost
    cum_return_without_cost = cumulative_return(daily_returns)

    # Metrics calculations
    ann_return = annualized_return(daily_returns)
    sharpe = sharpe_ratio(daily_returns)
    vol = calculate_volatility(daily_returns)
    max_draw = maximum_drawdown(daily_returns)

    # Standard Deviation of daily returns without cost
    std_dev = np.std(daily_returns)

    # Store results
    metrics_wc['Portfolio'].append(portfolio)
    metrics_wc['Annualized Return'].append(ann_return)
    metrics_wc['Sharpe Ratio'].append(sharpe)
    metrics_wc['Volatility'].append(vol)
    metrics_wc['Standard Deviation'].append(std_dev)
    metrics_wc['Max Drawdown'].append(max_draw)
    metrics_wc['Cumulative Return'].append(cum_return_without_cost)

# Convert the results into a DataFrame for analysis
metrics_timesfm1512_wc = pd.DataFrame(metrics_wc)
display(metrics_timesfm1512_wc)

## TimesFM 2.0-500m

In [None]:
def rolling_forecast_tfm2(crsp_train_lagged, crsp_test_lagged, lags=[5], model_name="TimesFM 2.0-500M"):
    # Constants
    DATE_COL = 'date'
    ID_COL = 'permno'
    TARGET_COL = 'excess_ret'
    device_map = "cuda" if torch.cuda.is_available() else "cpu"
    device = torch.device(device_map)

    # Clean and sort input data
    crsp_test_lagged = crsp_test_lagged.dropna(subset=[TARGET_COL])
    crsp_test_lagged = crsp_test_lagged.sort_values([ID_COL, DATE_COL]).reset_index(drop=True)
    crsp_test_lagged = crsp_test_lagged.copy()

    crsp_train_lagged = crsp_train_lagged.sort_values([ID_COL, DATE_COL]).reset_index(drop=True)

    # For collecting results
    all_results = []
    all_predictions = []

    # Iterate through lags
    for lag in lags:
        print(f"\nTraining with context window of length: {lag}")

        contexts, targets_raw, denorm_stats, records = [], [], [], []

        # Build contexts and targets
        for permno, grp in crsp_test_lagged.groupby(ID_COL):
            grp = grp.reset_index(drop=True)
            values = grp[TARGET_COL].values
            dates = grp[DATE_COL].values
            if len(values) <= lag:
                continue
            for i in range(lag, len(values) - 1):
                context = values[i - lag:i]
                target = values[i + 1]
                context_mean = np.mean(context)
                context_std = np.std(context)
                if context_std == 0:
                    continue
                context_norm = (context - context_mean) / context_std
                contexts.append(context_norm.tolist())
                targets_raw.append(target)
                denorm_stats.append((context_mean, context_std))
                records.append({
                    ID_COL: permno,
                    TARGET_COL: target,
                    DATE_COL: dates[i + 1]
                })

        # If no usable data
        if not contexts:
            print(f" No usable data for lag {lag}")
            continue

        y_test = pd.Series(targets_raw)
        contexts_tensor = torch.tensor(contexts).float().to(device)

        # Initialize model
        tfm2 = TimesFm(
            hparams=TimesFmHparams(
                context_len=int(32 * np.ceil(lag / 32)),
                horizon_len=1,
                input_patch_len=32,
                output_patch_len=128,
                num_layers=50,
                model_dims=1280,
                use_positional_embedding=False,
                backend=device_map
            ),
            checkpoint=TimesFmCheckpoint(huggingface_repo_id="google/timesfm-2.0-500m-pytorch")
        )

        # Run forecasts
        freqs = [0] * len(contexts)
        preds, _ = tfm2.forecast(contexts_tensor, freq=freqs)
        preds_cpu = preds
        y_pred_norm = preds_cpu.reshape(-1)

        # Denormalize predictions
        y_pred = []
        for i, pred in enumerate(y_pred_norm):
            mu, sigma = denorm_stats[i]
            y_pred.append(pred * sigma + mu)
        y_pred = pd.Series(y_pred)

        # Results DataFrame
        results_df = pd.DataFrame(records)
        # Assign predicted returns per lag
        results_df[f'predicted_excess_returns_lag{lag}'] = y_pred

        # Evaluation metrics
        def r2_score_fn(y_true, y_pred):
            ssr = np.sum(np.square(y_true - y_pred))
            tss = np.sum(np.square(y_true - np.mean(y_true)))
            return 1 - ssr / tss if tss != 0 else np.nan

        r2_score = r2_score_fn(y_test, y_pred)
        mse = mean_squared_error(y_test, y_pred)
        mae = mean_absolute_error(y_test, y_pred)
        rmse = np.sqrt(mse)
        acc = (np.sign(y_test) == np.sign(y_pred)).mean()
        up_acc = (np.sign(y_pred[y_test > 0]) == 1).mean() if (y_test > 0).any() else np.nan
        down_acc = (np.sign(y_pred[y_test < 0]) == -1).mean() if (y_test < 0).any() else np.nan

        result = pd.DataFrame([{
            "Model": f"{model_name} Lag {lag}",
            "Lag": lag,
            "R-squared": r2_score,
            "MSE": mse,
            "MAE": mae,
            "RMSE": rmse,
            "Direction Accuracy": acc,
            "Up Direction Accuracy": up_acc,
            "Down Direction Accuracy": down_acc
        }])

        # Save results
        result.to_csv(f"timesfm2_results_lag{lag}.csv", index=False)
        results_df.to_csv(f"timesfm2_results_lag{lag}_full.csv", index=False)

        all_results.append(result)
        all_predictions.append(results_df)

    # Combine all performance metrics
    final_df = pd.concat(all_results, ignore_index=True)

    # Merge all predictions into the test set
    if all_predictions:
        combined_preds = pd.concat(all_predictions, ignore_index=True)
        crsp_test_lagged = crsp_test_lagged.merge(
            combined_preds[[ID_COL, DATE_COL] + [f'predicted_excess_returns_lag{lag}' for lag in lags]],
            on=[ID_COL, DATE_COL],
            how='left'
        )
    else:
        crsp_test_lagged['predicted_excess_returns'] = np.nan

    return final_df, crsp_test_lagged

In [None]:
# Run for lag 5
tfm2_results_lag5 = rolling_forecast_tfm2(crsp_train_lagged, crsp_test_lagged, lags=[5])

# Show results in notebook
from IPython.display import display
display(tfm2_results_lag5)

In [None]:
# Run for lag 21
tfm2_results_lag21 = rolling_forecast_tfm2(crsp_train_lagged, crsp_test_lagged, lags=[21])

# Show results in notebook
from IPython.display import display
display(tfm2_results_lag21)

In [None]:
# Run for lag 252
tfm2_results_lag252 = rolling_forecast_tfm2(crsp_train_lagged, crsp_test_lagged, lags=[252])

# Show results in notebook
from IPython.display import display
display(tfm2_results_lag252)

In [None]:
# Run for lag 512
tfm2_results_lag512 = rolling_forecast_tfm2(crsp_train_lagged, crsp_test_lagged, lags=[512])

# Show results in notebook
from IPython.display import display
display(tfm2_results_lag512)

## Timesfm 2.0-500M Portfolio

### Window Size 5

In [None]:
# Add transaction cost (10bps = 0.001)
def calculate_transaction_cost(row):
    return 0.001  # 10 bps for both small and large cap stocks

crsp_test_lagged.loc[:, 'transaction_cost'] = crsp_test_lagged.apply(calculate_transaction_cost, axis=1)

# Use the previously defined `timesfm2` to predict excess returns
def timesfm2_5_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged, lags=[5]):
    # Get the predicted excess returns using the timesfm model
    metrics_df, crsp_test_lagged = rolling_forecast_tfm2(crsp_train_lagged, crsp_test_lagged, lags)

    # For each lag, extract the predicted excess returns
    for lag in lags:
        pred_col = f'predicted_excess_returns_lag{lag}'
        if pred_col in crsp_test_lagged.columns:
            crsp_test_lagged[f'timesfm2_5_predicted_excess_returns_lag{lag}'] = crsp_test_lagged[pred_col].values

    return crsp_test_lagged

# Get predicted excess returns using TimesFM2 model
crsp_test_lagged = timesfm2_5_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged)

# Portfolio Construction (Top 10% Long, Bottom 10% Short)
def compute_returns(group, predicted_col):
    # Long position (Top 10% based on predicted returns)
    top_positive = group.nlargest(int(0.1 * len(group)), predicted_col)
    # Short position (Bottom 10% based on predicted returns)
    top_negative = group.nsmallest(int(0.1 * len(group)), predicted_col)

    # Equal-Weighted Long position return (Top 10%)
    equal_long_log_return_with_cost = np.log1p(top_positive['adjusted_ret']).mean() - top_positive['transaction_cost'].mean()
    equal_long_log_return_without_cost = np.log1p(top_positive['adjusted_ret']).mean()

    # Equal-Weighted Short position return (Bottom 10%)
    equal_short_log_return_with_cost = -np.log1p(top_negative['adjusted_ret']).mean() - top_negative['transaction_cost'].mean()
    equal_short_log_return_without_cost = -np.log1p(top_negative['adjusted_ret']).mean()

    # Value-Weighted Long position return (Top 10%) based on market cap
    total_market_cap_positive = top_positive['market_cap_merged'].sum()
    value_long_log_return_with_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive - top_positive['transaction_cost'].mean()
    value_long_log_return_without_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive

    # Value-Weighted Short position return (Bottom 10%) based on market cap
    total_market_cap_negative = top_negative['market_cap_merged'].sum()
    value_short_log_return_with_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative + top_negative['transaction_cost'].mean())
    value_short_log_return_without_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative)

    # Combine Long and Short to get Long-Short return
    equal_long_short_log_return_with_cost = equal_long_log_return_with_cost + equal_short_log_return_with_cost
    equal_long_short_log_return_without_cost = equal_long_log_return_without_cost + equal_short_log_return_without_cost

    value_long_short_log_return_with_cost = value_long_log_return_with_cost + value_short_log_return_with_cost
    value_long_short_log_return_without_cost = value_long_log_return_without_cost + value_short_log_return_without_cost

    return {
        'equal_long_log_return_with_cost': equal_long_log_return_with_cost,
        'equal_short_log_return_with_cost': equal_short_log_return_with_cost,
        'equal_long_short_log_return_with_cost': equal_long_short_log_return_with_cost,
        'equal_long_log_return_without_cost': equal_long_log_return_without_cost,
        'equal_short_log_return_without_cost': equal_short_log_return_without_cost,
        'equal_long_short_log_return_without_cost': equal_long_short_log_return_without_cost,
        'value_long_log_return_with_cost': value_long_log_return_with_cost,
        'value_short_log_return_with_cost': value_short_log_return_with_cost,
        'value_long_short_log_return_with_cost': value_long_short_log_return_with_cost,
        'value_long_log_return_without_cost': value_long_log_return_without_cost,
        'value_short_log_return_without_cost': value_short_log_return_without_cost,
        'value_long_short_log_return_without_cost': value_long_short_log_return_without_cost
    }

# Compute cumulative returns for each date with daily rebalancing
cumulative_log_returns_by_date_timesfm25  = {
    'date': [],
    'cum_EL_return_5_with_cost': [],
    'cum_ES_return_5_with_cost': [],
    'cum_ELS_return_5_with_cost': [],
    'cum_VL_return_5_with_cost': [],
    'cum_VS_return_5_with_cost': [],
    'cum_VLS_return_5_with_cost': [],
    'cum_EL_return_5_without_cost': [],
    'cum_ES_return_5_without_cost': [],
    'cum_ELS_return_5_without_cost': [],
    'cum_VL_return_5_without_cost': [],
    'cum_VS_return_5_without_cost': [],
    'cum_VLS_return_5_without_cost': []
}

# Initialize cumulative returns for lag 5
cum_EL_return_5_with_cost = 0
cum_ES_return_5_with_cost = 0
cum_ELS_return_5_with_cost = 0
cum_VL_return_5_with_cost = 0
cum_VS_return_5_with_cost = 0
cum_VLS_return_5_with_cost = 0

cum_EL_return_5_without_cost = 0
cum_ES_return_5_without_cost = 0
cum_ELS_return_5_without_cost = 0
cum_VL_return_5_without_cost = 0
cum_VS_return_5_without_cost = 0
cum_VLS_return_5_without_cost = 0

# Iterate over each date to compute returns for lag 5 portfolios
for date in crsp_test_lagged['date'].unique():
    group = crsp_test_lagged[crsp_test_lagged['date'] == date]

    # Compute returns for lag 5 (or any other lag if needed)
    predicted_col = 'timesfm2_5_predicted_excess_returns_lag5'
    returns = compute_returns(group, predicted_col)

    # Update cumulative returns with daily values for lag 5
    cum_EL_return_5_with_cost += returns['equal_long_log_return_with_cost']
    cum_ES_return_5_with_cost += returns['equal_short_log_return_with_cost']
    cum_ELS_return_5_with_cost += returns['equal_long_short_log_return_with_cost']
    cum_VL_return_5_with_cost += returns['value_long_log_return_with_cost']
    cum_VS_return_5_with_cost += returns['value_short_log_return_with_cost']
    cum_VLS_return_5_with_cost += returns['value_long_short_log_return_with_cost']

    cum_EL_return_5_without_cost += returns['equal_long_log_return_without_cost']
    cum_ES_return_5_without_cost += returns['equal_short_log_return_without_cost']
    cum_ELS_return_5_without_cost += returns['equal_long_short_log_return_without_cost']
    cum_VL_return_5_without_cost += returns['value_long_log_return_without_cost']
    cum_VS_return_5_without_cost += returns['value_short_log_return_without_cost']
    cum_VLS_return_5_without_cost += returns['value_long_short_log_return_without_cost']

    # Append results for the day for lag 5 portfolios
    cumulative_log_returns_by_date_timesfm25['date'].append(date)
    cumulative_log_returns_by_date_timesfm25['cum_EL_return_5_with_cost'].append(cum_EL_return_5_with_cost)
    cumulative_log_returns_by_date_timesfm25['cum_ES_return_5_with_cost'].append(cum_ES_return_5_with_cost)
    cumulative_log_returns_by_date_timesfm25['cum_ELS_return_5_with_cost'].append(cum_ELS_return_5_with_cost)
    cumulative_log_returns_by_date_timesfm25['cum_VL_return_5_with_cost'].append(cum_VL_return_5_with_cost)
    cumulative_log_returns_by_date_timesfm25['cum_VS_return_5_with_cost'].append(cum_VS_return_5_with_cost)
    cumulative_log_returns_by_date_timesfm25['cum_VLS_return_5_with_cost'].append(cum_VLS_return_5_with_cost)
    cumulative_log_returns_by_date_timesfm25['cum_EL_return_5_without_cost'].append(cum_EL_return_5_without_cost)
    cumulative_log_returns_by_date_timesfm25['cum_ES_return_5_without_cost'].append(cum_ES_return_5_without_cost)
    cumulative_log_returns_by_date_timesfm25['cum_ELS_return_5_without_cost'].append(cum_ELS_return_5_without_cost)
    cumulative_log_returns_by_date_timesfm25['cum_VL_return_5_without_cost'].append(cum_VL_return_5_without_cost)
    cumulative_log_returns_by_date_timesfm25['cum_VS_return_5_without_cost'].append(cum_VS_return_5_without_cost)
    cumulative_log_returns_by_date_timesfm25['cum_VLS_return_5_without_cost'].append(cum_VLS_return_5_without_cost)

# Convert to DataFrame for lag 5
cumulative_log_returns_timesfm2_lag_5 = pd.DataFrame(cumulative_log_returns_by_date_timesfm25)

# Display the cumulative returns DataFrame for lag 5
display(cumulative_log_returns_timesfm2_lag_5.head())

# Saving the DataFrame as a CSV file
cumulative_log_returns_timesfm2_lag_5.to_csv("cumulative_log_timesfm2_lag_5.csv", index=False)

In [None]:
# Cumulative returns
def cumulative_return(daily_returns):
    return np.prod(1 + daily_returns) - 1

# Annualized returns
def annualized_return(daily_returns, periods=252):
    cumulative_return_value = np.prod(1 + daily_returns) - 1
    return (1 + cumulative_return_value) ** (periods / len(daily_returns)) - 1

# Sharpe ratio
def sharpe_ratio(daily_returns, risk_free_rate=0.01, periods=252):
    daily_rf = risk_free_rate / periods  # Assuming 252 trading days
    excess_returns = daily_returns - daily_rf
    return np.sqrt(periods) * excess_returns.mean() / excess_returns.std()

# Calculate volatility (standard deviation) of daily returns
def calculate_volatility(daily_returns, periods=252):
    return np.std(daily_returns) * np.sqrt(periods)

# Calculate maximum drawdown
def maximum_drawdown(daily_returns):
    cum_returns = np.cumprod(1 + daily_returns)
    peak = np.maximum.accumulate(cum_returns)
    drawdown = (cum_returns - peak) / peak
    return np.min(drawdown)

# Apply fixed transaction cost to the daily returns
def apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001):
    # Subtract the transaction cost from each daily return
    return daily_returns - transaction_cost

# Apply percentage-based transaction cost to the daily returns
def apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001):
    # Apply transaction cost as a percentage of the return
    return daily_returns * (1 - transaction_cost_percentage)

# Prepare portfolio names (with transaction cost)
portfolios_with_cost = [
    'cum_EL_return_5_with_cost', 'cum_ES_return_5_with_cost', 'cum_ELS_return_5_with_cost',
    'cum_VL_return_5_with_cost', 'cum_VS_return_5_with_cost', 'cum_VLS_return_5_with_cost',
]

# Initialize metrics container
metrics = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio with transaction cost
for portfolio in portfolios_with_cost:
    cumulative_returns_timesfm25_c = cumulative_log_returns_timesfm2_lag_5[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_timesfm25_c)

    # Apply fixed transaction cost for other metrics
    daily_returns_after_cost_fixed = apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001)

    # Apply percentage-based transaction cost for volatility and standard deviation
    daily_returns_after_cost_percentage = apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001)

    # Calculate cumulative returns after fixed transaction cost
    cum_return_after_cost = cumulative_return(daily_returns_after_cost_fixed)

    # Metrics calculations
    ann_return = annualized_return(daily_returns_after_cost_fixed)
    sharpe = sharpe_ratio(daily_returns_after_cost_fixed)
    vol = calculate_volatility(daily_returns_after_cost_percentage)
    max_draw = maximum_drawdown(daily_returns_after_cost_fixed)

    # Standard Deviation of daily returns after cost
    std_dev = np.std(daily_returns_after_cost_percentage)

    # Store results
    metrics['Portfolio'].append(portfolio)
    metrics['Annualized Return'].append(ann_return)
    metrics['Sharpe Ratio'].append(sharpe)
    metrics['Volatility'].append(vol)
    metrics['Standard Deviation'].append(std_dev)
    metrics['Max Drawdown'].append(max_draw)
    metrics['Cumulative Return'].append(cum_return_after_cost)

# Convert the results into a DataFrame for analysis
metrics_timesfm25_c = pd.DataFrame(metrics)
display(metrics_timesfm25_c)

# same calculations for portfolios without transaction cost
portfolios_without_cost = [
    'cum_EL_return_5_without_cost', 'cum_ES_return_5_without_cost', 'cum_ELS_return_5_without_cost',
    'cum_VL_return_5_without_cost', 'cum_VS_return_5_without_cost', 'cum_VLS_return_5_without_cost',
]

# Initialize metrics container for portfolios without transaction cost
metrics_wc = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio without transaction cost
for portfolio in portfolios_without_cost:
    cumulative_returns_timsfm15_wc = cumulative_log_returns_timesfm2_lag_5[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_timsfm15_wc)

    # Calculate cumulative returns without transaction cost
    cum_return_without_cost = cumulative_return(daily_returns)

    # Metrics calculations
    ann_return = annualized_return(daily_returns)
    sharpe = sharpe_ratio(daily_returns)
    vol = calculate_volatility(daily_returns)
    max_draw = maximum_drawdown(daily_returns)

    # Standard Deviation of daily returns without cost
    std_dev = np.std(daily_returns)

    # Store results
    metrics_wc['Portfolio'].append(portfolio)
    metrics_wc['Annualized Return'].append(ann_return)
    metrics_wc['Sharpe Ratio'].append(sharpe)
    metrics_wc['Volatility'].append(vol)
    metrics_wc['Standard Deviation'].append(std_dev)
    metrics_wc['Max Drawdown'].append(max_draw)
    metrics_wc['Cumulative Return'].append(cum_return_without_cost)

# Convert the results into a DataFrame for analysis
metrics_timesfm25_wc = pd.DataFrame(metrics_wc)
display(metrics_timesfm25_wc)

### Window Size 21

In [None]:
# Add transaction cost (10bps = 0.001)
def calculate_transaction_cost(row):
    return 0.001  # 10 bps for both small and large cap stocks

crsp_test_lagged.loc[:, 'transaction_cost'] = crsp_test_lagged.apply(calculate_transaction_cost, axis=1)

# Use the previously defined `timesfm2` to predict excess returns
def timesfm2_21_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged, lags=[21]):
    # Get the predicted excess returns using the timesfm model
    metrics_df, crsp_test_lagged = rolling_forecast_tfm2(crsp_train_lagged, crsp_test_lagged, lags)

    # For each lag, extract the predicted excess returns
    for lag in lags:
        pred_col = f'predicted_excess_returns_lag{lag}'
        if pred_col in crsp_test_lagged.columns:
            crsp_test_lagged[f'timesfm2_21_predicted_excess_returns_lag{lag}'] = crsp_test_lagged[pred_col].values

    return crsp_test_lagged

# Get predicted excess returns using TimesFM2 model
crsp_test_lagged = timesfm2_21_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged)

# Portfolio Construction (Top 10% Long, Bottom 10% Short)
def compute_returns(group, predicted_col):
    # Long position (Top 10% based on predicted returns)
    top_positive = group.nlargest(int(0.1 * len(group)), predicted_col)
    # Short position (Bottom 10% based on predicted returns)
    top_negative = group.nsmallest(int(0.1 * len(group)), predicted_col)

    # Equal-Weighted Long position return (Top 10%)
    equal_long_log_return_with_cost = np.log1p(top_positive['adjusted_ret']).mean() - top_positive['transaction_cost'].mean()
    equal_long_log_return_without_cost = np.log1p(top_positive['adjusted_ret']).mean()

    # Equal-Weighted Short position return (Bottom 10%)
    equal_short_log_return_with_cost = -np.log1p(top_negative['adjusted_ret']).mean() - top_negative['transaction_cost'].mean()
    equal_short_log_return_without_cost = -np.log1p(top_negative['adjusted_ret']).mean()

    # Value-Weighted Long position return (Top 10%) based on market cap
    total_market_cap_positive = top_positive['market_cap_merged'].sum()
    value_long_log_return_with_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive - top_positive['transaction_cost'].mean()
    value_long_log_return_without_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive

    # Value-Weighted Short position return (Bottom 10%) based on market cap
    total_market_cap_negative = top_negative['market_cap_merged'].sum()
    value_short_log_return_with_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative + top_negative['transaction_cost'].mean())
    value_short_log_return_without_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative)

    # Combine Long and Short to get Long-Short return
    equal_long_short_log_return_with_cost = equal_long_log_return_with_cost + equal_short_log_return_with_cost
    equal_long_short_log_return_without_cost = equal_long_log_return_without_cost + equal_short_log_return_without_cost

    value_long_short_log_return_with_cost = value_long_log_return_with_cost + value_short_log_return_with_cost
    value_long_short_log_return_without_cost = value_long_log_return_without_cost + value_short_log_return_without_cost

    return {
        'equal_long_log_return_with_cost': equal_long_log_return_with_cost,
        'equal_short_log_return_with_cost': equal_short_log_return_with_cost,
        'equal_long_short_log_return_with_cost': equal_long_short_log_return_with_cost,
        'equal_long_log_return_without_cost': equal_long_log_return_without_cost,
        'equal_short_log_return_without_cost': equal_short_log_return_without_cost,
        'equal_long_short_log_return_without_cost': equal_long_short_log_return_without_cost,
        'value_long_log_return_with_cost': value_long_log_return_with_cost,
        'value_short_log_return_with_cost': value_short_log_return_with_cost,
        'value_long_short_log_return_with_cost': value_long_short_log_return_with_cost,
        'value_long_log_return_without_cost': value_long_log_return_without_cost,
        'value_short_log_return_without_cost': value_short_log_return_without_cost,
        'value_long_short_log_return_without_cost': value_long_short_log_return_without_cost
    }

# Compute cumulative returns for each date with daily rebalancing
cumulative_log_returns_by_date_timesfm221  = {
    'date': [],
    'cum_EL_return_21_with_cost': [],
    'cum_ES_return_21_with_cost': [],
    'cum_ELS_return_21_with_cost': [],
    'cum_VL_return_21_with_cost': [],
    'cum_VS_return_21_with_cost': [],
    'cum_VLS_return_21_with_cost': [],
    'cum_EL_return_21_without_cost': [],
    'cum_ES_return_21_without_cost': [],
    'cum_ELS_return_21_without_cost': [],
    'cum_VL_return_21_without_cost': [],
    'cum_VS_return_21_without_cost': [],
    'cum_VLS_return_21_without_cost': []
}

# Initialize cumulative returns for lag 21
cum_EL_return_21_with_cost = 0
cum_ES_return_21_with_cost = 0
cum_ELS_return_21_with_cost = 0
cum_VL_return_21_with_cost = 0
cum_VS_return_21_with_cost = 0
cum_VLS_return_21_with_cost = 0

cum_EL_return_21_without_cost = 0
cum_ES_return_21_without_cost = 0
cum_ELS_return_21_without_cost = 0
cum_VL_return_21_without_cost = 0
cum_VS_return_21_without_cost = 0
cum_VLS_return_21_without_cost = 0

# Iterate over each date to compute returns for lag 21 portfolios
for date in crsp_test_lagged['date'].unique():
    group = crsp_test_lagged[crsp_test_lagged['date'] == date]

    # Compute returns for lag 21 (or any other lag if needed)
    predicted_col = 'timesfm2_21_predicted_excess_returns_lag21'
    returns = compute_returns(group, predicted_col)

    # Update cumulative returns with daily values for lag 21
    cum_EL_return_21_with_cost += returns['equal_long_log_return_with_cost']
    cum_ES_return_21_with_cost += returns['equal_short_log_return_with_cost']
    cum_ELS_return_21_with_cost += returns['equal_long_short_log_return_with_cost']
    cum_VL_return_21_with_cost += returns['value_long_log_return_with_cost']
    cum_VS_return_21_with_cost += returns['value_short_log_return_with_cost']
    cum_VLS_return_21_with_cost += returns['value_long_short_log_return_with_cost']

    cum_EL_return_21_without_cost += returns['equal_long_log_return_without_cost']
    cum_ES_return_21_without_cost += returns['equal_short_log_return_without_cost']
    cum_ELS_return_21_without_cost += returns['equal_long_short_log_return_without_cost']
    cum_VL_return_21_without_cost += returns['value_long_log_return_without_cost']
    cum_VS_return_21_without_cost += returns['value_short_log_return_without_cost']
    cum_VLS_return_21_without_cost += returns['value_long_short_log_return_without_cost']

    # Append results for the day for lag 21 portfolios
    cumulative_log_returns_by_date_timesfm221['date'].append(date)
    cumulative_log_returns_by_date_timesfm221['cum_EL_return_21_with_cost'].append(cum_EL_return_21_with_cost)
    cumulative_log_returns_by_date_timesfm221['cum_ES_return_21_with_cost'].append(cum_ES_return_21_with_cost)
    cumulative_log_returns_by_date_timesfm221['cum_ELS_return_21_with_cost'].append(cum_ELS_return_21_with_cost)
    cumulative_log_returns_by_date_timesfm221['cum_VL_return_21_with_cost'].append(cum_VL_return_21_with_cost)
    cumulative_log_returns_by_date_timesfm221['cum_VS_return_21_with_cost'].append(cum_VS_return_21_with_cost)
    cumulative_log_returns_by_date_timesfm221['cum_VLS_return_21_with_cost'].append(cum_VLS_return_21_with_cost)
    cumulative_log_returns_by_date_timesfm221['cum_EL_return_21_without_cost'].append(cum_EL_return_21_without_cost)
    cumulative_log_returns_by_date_timesfm221['cum_ES_return_21_without_cost'].append(cum_ES_return_21_without_cost)
    cumulative_log_returns_by_date_timesfm221['cum_ELS_return_21_without_cost'].append(cum_ELS_return_21_without_cost)
    cumulative_log_returns_by_date_timesfm221['cum_VL_return_21_without_cost'].append(cum_VL_return_21_without_cost)
    cumulative_log_returns_by_date_timesfm221['cum_VS_return_21_without_cost'].append(cum_VS_return_21_without_cost)
    cumulative_log_returns_by_date_timesfm221['cum_VLS_return_21_without_cost'].append(cum_VLS_return_21_without_cost)

# Convert to DataFrame for lag 21
cumulative_log_returns_timesfm2_lag_21 = pd.DataFrame(cumulative_log_returns_by_date_timesfm221)

# Display the cumulative returns DataFrame for lag 21
display(cumulative_log_returns_timesfm2_lag_21.head())

# Saving the DataFrame as a CSV file
cumulative_log_returns_timesfm2_lag_21.to_csv("cumulative_log_timesfm2_lag_21.csv", index=False)

In [None]:
# Cumulative returns
def cumulative_return(daily_returns):
    return np.prod(1 + daily_returns) - 1

# Annualized returns
def annualized_return(daily_returns, periods=252):
    cumulative_return_value = np.prod(1 + daily_returns) - 1
    return (1 + cumulative_return_value) ** (periods / len(daily_returns)) - 1

# Sharpe ratio
def sharpe_ratio(daily_returns, risk_free_rate=0.01, periods=252):
    daily_rf = risk_free_rate / periods  # Assuming 252 trading days
    excess_returns = daily_returns - daily_rf
    return np.sqrt(periods) * excess_returns.mean() / excess_returns.std()

# Calculate volatility (standard deviation) of daily returns
def calculate_volatility(daily_returns, periods=252):
    return np.std(daily_returns) * np.sqrt(periods)

# Calculate maximum drawdown
def maximum_drawdown(daily_returns):
    cum_returns = np.cumprod(1 + daily_returns)
    peak = np.maximum.accumulate(cum_returns)
    drawdown = (cum_returns - peak) / peak
    return np.min(drawdown)

# Apply fixed transaction cost to the daily returns
def apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001):
    # Subtract the transaction cost from each daily return
    return daily_returns - transaction_cost

# Apply percentage-based transaction cost to the daily returns
def apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001):
    # Apply transaction cost as a percentage of the return
    return daily_returns * (1 - transaction_cost_percentage)

# Prepare portfolio names (with transaction cost)
portfolios_with_cost = [
    'cum_EL_return_21_with_cost', 'cum_ES_return_21_with_cost', 'cum_ELS_return_21_with_cost',
    'cum_VL_return_21_with_cost', 'cum_VS_return_21_with_cost', 'cum_VLS_return_21_with_cost',
]

# Initialize metrics container
metrics = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio with transaction cost
for portfolio in portfolios_with_cost:
    cumulative_returns_timesfm221_c = cumulative_log_returns_timesfm2_lag_21[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_timesfm221_c)

    # Apply fixed transaction cost for other metrics
    daily_returns_after_cost_fixed = apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001)

    # Apply percentage-based transaction cost for volatility and standard deviation
    daily_returns_after_cost_percentage = apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001)

    # Calculate cumulative returns after fixed transaction cost
    cum_return_after_cost = cumulative_return(daily_returns_after_cost_fixed)

    # Metrics calculations
    ann_return = annualized_return(daily_returns_after_cost_fixed)
    sharpe = sharpe_ratio(daily_returns_after_cost_fixed)
    vol = calculate_volatility(daily_returns_after_cost_percentage)
    max_draw = maximum_drawdown(daily_returns_after_cost_fixed)

    # Standard Deviation of daily returns after cost
    std_dev = np.std(daily_returns_after_cost_percentage)

    # Store results
    metrics['Portfolio'].append(portfolio)
    metrics['Annualized Return'].append(ann_return)
    metrics['Sharpe Ratio'].append(sharpe)
    metrics['Volatility'].append(vol)
    metrics['Standard Deviation'].append(std_dev)
    metrics['Max Drawdown'].append(max_draw)
    metrics['Cumulative Return'].append(cum_return_after_cost)

# Convert the results into a DataFrame for analysis
metrics_timesfm221_c = pd.DataFrame(metrics)
display(metrics_timesfm221_c)

# same calculations for portfolios without transaction cost
portfolios_without_cost = [
    'cum_EL_return_21_without_cost', 'cum_ES_return_21_without_cost', 'cum_ELS_return_21_without_cost',
    'cum_VL_return_21_without_cost', 'cum_VS_return_21_without_cost', 'cum_VLS_return_21_without_cost',
]

# Initialize metrics container for portfolios without transaction cost
metrics_wc = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio without transaction cost
for portfolio in portfolios_without_cost:
    cumulative_returns_timsfm121_wc = cumulative_log_returns_timesfm2_lag_21[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_timsfm15_wc)  # Compute daily returns from cumulative log returns

    # Calculate cumulative returns without transaction cost
    cum_return_without_cost = cumulative_return(daily_returns)

    # Metrics calculations
    ann_return = annualized_return(daily_returns)
    sharpe = sharpe_ratio(daily_returns)
    vol = calculate_volatility(daily_returns)
    max_draw = maximum_drawdown(daily_returns)

    # Standard Deviation of daily returns without cost
    std_dev = np.std(daily_returns)

    # Store results
    metrics_wc['Portfolio'].append(portfolio)
    metrics_wc['Annualized Return'].append(ann_return)
    metrics_wc['Sharpe Ratio'].append(sharpe)
    metrics_wc['Volatility'].append(vol)
    metrics_wc['Standard Deviation'].append(std_dev)
    metrics_wc['Max Drawdown'].append(max_draw)
    metrics_wc['Cumulative Return'].append(cum_return_without_cost)

# Convert the results into a DataFrame for analysis
metrics_timesfm221_wc = pd.DataFrame(metrics_wc)
display(metrics_timesfm221_wc)

### Window Size 252

In [None]:
# Add transaction cost (10bps = 0.001)
def calculate_transaction_cost(row):
    return 0.001  # 10 bps for both small and large cap stocks

crsp_test_lagged.loc[:, 'transaction_cost'] = crsp_test_lagged.apply(calculate_transaction_cost, axis=1)

# Use the previously defined `timesfm1` to predict excess returns
def timesfm2_252_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged, lags=[252]):
    # Get the predicted excess returns using the timesfm model
    metrics_df, crsp_test_lagged = rolling_forecast_tfm2(crsp_train_lagged, crsp_test_lagged, lags)

    # For each lag, extract the predicted excess returns
    for lag in lags:
        pred_col = f'predicted_excess_returns_lag{lag}'
        if pred_col in crsp_test_lagged.columns:
            crsp_test_lagged[f'timesfm2_252_predicted_excess_returns_lag{lag}'] = crsp_test_lagged[pred_col].values

    return crsp_test_lagged

# Get predicted excess returns using TimesFm2 model
crsp_test_lagged = timesfm2_252_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged)

# Portfolio Construction (Top 10% Long, Bottom 10% Short)
def compute_returns(group, predicted_col):
    # Long position (Top 10% based on predicted returns)
    top_positive = group.nlargest(int(0.1 * len(group)), predicted_col)
    # Short position (Bottom 10% based on predicted returns)
    top_negative = group.nsmallest(int(0.1 * len(group)), predicted_col)

    # Equal-Weighted Long position return (Top 10%)
    equal_long_log_return_with_cost = np.log1p(top_positive['adjusted_ret']).mean() - top_positive['transaction_cost'].mean()
    equal_long_log_return_without_cost = np.log1p(top_positive['adjusted_ret']).mean()

    # Equal-Weighted Short position return (Bottom 10%)
    equal_short_log_return_with_cost = -np.log1p(top_negative['adjusted_ret']).mean() - top_negative['transaction_cost'].mean()
    equal_short_log_return_without_cost = -np.log1p(top_negative['adjusted_ret']).mean()

    # Value-Weighted Long position return (Top 10%) based on market cap
    total_market_cap_positive = top_positive['market_cap_merged'].sum()
    value_long_log_return_with_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive - top_positive['transaction_cost'].mean()
    value_long_log_return_without_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive

    # Value-Weighted Short position return (Bottom 10%) based on market cap
    total_market_cap_negative = top_negative['market_cap_merged'].sum()
    value_short_log_return_with_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative + top_negative['transaction_cost'].mean())
    value_short_log_return_without_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative)

    # Combine Long and Short to get Long-Short return
    equal_long_short_log_return_with_cost = equal_long_log_return_with_cost + equal_short_log_return_with_cost
    equal_long_short_log_return_without_cost = equal_long_log_return_without_cost + equal_short_log_return_without_cost

    value_long_short_log_return_with_cost = value_long_log_return_with_cost + value_short_log_return_with_cost
    value_long_short_log_return_without_cost = value_long_log_return_without_cost + value_short_log_return_without_cost

    return {
        'equal_long_log_return_with_cost': equal_long_log_return_with_cost,
        'equal_short_log_return_with_cost': equal_short_log_return_with_cost,
        'equal_long_short_log_return_with_cost': equal_long_short_log_return_with_cost,
        'equal_long_log_return_without_cost': equal_long_log_return_without_cost,
        'equal_short_log_return_without_cost': equal_short_log_return_without_cost,
        'equal_long_short_log_return_without_cost': equal_long_short_log_return_without_cost,
        'value_long_log_return_with_cost': value_long_log_return_with_cost,
        'value_short_log_return_with_cost': value_short_log_return_with_cost,
        'value_long_short_log_return_with_cost': value_long_short_log_return_with_cost,
        'value_long_log_return_without_cost': value_long_log_return_without_cost,
        'value_short_log_return_without_cost': value_short_log_return_without_cost,
        'value_long_short_log_return_without_cost': value_long_short_log_return_without_cost
    }

# Compute cumulative returns for each date with daily rebalancing
cumulative_log_returns_by_date_timesfm2252  = {
    'date': [],
    'cum_EL_return_252_with_cost': [],
    'cum_ES_return_252_with_cost': [],
    'cum_ELS_return_252_with_cost': [],
    'cum_VL_return_252_with_cost': [],
    'cum_VS_return_252_with_cost': [],
    'cum_VLS_return_252_with_cost': [],
    'cum_EL_return_252_without_cost': [],
    'cum_ES_return_252_without_cost': [],
    'cum_ELS_return_252_without_cost': [],
    'cum_VL_return_252_without_cost': [],
    'cum_VS_return_252_without_cost': [],
    'cum_VLS_return_252_without_cost': []
}

# Initialize cumulative returns for lag 252
cum_EL_return_252_with_cost = 0
cum_ES_return_252_with_cost = 0
cum_ELS_return_252_with_cost = 0
cum_VL_return_252_with_cost = 0
cum_VS_return_252_with_cost = 0
cum_VLS_return_252_with_cost = 0

cum_EL_return_252_without_cost = 0
cum_ES_return_252_without_cost = 0
cum_ELS_return_252_without_cost = 0
cum_VL_return_252_without_cost = 0
cum_VS_return_252_without_cost = 0
cum_VLS_return_252_without_cost = 0

# Iterate over each date to compute returns for lag 252 portfolios
for date in crsp_test_lagged['date'].unique():
    group = crsp_test_lagged[crsp_test_lagged['date'] == date]

    # Compute returns for lag 252 (or any other lag if needed)
    predicted_col = 'timesfm2_252_predicted_excess_returns_lag252'
    returns = compute_returns(group, predicted_col)

    # Update cumulative returns with daily values for lag 252
    cum_EL_return_252_with_cost += returns['equal_long_log_return_with_cost']
    cum_ES_return_252_with_cost += returns['equal_short_log_return_with_cost']
    cum_ELS_return_252_with_cost += returns['equal_long_short_log_return_with_cost']
    cum_VL_return_252_with_cost += returns['value_long_log_return_with_cost']
    cum_VS_return_252_with_cost += returns['value_short_log_return_with_cost']
    cum_VLS_return_252_with_cost += returns['value_long_short_log_return_with_cost']

    cum_EL_return_252_without_cost += returns['equal_long_log_return_without_cost']
    cum_ES_return_252_without_cost += returns['equal_short_log_return_without_cost']
    cum_ELS_return_252_without_cost += returns['equal_long_short_log_return_without_cost']
    cum_VL_return_252_without_cost += returns['value_long_log_return_without_cost']
    cum_VS_return_252_without_cost += returns['value_short_log_return_without_cost']
    cum_VLS_return_252_without_cost += returns['value_long_short_log_return_without_cost']

    # Append results for the day for lag 252 portfolios
    cumulative_log_returns_by_date_timesfm2252['date'].append(date)
    cumulative_log_returns_by_date_timesfm2252['cum_EL_return_252_with_cost'].append(cum_EL_return_252_with_cost)
    cumulative_log_returns_by_date_timesfm2252['cum_ES_return_252_with_cost'].append(cum_ES_return_252_with_cost)
    cumulative_log_returns_by_date_timesfm2252['cum_ELS_return_252_with_cost'].append(cum_ELS_return_252_with_cost)
    cumulative_log_returns_by_date_timesfm2252['cum_VL_return_252_with_cost'].append(cum_VL_return_252_with_cost)
    cumulative_log_returns_by_date_timesfm2252['cum_VS_return_252_with_cost'].append(cum_VS_return_252_with_cost)
    cumulative_log_returns_by_date_timesfm2252['cum_VLS_return_252_with_cost'].append(cum_VLS_return_252_with_cost)
    cumulative_log_returns_by_date_timesfm2252['cum_EL_return_252_without_cost'].append(cum_EL_return_252_without_cost)
    cumulative_log_returns_by_date_timesfm2252['cum_ES_return_252_without_cost'].append(cum_ES_return_252_without_cost)
    cumulative_log_returns_by_date_timesfm2252['cum_ELS_return_252_without_cost'].append(cum_ELS_return_252_without_cost)
    cumulative_log_returns_by_date_timesfm2252['cum_VL_return_252_without_cost'].append(cum_VL_return_252_without_cost)
    cumulative_log_returns_by_date_timesfm2252['cum_VS_return_252_without_cost'].append(cum_VS_return_252_without_cost)
    cumulative_log_returns_by_date_timesfm2252['cum_VLS_return_252_without_cost'].append(cum_VLS_return_252_without_cost)

# Convert to DataFrame for lag 252
cumulative_log_returns_timesfm2_lag_252 = pd.DataFrame(cumulative_log_returns_by_date_timesfm2252)

# Display the cumulative returns DataFrame for lag 252
display(cumulative_log_returns_timesfm2_lag_252.head())

# Saving the DataFrame as a CSV file
cumulative_log_returns_timesfm2_lag_252.to_csv("cumulative_log_timesfm2_lag_252.csv", index=False)

In [None]:
# Cumulative returns
def cumulative_return(daily_returns):
    return np.prod(1 + daily_returns) - 1

# Annualized returns
def annualized_return(daily_returns, periods=252):
    cumulative_return_value = np.prod(1 + daily_returns) - 1
    return (1 + cumulative_return_value) ** (periods / len(daily_returns)) - 1

# Sharpe ratio
def sharpe_ratio(daily_returns, risk_free_rate=0.01, periods=252):
    daily_rf = risk_free_rate / periods  # Assuming 252 trading days
    excess_returns = daily_returns - daily_rf
    return np.sqrt(periods) * excess_returns.mean() / excess_returns.std()

# Calculate volatility (standard deviation) of daily returns
def calculate_volatility(daily_returns, periods=252):
    return np.std(daily_returns) * np.sqrt(periods)

# Calculate maximum drawdown
def maximum_drawdown(daily_returns):
    cum_returns = np.cumprod(1 + daily_returns)
    peak = np.maximum.accumulate(cum_returns)
    drawdown = (cum_returns - peak) / peak
    return np.min(drawdown)

# Apply fixed transaction cost to the daily returns
def apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001):
    # Subtract the transaction cost from each daily return
    return daily_returns - transaction_cost

# Apply percentage-based transaction cost to the daily returns
def apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001):
    # Apply transaction cost as a percentage of the return
    return daily_returns * (1 - transaction_cost_percentage)

# Prepare portfolio names (with transaction cost)
portfolios_with_cost = [
    'cum_EL_return_252_with_cost', 'cum_ES_return_252_with_cost', 'cum_ELS_return_252_with_cost',
    'cum_VL_return_252_with_cost', 'cum_VS_return_252_with_cost', 'cum_VLS_return_252_with_cost',
]

# Initialize metrics container
metrics = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio with transaction cost
for portfolio in portfolios_with_cost:
    cumulative_returns_timesfm2252_c = cumulative_log_returns_timesfm2_lag_252[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_timesfm2252_c)

    # Apply fixed transaction cost for other metrics (e.g., cumulative returns, Sharpe ratio)
    daily_returns_after_cost_fixed = apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001)

    # Apply percentage-based transaction cost for volatility and standard deviation
    daily_returns_after_cost_percentage = apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001)

    # Calculate cumulative returns after fixed transaction cost
    cum_return_after_cost = cumulative_return(daily_returns_after_cost_fixed)

    # Metrics calculations
    ann_return = annualized_return(daily_returns_after_cost_fixed)
    sharpe = sharpe_ratio(daily_returns_after_cost_fixed)
    vol = calculate_volatility(daily_returns_after_cost_percentage)
    max_draw = maximum_drawdown(daily_returns_after_cost_fixed)

    # Standard Deviation of daily returns after cost
    std_dev = np.std(daily_returns_after_cost_percentage)

    # Store results
    metrics['Portfolio'].append(portfolio)
    metrics['Annualized Return'].append(ann_return)
    metrics['Sharpe Ratio'].append(sharpe)
    metrics['Volatility'].append(vol)
    metrics['Standard Deviation'].append(std_dev)
    metrics['Max Drawdown'].append(max_draw)
    metrics['Cumulative Return'].append(cum_return_after_cost)

# Convert the results into a DataFrame for analysis
metrics_timesfm2252_c = pd.DataFrame(metrics)
display(metrics_timesfm2252_c)

# same calculations for portfolios without transaction cost
portfolios_without_cost = [
    'cum_EL_return_252_without_cost', 'cum_ES_return_252_without_cost', 'cum_ELS_return_252_without_cost',
    'cum_VL_return_252_without_cost', 'cum_VS_return_252_without_cost', 'cum_VLS_return_252_without_cost',
]

# Initialize metrics container for portfolios without transaction cost
metrics_wc = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio without transaction cost
for portfolio in portfolios_without_cost:
    cumulative_returns_timsfm1252_wc = cumulative_log_returns_timesfm2_lag_252[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_timsfm15_wc)  # Compute daily returns from cumulative log returns

    # Calculate cumulative returns without transaction cost
    cum_return_without_cost = cumulative_return(daily_returns)

    # Metrics calculations
    ann_return = annualized_return(daily_returns)
    sharpe = sharpe_ratio(daily_returns)
    vol = calculate_volatility(daily_returns)
    max_draw = maximum_drawdown(daily_returns)

    # Standard Deviation of daily returns without cost
    std_dev = np.std(daily_returns)

    # Store results
    metrics_wc['Portfolio'].append(portfolio)
    metrics_wc['Annualized Return'].append(ann_return)
    metrics_wc['Sharpe Ratio'].append(sharpe)
    metrics_wc['Volatility'].append(vol)
    metrics_wc['Standard Deviation'].append(std_dev)
    metrics_wc['Max Drawdown'].append(max_draw)
    metrics_wc['Cumulative Return'].append(cum_return_without_cost)

# Convert the results into a DataFrame for analysis
metrics_timesfm2252_wc = pd.DataFrame(metrics_wc)
display(metrics_timesfm2252_wc)

### Window Size 512

In [None]:
# Add transaction cost (10bps = 0.001)
def calculate_transaction_cost(row):
    return 0.001  # 10 bps for both small and large cap stocks

crsp_test_lagged.loc[:, 'transaction_cost'] = crsp_test_lagged.apply(calculate_transaction_cost, axis=1)

# Use the previously defined `timesfm2` to predict excess returns
def timesfm2_512_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged, lags=[512]):
    # Get the predicted excess returns using the timesfm model
    metrics_df, crsp_test_lagged = rolling_forecast_tfm2(crsp_train_lagged, crsp_test_lagged, lags)

    # For each lag, extract the predicted excess returns
    for lag in lags:
        pred_col = f'predicted_excess_returns_lag{lag}'
        if pred_col in crsp_test_lagged.columns:
            crsp_test_lagged[f'timesfm2_512_predicted_excess_returns_lag{lag}'] = crsp_test_lagged[pred_col].values

    return crsp_test_lagged

# Get predicted excess returns using TimesFM2 model
crsp_test_lagged = timesfm2_512_predicted_excess_returns(crsp_train_lagged, crsp_test_lagged)

# Portfolio Construction (Top 10% Long, Bottom 10% Short)
def compute_returns(group, predicted_col):
    # Long position (Top 10% based on predicted returns)
    top_positive = group.nlargest(int(0.1 * len(group)), predicted_col)
    # Short position (Bottom 10% based on predicted returns)
    top_negative = group.nsmallest(int(0.1 * len(group)), predicted_col)

    # Equal-Weighted Long position return (Top 10%)
    equal_long_log_return_with_cost = np.log1p(top_positive['adjusted_ret']).mean() - top_positive['transaction_cost'].mean()
    equal_long_log_return_without_cost = np.log1p(top_positive['adjusted_ret']).mean()

    # Equal-Weighted Short position return (Bottom 10%)
    equal_short_log_return_with_cost = -np.log1p(top_negative['adjusted_ret']).mean() - top_negative['transaction_cost'].mean()
    equal_short_log_return_without_cost = -np.log1p(top_negative['adjusted_ret']).mean()

    # Value-Weighted Long position return (Top 10%) based on market cap
    total_market_cap_positive = top_positive['market_cap_merged'].sum()
    value_long_log_return_with_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive - top_positive['transaction_cost'].mean()
    value_long_log_return_without_cost = (np.log1p(top_positive['adjusted_ret']) * top_positive['market_cap_merged']).sum() / total_market_cap_positive

    # Value-Weighted Short position return (Bottom 10%) based on market cap
    total_market_cap_negative = top_negative['market_cap_merged'].sum()
    value_short_log_return_with_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative + top_negative['transaction_cost'].mean())
    value_short_log_return_without_cost = -((np.log1p(top_negative['adjusted_ret']) * top_negative['market_cap_merged']).sum() / total_market_cap_negative)

    # Combine Long and Short to get Long-Short return
    equal_long_short_log_return_with_cost = equal_long_log_return_with_cost + equal_short_log_return_with_cost
    equal_long_short_log_return_without_cost = equal_long_log_return_without_cost + equal_short_log_return_without_cost

    value_long_short_log_return_with_cost = value_long_log_return_with_cost + value_short_log_return_with_cost
    value_long_short_log_return_without_cost = value_long_log_return_without_cost + value_short_log_return_without_cost

    return {
        'equal_long_log_return_with_cost': equal_long_log_return_with_cost,
        'equal_short_log_return_with_cost': equal_short_log_return_with_cost,
        'equal_long_short_log_return_with_cost': equal_long_short_log_return_with_cost,
        'equal_long_log_return_without_cost': equal_long_log_return_without_cost,
        'equal_short_log_return_without_cost': equal_short_log_return_without_cost,
        'equal_long_short_log_return_without_cost': equal_long_short_log_return_without_cost,
        'value_long_log_return_with_cost': value_long_log_return_with_cost,
        'value_short_log_return_with_cost': value_short_log_return_with_cost,
        'value_long_short_log_return_with_cost': value_long_short_log_return_with_cost,
        'value_long_log_return_without_cost': value_long_log_return_without_cost,
        'value_short_log_return_without_cost': value_short_log_return_without_cost,
        'value_long_short_log_return_without_cost': value_long_short_log_return_without_cost
    }

# Compute cumulative returns for each date with daily rebalancing
cumulative_log_returns_by_date_timesfm2512  = {
    'date': [],
    'cum_EL_return_512_with_cost': [],
    'cum_ES_return_512_with_cost': [],
    'cum_ELS_return_512_with_cost': [],
    'cum_VL_return_512_with_cost': [],
    'cum_VS_return_512_with_cost': [],
    'cum_VLS_return_512_with_cost': [],
    'cum_EL_return_512_without_cost': [],
    'cum_ES_return_512_without_cost': [],
    'cum_ELS_return_512_without_cost': [],
    'cum_VL_return_512_without_cost': [],
    'cum_VS_return_512_without_cost': [],
    'cum_VLS_return_512_without_cost': []
}

# Initialize cumulative returns for lag 512
cum_EL_return_512_with_cost = 0
cum_ES_return_512_with_cost = 0
cum_ELS_return_512_with_cost = 0
cum_VL_return_512_with_cost = 0
cum_VS_return_512_with_cost = 0
cum_VLS_return_512_with_cost = 0

cum_EL_return_512_without_cost = 0
cum_ES_return_512_without_cost = 0
cum_ELS_return_512_without_cost = 0
cum_VL_return_512_without_cost = 0
cum_VS_return_512_without_cost = 0
cum_VLS_return_512_without_cost = 0

# Iterate over each date to compute returns for lag 512 portfolios
for date in crsp_test_lagged['date'].unique():
    group = crsp_test_lagged[crsp_test_lagged['date'] == date]

    # Compute returns for lag 512 (or any other lag if needed)
    predicted_col = 'timesfm2_512_predicted_excess_returns_lag512'
    returns = compute_returns(group, predicted_col)

    # Update cumulative returns with daily values for lag 512
    cum_EL_return_512_with_cost += returns['equal_long_log_return_with_cost']
    cum_ES_return_512_with_cost += returns['equal_short_log_return_with_cost']
    cum_ELS_return_512_with_cost += returns['equal_long_short_log_return_with_cost']
    cum_VL_return_512_with_cost += returns['value_long_log_return_with_cost']
    cum_VS_return_512_with_cost += returns['value_short_log_return_with_cost']
    cum_VLS_return_512_with_cost += returns['value_long_short_log_return_with_cost']

    cum_EL_return_512_without_cost += returns['equal_long_log_return_without_cost']
    cum_ES_return_512_without_cost += returns['equal_short_log_return_without_cost']
    cum_ELS_return_512_without_cost += returns['equal_long_short_log_return_without_cost']
    cum_VL_return_512_without_cost += returns['value_long_log_return_without_cost']
    cum_VS_return_512_without_cost += returns['value_short_log_return_without_cost']
    cum_VLS_return_512_without_cost += returns['value_long_short_log_return_without_cost']

    # Append results for the day for lag 512 portfolios
    cumulative_log_returns_by_date_timesfm2512['date'].append(date)
    cumulative_log_returns_by_date_timesfm2512['cum_EL_return_512_with_cost'].append(cum_EL_return_512_with_cost)
    cumulative_log_returns_by_date_timesfm2512['cum_ES_return_512_with_cost'].append(cum_ES_return_512_with_cost)
    cumulative_log_returns_by_date_timesfm2512['cum_ELS_return_512_with_cost'].append(cum_ELS_return_512_with_cost)
    cumulative_log_returns_by_date_timesfm2512['cum_VL_return_512_with_cost'].append(cum_VL_return_512_with_cost)
    cumulative_log_returns_by_date_timesfm2512['cum_VS_return_512_with_cost'].append(cum_VS_return_512_with_cost)
    cumulative_log_returns_by_date_timesfm2512['cum_VLS_return_512_with_cost'].append(cum_VLS_return_512_with_cost)
    cumulative_log_returns_by_date_timesfm2512['cum_EL_return_512_without_cost'].append(cum_EL_return_512_without_cost)
    cumulative_log_returns_by_date_timesfm2512['cum_ES_return_512_without_cost'].append(cum_ES_return_512_without_cost)
    cumulative_log_returns_by_date_timesfm2512['cum_ELS_return_512_without_cost'].append(cum_ELS_return_512_without_cost)
    cumulative_log_returns_by_date_timesfm2512['cum_VL_return_512_without_cost'].append(cum_VL_return_512_without_cost)
    cumulative_log_returns_by_date_timesfm2512['cum_VS_return_512_without_cost'].append(cum_VS_return_512_without_cost)
    cumulative_log_returns_by_date_timesfm2512['cum_VLS_return_512_without_cost'].append(cum_VLS_return_512_without_cost)

# Convert to DataFrame for lag 512
cumulative_log_returns_timesfm2_lag_512 = pd.DataFrame(cumulative_log_returns_by_date_timesfm2512)

# Display the cumulative returns DataFrame for lag 512
display(cumulative_log_returns_timesfm2_lag_512.head())

# Saving the DataFrame as a CSV file
cumulative_log_returns_timesfm2_lag_512.to_csv("cumulative_log_timesfm2_lag_512.csv", index=False)

In [None]:
# Cumulative returns
def cumulative_return(daily_returns):
    return np.prod(1 + daily_returns) - 1

# Annualized returns
def annualized_return(daily_returns, periods=252):
    cumulative_return_value = np.prod(1 + daily_returns) - 1
    return (1 + cumulative_return_value) ** (periods / len(daily_returns)) - 1

# Sharpe ratio
def sharpe_ratio(daily_returns, risk_free_rate=0.01, periods=252):
    daily_rf = risk_free_rate / periods  # Assuming 252 trading days
    excess_returns = daily_returns - daily_rf
    return np.sqrt(periods) * excess_returns.mean() / excess_returns.std()

# Calculate volatility (standard deviation) of daily returns
def calculate_volatility(daily_returns, periods=252):
    return np.std(daily_returns) * np.sqrt(periods)

# Calculate maximum drawdown
def maximum_drawdown(daily_returns):
    cum_returns = np.cumprod(1 + daily_returns)
    peak = np.maximum.accumulate(cum_returns)
    drawdown = (cum_returns - peak) / peak
    return np.min(drawdown)

# Apply fixed transaction cost to the daily returns
def apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001):
    # Subtract the transaction cost from each daily return
    return daily_returns - transaction_cost

# Apply percentage-based transaction cost to the daily returns
def apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001):
    # Apply transaction cost as a percentage of the return
    return daily_returns * (1 - transaction_cost_percentage)

# Prepare portfolio names (with transaction cost)
portfolios_with_cost = [
    'cum_EL_return_512_with_cost', 'cum_ES_return_512_with_cost', 'cum_ELS_return_512_with_cost',
    'cum_VL_return_512_with_cost', 'cum_VS_return_512_with_cost', 'cum_VLS_return_512_with_cost',
]

# Initialize metrics container
metrics = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio with transaction cost
for portfolio in portfolios_with_cost:
    cumulative_returns_timesfm2512_c = cumulative_log_returns_timesfm2_lag_512[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_timesfm2512_c)

    # Apply fixed transaction cost for other metrics
    daily_returns_after_cost_fixed = apply_transaction_cost_fixed(daily_returns, transaction_cost=0.001)

    # Apply percentage-based transaction cost for volatility and standard deviation
    daily_returns_after_cost_percentage = apply_transaction_cost_percentage(daily_returns, transaction_cost_percentage=0.001)

    # Calculate cumulative returns after fixed transaction cost
    cum_return_after_cost = cumulative_return(daily_returns_after_cost_fixed)

    # Metrics calculations
    ann_return = annualized_return(daily_returns_after_cost_fixed)
    sharpe = sharpe_ratio(daily_returns_after_cost_fixed)
    vol = calculate_volatility(daily_returns_after_cost_percentage)
    max_draw = maximum_drawdown(daily_returns_after_cost_fixed)

    # Standard Deviation of daily returns after cost
    std_dev = np.std(daily_returns_after_cost_percentage)

    # Store results
    metrics['Portfolio'].append(portfolio)
    metrics['Annualized Return'].append(ann_return)
    metrics['Sharpe Ratio'].append(sharpe)
    metrics['Volatility'].append(vol)
    metrics['Standard Deviation'].append(std_dev)
    metrics['Max Drawdown'].append(max_draw)
    metrics['Cumulative Return'].append(cum_return_after_cost)

# Convert the results into a DataFrame for analysis
metrics_timesfm2512_c = pd.DataFrame(metrics)
display(metrics_timesfm2512_c)

# same calculations for portfolios without transaction cost
portfolios_without_cost = [
    'cum_EL_return_512_without_cost', 'cum_ES_return_512_without_cost', 'cum_ELS_return_512_without_cost',
    'cum_VL_return_512_without_cost', 'cum_VS_return_512_without_cost', 'cum_VLS_return_512_without_cost',
]

# Initialize metrics container for portfolios without transaction cost
metrics_wc = {
    'Portfolio': [],
    'Annualized Return': [],
    'Sharpe Ratio': [],
    'Volatility': [],
    'Standard Deviation': [],
    'Max Drawdown': [],
    'Cumulative Return': []
}

# Calculate metrics for each portfolio without transaction cost
for portfolio in portfolios_without_cost:
    cumulative_returns_timsfm1512_wc = cumulative_log_returns_timesfm2_lag_512[portfolio].values

    # Calculate daily returns from cumulative returns
    daily_returns = np.diff(cumulative_returns_timsfm1512_wc)  # Compute daily returns from cumulative log returns

    # Calculate cumulative returns without transaction cost
    cum_return_without_cost = cumulative_return(daily_returns)

    # Metrics calculations
    ann_return = annualized_return(daily_returns)
    sharpe = sharpe_ratio(daily_returns)
    vol = calculate_volatility(daily_returns)
    max_draw = maximum_drawdown(daily_returns)

    # Standard Deviation of daily returns without cost
    std_dev = np.std(daily_returns)

    # Store results
    metrics_wc['Portfolio'].append(portfolio)
    metrics_wc['Annualized Return'].append(ann_return)
    metrics_wc['Sharpe Ratio'].append(sharpe)
    metrics_wc['Volatility'].append(vol)
    metrics_wc['Standard Deviation'].append(std_dev)
    metrics_wc['Max Drawdown'].append(max_draw)
    metrics_wc['Cumulative Return'].append(cum_return_without_cost)

# Convert the results into a DataFrame for analysis
metrics_timesfm2512_wc = pd.DataFrame(metrics_wc)
display(metrics_timesfm2512_wc)

# Save the results with transaction costs (metrics_timesfm2512_c) to a CSV file
metrics_timesfm2512_c.to_csv('metrics_timesfm512_with_cost.csv', index=False)

# Save the results without transaction costs (metrics_timesfm2512_wc) to a CSV file
metrics_timesfm2512_wc.to_csv('metrics_timesfm512_without_cost.csv', index=False)