In [2]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

def gmv_weights(Theta_hat):
    """
    Compute Global Minimum Variance (GMV) portfolio weights.
    """
    p = Theta_hat.shape[0]
    ones_p = np.ones(p)
    
    numerator = Theta_hat @ ones_p
    denominator = ones_p @ Theta_hat @ ones_p
    
    if np.abs(denominator) < 1e-10:
        return ones_p / p
    
    w_star = numerator / denominator
    return w_star

def mv_weights(Theta_hat, mu, target_return=0.01):
    """
    Compute Mean-Variance portfolio weights with target return.
    
    Parameters:
    -----------
    Theta_hat : np.ndarray, shape (p, p)
        Precision matrix (Sigma^{-1})
    mu : np.ndarray, shape (p,)
        Expected returns
    target_return : float
        Target portfolio return (default: 0.01 = 1% monthly)
    
    Returns:
    --------
    w_star : np.ndarray, shape (p,)
        Portfolio weights
    """
    p = Theta_hat.shape[0]
    ones_p = np.ones(p)
    
    # Compute key quantities
    A = ones_p @ Theta_hat @ ones_p
    B = ones_p @ Theta_hat @ mu
    C = mu @ Theta_hat @ mu
    D = A * C - B * B
    
    # Check for singularity
    if np.abs(D) < 1e-10:
        if np.abs(A) > 1e-10:
            w_star = (Theta_hat @ ones_p) / A
            return w_star
        else:
            return ones_p / p
    
    # Compute Lagrange multipliers
    lambda1 = (C - B * target_return) / D
    lambda2 = (A * target_return - B) / D
    
    # Compute weights
    w_star = lambda1 * (Theta_hat @ ones_p) + lambda2 * (Theta_hat @ mu)
    
    return w_star


def msr_weights(Theta_hat, mu):
    """
    Compute Maximum Sharpe Ratio portfolio weights.
    
    Parameters:
    -----------
    Theta_hat : np.ndarray, shape (p, p)
        Precision matrix (Sigma^{-1})
    mu : np.ndarray, shape (p,)
        Expected excess returns
    
    Returns:
    --------
    w_star : np.ndarray, shape (p,)
        Portfolio weights (sum to 1)
    """
    p = Theta_hat.shape[0]
    ones_p = np.ones(p)
    
    # Compute unnormalized weights
    w_unnorm = Theta_hat @ mu
    
    # Normalize to sum to 1
    weight_sum = np.sum(w_unnorm)
    
    if np.abs(weight_sum) < 1e-10:
        return ones_p / p
    
    w_star = w_unnorm / weight_sum
    
    return w_star


import numpy as np
import pandas as pd
from sklearn.linear_model import Lasso, LassoCV, LinearRegression
import warnings
warnings.filterwarnings('ignore')

import tensorflow as tf
import time
from decimal import Decimal, ROUND_HALF_UP

# Import DNN-FM helper functions (assumed to be available)
from DNNFM_functions import *
from comp_m_functions import static_factor_obs, cov_e_poet

def round(x):
    return int(Decimal(x).to_integral_value(rounding=ROUND_HALF_UP))


def DNN_FM_main(data, data_factor, architecture=1, const_err_cov=2.5, use_CV_err=False, eval_type='frob'):
    """
    Main DNN-FM function. Simplified version focusing on core functionality.
    """
    data_dm = data
    data_factor_dm = data_factor
    
    # Obtain optimal tuning parameter
    opt = opt_hyper_parameters(data=data, data_F=data_factor, architecture=architecture, 
                               const_err_cov=const_err_cov, use_CV_err=use_CV_err, eval_type=eval_type)
    
    # Compute DNN-FM based on optimal hyper-parameters
    res_DNN_FM = DNN_FM_core(data=data_dm, data_factor=data_factor_dm, architecture=architecture, opt=opt)
    
    # Compute covariance matrix
    res_DNN_FM_cov = DNN_FM_cov(data=data_dm, data_factor=data_factor_dm, 
                                DNN_model=res_DNN_FM['neural_net'], 
                                c_err_cov=opt['const_err_cov'], check_eig=False)
    
    res_DNN_FM.update(res_DNN_FM_cov)
    
    return res_DNN_FM


def DNN_FM_core(data, data_factor, architecture, opt):
    """
    Core function for creating Neural Network.
    """
    num_n, num_s = data.shape
    num_f = data_factor.shape[1]

    # Create neural network specifications based on architecture
    if architecture == 5:
        inter_layer = False
        n_layers = 3
        d_rate = 0.2
        
        hidden_layer_s = [256, 128, 64]
        dropout_rates_tr = [d_rate] * (n_layers + 1)
        activation_functions = ['relu'] * n_layers + [None]
    else:
        # Default architecture
        inter_layer = False
        n_layers = 1
        d_rate = 0.2
        
        hidden_layer_s = [512]
        dropout_rates_tr = [d_rate, d_rate]
        activation_functions = ['relu', None]

    # Optimization options
    optimizer = 'Adam'
    max_iter = 2000
    max_iter_nc = 50
    split_ratio = 0.3
    batch_s = 256
    use_bias = False
    
    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss', patience=max_iter_nc, mode='min')
    
    learning_rate = opt['learning_rate']
    reg_par_w = opt['reg_par_w']
    reg_par_b = opt['reg_par_b']
    el_r_pro = opt['el_r_pro']
    
    # Create sparse neural network
    neural_net = sparse_nn(hidden_layer_s, activation_functions, dropout_rates_tr, num_s, num_f,
                          reg_par_w, reg_par_b, el_r_pro, max_iter_nc, max_iter, optimizer, 
                          learning_rate, use_bias, inter_layer)
    
    neural_net.build_neural_network()
    neural_net.compile_nn()
    
    fit_nn = neural_net.model.fit(data_factor, data, epochs=max_iter,
                                  validation_split=split_ratio, shuffle=True, batch_size=batch_s,
                                  callbacks=early_stopping, verbose=0)
    
    # Compute market sensitivity
    with tf.GradientTape() as tape:
        data_factor_ts = tf.convert_to_tensor(data_factor[-1,:].reshape(1,-1), dtype=tf.float32)
        tape.watch(data_factor_ts)
        y_pred = neural_net.model(data_factor_ts)
    
    market_sens = tape.jacobian(y_pred, data_factor_ts)[-1,:,-1,:].numpy()
    market_return_t = data_factor_ts.numpy()[-1, :].reshape(1,-1)
    market_sensitivity = (market_sens, market_return_t)
    
    res = {'neural_net': neural_net, 'opt': opt, 'market_sensitivity': market_sensitivity}
    return res


def DNN_FM_cov(data, data_factor, DNN_model, c_err_cov, check_eig=False):
    """
    Compute covariance matrix from DNN-FM model.
    """
    num_n, num_s = data.shape
    
    y_hat_nn = DNN_model.model(data_factor).numpy()
    resd_nn = data - y_hat_nn
    
    sig_hat_e = thres_resd_new(resd_nn, c_err_cov, num_s, num_n)
    cov_f_nnet = np.cov(y_hat_nn.T)
    
    if not check_eig:
        sigma_y_nnet = cov_f_nnet + sig_hat_e
    else:
        cond = True
        const_c = 0
        while cond:
            sig_hat_e = thres_resd_new(resd_nn, c_err_cov+const_c, num_s, num_n)
            cond = (round(min(np.linalg.eig(sig_hat_e)[0]),2) < 0.01) or (np.linalg.cond(sig_hat_e) > num_s*10)
            const_c+=0.01
        sigma_y_nnet = cov_f_nnet + sig_hat_e
    
    inv_sigma_y_nnet = sig_inv_f_nnet(cov_f_nnet, sig_hat_e)
    
    res = {'sigma_hat': sigma_y_nnet, 'sigma_f_hat': cov_f_nnet, 'sigma_e_hat': sig_hat_e, 
           'inv_sigma_hat': inv_sigma_y_nnet}
    
    return res


def opt_hyper_parameters(data, data_F, architecture, const_err_cov, use_CV_err, eval_type):
    """
    Determine optimal regularization and learning rate.
    """
    if architecture == 1 or architecture == 5 or architecture == 7:
        reg_par_w = 0.0005
        reg_par_b = 0.0005
    else:
        reg_par_w = 0.0
        reg_par_b = 0.0
    
    el_r_pro = 1
    learning_rate = 0.0005
    
    if use_CV_err:
        range_cov_err = np.arange(0, const_err_cov+0.6, 0.1)
        opt = cv_split(data=data, data_F=data_F, architecture=architecture, 
                      reg_par=reg_par_w, lr=learning_rate, range_cov_err=range_cov_err, 
                      eval_type=eval_type)
    else:
        opt = {'learning_rate': learning_rate, 'reg_par_w': reg_par_w, 
               'reg_par_b': reg_par_b, 'el_r_pro': el_r_pro, 'const_err_cov': const_err_cov}
    
    return opt


def cv_split(data, data_F, architecture, reg_par, lr, range_cov_err, eval_type):
    """
    Cross-validation for hyperparameter selection.
    """
    start = time.time()
    
    opt = {'learning_rate': lr, 'reg_par_w': reg_par, 'reg_par_b': reg_par, 'el_r_pro': 1}
    
    data_dm, _, _ = normalize_dat_sim(data)
    data_F_dm, _, _ = normalize_dat_sim(data_F)
    
    res_DNN_FM = DNN_FM_core(data_dm, data_F_dm, architecture, opt)
    neural_net = res_DNN_FM['neural_net']
    
    y_hat = neural_net.model(data_F_dm).numpy()
    resd_nn = data_dm - y_hat
    
    n_folds = 10
    res_mat = np.empty((n_folds, len(range_cov_err)))
    res_mat[:] = np.nan
    
    split_sample, _ = ts_train_test_split(resd_nn, n_folds, train_size=0.5)
    
    for m_idx in range(n_folds):
        resd_nn_s1 = split_sample[m_idx][0]
        resd_nn_s2 = split_sample[m_idx][1]
        
        num_n, num_s = resd_nn_s1.shape
        sigma_test = cov_sfm(resd_nn_s2)
        sig_e_samp, thet_par = thres_cov_resd_aux(resd_nn_s1, num_s)
        
        for c_idx in range(len(range_cov_err)):
            sig_hat_e = thres_cov_resd(sig_e_samp, thet_par, range_cov_err[c_idx], num_s, num_n)
            
            if min(np.linalg.eig(sig_hat_e)[0]) < 0:
                res_mat[m_idx, c_idx] = np.inf
            else:
                if eval_type == 'frob':
                    res_mat[m_idx, c_idx] = np.linalg.norm(sig_hat_e - sigma_test, ord='fro')**2
                elif eval_type == 'spec':
                    res_mat[m_idx, c_idx] = np.linalg.norm(sig_hat_e - sigma_test, ord=2)**2
    
    idx_opt = np.where(res_mat.mean(axis=0) == np.nanmin(res_mat.mean(axis=0)))
    opt.update({'const_err_cov': range_cov_err[idx_opt][0]})
    
    print(f"CV took {time.time() - start:.2f} seconds")
    return opt


def ts_train_test_split(X, n_folds=5, train_size=0.5):
    """
    Time series train-test split.
    """
    test_size = 1 - train_size
    n_obs = X.shape[0]
    size_split = n_obs - n_folds
    
    n_train = round(size_split * train_size)
    n_test = round(size_split * test_size)
    
    split_t = (list(range(0, n_train)), list(range(n_train, n_train+n_test)))
    
    split_sample = []
    split_index = []
    
    for jj in range(n_folds):
        split_index.append(([el1 + jj for el1 in split_t[0]], [el2 + jj for el2 in split_t[1]]))
        split_sample.append((X[split_index[jj][0],:], X[split_index[jj][1],:]))
    
    return split_sample, split_index


def load_finbert_signals(signals_path):
    """
    Load FinBERT monthly signals from CSV file.
    
    Parameters:
    -----------
    signals_path : str
        Path to monthly_signals.csv file
    
    Returns:
    --------
    signals_df : pd.DataFrame
        DataFrame with columns: symbol, company, year_month, signal, avg_sentiment_score
    """
    try:
        signals_df = pd.read_csv(signals_path)
        # Convert year_month to datetime (end of month)
        signals_df['date'] = pd.to_datetime(signals_df['year_month']) + pd.offsets.MonthEnd(0)
        return signals_df
    except FileNotFoundError as e:
        print(f"  ⚠ Warning: Could not load FinBERT signals: {e}")
        return pd.DataFrame(columns=['symbol', 'company', 'year_month', 'signal', 'date'])


def get_buy_signal_permnos_for_date(signals_df, ticker_to_permno, date):
    """
    Get set of permnos with 'buy' or 'sell' signals for a specific date.
    
    Parameters:
    -----------
    signals_df : pd.DataFrame
        FinBERT signals dataframe
    ticker_to_permno : dict
        Mapping from ticker symbol to permno
    date : pd.Timestamp
        Date to get signals for
    
    Returns:
    --------
    permno_set : set
        Set of permnos with buy or sell signals on this date
    """
    # Get signals for this date
    date_signals = signals_df[signals_df['date'] == date]
    
    # Filter for buy and sell signals only (exclude hold)
    buy_signals = date_signals[date_signals['signal'] == 'buy']
    sell_signals = date_signals[date_signals['signal'] == 'sell']
    
    # Convert tickers to permnos
    permnos = set()
    for ticker in buy_signals['symbol'].values:
        if ticker in ticker_to_permno:
            permnos.add(ticker_to_permno[ticker])
    for ticker in sell_signals['symbol'].values:
        if ticker in ticker_to_permno:
            permnos.add(ticker_to_permno[ticker])
    
    return permnos


def create_ticker_to_permno_mapping(df):
    """
    Create a mapping from ticker to permno from the returns dataframe.
    
    Parameters:
    -----------
    df : pd.DataFrame
        Returns dataframe with 'ticker' and 'permno' columns
    
    Returns:
    --------
    ticker_to_permno : dict
        Mapping from ticker to permno (uses most recent permno for each ticker)
    """
    if 'ticker' not in df.columns:
        raise ValueError("DataFrame must have 'ticker' column for mapping")
    
    # Drop NaN tickers
    valid_df = df[df['ticker'].notna()].copy()
    
    # Get the most recent permno for each ticker
    ticker_to_permno = valid_df.groupby('ticker')['permno'].last().to_dict()
    
    return ticker_to_permno


def calculate_exit_transaction_cost(prev_weights_dict, prev_oos_returns_dict, 
                                    prev_gross_return, transaction_cost, verbose=False):
    """
    Calculate transaction cost when exiting the market (liquidating all positions).
    Enforces Immediate Liquidation logic:
    - Next period return is 0.0 (Cash)
    - Cost is paid on current portfolio value
    """
    if len(prev_weights_dict) == 0:
        return 0.0, 0.0, 0.0
    
    # Step 1: Adjust previous weights to current period's BEGINNING (drift from t-1 to t)
    adjusted_prev = {}
    for asset, prev_w in prev_weights_dict.items():
        if asset in prev_oos_returns_dict:
            prev_r = prev_oos_returns_dict[asset]
            if abs(1 + prev_gross_return) > 1e-6:
                adjusted_prev[asset] = prev_w * (1 + prev_r) / (1 + prev_gross_return)
            else:
                adjusted_prev[asset] = 0.0
        else:
            if abs(1 + prev_gross_return) > 1e-6:
                adjusted_prev[asset] = prev_w / (1 + prev_gross_return)
            else:
                adjusted_prev[asset] = 0.0
    
    # Step 2: Turnover (Selling everything to Cash)
    turnover = sum(abs(w) for w in adjusted_prev.values())
    
    # Step 3: Cost 
    # Paper Formula: c * (1 + R_next) * Turnover. 
    # Since R_next (Cash) is 0.0, this simplifies to c * 1.0 * Turnover.
    tc = transaction_cost * 1.0 * turnover
    
    # Step 4: Net Return is 0.0 (Cash return) - Cost
    net_return = -tc
    
    if verbose:
        print(f"  Liquidating positions | Turnover: {turnover:>6.4f} | TC: {tc:>8.6f}")
    
    return turnover, tc, net_return


def backtest_dnn_finbert(df, 
                         data_factor,
                         signals_path='monthly_signals.csv',
                         test_start_date='2020-01-31', 
                         test_end_date='2024-11-30',
                         lookback_window=180,
                         transaction_cost=0.001,
                         architecture=5,
                         const_err_cov=2.5,
                         use_CV_err=False,
                         target_return=0.01,
                         verbose=True):
    """
    Backtest DNN-FM + FinBERT signals using GMV, MV, and MSR strategies.
    Records zero returns and empty weights when skipping periods.
    
    Parameters:
    -----------
    df : pd.DataFrame
        DataFrame with columns: permno, datadate, ticker, ret_fwd_1
    data_factor : pd.DataFrame
        Factor data for DNN-FM model (indexed by date)
    signals_path : str
        Path to monthly_signals.csv file
    test_start_date : str
        First date for out-of-sample returns (format: 'YYYY-MM-DD')
    test_end_date : str
        Last date for out-of-sample returns (format: 'YYYY-MM-DD')
    lookback_window : int
        Number of months in rolling training window (default: 180)
    transaction_cost : float
        Proportional transaction cost (default: 0.001 = 10 bps)
    architecture : int
        DNN architecture number (default: 5)
    const_err_cov : float
        Constant for error covariance thresholding (default: 2.5)
    use_CV_err : bool
        Whether to use cross-validation for hyperparameters
    target_return : float
        Target return for MV portfolio (default: 0.01 = 1% monthly)
    verbose : bool
        If True, prints detailed log at each time step.
    
    Returns:
    --------
    results_df : pd.DataFrame (GMV)
    metrics : dict (GMV)
    results_df_2 : pd.DataFrame (MV)
    metrics_2 : dict (MV)
    results_df_3 : pd.DataFrame (MSR)
    metrics_3 : dict (MSR)
    """
    # --- 1. Setup ---
    df = df.copy()
    if 'datadate' not in df.columns or 'permno' not in df.columns:
        raise ValueError("DataFrame must have 'datadate' and 'permno' columns")
    df['datadate'] = pd.to_datetime(df['datadate'])
    
    # Create ticker to permno mapping
    if verbose:
        print("Creating ticker to permno mapping...")
    ticker_to_permno = create_ticker_to_permno_mapping(df)
    if verbose:
        print(f"Mapped {len(ticker_to_permno)} unique tickers to permnos")
    
    # Load FinBERT signals
    if verbose:
        print(f"Loading FinBERT signals from {signals_path}...")
    signals_df = load_finbert_signals(signals_path)
    if len(signals_df) == 0:
        raise ValueError("No FinBERT signals loaded")
    
    if verbose:
        print(f"Loaded {len(signals_df)} monthly signals")
        print(f"Signal distribution:")
        print(signals_df['signal'].value_counts())
    
    # Get unique dates
    all_dates = sorted(df['datadate'].unique())
    
    # Convert test dates to datetime
    test_start_dt = pd.to_datetime(test_start_date)
    test_end_dt = pd.to_datetime(test_end_date)
    
    # Find date indices
    try:
        test_start_idx = all_dates.index(test_start_dt)
        test_end_idx = all_dates.index(test_end_dt)
    except ValueError as e:
        raise ValueError(f"Date not found in DataFrame: {e}")
    
    if test_start_idx < lookback_window:
        raise ValueError(f"Not enough data for lookback. Test start date {test_start_date} "
                         f"requires data back to {all_dates[test_start_idx - lookback_window]}, "
                         f"but only {test_start_idx} periods are available.")
    
    # Storage for results - GMV
    portfolio_returns = []
    portfolio_dates = []
    portfolio_weights_list = []
    portfolio_turnover_list = []
    portfolio_gross_returns = []
    
    # Storage for results - MV
    portfolio_returns_2 = []
    portfolio_dates_2 = []
    portfolio_weights_list_2 = []
    portfolio_turnover_list_2 = []
    portfolio_gross_returns_2 = []
    
    # Storage for results - MSR
    portfolio_returns_3 = []
    portfolio_dates_3 = []
    portfolio_weights_list_3 = []
    portfolio_turnover_list_3 = []
    portfolio_gross_returns_3 = []
    
    # Track weights by permno - GMV
    prev_weights_dict = {}
    prev_oos_returns_dict = {}
    prev_gross_return = 0.0
    
    # Track weights by permno - MV
    prev_weights_dict_2 = {}
    prev_oos_returns_dict_2 = {}
    prev_gross_return_2 = 0.0
    
    # Track weights by permno - MSR
    prev_weights_dict_3 = {}
    prev_oos_returns_dict_3 = {}
    prev_gross_return_3 = 0.0
    
    # --- 2. Rolling Window Backtest ---
    if verbose:
        print("="*60)
        print("STARTING BACKTEST WITH DNN-FM + FINBERT (GMV/MV/MSR)")
        print("="*60)
        
    for t in range(test_start_idx, test_end_idx + 1):
        current_date = all_dates[t]
        
        # Get buy/sell signal permnos for current date
        allowed_permnos = get_buy_signal_permnos_for_date(
            signals_df, ticker_to_permno, current_date
        )
        
        # ========================================
        # CRITICAL: Get OOS returns FIRST before any early exits
        # ========================================
        oos_data = df[(df['datadate'] == current_date) & (df['permno'].isin(allowed_permnos))]
        oos_returns_series = oos_data.set_index('permno')['ret_fwd_1']
        oos_returns_series = oos_returns_series.dropna()
        oos_returns_dict = oos_returns_series.to_dict()
        
        # Now handle early exit cases
        if len(allowed_permnos) == 0:
            if verbose:
                print(f"\n[{t - test_start_idx + 1}/{test_end_idx - test_start_idx + 1}] "
                      f"Date: {current_date.strftime('%Y-%m-%d')}")
                print(f"  ⚠ No signals for {current_date.strftime('%Y-%m-%d')}, recording zero return")
            
            # GMV
            turnover, tc, net_return = calculate_exit_transaction_cost(
                prev_weights_dict, prev_oos_returns_dict, prev_gross_return, 
                transaction_cost, verbose=verbose
            )
            portfolio_returns.append(net_return)
            portfolio_dates.append(current_date)
            portfolio_weights_list.append({})
            portfolio_turnover_list.append(turnover)
            portfolio_gross_returns.append(0.0)
            prev_weights_dict = {}
            prev_oos_returns_dict = {}
            prev_gross_return = 0.0
            
            # MV
            turnover_2, tc_2, net_return_2 = calculate_exit_transaction_cost(
                prev_weights_dict_2, prev_oos_returns_dict_2, prev_gross_return_2,
                transaction_cost, verbose=False
            )
            portfolio_returns_2.append(net_return_2)
            portfolio_dates_2.append(current_date)
            portfolio_weights_list_2.append({})
            portfolio_turnover_list_2.append(turnover_2)
            portfolio_gross_returns_2.append(0.0)
            prev_weights_dict_2 = {}
            prev_oos_returns_dict_2 = {}
            prev_gross_return_2 = 0.0
            
            # MSR
            turnover_3, tc_3, net_return_3 = calculate_exit_transaction_cost(
                prev_weights_dict_3, prev_oos_returns_dict_3, prev_gross_return_3,
                transaction_cost, verbose=False
            )
            portfolio_returns_3.append(net_return_3)
            portfolio_dates_3.append(current_date)
            portfolio_weights_list_3.append({})
            portfolio_turnover_list_3.append(turnover_3)
            portfolio_gross_returns_3.append(0.0)
            prev_weights_dict_3 = {}
            prev_oos_returns_dict_3 = {}
            prev_gross_return_3 = 0.0
            
            continue
        
        # Define the lookback window
        window_start_date = all_dates[t - lookback_window]
        window_end_date = all_dates[t - 1]
        
        # Get training data for this window, filtered by allowed permnos
        train_data = df[(df['datadate'] >= window_start_date) & 
                        (df['datadate'] <= window_end_date) &
                        (df['permno'].isin(allowed_permnos))]
        
        # Pivot to get returns matrix (time x assets)
        returns_pivot = train_data.pivot(index='datadate', columns='permno', values='ret_fwd_1')
        
        # Reindex to ensure all dates are present
        window_dates = all_dates[t - lookback_window : t]
        returns_pivot = returns_pivot.reindex(index=window_dates)
        
        # Filter assets with any NaNs in this window
        nan_assets = returns_pivot.columns[returns_pivot.isna().any()]
        filtered_pivot = returns_pivot.drop(columns=nan_assets)
        
        current_assets = filtered_pivot.columns.tolist()
        Y = filtered_pivot.values
        n_train, p_current = Y.shape

        if verbose:
            print(f"\n[{t - test_start_idx + 1}/{test_end_idx - test_start_idx + 1}] "
                  f"Date: {current_date.strftime('%Y-%m-%d')}")
            print(f"  Window: {window_start_date.strftime('%Y-%m-%d')} to "
                  f"{window_end_date.strftime('%Y-%m-%d')}")
            print(f"  FinBERT signals: {len(allowed_permnos)} | Assets w/ data: {p_current}")

        # Check for valid data
        if n_train < lookback_window or p_current < 2:
            if verbose:
                print(f"  ⚠ Insufficient data (n={n_train}, p={p_current}), recording zero return")
            
            # GMV
            turnover, tc, net_return = calculate_exit_transaction_cost(
                prev_weights_dict, prev_oos_returns_dict, prev_gross_return,
                transaction_cost, verbose=verbose
            )
            portfolio_returns.append(net_return)
            portfolio_dates.append(current_date)
            portfolio_weights_list.append({})
            portfolio_turnover_list.append(turnover)
            portfolio_gross_returns.append(0.0)
            prev_weights_dict = {}
            prev_oos_returns_dict = {}
            prev_gross_return = 0.0
            
            # MV
            turnover_2, tc_2, net_return_2 = calculate_exit_transaction_cost(
                prev_weights_dict_2, prev_oos_returns_dict_2, prev_gross_return_2,
                transaction_cost, verbose=False
            )
            portfolio_returns_2.append(net_return_2)
            portfolio_dates_2.append(current_date)
            portfolio_weights_list_2.append({})
            portfolio_turnover_list_2.append(turnover_2)
            portfolio_gross_returns_2.append(0.0)
            prev_weights_dict_2 = {}
            prev_oos_returns_dict_2 = {}
            prev_gross_return_2 = 0.0
            
            # MSR
            turnover_3, tc_3, net_return_3 = calculate_exit_transaction_cost(
                prev_weights_dict_3, prev_oos_returns_dict_3, prev_gross_return_3,
                transaction_cost, verbose=False
            )
            portfolio_returns_3.append(net_return_3)
            portfolio_dates_3.append(current_date)
            portfolio_weights_list_3.append({})
            portfolio_turnover_list_3.append(turnover_3)
            portfolio_gross_returns_3.append(0.0)
            prev_weights_dict_3 = {}
            prev_oos_returns_dict_3 = {}
            prev_gross_return_3 = 0.0
            
            continue
        
        try:
            # Get factor data for the same window
            train_factor = data_factor.loc[window_start_date:window_end_date]
            
            # Ensure factor data aligns with returns data
            train_factor = train_factor.reindex(index=window_dates)
            
            if train_factor.isna().any().any():
                raise ValueError("Factor data contains NaN values in training window")
            
            F = train_factor.values.astype(float)
            
            # Demean the returns
            Y_bar = Y.mean(axis=0)
            Y_star = Y - Y_bar
            
            if verbose:
                print(f"  Running DNN-FM (arch={architecture})...")
            
            # Run DNN-FM
            res_dnn_fm = DNN_FM_main(
                data=Y_star, 
                data_factor=F, 
                architecture=architecture, 
                const_err_cov=const_err_cov, 
                use_CV_err=use_CV_err, 
                eval_type='frob'
            )
            
            Theta_hat = res_dnn_fm['inv_sigma_hat']
            
            if verbose:
                print(f"  Computing GMV weights...")
            w_star = gmv_weights(Theta_hat)
            
            if verbose:
                print(f"  Computing MV weights...")
            w_star_2 = mv_weights(Theta_hat, Y_bar, target_return=target_return)
            
            if verbose:
                print(f"  Computing MSR weights...")
            w_star_3 = msr_weights(Theta_hat, Y_bar)
            
            # Create weights dictionaries
            new_weights_dict = {asset: w_star[i] for i, asset in enumerate(current_assets)}
            new_weights_dict_2 = {asset: w_star_2[i] for i, asset in enumerate(current_assets)}
            new_weights_dict_3 = {asset: w_star_3[i] for i, asset in enumerate(current_assets)}
            
        except Exception as e:
            if verbose:
                print(f"  ✗ Error: {e}")
                print(f"  Recording zero return")
            
            # GMV
            turnover, tc, net_return = calculate_exit_transaction_cost(
                prev_weights_dict, prev_oos_returns_dict, prev_gross_return,
                transaction_cost, verbose=verbose
            )
            portfolio_returns.append(net_return)
            portfolio_dates.append(current_date)
            portfolio_weights_list.append({})
            portfolio_turnover_list.append(turnover)
            portfolio_gross_returns.append(0.0)
            prev_weights_dict = {}
            prev_oos_returns_dict = {}
            prev_gross_return = 0.0
            
            # MV
            turnover_2, tc_2, net_return_2 = calculate_exit_transaction_cost(
                prev_weights_dict_2, prev_oos_returns_dict_2, prev_gross_return_2,
                transaction_cost, verbose=False
            )
            portfolio_returns_2.append(net_return_2)
            portfolio_dates_2.append(current_date)
            portfolio_weights_list_2.append({})
            portfolio_turnover_list_2.append(turnover_2)
            portfolio_gross_returns_2.append(0.0)
            prev_weights_dict_2 = {}
            prev_oos_returns_dict_2 = {}
            prev_gross_return_2 = 0.0
            
            # MSR
            turnover_3, tc_3, net_return_3 = calculate_exit_transaction_cost(
                prev_weights_dict_3, prev_oos_returns_dict_3, prev_gross_return_3,
                transaction_cost, verbose=False
            )
            portfolio_returns_3.append(net_return_3)
            portfolio_dates_3.append(current_date)
            portfolio_weights_list_3.append({})
            portfolio_turnover_list_3.append(turnover_3)
            portfolio_gross_returns_3.append(0.0)
            prev_weights_dict_3 = {}
            prev_oos_returns_dict_3 = {}
            prev_gross_return_3 = 0.0
            
            continue

        # Normalize weights to sum to 1 - GMV
        weight_sum = sum(new_weights_dict.values())
        if weight_sum > 1e-10:
            new_weights_dict = {k: v/weight_sum for k, v in new_weights_dict.items()}
        else:
            if verbose:
                print("  ⚠ GMV: Zero weight sum, recording zero return")
            
            # GMV
            turnover, tc, net_return = calculate_exit_transaction_cost(
                prev_weights_dict, prev_oos_returns_dict, prev_gross_return,
                transaction_cost, verbose=verbose
            )
            portfolio_returns.append(net_return)
            portfolio_dates.append(current_date)
            portfolio_weights_list.append({})
            portfolio_turnover_list.append(turnover)
            portfolio_gross_returns.append(0.0)
            prev_weights_dict = {}
            prev_oos_returns_dict = {}
            prev_gross_return = 0.0
            
            # MV
            turnover_2, tc_2, net_return_2 = calculate_exit_transaction_cost(
                prev_weights_dict_2, prev_oos_returns_dict_2, prev_gross_return_2,
                transaction_cost, verbose=False
            )
            portfolio_returns_2.append(net_return_2)
            portfolio_dates_2.append(current_date)
            portfolio_weights_list_2.append({})
            portfolio_turnover_list_2.append(turnover_2)
            portfolio_gross_returns_2.append(0.0)
            prev_weights_dict_2 = {}
            prev_oos_returns_dict_2 = {}
            prev_gross_return_2 = 0.0
            
            # MSR
            turnover_3, tc_3, net_return_3 = calculate_exit_transaction_cost(
                prev_weights_dict_3, prev_oos_returns_dict_3, prev_gross_return_3,
                transaction_cost, verbose=False
            )
            portfolio_returns_3.append(net_return_3)
            portfolio_dates_3.append(current_date)
            portfolio_weights_list_3.append({})
            portfolio_turnover_list_3.append(turnover_3)
            portfolio_gross_returns_3.append(0.0)
            prev_weights_dict_3 = {}
            prev_oos_returns_dict_3 = {}
            prev_gross_return_3 = 0.0
            
            continue
        
        # Normalize weights to sum to 1 - MV
        weight_sum_2 = sum(new_weights_dict_2.values())
        if weight_sum_2 > 1e-10:
            new_weights_dict_2 = {k: v/weight_sum_2 for k, v in new_weights_dict_2.items()}
        else:
            if verbose:
                print("  ⚠ MV: Zero weight sum, using GMV weights")
            new_weights_dict_2 = new_weights_dict.copy()
        
        # Normalize weights to sum to 1 - MSR
        weight_sum_3 = sum(new_weights_dict_3.values())
        if weight_sum_3 > 1e-10:
            new_weights_dict_3 = {k: v/weight_sum_3 for k, v in new_weights_dict_3.items()}
        else:
            if verbose:
                print("  ⚠ MSR: Zero weight sum, using GMV weights")
            new_weights_dict_3 = new_weights_dict.copy()
        
        # --- 3. OOS Returns & Transaction Costs ---
        
        # Find common assets between weights and returns
        common_assets = set(new_weights_dict.keys()) & set(oos_returns_dict.keys())
        common_assets_2 = set(new_weights_dict_2.keys()) & set(oos_returns_dict.keys())
        common_assets_3 = set(new_weights_dict_3.keys()) & set(oos_returns_dict.keys())
        
        if len(common_assets) == 0:
            if verbose:
                print("  ⚠ No common assets with valid returns, recording zero return")
            
            # GMV
            turnover, tc, net_return = calculate_exit_transaction_cost(
                prev_weights_dict, prev_oos_returns_dict, prev_gross_return,
                transaction_cost, verbose=verbose
            )
            portfolio_returns.append(net_return)
            portfolio_dates.append(current_date)
            portfolio_weights_list.append({})
            portfolio_turnover_list.append(turnover)
            portfolio_gross_returns.append(0.0)
            prev_weights_dict = {}
            prev_oos_returns_dict = {}
            prev_gross_return = 0.0
            
            # MV
            turnover_2, tc_2, net_return_2 = calculate_exit_transaction_cost(
                prev_weights_dict_2, prev_oos_returns_dict_2, prev_gross_return_2,
                transaction_cost, verbose=False
            )
            portfolio_returns_2.append(net_return_2)
            portfolio_dates_2.append(current_date)
            portfolio_weights_list_2.append({})
            portfolio_turnover_list_2.append(turnover_2)
            portfolio_gross_returns_2.append(0.0)
            prev_weights_dict_2 = {}
            prev_oos_returns_dict_2 = {}
            prev_gross_return_2 = 0.0
            
            # MSR
            turnover_3, tc_3, net_return_3 = calculate_exit_transaction_cost(
                prev_weights_dict_3, prev_oos_returns_dict_3, prev_gross_return_3,
                transaction_cost, verbose=False
            )
            portfolio_returns_3.append(net_return_3)
            portfolio_dates_3.append(current_date)
            portfolio_weights_list_3.append({})
            portfolio_turnover_list_3.append(turnover_3)
            portfolio_gross_returns_3.append(0.0)
            prev_weights_dict_3 = {}
            prev_oos_returns_dict_3 = {}
            prev_gross_return_3 = 0.0
            
            continue
        
        # Filter to common assets and renormalize - GMV
        common_weights = {a: new_weights_dict[a] for a in common_assets}
        common_weight_sum = sum(common_weights.values())
        if common_weight_sum > 1e-10:
            common_weights = {k: v/common_weight_sum for k, v in common_weights.items()}
        else:
            if verbose:
                print("  ⚠ GMV: Zero weight sum after filtering, recording zero return")
            
            # GMV
            turnover, tc, net_return = calculate_exit_transaction_cost(
                prev_weights_dict, prev_oos_returns_dict, prev_gross_return,
                transaction_cost, verbose=verbose
            )
            portfolio_returns.append(net_return)
            portfolio_dates.append(current_date)
            portfolio_weights_list.append({})
            portfolio_turnover_list.append(turnover)
            portfolio_gross_returns.append(0.0)
            prev_weights_dict = {}
            prev_oos_returns_dict = {}
            prev_gross_return = 0.0
            
            # MV
            turnover_2, tc_2, net_return_2 = calculate_exit_transaction_cost(
                prev_weights_dict_2, prev_oos_returns_dict_2, prev_gross_return_2,
                transaction_cost, verbose=False
            )
            portfolio_returns_2.append(net_return_2)
            portfolio_dates_2.append(current_date)
            portfolio_weights_list_2.append({})
            portfolio_turnover_list_2.append(turnover_2)
            portfolio_gross_returns_2.append(0.0)
            prev_weights_dict_2 = {}
            prev_oos_returns_dict_2 = {}
            prev_gross_return_2 = 0.0
            
            # MSR
            turnover_3, tc_3, net_return_3 = calculate_exit_transaction_cost(
                prev_weights_dict_3, prev_oos_returns_dict_3, prev_gross_return_3,
                transaction_cost, verbose=False
            )
            portfolio_returns_3.append(net_return_3)
            portfolio_dates_3.append(current_date)
            portfolio_weights_list_3.append({})
            portfolio_turnover_list_3.append(turnover_3)
            portfolio_gross_returns_3.append(0.0)
            prev_weights_dict_3 = {}
            prev_oos_returns_dict_3 = {}
            prev_gross_return_3 = 0.0
            
            continue
        
        # Filter to common assets and renormalize - MV
        common_weights_2 = {a: new_weights_dict_2[a] for a in common_assets_2}
        common_weight_sum_2 = sum(common_weights_2.values())
        if common_weight_sum_2 > 1e-10:
            common_weights_2 = {k: v/common_weight_sum_2 for k, v in common_weights_2.items()}
        else:
            if verbose:
                print("  ⚠ MV: Zero weight sum after filtering, using GMV")
            common_weights_2 = common_weights.copy()
        
        # Filter to common assets and renormalize - MSR
        common_weights_3 = {a: new_weights_dict_3[a] for a in common_assets_3}
        common_weight_sum_3 = sum(common_weights_3.values())
        if common_weight_sum_3 > 1e-10:
            common_weights_3 = {k: v/common_weight_sum_3 for k, v in common_weights_3.items()}
        else:
            if verbose:
                print("  ⚠ MSR: Zero weight sum after filtering, using GMV")
            common_weights_3 = common_weights.copy()
        
        # Compute gross portfolio returns
        gross_return = sum(common_weights[a] * oos_returns_dict[a] for a in common_assets)
        gross_return_2 = sum(common_weights_2[a] * oos_returns_dict[a] for a in common_assets_2)
        gross_return_3 = sum(common_weights_3[a] * oos_returns_dict[a] for a in common_assets_3)
        
        # Sanity checks
        if np.isnan(gross_return) or np.isinf(gross_return):
            if verbose:
                print(f"  ⚠ GMV: Invalid gross return: {gross_return}, recording zero return")
            
            # GMV
            turnover, tc, net_return = calculate_exit_transaction_cost(
                prev_weights_dict, prev_oos_returns_dict, prev_gross_return,
                transaction_cost, verbose=verbose
            )
            portfolio_returns.append(net_return)
            portfolio_dates.append(current_date)
            portfolio_weights_list.append({})
            portfolio_turnover_list.append(turnover)
            portfolio_gross_returns.append(0.0)
            prev_weights_dict = {}
            prev_oos_returns_dict = {}
            prev_gross_return = 0.0
            
            # MV
            turnover_2, tc_2, net_return_2 = calculate_exit_transaction_cost(
                prev_weights_dict_2, prev_oos_returns_dict_2, prev_gross_return_2,
                transaction_cost, verbose=False
            )
            portfolio_returns_2.append(net_return_2)
            portfolio_dates_2.append(current_date)
            portfolio_weights_list_2.append({})
            portfolio_turnover_list_2.append(turnover_2)
            portfolio_gross_returns_2.append(0.0)
            prev_weights_dict_2 = {}
            prev_oos_returns_dict_2 = {}
            prev_gross_return_2 = 0.0
            
            # MSR
            turnover_3, tc_3, net_return_3 = calculate_exit_transaction_cost(
                prev_weights_dict_3, prev_oos_returns_dict_3, prev_gross_return_3,
                transaction_cost, verbose=False
            )
            portfolio_returns_3.append(net_return_3)
            portfolio_dates_3.append(current_date)
            portfolio_weights_list_3.append({})
            portfolio_turnover_list_3.append(turnover_3)
            portfolio_gross_returns_3.append(0.0)
            prev_weights_dict_3 = {}
            prev_oos_returns_dict_3 = {}
            prev_gross_return_3 = 0.0
            
            continue
        
        # Calculate transaction costs - GMV
        if len(prev_weights_dict) > 0:
            # Step 1: Adjust ALL previous weights for returns
            adjusted_prev = {}
            
            for asset, prev_w in prev_weights_dict.items():
                if asset in prev_oos_returns_dict:
                    prev_r = prev_oos_returns_dict[asset]
                    if abs(1 + prev_gross_return) > 1e-6:
                        adjusted_prev[asset] = prev_w * (1 + prev_r) / (1 + prev_gross_return)
                    else:
                        adjusted_prev[asset] = 0.0
                else:
                    # Asset had weight but no return data
                    if abs(1 + prev_gross_return) > 1e-6:
                        adjusted_prev[asset] = prev_w / (1 + prev_gross_return)
                    else:
                        adjusted_prev[asset] = 0.0
            
            # Step 2: Calculate turnover
            all_assets = set(adjusted_prev.keys()) | set(common_weights.keys())
            
            turnover = 0.0
            for asset in all_assets:
                old_w = adjusted_prev.get(asset, 0.0)
                new_w = common_weights.get(asset, 0.0)
                turnover += abs(new_w - old_w)
            
            # Transaction cost on end-of-period portfolio value
            tc = transaction_cost * (1 + gross_return) * turnover
        
        else:
            # First period: no previous positions, buying into everything
            turnover = sum(abs(w) for w in common_weights.values())
            tc = transaction_cost * (1 + gross_return) * turnover
        
        # Calculate transaction costs - MV
        if len(prev_weights_dict_2) > 0:
            adjusted_prev_2 = {}
            
            for asset, prev_w in prev_weights_dict_2.items():
                if asset in prev_oos_returns_dict_2:
                    prev_r = prev_oos_returns_dict_2[asset]
                    if abs(1 + prev_gross_return_2) > 1e-6:
                        adjusted_prev_2[asset] = prev_w * (1 + prev_r) / (1 + prev_gross_return_2)
                    else:
                        adjusted_prev_2[asset] = 0.0
                else:
                    if abs(1 + prev_gross_return_2) > 1e-6:
                        adjusted_prev_2[asset] = prev_w / (1 + prev_gross_return_2)
                    else:
                        adjusted_prev_2[asset] = 0.0
            
            all_assets_2 = set(adjusted_prev_2.keys()) | set(common_weights_2.keys())
            
            turnover_2 = 0.0
            for asset in all_assets_2:
                old_w = adjusted_prev_2.get(asset, 0.0)
                new_w = common_weights_2.get(asset, 0.0)
                turnover_2 += abs(new_w - old_w)
            
            tc_2 = transaction_cost * (1 + gross_return_2) * turnover_2
        else:
            turnover_2 = sum(abs(w) for w in common_weights_2.values())
            tc_2 = transaction_cost * (1 + gross_return_2) * turnover_2
        
        # Calculate transaction costs - MSR
        if len(prev_weights_dict_3) > 0:
            adjusted_prev_3 = {}
            
            for asset, prev_w in prev_weights_dict_3.items():
                if asset in prev_oos_returns_dict_3:
                    prev_r = prev_oos_returns_dict_3[asset]
                    if abs(1 + prev_gross_return_3) > 1e-6:
                        adjusted_prev_3[asset] = prev_w * (1 + prev_r) / (1 + prev_gross_return_3)
                    else:
                        adjusted_prev_3[asset] = 0.0
                else:
                    if abs(1 + prev_gross_return_3) > 1e-6:
                        adjusted_prev_3[asset] = prev_w / (1 + prev_gross_return_3)
                    else:
                        adjusted_prev_3[asset] = 0.0
            
            all_assets_3 = set(adjusted_prev_3.keys()) | set(common_weights_3.keys())
            
            turnover_3 = 0.0
            for asset in all_assets_3:
                old_w = adjusted_prev_3.get(asset, 0.0)
                new_w = common_weights_3.get(asset, 0.0)
                turnover_3 += abs(new_w - old_w)
            
            tc_3 = transaction_cost * (1 + gross_return_3) * turnover_3
        else:
            turnover_3 = sum(abs(w) for w in common_weights_3.values())
            tc_3 = transaction_cost * (1 + gross_return_3) * turnover_3
        
        # Net returns
        net_return = gross_return - tc
        net_return_2 = gross_return_2 - tc_2
        net_return_3 = gross_return_3 - tc_3
        
        # Store results - GMV
        portfolio_returns.append(net_return)
        portfolio_dates.append(current_date)
        portfolio_weights_list.append(common_weights.copy())
        portfolio_turnover_list.append(turnover)
        portfolio_gross_returns.append(gross_return)
        
        # Store results - MV
        portfolio_returns_2.append(net_return_2)
        portfolio_dates_2.append(current_date)
        portfolio_weights_list_2.append(common_weights_2.copy())
        portfolio_turnover_list_2.append(turnover_2)
        portfolio_gross_returns_2.append(gross_return_2)
        
        # Store results - MSR
        portfolio_returns_3.append(net_return_3)
        portfolio_dates_3.append(current_date)
        portfolio_weights_list_3.append(common_weights_3.copy())
        portfolio_turnover_list_3.append(turnover_3)
        portfolio_gross_returns_3.append(gross_return_3)
        
        # Update previous values for next iteration
        prev_weights_dict = common_weights.copy()
        prev_oos_returns_dict = {a: oos_returns_dict[a] for a in common_assets}
        prev_gross_return = gross_return
        
        prev_weights_dict_2 = common_weights_2.copy()
        prev_oos_returns_dict_2 = {a: oos_returns_dict[a] for a in common_assets_2}
        prev_gross_return_2 = gross_return_2
        
        prev_weights_dict_3 = common_weights_3.copy()
        prev_oos_returns_dict_3 = {a: oos_returns_dict[a] for a in common_assets_3}
        prev_gross_return_3 = gross_return_3
        
        if verbose:
            print(f"  GMV  - Gross: {gross_return:>8.5f} | Turnover: {turnover:>6.4f} | "
                  f"TC: {tc:>8.6f} | Net: {net_return:>8.5f}")
            print(f"  MV   - Gross: {gross_return_2:>8.5f} | Turnover: {turnover_2:>6.4f} | "
                  f"TC: {tc_2:>8.6f} | Net: {net_return_2:>8.5f}")
            print(f"  MSR  - Gross: {gross_return_3:>8.5f} | Turnover: {turnover_3:>6.4f} | "
                  f"TC: {tc_3:>8.6f} | Net: {net_return_3:>8.5f}")

    if verbose:
        print("\n" + "="*60)
        print("BACKTEST COMPLETE")
        print("="*60)
    
    # --- 4. Compile Results ---
    results_df = pd.DataFrame({
        'date': portfolio_dates,
        'portfolio_return': portfolio_returns,
        'portfolio_gross_return': portfolio_gross_returns,
        'portfolio_weights': portfolio_weights_list,
        'portfolio_turnover': portfolio_turnover_list
    })
    results_df['cumulative_return'] = (1 + results_df['portfolio_return']).cumprod() - 1
    
    results_df_2 = pd.DataFrame({
        'date': portfolio_dates_2,
        'portfolio_return': portfolio_returns_2,
        'portfolio_gross_return': portfolio_gross_returns_2,
        'portfolio_weights': portfolio_weights_list_2,
        'portfolio_turnover': portfolio_turnover_list_2
    })
    results_df_2['cumulative_return'] = (1 + results_df_2['portfolio_return']).cumprod() - 1
    
    results_df_3 = pd.DataFrame({
        'date': portfolio_dates_3,
        'portfolio_return': portfolio_returns_3,
        'portfolio_gross_return': portfolio_gross_returns_3,
        'portfolio_weights': portfolio_weights_list_3,
        'portfolio_turnover': portfolio_turnover_list_3
    })
    results_df_3['cumulative_return'] = (1 + results_df_3['portfolio_return']).cumprod() - 1
    
    # Helper function to compute metrics
    def compute_metrics(returns_list, turnover_list, results_df):
        if len(returns_list) > 0:
            mean_return = np.mean(returns_list)
            variance = np.var(returns_list, ddof=1)
            sharpe_ratio = mean_return / np.sqrt(variance) if variance > 0 else 0
            
            # Annualized metrics (monthly data)
            annual_return = mean_return * 12
            annual_volatility = np.sqrt(variance * 12)
            annual_sharpe = annual_return / annual_volatility if annual_volatility > 0 else 0
            
            return {
                'mean_return': mean_return,
                'variance': variance,
                'sharpe_ratio': sharpe_ratio,
                'annual_return': annual_return,
                'annual_volatility': annual_volatility,
                'annual_sharpe_ratio': annual_sharpe,
                'total_return': results_df['cumulative_return'].iloc[-1],
                'avg_turnover': np.mean(turnover_list),
                'n_periods': len(returns_list),
                'n_zero_periods': sum(1 for r in returns_list if r == 0)
            }
        else:
            return {
                'mean_return': 0, 'variance': 0, 'sharpe_ratio': 0,
                'annual_return': 0, 'annual_volatility': 0, 'annual_sharpe_ratio': 0,
                'total_return': 0, 'avg_turnover': 0, 'n_periods': 0, 'n_zero_periods': 0
            }
    
    # Compute metrics for all three strategies
    metrics = compute_metrics(portfolio_returns, portfolio_turnover_list, results_df)
    metrics_2 = compute_metrics(portfolio_returns_2, portfolio_turnover_list_2, results_df_2)
    metrics_3 = compute_metrics(portfolio_returns_3, portfolio_turnover_list_3, results_df_3)
    
    return results_df, metrics, results_df_2, metrics_2, results_df_3, metrics_3

2025-12-05 15:11:45.535857: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [7]:
df = pd.read_csv('../green cleaned.csv', dtype={'ncusip': 'string'})
df['ret_fwd_1'] = df.groupby('permno')['ret_excess'].shift(-1)

data_f=pd.read_csv('F-F_Research_Data_Factors.csv',sep=',')
data_f['Date']=pd.to_datetime(data_f['Date'], format="%Y%m")
data_f['Date']=data_f['Date']+pd.offsets.MonthEnd(0)
data_f = data_f.set_index('Date')
data_f = data_f[['Mkt-RF', 'SMB', 'HML', 'RF']].astype(float)

# Run backtest with yearly signals
results_df, metrics, results_df_2, metrics_2, results_df_3, metrics_3= backtest_dnn_finbert(
    df,
    test_start_date='2020-01-31',
    test_end_date='2024-04-30',
    lookback_window=180,
    transaction_cost=0.001,
    signals_path='../examples/monthly_signals_decay.csv',
    data_factor=data_f,
    verbose=True
)

print(f"\n GMV")
print(f"\nSharpe Ratio: {metrics['sharpe_ratio']:.4f}")
print(f"Annualized Sharpe Ratio: {metrics['annual_sharpe_ratio']:.4f}")
print(f"Total Return: {metrics['total_return']:.4f}")
print(f"Average Turnover: {metrics['avg_turnover']:.4f}")

print(f"\n MV")
print(f"\nSharpe Ratio: {metrics_2['sharpe_ratio']:.4f}")
print(f"Annualized Sharpe Ratio: {metrics_2['annual_sharpe_ratio']:.4f}")
print(f"Total Return: {metrics_2['total_return']:.4f}")
print(f"Average Turnover: {metrics_2['avg_turnover']:.4f}")

print(f"\n MSR")
print(f"\nSharpe Ratio: {metrics_3['sharpe_ratio']:.4f}")
print(f"Annualized Sharpe Ratio: {metrics_3['annual_sharpe_ratio']:.4f}")
print(f"Total Return: {metrics_3['total_return']:.4f}")
print(f"Average Turnover: {metrics_3['avg_turnover']:.4f}")

Creating ticker to permno mapping...
Mapped 1664 unique tickers to permnos
Loading FinBERT signals from ../examples/monthly_signals_decay.csv...
Loaded 24780 monthly signals
Signal distribution:
signal
hold    23840
sell      529
buy       411
Name: count, dtype: int64
STARTING BACKTEST WITH DNN-FM + FINBERT (GMV/MV/MSR)

[1/52] Date: 2020-01-31
  Window: 2005-01-31 to 2019-12-31
  FinBERT signals: 8 | Assets w/ data: 7
  Running DNN-FM (arch=5)...
  Computing GMV weights...
  Computing MV weights...
  Computing MSR weights...
  GMV  - Gross: -0.11116 | Turnover: 1.0000 | TC: 0.000889 | Net: -0.11205
  MV   - Gross: -0.11200 | Turnover: 1.0000 | TC: 0.000888 | Net: -0.11289
  MSR  - Gross: -0.10422 | Turnover: 1.2171 | TC: 0.001090 | Net: -0.10531

[2/52] Date: 2020-02-29
  Window: 2005-02-28 to 2020-01-31
  FinBERT signals: 10 | Assets w/ data: 5
  Running DNN-FM (arch=5)...
  Computing GMV weights...
  Computing MV weights...
  Computing MSR weights...
  GMV  - Gross: -0.07984 | Turn

In [8]:
print(f"\n GMV")
print(f"Annualized Sharpe Ratio: {metrics['annual_sharpe_ratio']:.4f}")
print(f"Mean Return: {metrics['mean_return']*12:.4f}")
print(f"Variance: {metrics['variance']*12:.4f}")
print(f"Avg Turnover: {metrics['avg_turnover']:.4f}")

print(f"\n MV")
print(f"Annualized Sharpe Ratio: {metrics_2['annual_sharpe_ratio']:.4f}")
print(f"Mean Return: {metrics_2['mean_return']*12:.4f}")
print(f"Variance: {metrics_2['variance']*12:.4f}")
print(f"Avg Turnover: {metrics_2['avg_turnover']:.4f}")

print(f"\n MSR")
print(f"Annualized Sharpe Ratio: {metrics_3['annual_sharpe_ratio']:.4f}")
print(f"Mean Return: {metrics_3['mean_return']*12:.4f}")
print(f"Variance: {metrics_3['variance']*12:.4f}")
print(f"Avg Turnover: {metrics_3['avg_turnover']:.4f}")


 GMV
Annualized Sharpe Ratio: 0.7805
Mean Return: 0.1429
Variance: 0.0335
Avg Turnover: 1.7815

 MV
Annualized Sharpe Ratio: 0.6378
Mean Return: 0.1291
Variance: 0.0409
Avg Turnover: 1.9699

 MSR
Annualized Sharpe Ratio: 0.7582
Mean Return: 0.1299
Variance: 0.0294
Avg Turnover: 1.9693
