# MLE Evaluation Lab 

In [10]:
import numpy as np
import pandas as pd
from factor_analyzer.rotator import Rotator
import matplotlib.pyplot as plt

Parameters

In [22]:
stock_samples = [25, 50, 75, 100, 250]
factors = [[4, 3, 3, 3], [5, 4, 4, 5], [7, 6, 6, 8], [10, 8, 7, 10], [23, 17, 16, 20]]

stock_sample = 100
n_factors = [10, 8, 7, 10]

#excel_filename = f'Final_Data/Final_Results/mle_stability_results_w_obli_{stock_sample}.xlsx'
directory = f'Final_Data/Walkforward_Sets/{stock_sample}_stocks_seed42'

In [23]:
import os
import re
import pandas as pd

directory = f'Final_Data/Walkforward_Sets/{stock_sample}_stocks_seed42'
files = os.listdir(directory)

pattern_corr = re.compile(r'^(df\d{2}[a-z])_varresid_(is|os)_corr_(is|os)\.csv$')
pattern_resid = re.compile(r'^(df\d{2}[a-z])_varresid_(is|os)\.csv$')

for fname in files:
    # Skip covariance matrices (those with 'cov_' in the filename)
    if 'cov_' in fname:
        continue
    full_path = os.path.join(directory, fname)
    
    m = pattern_corr.match(fname)
    if m:
        # File: df00a_varresid_is_corr_is.csv -> Key: corr00a_is 
        base = m.group(1)    # e.g. "df00a"
        inout = m.group(2)   # e.g. "is" or "os"
        new_key = f"corr{base[2:]}_{inout}"
        df = pd.read_csv(full_path, index_col=0)
        globals()[new_key] = df
        continue

    m = pattern_resid.match(fname)
    if m:
        # File: df00a_varresid_is.csv -> Key: df00a_is
        base = m.group(1)
        inout = m.group(2)
        new_key = f"{base}_{inout}"
        df = pd.read_csv(full_path, index_col=0)
        globals()[new_key] = df
        continue

## Walk-forward Import

Import done, now continue with estimation and then finish with eval. Then copy what you've done here over to the other notebooks. 

## Evaluation

No need for rotation here yet because it doesn't change the explained variance

In [20]:
import os
import re
import pandas as pd
from sklearn.decomposition import PCA  # Only used for the residual PCA step
import numpy as np
import pandas as pd
from datetime import datetime
import scipy.stats as stats

# ---------------- Utility Functions ----------------

def congruence_coefficient(v1, v2):
    """
    Computes Tucker's coefficient of congruence between two vectors.
    """
    return np.sum(v1 * v2) / (np.sqrt(np.sum(v1**2)) * np.sqrt(np.sum(v2**2)))

def compute_sparsity_index(loadings):
    """
    Computes the normalized sparsity index for a given loadings DataFrame.
    For each factor vector v, we compute the ratio:
      ratio = ||v||_1 / ||v||_2,
    then normalize over n (number of assets):
      normalized = (sqrt(n) - ratio) / (sqrt(n)-1),
    so that 1 means only one asset carries weight and 0 means all assets share equally.
    Finally, return the average over factors.
    """
    sparsity_scores = []
    n_assets = loadings.shape[0]
    for col in loadings.columns:
        v = loadings[col].values
        l1_norm = np.linalg.norm(v, 1)
        l2_norm = np.linalg.norm(v, 2)
        ratio = l1_norm / l2_norm if l2_norm != 0 else np.nan
        normalized = (np.sqrt(n_assets) - ratio) / (np.sqrt(n_assets) - 1)
        sparsity_scores.append(normalized)
    return np.mean(sparsity_scores)

def calc_proj_ev(df_std, loadings):
    """
    Given standardized returns (df_std) and factor loadings,
    compute predicted returns using regression-based factor score estimation,
    and return the explained variance defined as 1 - (residual variance / total variance).
    """
    inv_LL = np.linalg.inv(loadings.T.dot(loadings))
    factor_scores = (inv_LL.dot(loadings.T.dot(df_std.T))).T
    projected = factor_scores.dot(loadings.T)
    residuals = df_std - projected
    total_var = df_std.var(axis=0).mean()
    ev = 1 - residuals.var(axis=0).mean() / total_var
    return ev

def joreskog(cov, n_factors=None, max_iter=1000000, tol=1e-6, min_communal=1e-6):
    """
    Robust Jöreskog's factor analysis with proper handling of communalities
    and convergence checks.
    """
    original_index = cov.index.tolist() if isinstance(cov, pd.DataFrame) else None
    cov = cov.to_numpy() if isinstance(cov, pd.DataFrame) else cov

    if not isinstance(cov, np.ndarray):
        raise ValueError("Covariance matrix must be numpy array or DataFrame")
    if cov.shape[0] != cov.shape[1]:
        raise ValueError("Covariance matrix must be square")
    if not np.allclose(cov, cov.T, atol=1e-9):
        raise ValueError("Covariance matrix must be symmetric")
    
    n_vars = cov.shape[0]
    is_correlation = np.allclose(np.diag(cov), 1.0, atol=1e-5)
    
    eigvals_full, _ = np.linalg.eigh(cov)
    tol_eigen = 1e-9 * np.max(eigvals_full)
    if n_factors is None:
        n_factors = int(np.sum(eigvals_full > tol_eigen))
    else:
        if not isinstance(n_factors, int) or n_factors <= 0:
            raise ValueError("n_factors must be a positive integer")
        n_factors = min(n_factors, n_vars)
    
    eigenvals, eigenvecs = np.linalg.eigh(cov)
    idx = np.argsort(eigenvals)[::-1]
    eigenvals = eigenvals[idx][:n_factors]
    eigenvecs = eigenvecs[:, idx][:, :n_factors]
    
    beta = eigenvecs @ np.diag(np.sqrt(np.maximum(eigenvals, 0)))
    communalities = np.sum(beta**2, axis=1)
    if is_correlation:
        communalities = np.clip(communalities, 0, 0.999)
    psi = np.diag(np.maximum(np.diag(cov) - communalities, min_communal))
    
    iter_num = 0
    beta_change = np.inf
    psi_change = np.inf
    
    while iter_num < max_iter and (beta_change > tol or psi_change > tol):
        sigma = beta @ beta.T + psi
        try:
            sigma_inv = np.linalg.inv(sigma)
        except np.linalg.LinAlgError:
            raise ValueError("Singular sigma matrix - reduce n_factors or increase min_communal")
        
        middle = np.linalg.inv(np.eye(n_factors) + beta.T @ sigma_inv @ beta)
        beta_new = cov @ sigma_inv @ beta @ middle
        
        communalities_new = np.sum(beta_new**2, axis=1)
        if is_correlation:
            communalities_new = np.clip(communalities_new, 0, 0.999)
        psi_new_diag = np.maximum(np.diag(cov) - communalities_new, min_communal)
        psi_new = np.diag(psi_new_diag)
        
        if np.any(psi_new_diag <= min_communal + 1e-6):
            print("Warning: Some uniquenesses at lower bound")
        
        beta_change = np.linalg.norm(beta_new - beta) / (np.linalg.norm(beta) + np.finfo(float).eps)
        psi_change = np.linalg.norm(psi_new_diag - np.diag(psi)) / (np.linalg.norm(np.diag(psi)) + np.finfo(float).eps)
        
        beta = beta_new
        psi = psi_new
        iter_num += 1
    
    if iter_num == max_iter:
        print("Warning: Maximum iterations reached without convergence")
    
    factor_variances = np.sum(beta**2, axis=0)
    total_variance = np.sum(np.diag(cov))
    explained_var = factor_variances / total_variance
    explained_cumulative = np.cumsum(explained_var)
    
    factor_columns = [f'PC{i+1}' for i in range(n_factors)]
    betas_df = pd.DataFrame(beta, 
                            index=original_index if original_index is not None 
                                  else [f'var_{i}' for i in range(n_vars)],
                            columns=factor_columns)
    
    return betas_df, factor_variances, explained_var, explained_cumulative

# ------------------ Evaluation Function ------------------

def evaluate_mle_os(corr_is, df_os, df_is, n_factors):
    """
    Estimates MLE factor model on in-sample data and evaluates using a regression-based projection
    on both in-sample and out-of-sample data for consistency.
    
    Additionally, computes:
      - Factor congruence: Tucker's coefficient between in-sample and out-of-sample loadings.
      - Factor weights: In-sample loadings from Jöreskog's method.
      - Sparsity index: Normalized sparsity index for the factor weights.
    
    Returns:
        metrics (dict): Evaluation metrics.
        loadings_is (DataFrame): In-sample factor weights.
    """
    cols_os = df_os.columns.tolist()
    idx_corr = corr_is.index.tolist()
    cols_is = df_is.columns.tolist()
    common_assets = list(set(cols_os) & set(idx_corr) & set(cols_is))
    if len(common_assets) == 0:
        raise ValueError("No common assets between matrices")
    common_assets.sort()

    corr_is_aligned = corr_is.loc[common_assets, common_assets]
    df_os_aligned = df_os[common_assets]
    df_is_aligned = df_is[common_assets]
    
    is_means = df_is_aligned.mean()
    is_stds = df_is_aligned.std()
    df_is_std = (df_is_aligned - is_means) / is_stds
    df_os_std = (df_os_aligned - is_means) / is_stds
    
    # In-sample MLE using Jöreskog's method
    betas_is, _, _, _ = joreskog(corr_is_aligned, n_factors=n_factors)
    betas_is.columns = [f'PC{i+1}' for i in range(n_factors)]
    loadings_is = betas_is.copy()
    
    in_sample_ev = calc_proj_ev(df_is_std, loadings_is)
    out_sample_ev = calc_proj_ev(df_os_std, loadings_is)
    
    # MLE on out-of-sample data
    betas_os, _, _, _ = joreskog(df_os_std.corr(), n_factors=n_factors)
    betas_os.columns = [f'PC{i+1}' for i in range(n_factors)]
    loadings_os = betas_os.copy()
    
    # Compute loading correlations and factor congruence per factor
    correlations = []
    congruences = []
    for i in range(n_factors):
        is_vec = loadings_is.iloc[:, i]
        os_vec = loadings_os.iloc[:, i]
        corr_val = abs(np.corrcoef(is_vec, os_vec)[0, 1])
        correlations.append(corr_val)
        cong_val = abs(congruence_coefficient(is_vec, os_vec))
        congruences.append(cong_val)
    avg_loading_corr = np.mean(correlations)
    avg_congruence = np.mean(congruences)
    
    # Residual explained variance via projection using in-sample loadings
    inv_LL = np.linalg.inv(loadings_is.T.dot(loadings_is))
    factor_scores_os = (inv_LL.dot(loadings_is.T.dot(df_os_std.T))).T
    projected_returns_os = factor_scores_os.dot(loadings_is.T)
    resid = df_os_std - projected_returns_os
    pca_residuals = PCA(n_components=1)
    pca_residuals.fit(resid)
    residual_first_pc_ev = pca_residuals.explained_variance_ratio_[0]
    
    # Compute sparsity index on in-sample loadings (factor weights)
    sparsity_index = compute_sparsity_index(loadings_is)
    
    metrics = {
        'n_factors': n_factors,
        'in_sample_explained_variance': in_sample_ev,
        'explained_variance': out_sample_ev,
        'residual_first_pc_ev': residual_first_pc_ev,
        'avg_loading_correlation': avg_loading_corr,
        'avg_congruence': avg_congruence,
        'sparsity_index': sparsity_index,
    }
    for i, corr_val in enumerate(correlations):
        metrics[f'factor_{i+1}_correlation'] = round(corr_val, 3)
    for i, cong_val in enumerate(congruences):
        metrics[f'factor_{i+1}_congruence'] = round(cong_val, 3)
    
    return metrics, loadings_is

# --------------------- USAGE PART ---------------------

# Global parameter: stock_sample (assumed available)
periods = ["00", "05", "10", "15"]
# Create dictionary mapping periods to number of factors (example values)
n_factors_dict = dict(zip(periods, n_factors))

# Define pairings for in-sample/out-of-sample: a-b, b-c, c-d, d-e (for each period)
pairings = [("a", "b"), ("b", "c"), ("c", "d"), ("d", "e")]

results_list = []
factor_weights_list = []

print("Using the following number of factors per period:")
for period, n in n_factors_dict.items():
    print(f"Period {period}: {n} factors")
print("\n")

for period in periods:
    for ins, oos in pairings:
        corr_key = f"corr{period}{ins}_is"   # in-sample correlation for sample ins
        is_key = f"df{period}{ins}_is"         # in-sample returns for sample ins
        os_key = f"df{period}{oos}_os"         # out-of-sample returns for sample oos
        
        required_keys = [corr_key, is_key, os_key]
        if all(key in globals() for key in required_keys):
            try:
                metrics, loadings_is = evaluate_mle_os(
                    globals()[corr_key],
                    globals()[os_key],
                    globals()[is_key],
                    n_factors=n_factors_dict[period]
                )
                metrics['period'] = period
                metrics['in_sample'] = ins
                metrics['oos_sample'] = oos
                results_list.append(metrics)
                
                # Save in-sample factor weights (loadings) with asset identifiers
                loadings_is_reset = loadings_is.reset_index().rename(columns={'index': 'Asset'})
                loadings_is_reset['period'] = period
                loadings_is_reset['in_sample'] = ins
                factor_weights_list.append(loadings_is_reset)
                
                print(f"Completed {period}: in-sample '{ins}' vs out-of-sample '{oos}'")
            except Exception as e:
                print(f"Error in {period} for pairing {ins}-{oos}: {str(e)}")
        else:
            missing = [key for key in required_keys if key not in globals()]
            print(f"Skipping {period} pairing {ins}-{oos} – missing data: {missing}")

if results_list:
    results_df = pd.DataFrame(results_list)
    
    column_order = ['period', 'in_sample', 'oos_sample', 'n_factors', 
                    'in_sample_explained_variance', 'explained_variance', 
                    'residual_first_pc_ev', 'avg_loading_correlation', 
                    'avg_congruence', 'sparsity_index']
    factor_cols = sorted([col for col in results_df.columns if 'factor_' in col])
    column_order.extend(factor_cols)
    results_df = results_df[column_order]
    
    numeric_columns = results_df.select_dtypes(include=[np.number]).columns
    results_df[numeric_columns] = results_df[numeric_columns].round(3)
    
    results_df = results_df.sort_values(['period', 'in_sample', 'oos_sample'])
    
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    excel_filename = f"Final_Data/Final_Results/mle_{stock_sample}.xlsx"
    
    with pd.ExcelWriter(excel_filename) as writer:
        results_df.to_excel(writer, sheet_name='Results', index=False)
        
        summary_stats = results_df.groupby('period').agg({
            'in_sample_explained_variance': ['mean', 'std'],
            'explained_variance': ['mean', 'std'],
            'residual_first_pc_ev': ['mean', 'std'],
            'avg_loading_correlation': ['mean', 'std'],
            'avg_congruence': ['mean', 'std'],
            'sparsity_index': ['mean', 'std']
        }).round(3)
        summary_stats.columns = ['IS_EV_mean', 'IS_EV_std', 
                                 'OOS_EV_mean', 'OOS_EV_std',
                                 'Residual_PC1_mean', 'Residual_PC1_std',
                                 'Corr_mean', 'Corr_std',
                                 'Congruence_mean', 'Congruence_std',
                                 'Sparsity_Index_mean', 'Sparsity_Index_std']
        summary_stats.to_excel(writer, sheet_name='Summary_Stats')
        
        factor_summary = results_df[['period'] + factor_cols].groupby('period').agg(['mean', 'std']).round(3)
        factor_summary.to_excel(writer, sheet_name='Factor_Correlations')
        
        if factor_weights_list:
            factor_weights_df = pd.concat(factor_weights_list, ignore_index=True)
            factor_weight_cols = [col for col in factor_weights_df.columns if col.startswith('PC')]
            factor_weights_df = factor_weights_df[['period', 'in_sample', 'Asset'] + factor_weight_cols]
            factor_weights_df.to_excel(writer, sheet_name='Factor_Weights', index=False)
        
        # ------------------ New Aggregated Results Sheet ------------------
        # Aggregate over all datasets (i.e. all rows in results_df)
        agg_metrics = {}
        # Consider only numeric metric columns (exclude period, in_sample, etc.)
        metric_columns = [col for col in results_df.columns if col not in ['period', 'in_sample', 'oos_sample'] and np.issubdtype(results_df[col].dtype, np.number)]
        for col in metric_columns:
            data = results_df[col].dropna()
            n = len(data)
            if n > 1:
                mean_val = data.mean()
                std_val = data.std()
                sem = std_val / np.sqrt(n)
                t_stat = stats.t.ppf(1-0.025, df=n-1)
                ci_lower = mean_val - t_stat * sem
                ci_upper = mean_val + t_stat * sem
            else:
                mean_val = data.mean()
                std_val = np.nan
                ci_lower = np.nan
                ci_upper = np.nan
            agg_metrics[col] = {
                "mean": round(mean_val, 3),
                "std": round(std_val, 3) if pd.notnull(std_val) else std_val,
                "count": n,
                "CI_lower": round(ci_lower, 3) if pd.notnull(ci_lower) else ci_lower,
                "CI_upper": round(ci_upper, 3) if pd.notnull(ci_upper) else ci_upper
            }
        agg_df = pd.DataFrame(agg_metrics).T.reset_index().rename(columns={'index': 'Metric'})
        agg_df = agg_df[['Metric', 'mean', 'std', 'count', 'CI_lower', 'CI_upper']]
        agg_df.to_excel(writer, sheet_name='Results_Aggregated', index=False)
    
    print(f"\nResults saved to: {excel_filename}")
    print("\nResults DataFrame:")
    print(results_df)
    print("\nSummary Statistics by Period:")
    print(summary_stats)
    print("\nAggregated Results with Confidence Intervals:")
    print(agg_df)
else:
    print("No results were generated. Check your input data.")

    

Using the following number of factors per period:
Period 00: 23 factors
Period 05: 17 factors
Period 10: 16 factors
Period 15: 20 factors


Completed 00: in-sample 'a' vs out-of-sample 'b'
Completed 00: in-sample 'b' vs out-of-sample 'c'
Completed 00: in-sample 'c' vs out-of-sample 'd'
Completed 00: in-sample 'd' vs out-of-sample 'e'
Completed 05: in-sample 'a' vs out-of-sample 'b'
Completed 05: in-sample 'b' vs out-of-sample 'c'


KeyboardInterrupt: 

New (stable?) code

In [24]:
from sklearn.decomposition import PCA  # Only used for the residual PCA step
import numpy as np
import pandas as pd
from datetime import datetime
import scipy.stats as stats
import os, re

# ---------------- Utility Functions ----------------
def congruence_coefficient(v1, v2):
    """
    Computes Tucker's coefficient of congruence between two vectors.
    """
    return np.sum(v1 * v2) / (np.sqrt(np.sum(v1**2)) * np.sqrt(np.sum(v2**2)))

def compute_sparsity_index(loadings):
    """
    Computes the normalized sparsity index for a given loadings DataFrame.
    For each factor vector v, we compute the ratio:
      ratio = ||v||_1 / ||v||_2,
    then normalize over n (number of assets):
      normalized = (sqrt(n) - ratio) / (sqrt(n)-1),
    so that 1 means only one asset carries weight and 0 means all assets share equally.
    Finally, return the average over factors.
    """
    sparsity_scores = []
    n_assets = loadings.shape[0]
    for col in loadings.columns:
        v = loadings[col].values
        l1_norm = np.linalg.norm(v, 1)
        l2_norm = np.linalg.norm(v, 2)
        ratio = l1_norm / l2_norm if l2_norm != 0 else np.nan
        normalized = (np.sqrt(n_assets) - ratio) / (np.sqrt(n_assets) - 1)
        sparsity_scores.append(normalized)
    return np.mean(sparsity_scores)

def calc_proj_ev(df_std, loadings):
    """
    Given standardized returns (df_std) and factor loadings,
    compute predicted returns using regression-based factor score estimation,
    and return the explained variance defined as
    1 - (residual variance / total variance).
    """
    inv_LL = np.linalg.inv(loadings.T.dot(loadings))
    factor_scores = (inv_LL.dot(loadings.T.dot(df_std.T))).T
    projected = factor_scores.dot(loadings.T)
    residuals = df_std - projected
    total_var = df_std.var(axis=0).mean()
    ev = 1 - residuals.var(axis=0).mean() / total_var
    return ev

def joreskog(cov, n_factors=None, max_iter=1000000, tol=1e-6, min_communal=1e-6):
    """
    Robust Jöreskog's factor analysis with proper handling of communalities
    and convergence checks.
    """
    original_index = cov.index.tolist() if isinstance(cov, pd.DataFrame) else None
    cov = cov.to_numpy() if isinstance(cov, pd.DataFrame) else cov
    if not isinstance(cov, np.ndarray):
        raise ValueError("Covariance matrix must be numpy array or DataFrame")
    if cov.shape[0] != cov.shape[1]:
        raise ValueError("Covariance matrix must be square")
    if not np.allclose(cov, cov.T, atol=1e-9):
        raise ValueError("Covariance matrix must be symmetric")
    n_vars = cov.shape[0]
    is_correlation = np.allclose(np.diag(cov), 1.0, atol=1e-5)
    eigvals_full, _ = np.linalg.eigh(cov)
    tol_eigen = 1e-9 * np.max(eigvals_full)
    if n_factors is None:
        n_factors = int(np.sum(eigvals_full > tol_eigen))
    else:
        if not isinstance(n_factors, int) or n_factors <= 0:
            raise ValueError("n_factors must be a positive integer")
        n_factors = min(n_factors, n_vars)
    eigenvals, eigenvecs = np.linalg.eigh(cov)
    idx = np.argsort(eigenvals)[::-1]
    eigenvals = eigenvals[idx][:n_factors]
    eigenvecs = eigenvecs[:, idx][:, :n_factors]
    beta = eigenvecs @ np.diag(np.sqrt(np.maximum(eigenvals, 0)))
    communalities = np.sum(beta**2, axis=1)
    if is_correlation:
        communalities = np.clip(communalities, 0, 0.999)
    psi = np.diag(np.maximum(np.diag(cov) - communalities, min_communal))
    iter_num = 0
    beta_change = np.inf
    psi_change = np.inf
    while iter_num < max_iter and (beta_change > tol or psi_change > tol):
        sigma = beta @ beta.T + psi
        try:
            sigma_inv = np.linalg.inv(sigma)
        except np.linalg.LinAlgError:
            raise ValueError("Singular sigma matrix - reduce n_factors or increase min_communal")
        
        middle = np.linalg.inv(np.eye(n_factors) + beta.T @ sigma_inv @ beta)
        beta_new = cov @ sigma_inv @ beta @ middle
        
        communalities_new = np.sum(beta_new**2, axis=1)
        if is_correlation:
            communalities_new = np.clip(communalities_new, 0, 0.999)
        psi_new_diag = np.maximum(np.diag(cov) - communalities_new, min_communal)
        psi_new = np.diag(psi_new_diag)
        
        if np.any(psi_new_diag <= min_communal + 1e-6):
            print("Warning: Some uniquenesses at lower bound")
        
        beta_change = np.linalg.norm(beta_new - beta) / (np.linalg.norm(beta) + np.finfo(float).eps)
        psi_change = np.linalg.norm(psi_new_diag - np.diag(psi)) / (np.linalg.norm(np.diag(psi)) + np.finfo(float).eps)
        
        beta = beta_new
        psi = psi_new
        iter_num += 1
    if iter_num == max_iter:
        print("Warning: Maximum iterations reached without convergence")
    factor_variances = np.sum(beta**2, axis=0)
    total_variance = np.sum(np.diag(cov))
    explained_var = factor_variances / total_variance
    explained_cumulative = np.cumsum(explained_var)
    factor_columns = [f'PC{i+1}' for i in range(n_factors)]
    betas_df = pd.DataFrame(beta, 
                            index=original_index if original_index is not None 
                                  else [f'var_{i}' for i in range(n_vars)],
                            columns=factor_columns)
    return betas_df, factor_variances, explained_var, explained_cumulative

# ------------------ Evaluation Function ------------------
def evaluate_mle_os(corr_is, df_os, df_is, n_factors):
    """
    Estimates MLE factor model on in-sample data and evaluates using a regression-based projection
    on both in-sample and out-of-sample data for consistency.
    Additionally, computes:
      - Factor congruence: Tucker's coefficient between in-sample and out-of-sample loadings.
      - Factor weights: In-sample loadings from Jöreskog's method.
      - Sparsity index: Normalized sparsity index for the factor weights.
    Returns:
        metrics (dict): Evaluation metrics.
        loadings_is (DataFrame): In-sample factor weights.
        residuals (DataFrame): Out-of-sample residuals (with asset names preserved).
    """
    cols_os = df_os.columns.tolist()
    idx_corr = corr_is.index.tolist()
    cols_is = df_is.columns.tolist()
    common_assets = list(set(cols_os) & set(idx_corr) & set(cols_is))
    if len(common_assets) == 0:
        raise ValueError("No common assets between matrices")
    common_assets.sort()
    corr_is_aligned = corr_is.loc[common_assets, common_assets]
    df_os_aligned = df_os[common_assets]
    df_is_aligned = df_is[common_assets]
    is_means = df_is_aligned.mean()
    is_stds = df_is_aligned.std()
    df_is_std = (df_is_aligned - is_means) / is_stds
    df_os_std = (df_os_aligned - is_means) / is_stds
    # In-sample MLE using Jöreskog's method
    betas_is, _, _, _ = joreskog(corr_is_aligned, n_factors=n_factors)
    betas_is.columns = [f'PC{i+1}' for i in range(n_factors)]
    loadings_is = betas_is.copy()
    in_sample_ev = calc_proj_ev(df_is_std, loadings_is)
    out_sample_ev = calc_proj_ev(df_os_std, loadings_is)
    # MLE on out-of-sample data using the OOS correlation matrix
    betas_os, _, _, _ = joreskog(df_os_std.corr(), n_factors=n_factors)
    betas_os.columns = [f'PC{i+1}' for i in range(n_factors)]
    loadings_os = betas_os.copy()
    # Compute loading correlations and factor congruence per factor
    correlations = []
    congruences = []
    for i in range(n_factors):
        is_vec = loadings_is.iloc[:, i]
        os_vec = loadings_os.iloc[:, i]
        corr_val = abs(np.corrcoef(is_vec, os_vec)[0, 1])
        correlations.append(corr_val)
        cong_val = abs(congruence_coefficient(is_vec, os_vec))
        congruences.append(cong_val)
    avg_loading_corr = np.mean(correlations)
    avg_congruence = np.mean(congruences)
    # Compute factor scores for the out-of-sample period and the predicted returns
    inv_LL = np.linalg.inv(loadings_is.T.dot(loadings_is))
    factor_scores_os = (inv_LL.dot(loadings_is.T.dot(df_os_std.T))).T
    projected_returns_os = factor_scores_os.dot(loadings_is.T)
    # IMPORTANT: Wrap residual computation in a DataFrame with asset names.
    resid = pd.DataFrame(df_os_std - projected_returns_os, index=df_os_std.index,
                         columns=common_assets)
    pca_residuals = PCA(n_components=1)
    pca_residuals.fit(resid)
    residual_first_pc_ev = pca_residuals.explained_variance_ratio_[0]
    sparsity_index = compute_sparsity_index(loadings_is)
    metrics = {
        'n_factors': n_factors,
        'in_sample_explained_variance': in_sample_ev,
        'explained_variance': out_sample_ev,
        'residual_first_pc_ev': residual_first_pc_ev,
        'avg_loading_correlation': avg_loading_corr,
        'avg_congruence': avg_congruence,
        'sparsity_index': sparsity_index,
    }
    for i, corr_val in enumerate(correlations):
        metrics[f'factor_{i+1}_correlation'] = round(corr_val, 3)
    for i, cong_val in enumerate(congruences):
        metrics[f'factor_{i+1}_congruence'] = round(cong_val, 3)
    return metrics, loadings_is, resid

# --------------------- USAGE PART ---------------------
# Global parameter: stock_sample (assumed available)
periods = ["00", "05", "10", "15"]
# Create dictionary mapping periods to number of factors (example values)
n_factors_dict = dict(zip(periods, n_factors))
# Define pairings for in-sample/out-of-sample: e.g., a-b, b-c, c-d, d-e for each period
pairings = [("a", "b"), ("b", "c"), ("c", "d"), ("d", "e")]
results_list = []
factor_weights_list = []
residuals_dict = {}
print("Using the following number of factors per period:")
for period, n in n_factors_dict.items():
    print(f"Period {period}: {n} factors")
print("\n")
for period in periods:
    for ins, oos in pairings:
        corr_key = f"corr{period}{ins}_is"
        is_key = f"df{period}{ins}_is"
        os_key = f"df{period}{oos}_os"
        required_keys = [corr_key, is_key, os_key]
        if all(key in globals() for key in required_keys):
            try:
                metrics, loadings_is, residuals = evaluate_mle_os(
                    globals()[corr_key],
                    globals()[os_key],
                    globals()[is_key],
                    n_factors=n_factors_dict[period]
                )
                metrics['period'] = period
                metrics['in_sample'] = ins
                metrics['oos_sample'] = oos
                results_list.append(metrics)
                # Save in-sample factor weights (loadings) with asset identifiers
                loadings_is_reset = loadings_is.reset_index().rename(columns={'index': 'Asset'})
                loadings_is_reset['period'] = period
                loadings_is_reset['in_sample'] = ins
                factor_weights_list.append(loadings_is_reset)
                # Store residuals with a unique label
                label = f"{period}_{ins}_vs_{oos}"
                residuals_dict[label] = residuals
                print(f"Completed {period}: in-sample '{ins}' vs out-of-sample '{oos}'")
            except Exception as e:
                print(f"Error in {period} for pairing {ins}-{oos}: {str(e)}")
        else:
            missing = [key for key in required_keys if key not in globals()]
            print(f"Skipping {period} pairing {ins}-{oos} – missing data: {missing}")
if results_list:
    results_df = pd.DataFrame(results_list)
    column_order = ['period', 'in_sample', 'oos_sample', 'n_factors', 
                    'in_sample_explained_variance', 'explained_variance', 
                    'residual_first_pc_ev', 'avg_loading_correlation', 
                    'avg_congruence', 'sparsity_index']
    factor_cols = sorted([col for col in results_df.columns if 'factor_' in col])
    column_order.extend(factor_cols)
    results_df = results_df[column_order]
    numeric_columns = results_df.select_dtypes(include=[np.number]).columns
    results_df[numeric_columns] = results_df[numeric_columns].round(3)
    results_df = results_df.sort_values(['period', 'in_sample', 'oos_sample'])
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    excel_filename = f"Final_Data/Final_Results/mle_{stock_sample}.xlsx"
    with pd.ExcelWriter(excel_filename) as writer:
        results_df.to_excel(writer, sheet_name='Results', index=False)
        summary_stats = results_df.groupby('period').agg({
            'in_sample_explained_variance': ['mean', 'std'],
            'explained_variance': ['mean', 'std'],
            'residual_first_pc_ev': ['mean', 'std'],
            'avg_loading_correlation': ['mean', 'std'],
            'avg_congruence': ['mean', 'std'],
            'sparsity_index': ['mean', 'std']
        }).round(3)
        summary_stats.columns = ['IS_EV_mean', 'IS_EV_std', 
                                 'OOS_EV_mean', 'OOS_EV_std',
                                 'Residual_PC1_mean', 'Residual_PC1_std',
                                 'Corr_mean', 'Corr_std',
                                 'Congruence_mean', 'Congruence_std',
                                 'Sparsity_Index_mean', 'Sparsity_Index_std']
        summary_stats.to_excel(writer, sheet_name='Summary_Stats')
        factor_summary = results_df[['period'] + factor_cols].groupby('period').agg(['mean', 'std']).round(3)
        factor_summary.to_excel(writer, sheet_name='Factor_Correlations')
        if factor_weights_list:
            factor_weights_df = pd.concat(factor_weights_list, ignore_index=True)
            factor_weight_cols = [col for col in factor_weights_df.columns if col.startswith('PC')]
            factor_weights_df = factor_weights_df[['period', 'in_sample', 'Asset'] + factor_weight_cols]
            factor_weights_df.to_excel(writer, sheet_name='Factor_Weights', index=False)
        # --- Residuals Sheet ---
        if residuals_dict:
            all_residuals = None
            for label, resid in residuals_dict.items():
                if all_residuals is None:
                    all_residuals = resid.copy()
                    all_residuals.columns = [f"{label}_{col}" for col in all_residuals.columns]
                else:
                    resid_copy = resid.copy()
                    resid_copy.columns = [f"{label}_{col}" for col in resid_copy.columns]
                    all_residuals = all_residuals.join(resid_copy, how='outer')
            all_residuals.to_excel(writer, sheet_name='Residuals', index=True)
        # --- Aggregated Results Sheet ---
        agg_metrics = {}
        metric_columns = [col for col in results_df.columns if col not in ['period', 'in_sample', 'oos_sample'] and np.issubdtype(results_df[col].dtype, np.number)]
        for col in metric_columns:
            data = results_df[col].dropna()
            n = len(data)
            if n > 1:
                mean_val = data.mean()
                std_val = data.std()
                sem = std_val / np.sqrt(n)
                t_stat = stats.t.ppf(1-0.025, df=n-1)
                ci_lower = mean_val - t_stat * sem
                ci_upper = mean_val + t_stat * sem
            else:
                mean_val = data.mean()
                std_val = np.nan
                ci_lower = np.nan
                ci_upper = np.nan
            agg_metrics[col] = {
                "mean": round(mean_val, 3),
                "std": round(std_val, 3) if pd.notnull(std_val) else std_val,
                "count": n,
                "CI_lower": round(ci_lower, 3) if pd.notnull(ci_lower) else ci_lower,
                "CI_upper": round(ci_upper, 3) if pd.notnull(ci_upper) else ci_upper
            }
        agg_df = pd.DataFrame(agg_metrics).T.reset_index().rename(columns={'index': 'Metric'})
        agg_df = agg_df[['Metric', 'mean', 'std', 'count', 'CI_lower', 'CI_upper']]
        agg_df.to_excel(writer, sheet_name='Results_Aggregated', index=False)
    print(f"\nResults saved to: {excel_filename}")
    print("\nResults DataFrame:")
    print(results_df)
    print("\nSummary Statistics by Period:")
    print(summary_stats)
    print("\nAggregated Results with Confidence Intervals:")
    print(agg_df)
    print("\nResiduals sheet saved with columns:")
    if 'all_residuals' in locals():
        print(list(all_residuals.columns))
    else:
        print("No results were generated. Check your input data.")


Using the following number of factors per period:
Period 00: 10 factors
Period 05: 8 factors
Period 10: 7 factors
Period 15: 10 factors


Completed 00: in-sample 'a' vs out-of-sample 'b'
Completed 00: in-sample 'b' vs out-of-sample 'c'
Completed 00: in-sample 'c' vs out-of-sample 'd'
Completed 00: in-sample 'd' vs out-of-sample 'e'
Completed 05: in-sample 'a' vs out-of-sample 'b'
Completed 05: in-sample 'b' vs out-of-sample 'c'
Completed 05: in-sample 'c' vs out-of-sample 'd'
Completed 05: in-sample 'd' vs out-of-sample 'e'
Completed 10: in-sample 'a' vs out-of-sample 'b'
Completed 10: in-sample 'b' vs out-of-sample 'c'
Completed 10: in-sample 'c' vs out-of-sample 'd'
Completed 10: in-sample 'd' vs out-of-sample 'e'
Completed 15: in-sample 'a' vs out-of-sample 'b'
Completed 15: in-sample 'b' vs out-of-sample 'c'
Completed 15: in-sample 'c' vs out-of-sample 'd'
Completed 15: in-sample 'd' vs out-of-sample 'e'

Results saved to: Final_Data/Final_Results/mle_100.xlsx

Results DataFrame:
 