<a href="https://colab.research.google.com/github/rpjena/random_matrix/blob/main/Untitled3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns

# Function to scale time series to target volatility
def scale_to_target_vol(returns, target_vol=0.10):
    """
    Scale a return series to a target volatility level (annualized)

    Parameters:
    -----------
    returns : pandas.Series or numpy.array
        Return series to scale
    target_vol : float
        Target annualized volatility level (default 0.10 or 10%)

    Returns:
    --------
    pandas.Series or numpy.array : Scaled return series
    """
    # Assuming returns are daily, annualization factor would be sqrt(252)
    # For monthly, use sqrt(12); for weekly, use sqrt(52)
    if isinstance(returns, pd.Series):
        vol = returns.std() * np.sqrt(252)  # Annualized volatility
        return returns * (target_vol / vol)
    else:
        vol = np.std(returns) * np.sqrt(252)
        return returns * (target_vol / vol)

# Function to run regression and format results with significance stars
def run_regression_with_significance(y, X, alpha_levels=[0.1, 0.05, 0.01]):
    """
    Run regression and add significance stars to coefficients

    Parameters:
    -----------
    y : pandas.Series
        Dependent variable
    X : pandas.DataFrame
        Independent variables
    alpha_levels : list
        Significance levels for *, **, *** (default [0.1, 0.05, 0.01])

    Returns:
    --------
    dict : Dictionary with regression results and formatted coefficients
    """
    # Add constant to independent variables
    X = sm.add_constant(X)

    # Run regression
    model = sm.OLS(y, X).fit(cov_type='HAC', cov_kwds={'maxlags': 5})

    # Format coefficients with significance stars
    formatted_coefs = {}
    for var in model.params.index:
        coef = model.params[var]
        pval = model.pvalues[var]

        # Add significance stars
        stars = ''
        if pval <= alpha_levels[2]:  # 1% level
            stars = '***'
        elif pval <= alpha_levels[1]:  # 5% level
            stars = '**'
        elif pval <= alpha_levels[0]:  # 10% level
            stars = '*'

        formatted_coefs[var] = f"{coef:.4f}{stars}"

    return {
        'model': model,
        'formatted_coefs': formatted_coefs,
        'r_squared': model.rsquared,
        'adj_r_squared': model.rsquared_adj
    }

# Main analysis function
def analyze_macro_returns(systematic_returns, discretionary_returns, risk_factors, factor_subsets):
    """
    Analyze systematic and discretionary macro returns against risk factors

    Parameters:
    -----------
    systematic_returns : pandas.Series
        Time series of systematic macro returns
    discretionary_returns : pandas.Series
        Time series of discretionary macro returns
    risk_factors : pandas.DataFrame
        Time series of risk factor returns
    factor_subsets : dict
        Dictionary with subset names as keys and lists of factor names as values

    Returns:
    --------
    dict : Dictionary with regression results for each subset
    """
    results = {}

    # Scale risk factors to 10% volatility
    scaled_factors = pd.DataFrame()
    for col in risk_factors.columns:
        scaled_factors[col] = scale_to_target_vol(risk_factors[col], target_vol=0.10)

    # Run regressions for each subset of factors
    for subset_name, factor_list in factor_subsets.items():
        X_subset = scaled_factors[factor_list]

        # Regression for systematic macro
        systematic_result = run_regression_with_significance(systematic_returns, X_subset)

        # Regression for discretionary macro
        discretionary_result = run_regression_with_significance(discretionary_returns, X_subset)

        results[subset_name] = {
            'systematic': systematic_result,
            'discretionary': discretionary_result
        }

    return results

# Function to create a formatted results table
def create_results_table(results, factor_subsets):
    """
    Create a formatted table with regression results

    Parameters:
    -----------
    results : dict
        Dictionary with regression results
    factor_subsets : dict
        Dictionary with factor subsets

    Returns:
    --------
    pandas.DataFrame : Formatted results table
    """
    # Get all unique factors across all subsets
    all_factors = set(['const'])
    for factors in factor_subsets.values():
        all_factors.update(factors)

    # Create a multi-index dataframe for results
    columns = pd.MultiIndex.from_product([results.keys(), ['Systematic', 'Discretionary']])
    rows = list(all_factors) + ['R²', 'Adj. R²']

    # Initialize the table with empty strings
    table = pd.DataFrame('', index=rows, columns=columns)

    # Fill in the table
    for subset_name, subset_results in results.items():
        # Systematic results
        sys_res = subset_results['systematic']
        for var, coef in sys_res['formatted_coefs'].items():
            table.loc[var, (subset_name, 'Systematic')] = coef

        table.loc['R²', (subset_name, 'Systematic')] = f"{sys_res['r_squared']:.4f}"
        table.loc['Adj. R²', (subset_name, 'Systematic')] = f"{sys_res['adj_r_squared']:.4f}"

        # Discretionary results
        disc_res = subset_results['discretionary']
        for var, coef in disc_res['formatted_coefs'].items():
            table.loc[var, (subset_name, 'Discretionary')] = coef

        table.loc['R²', (subset_name, 'Discretionary')] = f"{disc_res['r_squared']:.4f}"
        table.loc['Adj. R²', (subset_name, 'Discretionary')] = f"{disc_res['adj_r_squared']:.4f}"

    return table

# Function to create panel plot of regression coefficients
def plot_regression_coefficients(results, factor_subsets):
    """
    Create panel plot of regression coefficients with significance indicators

    Parameters:
    -----------
    results : dict
        Dictionary with regression results
    factor_subsets : dict
        Dictionary with factor subsets

    Returns:
    --------
    matplotlib.figure.Figure : Figure with coefficient plots
    """
    # Determine number of subplots needed
    n_subplots = len(factor_subsets)
    fig, axes = plt.subplots(n_subplots, 1, figsize=(12, 4 * n_subplots))

    if n_subplots == 1:
        axes = [axes]  # Make it iterable

    for i, (subset_name, factors) in enumerate(factor_subsets.items()):
        ax = axes[i]

        # Get coefficients for systematic and discretionary
        sys_model = results[subset_name]['systematic']['model']
        disc_model = results[subset_name]['discretionary']['model']

        # Skip the constant for better visualization
        coef_names = [name for name in sys_model.params.index if name != 'const']
        sys_coefs = [sys_model.params[name] for name in coef_names]
        sys_pvals = [sys_model.pvalues[name] for name in coef_names]

        disc_coefs = [disc_model.params[name] for name in coef_names]
        disc_pvals = [disc_model.pvalues[name] for name in coef_names]

        # Set up bar positions
        x = np.arange(len(coef_names))
        width = 0.35

        # Plot bars
        rects1 = ax.bar(x - width/2, sys_coefs, width, label='Systematic')
        rects2 = ax.bar(x + width/2, disc_coefs, width, label='Discretionary')

        # Add significance markers
        for j, (pval, rect) in enumerate(zip(sys_pvals, rects1)):
            if pval <= 0.01:
                ax.text(rect.get_x() + rect.get_width()/2, rect.get_height() + 0.01, '***',
                        ha='center', va='bottom', fontsize=10)
            elif pval <= 0.05:
                ax.text(rect.get_x() + rect.get_width()/2, rect.get_height() + 0.01, '**',
                        ha='center', va='bottom', fontsize=10)
            elif pval <= 0.1:
                ax.text(rect.get_x() + rect.get_width()/2, rect.get_height() + 0.01, '*',
                        ha='center', va='bottom', fontsize=10)

        for j, (pval, rect) in enumerate(zip(disc_pvals, rects2)):
            if pval <= 0.01:
                ax.text(rect.get_x() + rect.get_width()/2, rect.get_height() + 0.01, '***',
                        ha='center', va='bottom', fontsize=10)
            elif pval <= 0.05:
                ax.text(rect.get_x() + rect.get_width()/2, rect.get_height() + 0.01, '**',
                        ha='center', va='bottom', fontsize=10)
            elif pval <= 0.1:
                ax.text(rect.get_x() + rect.get_width()/2, rect.get_height() + 0.01, '*',
                        ha='center', va='bottom', fontsize=10)

        # Add labels and legend
        ax.set_xlabel('Risk Factors')
        ax.set_ylabel('Coefficient Value')
        ax.set_title(f'Panel A: Regression Coefficients for {subset_name} Factors')
        ax.set_xticks(x)
        ax.set_xticklabels(coef_names, rotation=45, ha='right')
        ax.legend(loc='best')
        ax.grid(True, linestyle='--', alpha=0.7)
        ax.axhline(y=0, color='black', linestyle='-', alpha=0.3)

    plt.tight_layout()
    return fig

# Example usage
if __name__ == "__main__":
    # For demonstration purposes - replace with actual data loading
    np.random.seed(42)
    dates = pd.date_range(start='2015-01-01', end='2022-12-31', freq='B')

    # Create sample data
    # 1. Risk factor returns
    risk_factors = pd.DataFrame({
        'Equity': np.random.normal(0.0005, 0.01, len(dates)),
        'Bonds': np.random.normal(0.0002, 0.005, len(dates)),
        'Commodities': np.random.normal(0.0003, 0.012, len(dates)),
        'FX': np.random.normal(0.0001, 0.006, len(dates)),
        'Credit': np.random.normal(0.0002, 0.004, len(dates)),
        'Volatility': np.random.normal(-0.0001, 0.02, len(dates))
    }, index=dates)

    # Add some correlation between factors
    for i in range(1, len(risk_factors.columns)):
        risk_factors.iloc[:, i] = 0.3 * risk_factors.iloc[:, 0] + 0.7 * risk_factors.iloc[:, i]

    # 2. Systematic and discretionary macro returns
    # Systematic has stronger relationship with factors
    systematic_returns = (0.5 * risk_factors['Equity'] +
                         0.3 * risk_factors['Bonds'] +
                         0.2 * risk_factors['FX'] +
                         np.random.normal(0.0001, 0.004, len(dates)))

    # Discretionary has weaker relationship with factors
    discretionary_returns = (0.2 * risk_factors['Equity'] +
                            0.1 * risk_factors['Bonds'] +
                            0.1 * risk_factors['FX'] +
                            np.random.normal(0.0002, 0.008, len(dates)))

    # Define factor subsets for different regression specifications
    factor_subsets = {
        'All Factors': risk_factors.columns.tolist(),
        'Traditional': ['Equity', 'Bonds', 'Commodities', 'FX'],
        'Alternative': ['Credit', 'Volatility']
    }

    # Run the analysis
    results = analyze_macro_returns(systematic_returns, discretionary_returns, risk_factors, factor_subsets)

    # Create and display the results table
    table = create_results_table(results, factor_subsets)
    print("\nPanel A: Regression Coefficients with Significance Indicators")
    print(table)

    # Create regression coefficient plots
    fig = plot_regression_coefficients(results, factor_subsets)

    # In a real application, you might save the figure
    # fig.savefig('regression_coefficients.png', dpi=300, bbox_inches='tight')

    # Optional: Display additional regression statistics
    print("\nDetailed Regression Statistics:")
    for subset_name, subset_results in results.items():
        print(f"\n--- {subset_name} ---")
        print("\nSystematic Macro:")
        print(subset_results['systematic']['model'].summary().tables[1])
        print("\nDiscretionary Macro:")
        print(subset_results['discretionary']['model'].summary().tables[1])