<a href="https://colab.research.google.com/github/rpjena/random_matrix/blob/main/Untitled4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from scipy import stats

# Function to create synthetic data (replace with your actual data loading)
def create_sample_data(n_periods=500, seed=42):
    np.random.seed(seed)

    # Create dates (daily data for ~2 years)
    dates = pd.date_range(start='2020-01-01', periods=n_periods)

    # Create risk factors with 10% annualized volatility (approx. 0.63% daily vol)
    daily_vol = 0.1 / np.sqrt(252)

    # Traditional factors
    sp500 = np.random.normal(0.0005, daily_vol, n_periods)  # S&P 500
    bonds = np.random.normal(0.0002, daily_vol, n_periods)  # Bonds
    dollar = np.random.normal(0, daily_vol, n_periods)      # USD

    # Dynamic factors
    momentum = np.random.normal(0.0003, daily_vol, n_periods)   # Momentum
    value = np.random.normal(0.0001, daily_vol, n_periods)      # Value
    carry = np.random.normal(0.0002, daily_vol, n_periods)      # Carry

    # Volatility factors
    vol_level = np.random.normal(0, daily_vol, n_periods)       # Volatility level
    vol_slope = np.random.normal(0, daily_vol, n_periods)       # Volatility curve slope

    # Create dependent variables (systematic and discretionary macro returns)
    # Let's assume some relationships
    systematic_macro = (
        0.3 * sp500 + 0.2 * bonds - 0.15 * dollar +
        0.4 * momentum + 0.25 * value + 0.1 * carry +
        0.15 * vol_level - 0.05 * vol_slope +
        np.random.normal(0.0004, 0.005, n_periods)  # Specific return + alpha
    )

    discretionary_macro = (
        0.1 * sp500 + 0.1 * bonds - 0.05 * dollar +
        0.15 * momentum + 0.1 * value + 0.05 * carry +
        0.1 * vol_level - 0.02 * vol_slope +
        np.random.normal(0.0006, 0.008, n_periods)  # Specific return + alpha (higher than systematic)
    )

    # Combine into a dataframe
    df = pd.DataFrame({
        'systematic_macro': systematic_macro,
        'discretionary_macro': discretionary_macro,
        'sp500': sp500,
        'bonds': bonds,
        'dollar': dollar,
        'momentum': momentum,
        'value': value,
        'carry': carry,
        'vol_level': vol_level,
        'vol_slope': vol_slope
    }, index=dates)

    return df

# Function to run regressions with different factor subsets
def run_regressions(df):
    # Define factor subsets
    factor_subsets = {
        'Traditional': ['sp500', 'bonds', 'dollar'],
        'Traditional + Dynamic': ['sp500', 'bonds', 'dollar', 'momentum', 'value', 'carry'],
        'Traditional + Vol': ['sp500', 'bonds', 'dollar', 'vol_level', 'vol_slope'],
        'All Factors': ['sp500', 'bonds', 'dollar', 'momentum', 'value', 'carry', 'vol_level', 'vol_slope']
    }

    # Store regression results
    regression_results = {
        'systematic': {},
        'discretionary': {}
    }

    # Function to add constant and run regression
    def regress(y, X):
        X_with_const = sm.add_constant(X)
        model = sm.OLS(y, X_with_const)
        results = model.fit()
        return results

    # Run regressions for each subset and each strategy
    for subset_name, factors in factor_subsets.items():
        X = df[factors]

        # Systematic macro regression
        regression_results['systematic'][subset_name] = regress(df['systematic_macro'], X)

        # Discretionary macro regression
        regression_results['discretionary'][subset_name] = regress(df['discretionary_macro'], X)

    return regression_results

# Function to create Panel A (regression coefficients with significance stars)
def create_panel_a(regression_results):
    panel_a = pd.DataFrame()

    # Process both strategy types
    for strategy in ['systematic', 'discretionary']:
        for subset_name, results in regression_results[strategy].items():
            # Extract coefficients, p-values, and t-stats
            coefs = results.params[1:]  # Skip constant
            pvals = results.pvalues[1:]  # Skip constant

            # Add stars for significance
            coef_with_stars = []
            for coef, pval in zip(coefs, pvals):
                if pval < 0.01:
                    coef_with_stars.append(f"{coef:.4f}***")
                elif pval < 0.05:
                    coef_with_stars.append(f"{coef:.4f}**")
                elif pval < 0.1:
                    coef_with_stars.append(f"{coef:.4f}*")
                else:
                    coef_with_stars.append(f"{coef:.4f}")

            # Create a series with factor names as index
            s = pd.Series(coef_with_stars, index=results.params.index[1:])

            # Add R-squared and alpha
            s['R²'] = f"{results.rsquared:.4f}"
            s['Alpha (annualized)'] = f"{results.params[0] * 252:.4f}"

            # Add to panel
            panel_a[f"{strategy.capitalize()} - {subset_name}"] = s

    return panel_a

# Function to create Panel B (annualized performance statistics)
def create_panel_b(regression_results, df):
    panel_b = pd.DataFrame()

    # Calculate annualized factor returns
    factor_returns = df.drop(['systematic_macro', 'discretionary_macro'], axis=1).mean() * 252
    factor_vols = df.drop(['systematic_macro', 'discretionary_macro'], axis=1).std() * np.sqrt(252)

    # Process both strategy types
    for strategy in ['systematic', 'discretionary']:
        for subset_name, results in regression_results[strategy].items():
            # Get factor names for this subset
            factors = results.params.index[1:]  # Skip constant

            # Calculate attributed returns (coefficient * average factor return)
            attributed_returns = {}
            for factor in factors:
                attributed_returns[factor] = results.params[factor] * factor_returns[factor]

            # Total attributed return
            total_attributed = sum(attributed_returns.values())

            # Strategy statistics
            strategy_return = df[f"{strategy}_macro"].mean() * 252
            strategy_vol = df[f"{strategy}_macro"].std() * np.sqrt(252)

            # Create series with performance metrics
            s = pd.Series({
                'Strategy Return': f"{strategy_return:.4f}",
                'Strategy Volatility': f"{strategy_vol:.4f}",
                'Sharpe Ratio': f"{strategy_return / strategy_vol:.4f}",
                'Alpha': f"{results.params[0] * 252:.4f}",
                'Total Attributed Return': f"{total_attributed:.4f}",
                'Unexplained Return': f"{strategy_return - total_attributed:.4f}",
                'R²': f"{results.rsquared:.4f}"
            })

            # Add attributed returns for each factor
            for factor, attr_ret in attributed_returns.items():
                s[f"{factor} Contribution"] = f"{attr_ret:.4f}"

            # Add to panel
            panel_b[f"{strategy.capitalize()} - {subset_name}"] = s

    return panel_b

# Function to create Panel C (cumulative return charts)
def create_panel_c(regression_results, df):
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))

    # Choose the "All Factors" regression for adjustment
    subset_name = 'All Factors'

    for i, strategy in enumerate(['systematic', 'discretionary']):
        # Original returns
        original_returns = df[f"{strategy}_macro"]

        # Cumulative returns (unadjusted)
        cum_returns = (1 + original_returns).cumprod() - 1

        # Get regression results
        results = regression_results[strategy][subset_name]

        # Get fitted values (factor contribution)
        X = df[results.params.index[1:]]  # Skip constant
        X_with_const = sm.add_constant(X)
        factor_contribution = results.predict(X_with_const) - results.params[0]

        # Risk-adjusted returns (original minus factor contribution)
        risk_adjusted = original_returns - factor_contribution
        cum_risk_adjusted = (1 + risk_adjusted).cumprod() - 1

        # Correction (factor contribution)
        cum_correction = (1 + factor_contribution).cumprod() - 1

        # Plot
        ax = axes[i]
        ax.plot(cum_returns, 'b-', label='Unadjusted Returns')
        ax.plot(cum_risk_adjusted, 'k-', label='Risk-Adjusted Returns')
        ax.plot(cum_correction, 'g-', label='Factor Contribution')
        ax.set_title(f"{strategy.capitalize()} Macro Strategy")
        ax.legend()
        ax.grid(True, alpha=0.3)
        ax.set_ylabel('Cumulative Return')

    plt.tight_layout()
    return fig

# Main function to run the entire analysis
def run_macro_regression_analysis():
    # Create or load data
    df = create_sample_data()

    # Run regressions
    regression_results = run_regressions(df)

    # Create panels
    panel_a = create_panel_a(regression_results)
    panel_b = create_panel_b(regression_results, df)
    fig_panel_c = create_panel_c(regression_results, df)

    # Print results
    print("PANEL A: Regression Coefficients with Significance")
    print("-------------------------------------------------")
    print(panel_a)
    print("\n\nPANEL B: Annualized Performance Statistics")
    print("-----------------------------------------")
    print(panel_b)

    # Display Panel C
    plt.suptitle("PANEL C: Cumulative Returns")
    plt.show()

    return panel_a, panel_b, fig_panel_c

# Run the analysis
if __name__ == "__main__":
    panel_a, panel_b, _ = run_macro_regression_analysis()

    # Save the panels to CSV files (optional)
    panel_a.to_csv("panel_a_coefficients.csv")
    panel_b.to_csv("panel_b_performance.csv")