# Matrix Risk Engine - Research Notebook

This notebook demonstrates the research workflow:
1. Load and explore market data
2. Calculate factor signals
3. Analyze factor performance (IC, turnover)
4. Statistical significance tests

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import date

# Add project root to path
import sys
sys.path.insert(0, '..')

## 1. Load Market Data

In [None]:
# Create sample market data for demonstration
np.random.seed(42)
dates = pd.date_range('2020-01-01', periods=504, freq='B')
symbols = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'META']

# Generate correlated price data
returns = pd.DataFrame(
    np.random.randn(504, 5) * 0.02,
    index=dates,
    columns=symbols
)

# Add market factor
market_factor = np.random.randn(504) * 0.015
for col in returns.columns:
    returns[col] += market_factor

prices = (1 + returns).cumprod() * 100
print(f"Loaded {len(prices)} days of data for {len(symbols)} symbols")
prices.tail()

## 2. Calculate Factor Signals

In [None]:
# Momentum factor: 12-month returns
momentum = prices.pct_change(252)

# Mean reversion factor: 1-month returns (contrarian)
mean_reversion = -prices.pct_change(21)

# Volatility factor: 1-month rolling volatility
volatility = returns.rolling(21).std()

# Display latest signals
print("Latest Momentum Signals:")
print(momentum.iloc[-1].sort_values(ascending=False))

## 3. Factor Analysis

In [None]:
from src.core.services.factor_analysis_service import FactorAnalysisService

# Initialize factor analysis service
factor_service = FactorAnalysisService()

# Calculate forward returns (5-day)
forward_returns = returns.shift(-5)

# Calculate Information Coefficient for momentum factor
ic_series = factor_service.calculate_ic(
    factor_scores=momentum.dropna(),
    forward_returns=forward_returns.dropna()
)

print(f"Mean IC: {ic_series.mean():.4f}")
print(f"IC Std: {ic_series.std():.4f}")
print(f"IC IR: {ic_series.mean() / ic_series.std():.4f}")

In [None]:
# Plot IC time series
fig, axes = plt.subplots(2, 1, figsize=(12, 8))

# IC over time
axes[0].plot(ic_series.index, ic_series.values)
axes[0].axhline(y=0, color='r', linestyle='--', alpha=0.5)
axes[0].axhline(y=ic_series.mean(), color='g', linestyle='-', alpha=0.7, label=f'Mean: {ic_series.mean():.3f}')
axes[0].set_title('Information Coefficient Over Time')
axes[0].set_ylabel('IC')
axes[0].legend()

# Cumulative IC
axes[1].plot(ic_series.index, ic_series.cumsum())
axes[1].set_title('Cumulative IC')
axes[1].set_ylabel('Cumulative IC')

plt.tight_layout()
plt.show()

## 4. Statistical Tests

In [None]:
# Run statistical tests on IC series
stats = factor_service.statistical_tests(ic_series)

print("Factor Statistical Analysis")
print("=" * 40)
print(f"T-Statistic: {stats['t_statistic']:.4f}")
print(f"P-Value: {stats['p_value']:.4f}")
print(f"Hit Rate: {stats['hit_rate']:.2%}")
print(f"Significant at 5%: {'Yes' if stats['p_value'] < 0.05 else 'No'}")

## 5. Complete Factor Analysis

In [None]:
# Run complete analysis
result = factor_service.analyze(
    factor_scores=momentum.dropna(),
    forward_returns=forward_returns.dropna()
)

# Display summary
summary = result.to_dict()
print("\nFactor Analysis Summary")
print("=" * 40)
for key, value in summary.items():
    if isinstance(value, float):
        print(f"{key}: {value:.4f}")
    else:
        print(f"{key}: {value}")