In [None]:
# Import libraries
import sys
sys.path.append('..')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from src.analytics.statistical import StatisticalAnalytics
from src.analytics.spread_analysis import SpreadAnalysis
from src.analytics.kalman_filter import KalmanHedgeRatio

print('✅ Imports successful')

## Generate Sample Data

Create correlated price series for demonstration

In [None]:
# Set random seed for reproducibility
np.random.seed(42)

# Generate 200 data points
n = 200

# Asset 1: Random walk
price1 = np.cumsum(np.random.randn(n) * 0.5) + 100

# Asset 2: Correlated with Asset 1 (hedge ratio ~ 2.0)
price2 = 2.0 * price1 + 10 + np.random.randn(n) * 2

# Convert to lists for analysis
price1_list = price1.tolist()
price2_list = price2.tolist()

print(f'Generated {n} price points')
print(f'Asset 1: Mean={np.mean(price1):.2f}, Std={np.std(price1):.2f}')
print(f'Asset 2: Mean={np.mean(price2):.2f}, Std={np.std(price2):.2f}')

## 1. OLS Regression Analysis

Compute hedge ratio using Ordinary Least Squares

In [None]:
analytics = StatisticalAnalytics()

# Run OLS regression
ols_result = analytics.ols_regression(price1_list, price2_list)

print('=== OLS Regression Results ===')
print(f"Hedge Ratio (beta): {ols_result['hedge_ratio']:.4f}")
print(f"Intercept (alpha): {ols_result['alpha']:.4f}")
print(f"R-squared: {ols_result['r_squared']:.4f}")
print(f"P-value: {ols_result['p_value']:.4f}")
print(f"Residual Std: {ols_result['residual_std']:.4f}")

## 2. Spread and Z-Score Analysis

Calculate trading spread and z-score for mean reversion

In [None]:
# Analyze spread
spread_result = SpreadAnalysis.analyze_spread(
    price1_list,
    price2_list,
    hedge_ratio=ols_result['hedge_ratio'],
    window=50
)

print('=== Spread Analysis ===')
print(f"Current Spread: {spread_result['current_spread']:.4f}")
print(f"Spread Mean: {spread_result['spread_mean']:.4f}")
print(f"Spread Std: {spread_result['spread_std']:.4f}")
print(f"Current Z-Score: {spread_result['zscore']['current_zscore']:.4f}")

## 3. Trading Signal Generation

Generate buy/sell signals based on z-score thresholds

In [None]:
# Generate trading signal
signal = SpreadAnalysis.mean_reversion_signal(
    spread_result['zscore']['current_zscore'],
    entry_threshold=2.0,
    exit_threshold=0.5
)

print('=== Trading Signal ===')
print(f"Signal: {signal['signal'].upper()}")
print(f"Reason: {signal['reason']}")
print(f"Z-Score: {signal['zscore']:.4f}")

## 4. Kalman Filter - Dynamic Hedge Ratio

Use Kalman filter to estimate time-varying hedge ratio

In [None]:
try:
    kalman = KalmanHedgeRatio(
        initial_state=2.0,
        transition_covariance=0.01,
        observation_covariance=1.0
    )
    
    kalman_result = kalman.estimate(price1_list, price2_list)
    
    print('=== Kalman Filter Results ===')
    print(f"Current Hedge Ratio: {kalman_result['current_hedge_ratio']:.4f}")
    print(f"Mean Hedge Ratio: {kalman_result['mean_hedge_ratio']:.4f}")
    print(f"Std Hedge Ratio: {kalman_result['std_hedge_ratio']:.4f}")
    print(f"95% CI: [{kalman_result['confidence_interval_95'][0]:.4f}, {kalman_result['confidence_interval_95'][1]:.4f}]")
except Exception as e:
    print(f'Kalman filter not available: {e}')
    print('Install pykalman: pip install pykalman')

## 5. Stationarity Test (ADF)

Test if the spread is stationary (mean-reverting)

In [None]:
# Test spread for stationarity
spread = np.array(spread_result['spread'])

adf_result = analytics.adf_test(spread.tolist())

print('=== ADF Stationarity Test ===')
print(f"ADF Statistic: {adf_result['adf_statistic']:.4f}")
print(f"P-value: {adf_result['p_value']:.4f}")
print(f"Is Stationary: {adf_result['is_stationary']}")
print(f"Interpretation: {adf_result['interpretation']}")
print('\nCritical Values:')
for key, val in adf_result['critical_values'].items():
    print(f"  {key}: {val:.4f}")

## 6. Visualization

Plot prices, spread, and z-score

In [None]:
fig, axes = plt.subplots(3, 1, figsize=(12, 10))

# Plot 1: Prices
axes[0].plot(price1, label='Asset 1', color='blue')
axes[0].plot(price2, label='Asset 2', color='orange')
axes[0].set_title('Asset Prices')
axes[0].set_ylabel('Price')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Plot 2: Spread
spread_arr = np.array(spread_result['spread'])
axes[1].plot(spread_arr, label='Spread', color='green')
axes[1].axhline(y=spread_result['spread_mean'], color='red', linestyle='--', label='Mean')
axes[1].fill_between(
    range(len(spread_arr)),
    spread_result['spread_mean'] - spread_result['spread_std'],
    spread_result['spread_mean'] + spread_result['spread_std'],
    alpha=0.2,
    color='red',
    label='±1 Std Dev'
)
axes[1].set_title('Spread')
axes[1].set_ylabel('Spread')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

# Plot 3: Z-Score
zscore_arr = np.array(spread_result['zscore']['zscore'])
axes[2].plot(zscore_arr, label='Z-Score', color='purple')
axes[2].axhline(y=2.0, color='red', linestyle='--', label='Entry Threshold (+2)')
axes[2].axhline(y=-2.0, color='red', linestyle='--', label='Entry Threshold (-2)')
axes[2].axhline(y=0, color='gray', linestyle=':', label='Mean')
axes[2].fill_between(range(len(zscore_arr)), -2, 2, alpha=0.1, color='green', label='Hold Zone')
axes[2].set_title('Z-Score (Mean Reversion Signal)')
axes[2].set_xlabel('Time')
axes[2].set_ylabel('Z-Score')
axes[2].legend()
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print('✅ Plots generated successfully')

## Summary

This notebook demonstrated:
- OLS regression for hedge ratio estimation
- Spread and z-score calculation
- Trading signal generation
- Kalman filter for dynamic hedge ratios
- Stationarity testing with ADF
- Visualization of results

These techniques form the foundation of statistical arbitrage and pairs trading strategies.