# Walk-Forward Validation Demo

This notebook demonstrates the walk-forward validation methodology used in our backtesting framework.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import json

sns.set_style('whitegrid')
np.random.seed(1337)

## 1. Generate Synthetic Market Data

In [None]:
def generate_synthetic_data(n_days=1000, n_assets=10):
    """Generate synthetic price data with realistic properties."""
    dates = pd.date_range('2020-01-01', periods=n_days, freq='D')
    
    # Generate correlated returns
    correlation_matrix = np.random.uniform(0.1, 0.5, (n_assets, n_assets))
    np.fill_diagonal(correlation_matrix, 1.0)
    correlation_matrix = (correlation_matrix + correlation_matrix.T) / 2
    
    # Ensure positive definite
    eigenvalues, eigenvectors = np.linalg.eig(correlation_matrix)
    eigenvalues[eigenvalues < 0] = 0.01
    correlation_matrix = eigenvectors @ np.diag(eigenvalues) @ eigenvectors.T
    
    # Generate returns
    mean_returns = np.random.uniform(-0.0001, 0.0003, n_assets)
    std_returns = np.random.uniform(0.005, 0.03, n_assets)
    
    returns = np.random.multivariate_normal(
        mean_returns, 
        np.diag(std_returns) @ correlation_matrix @ np.diag(std_returns),
        size=n_days
    )
    
    # Convert to prices
    prices = pd.DataFrame(
        100 * (1 + returns).cumprod(axis=0),
        index=dates,
        columns=[f'SYN{i:03d}' for i in range(1, n_assets + 1)]
    )
    
    return prices

prices = generate_synthetic_data()
print(f"Generated {len(prices)} days of data for {len(prices.columns)} assets")
prices.head()

## 2. Implement Walk-Forward Validation

In [None]:
class WalkForwardValidator:
    def __init__(self, train_window=252, test_window=63, step_size=21):
        self.train_window = train_window
        self.test_window = test_window
        self.step_size = step_size
        
    def split(self, data):
        """Generate train/test splits for walk-forward validation."""
        n = len(data)
        splits = []
        
        start_idx = 0
        while start_idx + self.train_window + self.test_window <= n:
            train_end = start_idx + self.train_window
            test_end = train_end + self.test_window
            
            train_data = data.iloc[start_idx:train_end]
            test_data = data.iloc[train_end:test_end]
            
            splits.append({
                'train': train_data,
                'test': test_data,
                'train_period': (train_data.index[0], train_data.index[-1]),
                'test_period': (test_data.index[0], test_data.index[-1])
            })
            
            start_idx += self.step_size
            
        return splits

validator = WalkForwardValidator()
splits = validator.split(prices)
print(f"Created {len(splits)} walk-forward periods")

## 3. Simple Momentum Strategy

In [None]:
def momentum_strategy(prices, lookback=20, n_positions=3):
    """Simple momentum strategy."""
    returns = prices.pct_change(lookback)
    
    # Rank by momentum
    ranks = returns.rank(axis=1, ascending=False)
    
    # Long top N, short bottom N
    positions = pd.DataFrame(0, index=prices.index, columns=prices.columns)
    positions[ranks <= n_positions] = 1.0 / n_positions
    positions[ranks > len(prices.columns) - n_positions] = -1.0 / n_positions
    
    # Calculate returns
    strategy_returns = (positions.shift(1) * prices.pct_change()).sum(axis=1)
    
    return positions, strategy_returns

## 4. Run Walk-Forward Backtest

In [None]:
results = []

for i, split in enumerate(splits):
    # Train period: optimize parameters (simplified here)
    train_prices = split['train']
    
    # Test period: apply strategy
    test_prices = split['test']
    positions, returns = momentum_strategy(test_prices)
    
    # Calculate metrics
    sharpe = returns.mean() / returns.std() * np.sqrt(252) if returns.std() > 0 else 0
    total_return = (1 + returns).prod() - 1
    
    results.append({
        'period': i + 1,
        'test_start': split['test_period'][0],
        'test_end': split['test_period'][1],
        'sharpe': sharpe,
        'return': total_return,
        'n_trades': (positions.diff() != 0).sum().sum()
    })

results_df = pd.DataFrame(results)
print("\nWalk-Forward Results Summary:")
print(f"Average Sharpe: {results_df['sharpe'].mean():.2f}")
print(f"Average Return: {results_df['return'].mean():.2%}")
print(f"Total Trades: {results_df['n_trades'].sum()}")

## 5. Visualize Results

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Sharpe ratios over time
axes[0, 0].bar(results_df['period'], results_df['sharpe'], 
               color=['green' if x > 0 else 'red' for x in results_df['sharpe']])
axes[0, 0].set_title('Sharpe Ratio by Period')
axes[0, 0].set_xlabel('Walk-Forward Period')
axes[0, 0].set_ylabel('Sharpe Ratio')
axes[0, 0].axhline(y=0, color='black', linestyle='-', linewidth=0.5)
axes[0, 0].grid(True, alpha=0.3)

# Returns by period
axes[0, 1].bar(results_df['period'], results_df['return'] * 100,
               color=['green' if x > 0 else 'red' for x in results_df['return']])
axes[0, 1].set_title('Returns by Period')
axes[0, 1].set_xlabel('Walk-Forward Period')
axes[0, 1].set_ylabel('Return (%)')
axes[0, 1].axhline(y=0, color='black', linestyle='-', linewidth=0.5)
axes[0, 1].grid(True, alpha=0.3)

# Cumulative performance
cumulative_returns = (1 + results_df['return']).cumprod()
axes[1, 0].plot(results_df['period'], cumulative_returns, linewidth=2, marker='o')
axes[1, 0].set_title('Cumulative Performance')
axes[1, 0].set_xlabel('Walk-Forward Period')
axes[1, 0].set_ylabel('Cumulative Return')
axes[1, 0].grid(True, alpha=0.3)

# Trade frequency
axes[1, 1].bar(results_df['period'], results_df['n_trades'])
axes[1, 1].set_title('Number of Trades by Period')
axes[1, 1].set_xlabel('Walk-Forward Period')
axes[1, 1].set_ylabel('Number of Trades')
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 6. Export Results

In [None]:
# Create evidence directory
evidence_dir = Path('../evidence/notebooks')
evidence_dir.mkdir(parents=True, exist_ok=True)

# Save results
results_df.to_csv(evidence_dir / 'walkforward_results.csv', index=False)

# Save summary metrics
summary = {
    'n_periods': len(results_df),
    'avg_sharpe': float(results_df['sharpe'].mean()),
    'avg_return': float(results_df['return'].mean()),
    'total_trades': int(results_df['n_trades'].sum()),
    'win_rate': float((results_df['return'] > 0).mean()),
    'methodology': 'walk_forward',
    'train_window': validator.train_window,
    'test_window': validator.test_window,
    'step_size': validator.step_size
}

with open(evidence_dir / 'walkforward_summary.json', 'w') as f:
    json.dump(summary, f, indent=2)

print("\nResults exported to evidence/notebooks/")
print(json.dumps(summary, indent=2))