# Signal-Based Trading Strategy (Use Case 2)

This notebook demonstrates:
1. Converting alpha signals to positions
2. Signal scaling methods
3. Long/short portfolio construction
4. Beta hedging
5. Performance analysis and attribution

In [None]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from backtesting import Backtester, BacktestConfig, DataManager
from backtesting.input_processor import SignalProcessor

%matplotlib inline
plt.style.use('seaborn-v0_8-darkgrid')

## Step 1: Load Data

In [None]:
data_manager = DataManager(data_dir='../sample_data', use_float32=True)

prices = data_manager.load_prices()
signals_df = pd.read_csv('../sample_data/signals.csv', index_col=0, parse_dates=True)

print(f"Loaded data for {len(prices.columns)} securities")
print(f"Signal data shape: {signals_df.shape}")

## Step 2: Explore Signals

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# Signal distribution
axes[0].hist(signals_df.iloc[0], bins=50, edgecolor='black', alpha=0.7)
axes[0].set_title('Signal Distribution (Day 1)')
axes[0].set_xlabel('Signal Value')
axes[0].set_ylabel('Frequency')

# Signal time series for a few securities
axes[1].plot(signals_df.iloc[:, :5])
axes[1].set_title('Signal Time Series (Sample Securities)')
axes[1].set_xlabel('Date')
axes[1].set_ylabel('Signal')
axes[1].axhline(y=0, color='black', linestyle='--', alpha=0.5)

# Signal correlation with next-day returns
returns = prices.pct_change()
correlations = []
for col in signals_df.columns:
    if col in returns.columns:
        corr = signals_df[col].corr(returns[col].shift(-1))
        correlations.append(corr)

axes[2].hist(correlations, bins=30, edgecolor='black', alpha=0.7)
axes[2].set_title('Signal-Return Correlation')
axes[2].set_xlabel('Correlation')
axes[2].set_ylabel('Frequency')
axes[2].axvline(x=0, color='red', linestyle='--', alpha=0.5)

plt.tight_layout()
plt.show()

print(f"\nMean signal-return correlation: {np.mean(correlations):.4f}")

## Step 3: Test Signal Processing

Let's test different signal scaling methods.

In [None]:
# Get sample signals for one day
sample_date = signals_df.index[50]
sample_signals = signals_df.loc[sample_date].to_dict()
sample_prices = prices.loc[sample_date].to_dict()

portfolio_value = 10_000_000

# Test different scaling methods
scaling_methods = ['linear', 'rank', 'zscore']

fig, axes = plt.subplots(1, 3, figsize=(15, 4))

for i, method in enumerate(scaling_methods):
    processor = SignalProcessor(
        scaling_method=method,
        target_gross_exposure=1.0,
        long_short=True
    )
    
    shares, notional, weights = processor.process_signals(
        sample_signals, sample_prices, portfolio_value
    )
    
    # Plot weight distribution
    axes[i].hist(list(weights.values()), bins=50, edgecolor='black', alpha=0.7)
    axes[i].set_title(f'{method.title()} Scaling')
    axes[i].set_xlabel('Weight')
    axes[i].set_ylabel('Frequency')
    axes[i].axvline(x=0, color='red', linestyle='--', alpha=0.5)
    
    gross_exp = sum(abs(w) for w in weights.values())
    net_exp = sum(weights.values())
    axes[i].text(0.05, 0.95, f'Gross: {gross_exp:.2f}\nNet: {net_exp:.4f}',
                transform=axes[i].transAxes, va='top',
                bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

plt.tight_layout()
plt.show()

## Step 4: Configure and Run Signal-Based Backtest

In [None]:
# Configuration with beta hedging
config = BacktestConfig(
    initial_cash=10_000_000,
    max_adv_participation=0.05,
    tc_power=1.5,
    tc_coefficient=0.01,
    enable_beta_hedge=True,  # Enable beta hedging
    beta_hedge_instrument='SPY',
    target_beta=0.0,  # Market neutral
    enable_sector_hedge=False,
    risk_free_rate=0.02
)

# Prepare inputs - convert signals to dict format
signals_by_date = {
    date: signals_df.loc[date].to_dict()
    for date in signals_df.index
}

inputs = {
    'signals': signals_by_date
}

# Run backtest
backtester = Backtester(config, data_manager)

results = backtester.run(
    start_date=pd.Timestamp('2023-01-01'),
    end_date=pd.Timestamp('2023-12-31'),
    use_case=2,  # Signal-based
    inputs=inputs,
    show_progress=True
)

## Step 5: Analyze Performance

In [None]:
# Print summary
results.print_summary()

metrics = results.calculate_metrics()

print(f"\nStrategy Characteristics:")
print(f"  Sharpe Ratio: {metrics['sharpe_ratio']:.2f}")
print(f"  Sortino Ratio: {metrics['sortino_ratio']:.2f}")
print(f"  Max Drawdown: {metrics['max_drawdown']:.2%}")
print(f"  Calmar Ratio: {metrics['calmar_ratio']:.2f}")
print(f"  Win Rate: {metrics['win_rate']:.2%}")
print(f"  Profit Factor: {metrics['profit_factor']:.2f}")

## Step 6: Detailed Visualization

In [None]:
results_df = results.to_dataframe()

fig = plt.figure(figsize=(16, 12))
gs = fig.add_gridspec(4, 2, hspace=0.3, wspace=0.3)

# Cumulative returns
ax1 = fig.add_subplot(gs[0, :])
cum_returns = (results_df['portfolio_value'] / results_df['portfolio_value'].iloc[0] - 1) * 100
ax1.plot(results_df['date'], cum_returns, linewidth=2)
ax1.fill_between(results_df['date'], 0, cum_returns, alpha=0.3)
ax1.set_title('Cumulative Returns', fontsize=14, fontweight='bold')
ax1.set_ylabel('Return (%)')
ax1.grid(True, alpha=0.3)
ax1.axhline(y=0, color='black', linestyle='--', alpha=0.5)

# Drawdown
ax2 = fig.add_subplot(gs[1, 0])
values = results_df['portfolio_value'].values
running_max = np.maximum.accumulate(values)
drawdowns = (values - running_max) / running_max * 100
ax2.fill_between(results_df['date'], drawdowns, 0, alpha=0.7, color='red')
ax2.plot(results_df['date'], drawdowns, color='darkred', linewidth=1)
ax2.set_title('Drawdown')
ax2.set_ylabel('Drawdown (%)')
ax2.grid(True, alpha=0.3)

# Daily returns distribution
ax3 = fig.add_subplot(gs[1, 1])
daily_returns = results_df['daily_return'].values * 100
ax3.hist(daily_returns, bins=50, edgecolor='black', alpha=0.7)
ax3.axvline(x=np.mean(daily_returns), color='red', linestyle='--',
            label=f'Mean: {np.mean(daily_returns):.3f}%')
ax3.set_title('Daily Return Distribution')
ax3.set_xlabel('Daily Return (%)')
ax3.set_ylabel('Frequency')
ax3.legend()
ax3.grid(True, alpha=0.3, axis='y')

# Gross vs Net exposure
ax4 = fig.add_subplot(gs[2, 0])
ax4.plot(results_df['date'], results_df['gross_exposure'], label='Gross', linewidth=2)
ax4.plot(results_df['date'], results_df['net_exposure'], label='Net', linewidth=2)
ax4.set_title('Portfolio Exposures')
ax4.set_ylabel('Exposure ($)')
ax4.legend()
ax4.grid(True, alpha=0.3)

# Transaction costs
ax5 = fig.add_subplot(gs[2, 1])
cum_costs = results_df['transaction_cost'].cumsum()
ax5.plot(results_df['date'], cum_costs, linewidth=2, color='purple')
ax5.set_title('Cumulative Transaction Costs')
ax5.set_ylabel('Cost ($)')
ax5.grid(True, alpha=0.3)
total_cost = cum_costs.iloc[-1]
total_return_dollars = results_df['portfolio_value'].iloc[-1] - results_df['portfolio_value'].iloc[0]
cost_ratio = total_cost / abs(total_return_dollars) if total_return_dollars != 0 else 0
ax5.text(0.05, 0.95, f'Total: ${total_cost:,.0f}\nCost/Return: {cost_ratio:.2%}',
        transform=ax5.transAxes, va='top',
        bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

# Rolling Sharpe
ax6 = fig.add_subplot(gs[3, :])
returns_series = pd.Series(daily_returns, index=results_df['date'].iloc[1:])
rolling_sharpe = (returns_series.rolling(60).mean() / returns_series.rolling(60).std()) * np.sqrt(252)
ax6.plot(rolling_sharpe.index, rolling_sharpe.values, linewidth=2)
ax6.axhline(y=0, color='black', linestyle='--', alpha=0.5)
ax6.set_title('60-Day Rolling Sharpe Ratio')
ax6.set_xlabel('Date')
ax6.set_ylabel('Sharpe Ratio')
ax6.grid(True, alpha=0.3)

plt.show()

## Step 7: Factor Attribution Analysis

In [None]:
# Get factor attribution
factor_pnl = results.get_factor_attribution()

if factor_pnl is not None and not factor_pnl.empty:
    # Cumulative factor PnL
    cum_factor_pnl = factor_pnl.cumsum()
    
    fig, axes = plt.subplots(1, 2, figsize=(15, 5))
    
    # Time series
    for col in cum_factor_pnl.columns:
        axes[0].plot(cum_factor_pnl.index, cum_factor_pnl[col], label=col, linewidth=2)
    axes[0].set_title('Cumulative Factor PnL')
    axes[0].set_xlabel('Date')
    axes[0].set_ylabel('Cumulative PnL ($)')
    axes[0].legend()
    axes[0].grid(True, alpha=0.3)
    
    # Contribution breakdown
    total_pnl_by_factor = factor_pnl.sum()
    total = total_pnl_by_factor.sum()
    contributions = (total_pnl_by_factor / total * 100) if total != 0 else total_pnl_by_factor
    
    axes[1].bar(range(len(contributions)), contributions.values)
    axes[1].set_title('Factor Contribution to Total PnL')
    axes[1].set_xlabel('Factor')
    axes[1].set_ylabel('Contribution (%)')
    axes[1].set_xticks(range(len(contributions)))
    axes[1].set_xticklabels(contributions.index, rotation=45, ha='right')
    axes[1].grid(True, alpha=0.3, axis='y')
    axes[1].axhline(y=0, color='black', linestyle='--', alpha=0.5)
    
    plt.tight_layout()
    plt.show()
    
    print("\nFactor Contributions:")
    for factor, contrib in contributions.items():
        print(f"  {factor}: {contrib:.2f}%")
else:
    print("No factor attribution data available.")

## Step 8: Generate Reports

In [None]:
results.generate_full_report(
    output_dir='../output/signal_strategy',
    formats=['html', 'excel', 'csv']
)

print("\nReports saved to '../output/signal_strategy'")

## Summary

In this notebook, we:
1. ✓ Loaded and explored alpha signals
2. ✓ Tested different signal scaling methods
3. ✓ Ran a signal-based long/short strategy with beta hedging
4. ✓ Analyzed comprehensive performance metrics
5. ✓ Examined factor attribution
6. ✓ Generated detailed reports

Key Takeaways:
- Beta hedging helps create market-neutral returns
- Signal scaling method affects portfolio construction
- Factor attribution helps understand return sources
- Transaction costs are an important consideration