# Statistical Arbitrage: Pairs Trading with Cointegration

This notebook demonstrates a professional workflow for identifying and trading mean-reverting pairs.

**Workflow:**
1.  **Selection:** Identify pairs with economic links (e.g., KO vs PEP, GLD vs GDX).
2.  **Testing:** Use Augmented Dickey-Fuller (ADF) to test for cointegration.
3.  **Signal:** Calculate Z-Score of the spread.
4.  **Execution:** Buy spread when Z < -2, Sell spread when Z > 2.

In [None]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
import matplotlib.pyplot as plt
import yfinance as yf

# 1. Data Fetching
tickers = ['GLD', 'GDX'] # Gold ETF vs Gold Miners ETF
data = yf.download(tickers, start='2020-01-01', end='2023-01-01')['Adj Close']

s1 = data['GLD']
s2 = data['GDX']

# 2. Cointegration Test (Engle-Granger)
def calculate_spread(series_y, series_x):
    # Regress Y on X to find Hedge Ratio
    result = sm.OLS(series_y, sm.add_constant(series_x)).fit()
    hedge_ratio = result.params[1]
    spread = series_y - hedge_ratio * series_x
    return spread, hedge_ratio

spread, hedge_ratio = calculate_spread(s1, s2)

# Check stationarity of spread
adf_result = adfuller(spread)
print(f"ADF P-Value: {adf_result[1]:.5f}")
if adf_result[1] < 0.05:
    print("Spread is Stationary (Cointegrated). Valid for Pairs Trading.")
else:
    print("Spread is NOT Stationary. Do NOT trade.")

# 3. Signal Generation (Z-Score)
def z_score(series, window=30):
    r_mean = series.rolling(window=window).mean()
    r_std = series.rolling(window=window).std()
    z = (series - r_mean) / r_std
    return z

z_s = z_score(spread)

# Plotting
plt.figure(figsize=(12, 6))
z_s.plot(label='Spread Z-Score')
plt.axhline(2.0, color='red', linestyle='--')
plt.axhline(-2.0, color='green', linestyle='--')
plt.title('Pairs Trading Signal')
plt.legend()
plt.show()