In [None]:
import datetime
import pandas as pd
import hvplot.pandas
import holoviews as hv

from demo.backtest import CATALOG as catalog
from demo.util import bars_to_dataframe

# Pairs Trading

### Premise
- Two (or more) assets prices are related in some way
- Their prices typically move together
- Opportunities arise when one assert deviates from the relationship
- Buy one, sell the other with the expectation they will return to their relationship at some point in the future

### Benefits
- Applicable to a wide range of markets & assets
- market neutral (if the market suddenly tanks, don't lose money)

### Challenges
- Modelling the relationship (drifting pairs)
- Transaction costs
- Getting "legged"

# Example
- Two semiconductor ETFs `SMH` (VanEck Semiconductor ETF) and `SOXX` (iShares Semiconductor ETF)
- Fundamental reasons for a relationship (both ETFs of similar stocks)
- Visual analysis of price series confirms belief of relationship

In [None]:
# Load pre-loaded sample data from nautilus DataCatalog
src_id = 'SMH.NASDAQ'
tgt_id = 'SOXX.NASDAQ'
src = catalog.bars(instrument_ids=[src_id], start=pd.Timestamp('2020-01-01'), end=pd.Timestamp('2020-01-10'), as_nautilus=True)
tgt = catalog.bars(instrument_ids=[tgt_id], start=pd.Timestamp('2020-01-01'), end=pd.Timestamp('2020-01-10'), as_nautilus=True)

In [None]:
# Merge into single Dataframe for convenience, filter for market hours only
df = bars_to_dataframe(source_id=src_id, source_bars=src, target_id=tgt_id, target_bars=tgt)
df.index = pd.to_datetime(df.index)
df = df.between_time(datetime.time(14, 30), datetime.time(21,0))

In [None]:
# View scatter plot of SMH vs SOXX
df.pct_change().cumsum().hvplot.step(y=[src_id, tgt_id], title=f"Time Series {src_id} vs {tgt_id}")

In [None]:
# View scatter plot of SMH vs SOXX
df.hvplot.scatter(x=src_id, y=tgt_id, title=f"Price Scatter {src_id} vs {tgt_id}")

## Fit a (simple) model

In [None]:
# Format values for scikit-learn
X = df.loc[:, src_id].astype(float).values.reshape(-1, 1)
Y = df.loc[:, tgt_id].astype(float).values.reshape(-1, 1)

In [None]:
# Fit a model
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

model = LinearRegression(fit_intercept=False)
model.fit(X, Y)

In [None]:
hedge_ratio = float(model.coef_[0][0])

In [None]:
df.loc[:, "predicted"] = (df[src_id] * hedge_ratio)
df.loc[:, "spread"] = df['predicted'] - df[tgt_id]

In [None]:
# df.hvplot.hist("spread", bins=100)

In [None]:
ts_plot = df.hvplot.step(y=[tgt_id, 'predicted']) 
spread_plot = df['spread'].hvplot() * hv.HLine(0).opts(color='r', line_width=1)
(ts_plot + spread_plot).cols(1)

## Example Trade

In [None]:
notional = 10_000
entry = "2020-01-03 14:43:40" # BUY SMH / SELL SOXX
exit =  "2020-01-03 14:52:10"

In [None]:
spread_side = "SELL" if df.loc[entry]['spread'] > 0 else "BUY"
opp_side = {"BUY": "SELL", "SELL": "BUY"}

### Entry

In [None]:
src_entry_price = df.loc[entry][src_id]
src_shares = int(notional / df.loc[entry][src_id])

tgt_entry_price = df.loc[entry][tgt_id]
tgt_shares = int(notional / df.loc[entry][tgt_id])

print(f"{spread_side} {src_id} @ {src_entry_price}\n{opp_side[spread_side]} {tgt_id} @ {tgt_entry_price}")

### Exit

In [None]:
src_exit_price = df.loc[exit][src_id]
tgt_exit_price = df.loc[exit][tgt_id]

print(f"{opp_side[spread_side]} {src_id} @ {src_exit_price}\n{spread_side} {tgt_id} @ {tgt_exit_price}")

### PNL

In [None]:
CASH_SIDE = {"SELL": 1.0, "BUY": -1.0}
src_pnl = (src_shares * (src_entry_price - src_exit_price)  * CASH_SIDE[spread_side])
tgt_pnl = (tgt_shares * (tgt_entry_price - tgt_exit_price) * CASH_SIDE[opp_side[spread_side]]) 

print(f"{src_id} ({src_shares=}): {spread_side} @ {src_entry_price}, {opp_side[spread_side]} @ {src_exit_price}, {src_pnl=:0.2f}")
print(f"{tgt_id} ({tgt_shares=}): {opp_side[spread_side]} @ {tgt_entry_price}, {spread_side} @ {tgt_exit_price}, {tgt_pnl=:0.2f}")

In [None]:
total_pnl = src_pnl + tgt_pnl
roi = total_pnl / notional

In [None]:
BPS = 10_000
f"{total_pnl = :0.1f}, ROI ($10_000) = {roi * BPS:0.1f} bps"

In [None]:
trade_time = (pd.Timestamp(exit) - pd.Timestamp(entry))
f"Trade time: {trade_time}"