In [36]:
%load_ext autoreload
%autoreload 2

import os
import sys
nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path:
    sys.path.append(nb_dir)
import warnings
warnings.filterwarnings('ignore')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [37]:
import backtrader as bt
import numpy as np
np.int = int
import pandas as pd
import yfinance as yf

import common.util as util

In [38]:
def create_pair(df_1: str, df_2: str) -> pd.DataFrame:
    df_1.index = pd.DatetimeIndex(df_1.index)
    df_2.index = pd.DatetimeIndex(df_2.index)
    pair_df = df_1.join(df_2, lsuffix='_n', rsuffix='_d').dropna()
    pair_df.index = pair_df.index.tz_localize(None).astype('datetime64[ns]')
    return pair_df.reset_index()


In [42]:
class PairTradeMeanReversion(bt.Strategy):
    params = dict(window_size=30, outlier_threshold=2, ratio=pd.Series())

    def __init__(self):
        self.close_1 = self.datas[0].close
        self.close_2 = self.datas[1].close

    def next(self):
        i = len(self.close_1)-1
        if i < 2*self.p.window_size:
            return

        hist = pd.Series(self.p.ratio[:i])
        is_outlier = util.flag_outlier(self.p.ratio[i], hist, self.p.outlier_threshold)

        # ratio denom has much larger price than numerator, sell denominator buy numerator
        if is_outlier < 0:
            self.buy(data=self.datas[0])
            self.sell(data=self.datas[1])
        # ratio numerator has much larger price than denominator, sell numerator buy denominator
        if is_outlier > 0:
            self.sell(data=self.datas[0])
            self.buy(data=self.datas[1])
        # exit position when the ratio has reverted to within 1 standard deviation of the mean
        if abs(self.p.ratio[i]-hist.mean()) < hist.std():
            self.close(data=self.datas[0])
            self.close(data=self.datas[1])

In [40]:
def backtest_pair(tkr_1: str, tkr_2: str, start: str, end: str):
    print(f"{tkr_1} | {tkr_2}")
    df_1 = yf.Ticker(tkr_1).history(start=start, end=end)
    df_2 = yf.Ticker(tkr_2).history(start=start, end=end)
    pair_df = create_pair(df_1, df_2)
    ratio = util.norm_pairs(pair_df, 'Close_n', 'Close_d')

    cerebro = bt.Cerebro()
    cerebro.addstrategy(PairTradeMeanReversion, window_size=30, outlier_threshold=2, ratio=ratio)
    cerebro.adddata(bt.feeds.PandasData(dataname=df_1))
    cerebro.adddata(bt.feeds.PandasData(dataname=df_2))

    cerebro.broker.setcash(100000.0)
    start_cash = cerebro.broker.getvalue()
    cerebro.run()
    end_cash = cerebro.broker.getvalue()

    print(f"Pair return %: {end_cash/start_cash*100}")
    lng_1 = df_1['Close'][-1]-df_1['Close'][0]
    lng_2 = df_2['Close'][-1]-df_2['Close'][0]
    print(f"Long only return %: {(lng_1+lng_2)/(df_1['Close'][0]+df_2['Close'][0])*100}\n")

In [43]:
pairs = [
    ['AAPL', 'MSFT', ['2022-01-01', '2023-06-01']],
    ['META', 'GOOG', ['2020-06-01', '2022-01-01']],
    ['FDX', 'UPS', ['2021-01-01', '2022-06-01']],
    ['F', 'GM', ['2022-06-01', None]],
]

for pair in pairs:
    print('10y')
    backtest_pair(pair[0], pair[1], '2013-01-01', None)
    print(f'[{pair[2]}]')
    backtest_pair(pair[0], pair[1], pair[2][0], pair[2][1])

10y
AAPL | MSFT
Pair return %: 99.69290265518399
Long only return %: 1392.7736039217004

[['2022-01-01', '2023-06-01']]
AAPL | MSFT
Pair return %: 100.14266718151833
Long only return %: -0.957735401584699

10y
META | GOOG
Pair return %: 99.07067658233643
Long only return %: 1236.7314797612348

[['2020-06-01', '2022-01-01']]
META | GOOG
Pair return %: 100.04936753845215
Long only return %: 58.49354673512771

10y
FDX | UPS
Pair return %: 100.40862490834297
Long only return %: 181.84782049193714

[['2021-01-01', '2022-06-01']]
FDX | UPS
Pair return %: 100.11990216831043
Long only return %: -0.31885403574741855

10y
F | GM
Pair return %: 100.05949170042092
Long only return %: 70.48182669589441

[['2022-06-01', None]]
F | GM
Pair return %: 100.03263102694764
Long only return %: 3.2509663117546315

