In [21]:
import numpy as np
import pandas as pd
import pandas_datareader as dr
import matplotlib.pyplot as plt
import yfinance as yf
import seaborn as sn
import statsmodels
import statsmodels.api as sm
from statsmodels.tsa.stattools import coint
import bt
from get_all_tickers import get_tickers as gt
import copy
%matplotlib inline

In [22]:
"""Run cell you if want to hide warnings."""
import warnings
warnings.filterwarnings('ignore')

In [23]:
# Choose dataset to use
chosen_dataset = 'finance'
path = 'prices/{}.csv'.format(chosen_dataset)

In [24]:
data = pd.read_csv(path, index_col=0)
data.index = pd.to_datetime(data.index)
data.head()

Unnamed: 0_level_0,ABCB,ACGL,AINV,AMNB,AMSF,ANAT,ARCC,AROW,ASPS,ATAX,...,WHF,WLTW,WNEB,WRLD,WSBC,WSBF,WSFS,WTBA,WTFC,ZION
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-02,11.809612,14.793333,9.672423,15.793875,17.655058,53.86161,7.89184,16.431753,87.760002,3.539224,...,5.837776,77.111671,6.001124,74.360001,17.459869,5.21616,13.61966,8.170803,34.563049,19.219213
2013-01-03,11.772794,14.75,9.626857,15.846005,17.519011,54.126022,7.922996,16.386366,85.540001,3.574923,...,5.791134,77.899429,5.927036,74.75,17.498005,5.229025,13.715272,8.215169,34.917679,19.245436
2013-01-04,11.874044,14.876667,9.809139,15.816223,17.636507,54.23489,7.940801,16.490116,86.870003,3.620821,...,5.826115,77.876907,5.877643,75.849998,17.513266,5.164708,13.795461,8.377846,34.908581,19.796055
2013-01-07,11.846431,14.73,9.820533,15.771534,17.333496,54.102684,7.914094,16.159405,87.510002,3.580021,...,5.791134,77.651825,5.910573,75.32,17.261553,5.21616,13.644336,8.363055,34.772182,19.699917
2013-01-08,11.874044,14.75,9.968637,15.987481,17.253107,54.351551,7.909645,16.133469,91.529999,3.544325,...,5.791134,77.269203,5.951733,74.260002,17.032724,5.190435,13.524055,8.473972,34.981319,19.306614


In [25]:
import time
import datetime

stocks = data

# Split into train and test sets
split_date ='2019-01-01'
s = datetime.datetime.strptime(split_date,"%Y-%m-%d")
train = stocks.loc[stocks.index < s]
test = stocks.loc[stocks.index >= s]

In [26]:
"""Retrieve Optimal Pairs"""
import json
path = "pairs/{}_pairs.json".format(chosen_dataset)
with open(path) as f:
    pairs = json.load(f)[chosen_dataset]
    
print(pairs)

[['BOKF', 'ORRF'], ['CGO', 'CHW'], ['CVBF', 'FIBK'], ['ESGR', 'FRST'], ['FITB', 'PEBO'], ['GABC', 'IBCP'], ['MOFG', 'NFBK'], ['PNFP', 'TOWN']]


In [27]:
from skopt import gp_minimize
from ipynb.fs.full.Tester import *

# find optimal upper and lower
    # spread > upper, short stockA, long stockB
    # spread < lower, long stockA, short stockB

def optimize_pairs(name, pairs):
    d = {name: []}
    for pair in pairs:
        def f(x):
            x1 = x[0]
            x2 = x[1]
            pair_input = [[pair[0], pair[1], x1, x2]]
            tester = Tester(pair_input, train[[pair[0], pair[1]]])
            return -tester.run_backtest(False)
        res = gp_minimize(f,                  # the function to minimize
              [(0.0, 1.0), (-1.0, 0.0)],      # the bounds on each dimension of x
              acq_func="EI",      # the acquisition function
              n_calls=15,         # the number of evaluations of f
              n_random_starts=5,  # the number of random initialization points
              random_state=1234)   # the random seed
        d[name].append({
            "stock1": pair[0],
            "stock2": pair[1],
            "upper": res.x[0],
            "lower": res.x[1]
        })
    return d

In [28]:
res_dict = optimize_pairs(chosen_dataset, pairs)
print(res_dict)

{'finance': [{'stock1': 'BOKF', 'stock2': 'ORRF', 'upper': 0.9367439982168405, 'lower': -0.0622256149197562}, {'stock1': 'CGO', 'stock2': 'CHW', 'upper': 0.3013185966696092, 'lower': -1.0}, {'stock1': 'CVBF', 'stock2': 'FIBK', 'upper': 0.0, 'lower': -1.0}, {'stock1': 'ESGR', 'stock2': 'FRST', 'upper': 0.008032341620988441, 'lower': -1.0}, {'stock1': 'FITB', 'stock2': 'PEBO', 'upper': 0.9749460293136809, 'lower': -0.6393549344629386}, {'stock1': 'GABC', 'stock2': 'IBCP', 'upper': 0.0, 'lower': -0.6932751092636371}, {'stock1': 'MOFG', 'stock2': 'NFBK', 'upper': 0.04672229522371602, 'lower': 0.0}, {'stock1': 'PNFP', 'stock2': 'TOWN', 'upper': 0.20798032289232873, 'lower': 0.0}]}


In [32]:
"""Save optimized results"""
path = "optimized_pairs/{}_pairs.json".format(chosen_dataset)
resFile = open(path, "w+")
resFile.write(json.dumps(res_dict))

684