In this Jupyter notebook we created an algorithm that takes a set of public securities of our choosing and finds correlations or cointegrations between them. Using this, we then mark buy or sell signals accordingly allowing us to profit using pairwise trading.  

In [8]:
#imports
import numpy as np
import pandas as pd
import statsmodels
import statsmodels.api as sm
import yfinance as yf
from statsmodels.tsa.stattools import coint, adfuller
from pandas_datareader import data as pdr
pd.core.common.is_list_like = pd.api.types.is_list_like
import datetime

import matplotlib.pyplot as plt
import seaborn as sns; sns.set(style="whitegrid")

Area to choose what Stocks to track:

In [9]:
yf.pdr_override()
start = datetime.datetime(2015, 1, 1)
end = datetime.datetime.now()
tickers = ['AAPL', 'ADBE', 'ORCL', 'EBAY', 'MSFT', 'QCOM', 'HPQ', 'JNPR', 'AMD', 'IBM', 'SPY']


df = pdr.get_data_yahoo(tickers, start, end)['Close']
df.tail()

[*********************100%***********************]  11 of 11 completed


Unnamed: 0_level_0,AAPL,ADBE,AMD,EBAY,HPQ,IBM,JNPR,MSFT,ORCL,QCOM,SPY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2023-01-31,144.289993,370.339996,75.150002,49.5,29.139999,134.729996,32.299999,247.809998,88.459999,133.210007,406.480011
2023-02-01,145.429993,383.920013,84.639999,50.400002,29.870001,135.089996,30.99,252.75,90.050003,138.460007,410.799988
2023-02-02,150.820007,392.230011,88.309998,51.66,30.790001,136.389999,31.450001,264.600006,89.379997,135.850006,416.779999
2023-02-03,154.5,379.329987,86.089996,50.66,30.51,136.940002,30.73,258.350006,89.620003,135.020004,412.350006
2023-02-06,151.729996,375.230011,83.68,49.98,29.77,136.179993,30.799999,256.769989,88.529999,132.929993,409.829987


First we need to test for stationarity using the Dickey-Fuller Test; we obviously only want to look at stocks that are not following stationarity.

In [10]:
def stationarity_test(X, cutoff=0.01):
    pvalue = adfuller(X)[1]
    if pvalue < cutoff:
        print('p-value = ' + str(pvalue) + ' The series ' + X.name +' is likely stationary.')
    else:
        print('p-value = ' + str(pvalue) + ' The series ' + X.name +' is likely non-stationary.')

This is to test if the Stationarity Test actually works:

In [11]:
ticker = yf.Ticker('AAPL')
Stock_history = ticker.history(period = "60d")
Stock_close = Stock_history["Close"]
stationarity_test(Stock_close)

p-value = 0.3591300369897942 The series Close is likely non-stationary.


Test for Cointegration/Correlated Pairs:

In [12]:
def find_cointegrated_pairs(data):
    n = data.shape[1]
    score_matrix = np.zeros((n, n))
    pvalue_matrix = np.ones((n, n))
    keys = data.keys()
    pairs = []
    for i in range(n):
        for j in range(i+1, n):
            S1 = data[keys[i]]
            S2 = data[keys[j]]
            result = coint(S1, S2)
            score = result[0]
            pvalue = result[1]
            score_matrix[i, j] = score
            pvalue_matrix[i, j] = pvalue
            if pvalue < 0.05:
                pairs.append((keys[i], keys[j]))
    return score_matrix, pvalue_matrix, pairs

Now we will test the Cointegrated Pairs Function:

In [13]:
find_cointegrated_pairs(df)

(array([[ 0.        , -1.00633238, -2.4860263 , -1.59880236, -2.32191757,
         -1.64722064, -1.14347758, -2.76927613, -2.72937053, -3.21850549,
         -2.46313338],
        [ 0.        ,  0.        , -1.81587654, -2.69511782, -1.97301355,
         -2.75256532, -1.17168918, -1.32359666, -1.32041637, -2.31251184,
         -1.78068009],
        [ 0.        ,  0.        ,  0.        , -2.90255671, -2.34073352,
         -1.70431508, -1.2623993 , -4.40193264, -2.5818792 , -3.86089787,
         -3.17470807],
        [ 0.        ,  0.        ,  0.        ,  0.        , -2.6563779 ,
         -2.32881395, -1.57112993, -2.61017503, -1.84282934, -3.05272364,
         -2.55066604],
        [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         -1.65950564, -2.13095287, -2.39880652, -2.51429461, -2.55310145,
         -2.71177746],
        [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
          0.        , -3.3148604 , -4.00716266, -3.74813059, -3.8135188

Now we will work on a model to mark whether to buy or sell based on the cointegration model. 