In this Jupyter notebook we created an algorithm that takes a set of public securities of our choosing and finds correlations or cointegrations between them. Using this, we then mark buy or sell signals accordingly allowing us to profit using pairwise trading.  

In [107]:
#imports
import numpy as np
import pandas as pd
import statsmodels
import statsmodels.api as sm
import yfinance as yf
from statsmodels.tsa.stattools import coint, adfuller
from pandas_datareader import data as pdr
pd.core.common.is_list_like = pd.api.types.is_list_like
import datetime

import matplotlib.pyplot as plt
import seaborn as sns; sns.set(style="whitegrid")

Area to choose what Stocks to track:

In [108]:
yf.pdr_override()
start = datetime.datetime(2015, 1, 1)
end = datetime.datetime.now()
tickers = ['TSLA', 'NKE', 'AMZN', 'WMT', 'GOOG', 'JPM', 'META', 'NVDA', 'ABBV', 'AAPL', 'ADBE', 'ORCL', 'EBAY', 'MSFT', 'QCOM', 'HPQ', 'JNPR', 'AMD', 'IBM', 'VOO']


df = pdr.get_data_yahoo(tickers, start, end)['Close']
df.tail()

[*********************100%***********************]  20 of 20 completed


Unnamed: 0_level_0,AAPL,ABBV,ADBE,AMD,AMZN,EBAY,GOOG,HPQ,IBM,JNPR,JPM,META,MSFT,NKE,NVDA,ORCL,QCOM,TSLA,VOO,WMT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2023-02-06,151.729996,145.020004,375.230011,83.68,102.18,49.98,103.470001,29.77,136.179993,30.799999,141.919998,186.059998,256.769989,125.730003,210.889999,88.529999,132.929993,194.759995,376.660004,140.679993
2023-02-07,154.649994,145.139999,383.820007,85.910004,102.110001,50.169998,108.040001,30.0,135.839996,31.219999,143.649994,191.619995,267.559998,125.330002,221.729996,87.739998,136.630005,196.809998,381.519989,140.979996
2023-02-08,151.919998,144.610001,378.359985,84.690002,100.050003,49.16,100.0,29.610001,135.979996,31.059999,142.639999,183.429993,266.730011,122.910004,222.050003,86.690002,132.169998,201.289993,377.339996,140.220001
2023-02-09,150.869995,148.699997,375.809998,83.209999,98.239998,48.389999,95.459999,29.66,133.75,30.969999,140.419998,177.919998,263.619995,122.18,223.369995,86.650002,130.529999,207.320007,374.109985,141.520004
2023-02-10,151.009995,152.050003,370.98999,81.480003,97.610001,48.080002,94.860001,29.969999,135.600006,31.129999,141.039993,174.149994,263.100006,122.230003,212.649994,87.139999,128.990005,196.889999,375.019989,143.720001


First we need to test for stationarity using the Dickey-Fuller Test; we obviously only want to look at stocks that are not following stationarity.

In [109]:
def stationarity_test(X, cutoff=0.01):
    pvalue = adfuller(X)[1]
    if pvalue < cutoff:
        print('p-value = ' + str(pvalue) + ' The series ' + X.name +' is likely stationary.')
    else:
        print('p-value = ' + str(pvalue) + ' The series ' + X.name +' is likely non-stationary.')

This is to test if the Stationarity Test actually works:

In [110]:
ticker = yf.Ticker('AAPL')
Stock_history = ticker.history(period = "60d")
Stock_close = Stock_history["Close"]
stationarity_test(Stock_close)

p-value = 0.40673803423763844 The series Close is likely non-stationary.


Test for Cointegration/Correlated Pairs:

In [111]:
pairs = []
def find_cointegrated_pairs(data):
    n = data.shape[1]
    score_matrix = np.zeros((n, n))
    pvalue_matrix = np.ones((n, n))
    keys = data.keys()
    for i in range(n):
        for j in range(i+1, n):
            S1 = data[keys[i]]
            S2 = data[keys[j]]
            result = coint(S1, S2)
            score = result[0]
            pvalue = result[1]
            score_matrix[i, j] = score
            pvalue_matrix[i, j] = pvalue
            if pvalue < 0.05:
                pairs.append((keys[i], keys[j]))
    return pvalue_matrix, score_matrix, pairs

Now we will test the Cointegrated Pairs Function:

In [112]:
find_cointegrated_pairs(df)

(array([[1.        , 0.34650655, 0.90099935, 0.30298201, 0.98074716,
         0.757186  , 0.76959859, 0.35190065, 0.69402562, 0.86286511,
         0.69358409, 0.9777361 , 0.32199467, 0.47406462, 0.06819102,
         0.17475149, 0.0791732 , 0.16125589, 0.30049551, 0.2095674 ],
        [1.        , 1.        , 0.88142844, 0.61416749, 0.92501179,
         0.86963757, 0.73769829, 0.06524864, 0.8829608 , 0.61703456,
         0.77097077, 0.95548078, 0.58041932, 0.77875951, 0.26151596,
         0.29963398, 0.4863646 , 0.46231286, 0.45204901, 0.49246249],
        [1.        , 1.        , 1.        , 0.61455333, 0.00125093,
         0.20453359, 0.51675663, 0.54841513, 0.17904504, 0.86269287,
         0.22335051, 0.25743803, 0.90577644, 0.31284901, 0.92863976,
         0.7982605 , 0.35502595, 0.57456677, 0.64340079, 0.07410824],
        [1.        , 1.        , 1.        , 1.        , 0.48811315,
         0.14244032, 0.04193083, 0.35126066, 0.66752495, 0.83222995,
         0.40228042, 0.71730458

Now we will create a Pandas Dataframe to store all of the Cointegration values and the Correlations values. We have the option to store each pair twice or once in order to create iteration through the information easier for post-processing. 

In [116]:
def store_pairs(pairs, pairs_df, flip_pairs = False):
    temp_df = pd.DataFrame(pairs, columns=['Stock 1', 'Stock 2'])
    temp_df['Correlation'] = None
    if flip_pairs:
        flipped_df = pd.DataFrame(columns= ['Stock 1', 'Stock 2', 'Correlation'])
        flipped_df['Stock 1'] = temp_df['Stock 2']
        flipped_df['Stock 2'] = temp_df['Stock 1']
        flipped_df['Correlation'] = None
        
        pairs_df = pd.concat([pairs_df, temp_df, flipped_df], ignore_index=True)
    else:
        pairs_df = pd.concat([pairs_df, temp_df], ignore_index=True)
    return pairs_df

pairs_df = pd.DataFrame(columns= ['Stock 1', 'Stock 2', 'Correlation'])
pairs_df = store_pairs(pairs, pairs_df)
display(pairs_df)



Unnamed: 0,Stock 1,Stock 2,Correlation
0,ADBE,AMZN,
1,AMD,GOOG,
2,AMD,MSFT,
3,AMD,NKE,
4,AMD,QCOM,
5,AMD,WMT,
6,EBAY,META,
7,GOOG,ORCL,
8,GOOG,TSLA,
9,IBM,JPM,
