In this Jupyter notebook we created an algorithm that takes a set of public securities of our choosing and finds correlations or cointegrations between them. Using this, we then mark buy or sell signals accordingly allowing us to profit using pairwise trading.  

In [117]:
#imports
import numpy as np
import pandas as pd
import statsmodels
import statsmodels.api as sm
import yfinance as yf
from statsmodels.tsa.stattools import coint, adfuller
from pandas_datareader import data as pdr
pd.core.common.is_list_like = pd.api.types.is_list_like
import datetime
from scipy.stats.stats import pearsonr

import matplotlib.pyplot as plt
import seaborn as sns; sns.set(style="whitegrid")

Area to choose what Stocks to track:

In [108]:
yf.pdr_override()
start = datetime.datetime(2015, 1, 1)
end = datetime.datetime.now()
tickers = ['TSLA', 'NKE', 'AMZN', 'WMT', 'GOOG', 'JPM', 'META', 'NVDA', 'ABBV', 'AAPL', 'ADBE', 'ORCL', 'EBAY', 'MSFT', 'QCOM', 'HPQ', 'JNPR', 'AMD', 'IBM', 'VOO']


df = pdr.get_data_yahoo(tickers, start, end)['Close']
df.tail()

[*********************100%***********************]  20 of 20 completed


Unnamed: 0_level_0,AAPL,ABBV,ADBE,AMD,AMZN,EBAY,GOOG,HPQ,IBM,JNPR,JPM,META,MSFT,NKE,NVDA,ORCL,QCOM,TSLA,VOO,WMT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2023-02-06,151.729996,145.020004,375.230011,83.68,102.18,49.98,103.470001,29.77,136.179993,30.799999,141.919998,186.059998,256.769989,125.730003,210.889999,88.529999,132.929993,194.759995,376.660004,140.679993
2023-02-07,154.649994,145.139999,383.820007,85.910004,102.110001,50.169998,108.040001,30.0,135.839996,31.219999,143.649994,191.619995,267.559998,125.330002,221.729996,87.739998,136.630005,196.809998,381.519989,140.979996
2023-02-08,151.919998,144.610001,378.359985,84.690002,100.050003,49.16,100.0,29.610001,135.979996,31.059999,142.639999,183.429993,266.730011,122.910004,222.050003,86.690002,132.169998,201.289993,377.339996,140.220001
2023-02-09,150.869995,148.699997,375.809998,83.209999,98.239998,48.389999,95.459999,29.66,133.75,30.969999,140.419998,177.919998,263.619995,122.18,223.369995,86.650002,130.529999,207.320007,374.109985,141.520004
2023-02-10,151.009995,152.050003,370.98999,81.480003,97.610001,48.080002,94.860001,29.969999,135.600006,31.129999,141.039993,174.149994,263.100006,122.230003,212.649994,87.139999,128.990005,196.889999,375.019989,143.720001


First we need to test for stationarity using the Dickey-Fuller Test; we obviously only want to look at stocks that are not following stationarity.

In [109]:
def stationarity_test(X, cutoff=0.01):
    pvalue = adfuller(X)[1]
    if pvalue < cutoff:
        print('p-value = ' + str(pvalue) + ' The series ' + X.name +' is likely stationary.')
    else:
        print('p-value = ' + str(pvalue) + ' The series ' + X.name +' is likely non-stationary.')

This is to test if the Stationarity Test actually works:

In [110]:
ticker = yf.Ticker('AAPL')
Stock_history = ticker.history(period = "60d")
Stock_close = Stock_history["Close"]
stationarity_test(Stock_close)

p-value = 0.40673803423763844 The series Close is likely non-stationary.


Test for Cointegration/Correlated Pairs:

In [111]:
pairs = []
def find_cointegrated_pairs(data):
    n = data.shape[1]
    score_matrix = np.zeros((n, n))
    pvalue_matrix = np.ones((n, n))
    keys = data.keys()
    for i in range(n):
        for j in range(i+1, n):
            S1 = data[keys[i]]
            S2 = data[keys[j]]
            result = coint(S1, S2)
            score = result[0]
            pvalue = result[1]
            score_matrix[i, j] = score
            pvalue_matrix[i, j] = pvalue
            if pvalue < 0.05:
                pairs.append((keys[i], keys[j]))
    return pvalue_matrix, score_matrix, pairs

In [168]:
def add_row(S1, S2, df, p_value_testing = True):
    s1_name = S1.name
    s2_name = S2.name
    result = coint(S1, S2)
    score = result[0]
    p_value = result[1]
    corr = pearsonr(S1, S2)[0]
    temp_df = pd.DataFrame([s1_name, s2_name, score, p_value, corr])
    temp_df = temp_df.transpose()
    temp_df.columns = 'Stock 1', 'Stock 2', 'CoInt', 'P CoInt', 'Correlation'
    if df is not None:
        if p_value_testing == True:
            if p_value < 0.05:
                df = pd.concat([df, temp_df], ignore_index=True)
        if p_value_testing == False:
            df = pd.concat([df, temp_df], ignore_index=True)
    else:
        if p_value_testing == True:
            if p_value <0.05:
                df = temp_df
        else:
            df = temp_df
    return df


In [164]:
def make_table(data):
    temp_df = None
    n = data.shape[1]
    for i in range(n):
        for j in range(i+1, n):
            S1 = data.iloc[:,i]
            S2 = data.iloc[:,j]
            temp_df = add_row(S1, S2, temp_df)
    return temp_df



Now we need to make a function to test a new stock with all of the other stocks we already were testing and store it inside the dataframe

In [None]:
def new_stock_test(df, new_stock, new_table, p_value_testing == True):
    

Now we will test the Cointegrated Pairs Function and the make table function

In [169]:
find_cointegrated_pairs(df)
make_table(df)



Unnamed: 0,Stock 1,Stock 2,CoInt,P CoInt,Correlation
0,ADBE,AMZN,-4.494389,0.001251,0.97842
1,AMD,GOOG,-3.403794,0.041931,0.957829
2,AMD,MSFT,-4.332358,0.002278,0.97842
3,AMD,NKE,-3.616051,0.023403,0.952993
4,AMD,QCOM,-3.87071,0.010882,0.958742
5,AMD,WMT,-3.395299,0.042875,0.907513
6,EBAY,META,-3.861596,0.011198,0.921725
7,GOOG,ORCL,-3.57053,0.026635,0.953615
8,GOOG,TSLA,-3.398012,0.042572,0.932352
9,IBM,JPM,-3.946375,0.008551,-0.346826


Now we will create a Pandas Dataframe to store all of the Cointegration values and the Correlations values. We have the option to store each pair twice or once in order to create iteration through the information easier for post-processing. 