In [1]:
# Check for Stationarity for each pair

In [2]:
import yfinance as yf
import random
import json
import statsmodels.api as sm
import datetime
import os 
import csv
import copy 
from statsmodels.tsa.stattools import adfuller
random.seed(1)


In [3]:
# Function: Load data obtained from the identify pairs notebook

In [4]:
def load_pairs():
    file_path = "data/pairs_names.json"
    
    # Read the JSON file as a dictionary
    with open(file_path, "r") as json_file:
        loaded_pairs_dic = json.load(json_file)
    
    # Now, loaded_pairs_dic contains the dictionary from the JSON file
    #print(loaded_pairs_dic)
    return loaded_pairs_dic


In [5]:
# Enable to use start & end dates:
start = datetime.date.today() - datetime.timedelta(days=430)
end = datetime.date.today() - datetime.timedelta(days=60)


In [6]:
# Function: Plotting price movement of two assets

In [7]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

def plot_assets(asset1, asset2, ticker1, ticker2):
    fig = make_subplots(rows=2,
                    cols=1,
                    subplot_titles=("asset2", "asset1"))

    fig.append_trace(go.Scatter(x=asset2.index,
                                y=asset2,
                                ),
                     row=1, col=1)
    
    fig.append_trace(go.Scatter(x=asset1.index,
                                y=asset1,
                                ),
                     row=2, col=1)
    
    # Update yaxis properties
    fig.update_yaxes(title_text="Price", row=1, col=1)
    fig.update_xaxes(title_text="Date",  row=1, col=1)
    fig.update_yaxes(title_text="Price", row=2, col=1)
    fig.update_xaxes(title_text="Date",  row=2, col=1)
    
    #fig.show()
    if not os.path.exists("img/"):
        os.mkdir("img/")
        print("Image Directory Created!")
    
    file_path = "img/" + ticker1 + "_" + ticker2 + ".html" 
    fig.write_html(file_path)


In [8]:
# Function: Run OLS Regression and calculating spread

In [9]:
def regression_analysis(asset1, asset2, ticker1, ticker2):

    # adding a constant variable to the asset1 time series for the intercept value 
    asset1 = sm.add_constant(asset1, prepend=False)
    
    # Running the OLS function with asset2 as the dependent variable and asset1 as the dependent variable
    ols = sm.OLS(asset2, asset1)
    
    # Saving the results of the OLS model into a variable
    output = ols.fit()
    print("Completed Training")
    
    #print(output.params)
    # Getting the beta from the OLS results. Close means the close price of asset1
    beta = output.params["Close"]
    
    # Dropping the const column in the dependent variable
    asset1.drop(columns="const", inplace = True)
    
    # Calculating spread
    spread = asset2 - beta*asset1["Close"]
    
    fig = go.Figure(go.Scatter(y = spread))
    
    fig.update_layout(title = "Spread between " + ticker2 + " and " + ticker1)
    
    fig.update_xaxes(title_text="Date")
    fig.update_yaxes(title_text="Spread")
    
    file_path = "img/Spread_" + ticker1 + "_" + ticker2 + ".html"
    fig.write_html(file_path)
    
    #fig.show()
    
    return beta, spread


In [10]:
# Function: Perform Augmented Dickey Fuller Test

In [11]:
from statsmodels.tsa.stattools import adfuller
def run_adf_test(ticker1, ticker2, spread, pvalue_thres):
    result = adfuller(spread)
    does_pass = result[1] < pvalue_thres
    if does_pass:
        print(f'spread between {ticker1} and {ticker2} passed ADF stationarity test')
    else:
        print(f'spread between {ticker1} and {ticker2} failed ADF stationarity test')    
    return does_pass


In [12]:
#1. p-value > 0.05: Fail to reject the null hypothesis (H0), the data has a unit root and is non-stationary.
#2. p-value <= 0.05: Reject the null hypothesis (H0), the data does not have a unit root and is stationary.

#**Since the p value is greater than 0.05 we fail to reject the null hypothesis, the time series is not stationary**

#If it was accepted the code would be as follows
#```python
#accepted_pairs = []
#accepted_pairs.append(target_pair[0] + "_" + target_pair[1]) 
#```

#You don't need to print out the values from ADF just check if the 5% level is less than or equal to 0.05 and save it into the list. 


#Once the list is complete for all assets you would need to save it as a text file for later use

In [13]:
def get_data(ticker, start, end):
    return yf.download(ticker, start, end)['Close']

def main():
    final_pair_list = []
    adf_pvalue_thres = 0.05
    pair_to_spread = {} 
    
    pairs_dict = load_pairs()
    print(pairs_dict)

    for ticker1 in pairs_dict.keys():
        tickers = copy.copy(pairs_dict[ticker1])
        if isinstance(tickers, str):
            tickers = [tickers]
            
        for ticker2 in tickers:
            pair_name = ticker1 + '_' + ticker2
            print(pair_name)
            asset1 = get_data(ticker1,start,end)
            asset2 = get_data(ticker2,start,end)
            plot_assets(asset1, asset2, ticker1, ticker2)
            pair_to_spread[pair_name], spread = regression_analysis(asset1, asset2, ticker1, ticker2)
            does_pass_adf = run_adf_test(ticker1,ticker2,spread, adf_pvalue_thres)
            if does_pass_adf:
                final_pair_list.append(ticker1+"_"+ticker2)
    
    print(len(final_pair_list))
    final_pair_file_path = "data/final_pair.csv"
    with open(final_pair_file_path, "w", newline="") as csv_file:
        writer = csv.writer(csv_file)
        for pair in final_pair_list:
            tickers = pair.split("_")
            writer.writerow([tickers[0],tickers[1]])
        
main()


{'ICE': ['HDB'], 'ING': ['HDB'], 'ITUB': ['BX'], 'JPM': ['APO', 'HDB'], 'MA': ['HDB', 'JPM'], 'MMC': ['ICE'], 'MS': ['C'], 'PYPL': ['HDB'], 'RY': ['BNS', 'MS'], 'SMFG': ['BBVA', 'HDB'], 'UBS': ['HDB', 'PGR', 'SMFG'], 'USB': ['TD'], 'V': ['BBVA', 'HDB', 'PYPL', 'SAN', 'UBS']}
ICE_HDB
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
Completed Training
spread between ICE and HDB failed ADF stationarity test
ING_HDB
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
Completed Training
spread between ING and HDB failed ADF stationarity test
ITUB_BX
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
Completed Training
spread between ITUB and BX failed ADF stationarity test
JPM_APO
[*********************100%%**********************]  1