# Check for Stationarity for each pair

In [24]:
import yfinance as yf
import random
import json
import statsmodels.api as sm
import datetime
import os 
random.seed(1)

In [25]:
file_path = "data/pairs_names.json"

# Read the JSON file as a dictionary
with open(file_path, "r") as json_file:
    loaded_pairs_dic = json.load(json_file)

# Now, loaded_pairs_dic contains the dictionary from the JSON file
print(loaded_pairs_dic)


{'CMS': ['CEG'], 'CNP': ['AEE'], 'DUK': ['AEP', 'CEG'], 'EIX': ['CEG'], 'ES': ['AEP', 'CEG'], 'EVRG': ['CEG', 'CNP'], 'LNT': ['DTE'], 'NI': ['CEG'], 'PCG': ['D'], 'PEG': ['CEG', 'NI'], 'PPL': ['CEG', 'FE'], 'SO': ['CEG'], 'SRE': ['AEP', 'DUK', 'PCG'], 'VST': ['CEG'], 'WEC': ['CEG', 'EVRG'], 'XEL': ['CEG', 'DUK', 'SRE']}


In [26]:
target_pair = ["CMS" , loaded_pairs_dic["CMS"][0]]
target_pair

['CMS', 'CEG']

In [27]:
# Enable to use start & end dates:
start = datetime.date.today() - datetime.timedelta(days=430)
end = datetime.date.today() - datetime.timedelta(days=60)

In [28]:
asset1 = yf.download(target_pair[0], start = start, end = end)["Close"]
asset2 = yf.download(target_pair[1], start = start, end = end)["Close"]

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


# Plotting

In [29]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(rows=2,
                    cols=1,
                    subplot_titles=("asset2", "asset1"))

fig.append_trace(go.Scatter(x=asset2.index,
                            y=asset2,
                            ),
                 row=1, col=1)

fig.append_trace(go.Scatter(x=asset1.index,
                            y=asset1,
                            ),
                 row=2, col=1)

# Update yaxis properties
fig.update_yaxes(title_text="Price", row=1, col=1)
fig.update_xaxes(title_text="Date",  row=1, col=1)
fig.update_yaxes(title_text="Price", row=2, col=1)
fig.update_xaxes(title_text="Date",  row=2, col=1)

if not os.path.exists("img/"):
    os.mkdir("img/")
    print("Image Directory Created!")

file_path = "img/" + target_pair[0] + "_" + target_pair[1] + ".html" 
fig.write_html(file_path)

# Run OLS Regression and calculating spread
Since we are not using the recent 60 days data let us not split into training and testing

In [30]:
dictionary_spread = {}

In [32]:
# adding a constant variable to the asset1 time series for the intercept value 
asset1 = sm.add_constant(asset1, prepend=False)

# Running the OLS function with asset2 as the dependent variable and asset1 as the dependent variable
ols = sm.OLS(asset2, asset1)

# Saving the results of the OLS model into a variable
output = ols.fit()
print("Completed Training")


# Getting the beta from the OLS results
beta = output.params["Close"]

# Dropping the const column in the dependent variable
asset1.drop(columns="const", inplace = True)

# Calculating spread
spread = asset2 - beta*asset1["Close"]

fig = go.Figure(go.Scatter(y = spread))

fig.update_layout(title = "Spread between " + target_pair[1] + " and " + target_pair[0])

fig.update_xaxes(title_text="Date")
fig.update_yaxes(title_text="Spread")

file_path = "img/Spread_" + target_pair[0] + "_" + target_pair[1] + ".html"
fig.write_html(file_path)


dictionary_spread[target_pair[0] + "_" + target_pair[1]] = beta

Completed Training


# Perform Augmented Dickey Fuller Test

In [33]:
from statsmodels.tsa.stattools import adfuller

#perform augmented Dickey-Fuller test
result = adfuller(spread)

print('ADF Statistic: %f\n' % result[0])
print('p-value: %f\n' % result[1])

ADF Statistic: -3.373731

p-value: 0.011888


1. p-value > 0.05: Fail to reject the null hypothesis (H0), the data has a unit root and is non-stationary.
2. p-value <= 0.05: Reject the null hypothesis (H0), the data does not have a unit root and is stationary.

**Since the p value is greater than 0.05 we fail to reject the null hypothesis, the time series is not stationary**

If it was accepted the code would be as follows
```python
accepted_pairs = []
accepted_pairs.append(target_pair[0] + "_" + target_pair[1]) 
```

You don't need to print out the values from ADF just check if the 5% level is less than or equal to 0.05 and save it into the list. 


Once the list is complete for all assets you would need to save it as a text file and read for later use

```python 
# How to save list

import csv

my_list = [1, 2, 3, 4, 5]

# Specify the file path
file_path = "my_list.csv"

# Save the list to a CSV file
with open(file_path, "w", newline="") as csv_file:
    writer = csv.writer(csv_file)
    writer.writerow(my_list)
```


Now your pseudocode for entire file would look like this
```
DECLARE ALL OVERALL VARIABLES - LOADED_PAIRS, DICTIONARY_TO_HOLD_SPREAD_FOR_EACH_PAIR, FINAL_PAIRS_LIST

DEFINE FUNCTION FOR GETTTING YAHOO FINANCE DATA - GET_DATA(NAME)
DEFINE FUNCITON FOR PLOTTING AND SAVING EACH TIME SERIES - PLOT_ASSETS(ASSET1, ASSET2)
DEFINE FUNCTION FOR RUNNING THE REGRESSION, PLOTTING SPREAD AND RETURNING THE SPREAD - REGRESSION_ANALYSIS(ASSET1, ASSET2)
DEFINE FUNCTION FOR RUNNING ADF TEST AND RETURNING THE ACCEPTED PAIR IF NOT REJECTED - RUN_ADF_TEST(SPREAD) # THE ADF TEST SHOULD ALSO PRINT FAILED OR PASSED FOR EACH TEST

FOR LOOP WITH KEYS FROM DICTIONARY
    FOR LOOP RUNNING THROUGH EACH VALUE OF THE LIST FROM EACH ELEMENT IN DICTIONARY
        ASSET1 = GET_DATA(ASSET1)
        PRINT("DOWNLADED")
        ASSET2 = GET_DATA(ASSET2)
        PRINT("DOWNLOADED")
        PLOT_ASSETS(ASSET1, ASSET2)
        PRINT("PLOTTED")
        DICTIONARY_TO_HOLD_PAIR[ASSET1_ASSET2], SPREAD = REGRESSION_ANALYSIS(ASSET1, ASSET2)
        PRINT("COMPLETED REGRESSION ANALYSIS")
        FINAL_PAIRS_LIST.append(RUN_ADF_TEST(SPREAD))  "ASSET1_ASSET2"  
        PRINT("COMPLETED TEST")

SAVE DICTIONARY_TO_HOLD_SPREAD IN DATA DIRECTORY
```