<a href="https://colab.research.google.com/github/timbyles/ASX-Cointegration/blob/main/Pairs_Trading_V2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install yfinance

In [202]:
import pandas as pd
import yfinance as yf
import numpy as np
import statsmodels
import statsmodels.api as sm
import math
import random
from statsmodels.tsa.stattools import coint

In [203]:
# Pull ASX data from github file. Note: ASX300 data current as of 16/09/2021.
url = 'https://raw.githubusercontent.com/timbyles/ASX-Cointegration/main/ASX300.csv'
df = pd.read_csv(url)

# Converts tickers to list
tickers = df["Code"].tolist()

# Concatenates tickers with .ax in order to be suitable for yfinance
for index,code in enumerate(tickers):
  tickers[index] = code + '.ax'

In [None]:
# Pulls data from yfinance
data = yf.download(tickers, start="2018-01-01", end="2019-01-1")

In [205]:
# Takes the close prices of our data
data_close = data["Close"]

# Creates an empty dictionary to count exists of 'NaN'
nan_dict = {}

# Loop through dataframe to find which ticker sybomls have a NaN value.
for index, row in data_close.iteritems():
  for i in row:
    if np.isnan(i) == True:
      nan_dict[index] = 1

# Removes columns with NaN value
for key in nan_dict:
  data_close = data_close.drop(key, 1)

In [209]:
# Looks for cointegrated pairs

def find_cointegrated_pairs(data):
    
    # Creates matrices based of the number of stocks
    n = data.shape[1]
    score_matrix = np.zeros((n, n))
    pvalue_matrix = np.ones((n, n))
    keys = data.keys()
    pairs = []
    for i in range(n):
        for j in range(i+1, n):
            S1 = data[keys[i]]
            S2 = data[keys[j]]
            result = coint(S1, S2)
            score = result[0]
            pvalue = result[1]
            score_matrix[i, j] = score
            pvalue_matrix[i, j] = pvalue
            if pvalue < 0.05:
                pairs.append((keys[i], keys[j]))
    return score_matrix, pvalue_matrix, pairs

In [207]:
# To save computational time, we can generate sample amount of stocks at random to test for cointegration.
# This also prevents us from over sample choice bias?

sample = 10
random_index = []
i = 0

while i < 10:
  rand_int = random.randint(0, data_close.shape[1])
  if rand_int not in random_index:
    random_index.append(rand_int)
    i += 1

data_close_random = pd.DataFrame()

for r in random_index:
  temp_df = data_close.iloc[:, r]
  data_close_random = pd.concat([data_close_random, temp_df], axis=1)

In [208]:
scores, pvalues, pairs = find_cointegrated_pairs(data_close_random)

print(pairs)

[('TAH.AX', 'WHC.AX'), ('ARF.AX', 'DXS.AX')]
