In [0]:
# Find the price correlated HK stocks
# which can be used for Pair strategy

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)

from datetime import datetime

import numpy as np
import pandas as pd
import pandas_datareader.data as web

# webscraping library
import requests
from bs4 import BeautifulSoup

# Download the font to display Chinese
!wget https://github.com/kenwkliu/ideas/raw/master/colab/data/simhei.ttf
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
%matplotlib inline
CNFont = FontProperties(fname='/content/simhei.ttf')


In [0]:
# Get the stock information from Yahoo
stockInfo = web.get_quote_yahoo('0700.HK')
stockInfo

In [0]:
# show the column names
stockInfo.columns

In [0]:
# Get the pre-downloaded all HK stocks information as of 20200519
stocksInfo = pd.read_csv('https://raw.githubusercontent.com/kenwkliu/ideas/master/colab/data/20200519-HK-Stocks.csv')
stocksInfo

In [0]:
# Look at one stock
CODE = '0005.HK'
stocksInfo[(stocksInfo.code == CODE)]

In [0]:
# add a new colume 'turnover' 
stocksInfo['turnover'] = stocksInfo['regularMarketPreviousClose'] * stocksInfo['averageDailyVolume10Day']

# Look at the subset of useful columns and sort by turnover
SELECTED_COLUMNS = ['code', 'quoteType', 'shortName', 'longName', 'marketCap', 'turnover', 'regularMarketPreviousClose', 'averageDailyVolume10Day']
SORT_BY = 'turnover'
IS_ASCENDING = False

stocksSubInfo = stocksInfo[SELECTED_COLUMNS].sort_values(by=[SORT_BY], ascending=IS_ASCENDING)
stocksSubInfo

In [0]:
# We're only interested in trading liquid equity names
# Hence select only EQUITY and turnover is bigger than certain numbers
QUOTE_TYPE = 'EQUITY'
MIN_TURNOVER = 100000000  # 100 millions
SELECTED_COLUMNS = ['code', 'shortName', 'turnover']

stocksFilteredInfo = stocksSubInfo[(stocksSubInfo.quoteType == QUOTE_TYPE) & (stocksSubInfo.turnover > MIN_TURNOVER)][SELECTED_COLUMNS]
stocksFilteredInfo

In [0]:
# Use Webscraping to extract HK stock Chinese names from wiki web site
hk_url = 'https://zh-yue.wikipedia.org/wiki/%E9%A6%99%E6%B8%AF%E4%B8%8A%E5%B8%82%E5%85%AC%E5%8F%B8%E4%B8%80%E8%A6%BD'
html = requests.get(hk_url)
soup = BeautifulSoup(html.text, 'html.parser')
soup

In [0]:
# Read the tags line by line and scrape the stock code and names
code = []
name = []
STOCK_SUFFIX = '.HK'

a_tags = soup.find("div", attrs={"id":"mw-content-text"})
all_li = a_tags.find_all("li", attrs={"class":""})

for li in all_li:
    content = li.text.strip()
    code.append(str(content[:4] + STOCK_SUFFIX))
    name.append(content[4:].strip())
    
chiNames = pd.DataFrame(index=code, data=name)
chiNames = chiNames.reset_index()
chiNames.columns = ["code", "chiName"]
chiNames

In [0]:
# Merge the Chinese names (similar to Excel vlookup)
results = stocksFilteredInfo.merge(chiNames, on='code', how='left')
results 

In [0]:
# Look at Top 20
results[0:20]

In [0]:
# download the latest stock price from Yahoo
SOURCE = 'yahoo'
start = '2020' # accepts strings
end = datetime.today()

columnNames = []
stocksDownload = []

# use results.shortName for display company names in English 
# use results.chiName for display company names in Chinese 
for code, name in zip(results.code, results.chiName):
  try:
    stocksDownload.append(web.DataReader(code, SOURCE, start=start, end=end))
    columnNames.append(name)
    print("Completed:", code, name)
  except Exception as e:
    print(code, name, ": error:", e)

print("All Completed")    

In [0]:
# look at the downloaded stock price
index = 0
print(columnNames[index])
stocksDownload[index]

In [0]:
# Only use "Adj Close"
adjCloses = [stock['Adj Close'] for stock in stocksDownload]

# Combine the stocks adjusted closes to one pandas data frame
stocks = pd.concat(adjCloses, axis=1)
stocks.columns = columnNames
stocks

In [0]:
# Export the data to csv file for downloading
stocks.to_csv("hkStocksAdjClosePx.csv", encoding='utf_8_sig')

In [0]:
# Run stocks correlation
stocksCorr = stocks.corr()
stocksCorr.style.background_gradient(cmap='coolwarm', axis=None)

In [0]:
# Filter the correlated stocks pair with the THRESHOLD
THRESHOLD = 0.99
highCorr = np.where(((stocksCorr >= THRESHOLD) & (stocksCorr < 1)))
print("Number of pairs found:", int(len(highCorr[0])/2))


# Print the Paired stocks
pairs = {}
for i in range(len(highCorr[0])):
  a, b = highCorr[0][i], highCorr[1][i]
  revKey = str(b) + str(a)

  # prevent duplicated plot as (a,b) pair is the same as (b,a) pair
  if revKey not in pairs:
    key = str(a) +str(b)
    pairs[key] = [a, b]
    print(columnNames[a], "<->", columnNames[b])

In [0]:
# Plot the highly correlated stock pairs 

for k, pair in pairs.items():
  stocks[[columnNames[pair[0]], columnNames[pair[1]]]].plot()
  plt.legend(prop = CNFont)

In [0]:
# Choose a stock pair
INDEX = 2
stock_pair = list(pairs.values())[INDEX]

PAIR_STOCK_0 = columnNames[stock_pair[0]]
PAIR_STOCK_1 = columnNames[stock_pair[1]]

stocks[[PAIR_STOCK_0, PAIR_STOCK_1]]

In [0]:
# Calculate the average price ratio of the Pair 
# Trade if the current price ratio is significantly different 
PX_RATIO_THRESHOLD = 0.05

# Calculate the Buy PAIR_STOCK_0 and Sell PAIR_STOCK_1
stocks['priceRatio'] = stocks[PAIR_STOCK_0] / stocks[PAIR_STOCK_1]
avgPxRatio = stocks['priceRatio'].mean()
print("Average BUY/SELL price ratio of", PAIR_STOCK_0, "/", PAIR_STOCK_1, ":", avgPxRatio)

execPxRatio = avgPxRatio * (1 - PX_RATIO_THRESHOLD)
print("Buy", PAIR_STOCK_0, "and Sell", PAIR_STOCK_1, "when the BUY/SELL price ratio is smaller than:", execPxRatio)

In [0]:
# Calculate the Buy PAIR_STOCK_1 and Sell PAIR_STOCK_0
stocks['priceRatio'] = stocks[PAIR_STOCK_1] / stocks[PAIR_STOCK_0]
avgPxRatio = stocks['priceRatio'].mean()
print("Average BUY/SELL price ratio of", PAIR_STOCK_1, "/", PAIR_STOCK_0, ":", avgPxRatio)

execPxRatio = avgPxRatio * (1 - PX_RATIO_THRESHOLD)
print("Buy", PAIR_STOCK_1, "and Sell", PAIR_STOCK_0, "when the BUY/SELL price ratio is smaller than:", execPxRatio)