In [None]:
'''
Find the price correlated HK stocks in same sector for Pair strategy 
Buy the stock when it is relatively under-priced to the correlated stock 
and sell the corresponding over-priced stock
As they're price correlated (statistically), assume they will go back to the similar price ratio later
Hopefully, the under-priced stock will goes up later while the over-priced one will drop

Pair strategy is a type of relative value strategies which rely on the mean-reversion of prices to the asset's fair value. 
They assume that prices only temporarily move away from fair value due to behavioral effects 
like overreaction or herding, or liquidity effects such as temporary market impact or long-term supply/demand friction. 
'''

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)

from datetime import datetime
import numpy as np
import pandas as pd
import pandas_datareader.data as web

# Download the font to display Chinese
!wget https://github.com/kenwkliu/ideas/raw/master/colab/data/simhei.ttf
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
CNFont = FontProperties(fname='/content/simhei.ttf')

# Google colab interactive table
%load_ext google.colab.data_table 
%matplotlib inline

In [None]:
# Get the stock information from Yahoo
stockInfo = web.get_quote_yahoo(['0700.HK', '0005.HK', '0939.HK'])
stockInfo

In [None]:
# show the column names
stockInfo.columns

In [None]:
# Get the pre-downloaded all HK stocks information from yahoo
# Source the Chinese name and stock sectors from http://www1.hket.com/finance/chart/industry-index.do

# Pandas support reading Excel directly
stocksInfo = pd.read_excel('https://raw.githubusercontent.com/kenwkliu/ideas/master/colab/data/hkStocksQuotes.xlsx')
stocksInfo

In [None]:
# Look at one stock
CODE = '0005.HK'
stocksInfo[(stocksInfo.code == CODE)]

In [None]:
USE_CHINESE = True
#USE_CHINESE = False  ###**** uncomment this line to display English

if USE_CHINESE:
  COL_NAME = 'nameChi'
  COL_INDUSTRY = 'industryChi'
  COL_SECTOR = 'sectorChi'
  AJD_CLOSE_FILE = 'hkStocksAdjClosePxChi.csv'
  TEST_STOCK_A = '恒大健康'
  TEST_STOCK_B = '阿里健康'
else:
  COL_NAME = 'shortName'
  COL_INDUSTRY = 'industry'
  COL_SECTOR = 'sector'
  AJD_CLOSE_FILE = 'hkStocksAdjClosePx.csv'
  TEST_STOCK_A = 'EVERG HEALTH'
  TEST_STOCK_B = 'ALI HEALTH'  

In [None]:
# list the unique industries names
industryNames = stocksInfo[COL_INDUSTRY].unique()
print('Count:', len(industryNames))
print(industryNames)

In [None]:
# list the unique sector names
sectorNames = stocksInfo[COL_SECTOR].unique()
print('Count:', len(sectorNames))
print(sectorNames)

In [None]:
# Exploring the data 
# Look at the subset of useful columns and sort by largest marketCap
SELECTED_COLUMNS = ['code', 'quoteType', COL_NAME, COL_INDUSTRY, COL_SECTOR, 'marketCap', 'regularMarketPreviousClose', 'averageDailyVolume10Day']

stocksInfo[SELECTED_COLUMNS].sort_values(by=['marketCap'], ascending=False)

In [None]:
# Before we look at the stock price correlation, select only liquid equity names for the trading purpose
# thus add a new colume 'turnover' 
stocksInfo['turnover'] = round(stocksInfo['regularMarketPreviousClose'] * stocksInfo['averageDailyVolume10Day'])

# Select only EQUITY and turnover is bigger than certain numbers
QUOTE_TYPE = 'EQUITY'
MIN_TURNOVER = 100000000  # 100 millions
SELECTED_COLUMNS = ['code', COL_NAME, COL_INDUSTRY, COL_SECTOR, 'turnover']

stocksFilteredInfo = stocksInfo[(stocksInfo.quoteType == QUOTE_TYPE) & (stocksInfo.turnover > MIN_TURNOVER)].reset_index()
stocksFilteredInfo.sort_values(by=['turnover'], ascending=False)[SELECTED_COLUMNS]

In [None]:
# Industry Distribution with an interactive pie chart
import plotly.express as px

# Count the company names per industry
industryCount = stocksFilteredInfo[[COL_INDUSTRY, 'code']].groupby([COL_INDUSTRY]).count().reset_index()
industryCount.rename(columns={'code' : 'count'}, inplace=True)

# Concat the company names per industry
industryStocks = stocksFilteredInfo[[COL_INDUSTRY, COL_NAME]].groupby([COL_INDUSTRY])[COL_NAME].apply(', '.join).reset_index()

# Sum the marketCap and turnover per industry
industryDetails = stocksFilteredInfo[[COL_INDUSTRY, 'marketCap', 'turnover']].groupby([COL_INDUSTRY]).sum().reset_index()

# Combine all in one dataframe
industryInfo = industryCount.merge(industryStocks, on=COL_INDUSTRY, how='left')
industryInfo = industryInfo.merge(industryDetails, on=COL_INDUSTRY, how='left')

fig = px.pie(industryInfo, values='count', names=COL_INDUSTRY, hover_name=COL_NAME, title='Industry Distribution')
fig.show()

In [None]:
# Industry Distribution table
industryInfo.sort_values(by=['count'], ascending=False)

In [None]:
# download the latest stock price from Yahoo
# will use the daily stock price to find the correlated stocks
SOURCE = 'yahoo'
start = '2020' # accepts strings
end = datetime.today()

columnNames = []
stocksDownload = []

for code, name in zip(stocksFilteredInfo['code'], stocksFilteredInfo[COL_NAME]):
  try:
    stocksDownload.append(web.DataReader(code, SOURCE, start=start, end=end))
    columnNames.append(name)
    print("Completed:", code, name)
  except Exception as e:
    print(code, name, ": error:", e)

print("All Completed")    

In [None]:
# look at the downloaded stock price
index = 0
print(columnNames[index])
stocksDownload[index]

In [None]:
# Only use "Adj Close" instead of the close price
adjCloses = [stock['Adj Close'] for stock in stocksDownload]

# Combine the stocks adjusted closes to one pandas data frame
stocks = pd.concat(adjCloses, axis=1)
stocks.columns = columnNames
stocks

In [None]:
# Export the data to csv file for downloading
stocks.to_csv("hkStocksAdjClosePx.csv", encoding='utf_8_sig')

# In case the download failed, read from the prepared csv file
# stocks = pd.read_csv('https://raw.githubusercontent.com/kenwkliu/ideas/master/colab/data/' + AJD_CLOSE_FILE)

In [None]:
# Show price correlation of a stock Pair
stocks[[TEST_STOCK_A, TEST_STOCK_B]].plot(figsize = (12, 8))
plt.legend(prop = CNFont)

In [None]:
# Run the price correlation for all stock Pair combination
stocksCorr = stocks.corr()
stocksCorr.style.background_gradient(cmap='coolwarm', axis=None)

In [None]:
# Filter the correlated stocks pair with the THRESHOLD
THRESHOLD = 0.95
highCorr = np.where(((stocksCorr >= THRESHOLD) & (stocksCorr < 1)))


#Found the price correlated pairs with same sector and store in a dataframe
pairsDf = pd.DataFrame(columns=['corr', 'sector', 'stockA', 'stockB'])
count = 0
pairs = {}

for i in range(len(highCorr[0])):
  a, b = highCorr[0][i], highCorr[1][i]
  revKey = str(b) + str(a)

  # prevent duplicated plot as (a,b) pair is the same as (b,a) pair
  if revKey not in pairs:
    # Get the sector from the Pair
    sectorA = stocksFilteredInfo.loc[stocksFilteredInfo[COL_NAME]==columnNames[a], COL_SECTOR].values[0]
    sectorB = stocksFilteredInfo.loc[stocksFilteredInfo[COL_NAME]==columnNames[b], COL_SECTOR].values[0]

    # Include the Pair which is in the same sector
    if sectorA == sectorB:
      key = str(a) +str(b)
      pairs[key] = [a, b]

      # Get the Pair correlation and store the details in a dataframe
      corr = round(stocksCorr.at[columnNames[a], columnNames[b]], 4)
      pairsDf.loc[count] = [corr, sectorA, columnNames[a], columnNames[b]]
      count += 1

# shows the Pairs sorted by correlations
pairsDf.sort_values(by=['corr'], ascending=False)      

In [None]:
# Choose a correlated stock pair and plot their adjClose prices
INDEX = 10
stock_pair = list(pairs.values())[INDEX]
PAIR_STOCK_A = columnNames[stock_pair[0]]
PAIR_STOCK_B = columnNames[stock_pair[1]]

stocks[[PAIR_STOCK_A, PAIR_STOCK_B]].plot(figsize = (12, 8))
plt.legend(prop = CNFont)

### Uncomment the following block to plot all the highly correlated stock pairs 
#for k, pair in pairs.items():
#  stocks[[columnNames[pair[0]], columnNames[pair[1]]]].plot()
#  plt.legend(prop = CNFont)

In [None]:
# Calculate the stock price ratios of Pair A/B and Pair B/A over the date period
AB_stock = PAIR_STOCK_A + " / " + PAIR_STOCK_B
BA_stock = PAIR_STOCK_B + " / " + PAIR_STOCK_A

stocks[AB_stock] = stocks[PAIR_STOCK_A] / stocks[PAIR_STOCK_B]
stocks[BA_stock] = stocks[PAIR_STOCK_B] / stocks[PAIR_STOCK_A]

stocks[[PAIR_STOCK_A, PAIR_STOCK_B, AB_stock, BA_stock]]

In [None]:
# Calculate the average price ratio of the Pair 
# Trade if the current price ratio is significantly different 
PX_RATIO_THRESHOLD = 0.05

# Calculate the average Buy PAIR_STOCK_A / Sell PAIR_STOCK_B ratio
avgPxRatio = stocks[AB_stock].mean()
print("Average BUY/SELL price ratio of", AB_stock, ":", avgPxRatio)

execPxRatio = avgPxRatio * (1 - PX_RATIO_THRESHOLD)
print("Buy", PAIR_STOCK_A, "and Sell", PAIR_STOCK_B, "when the BUY/SELL price ratio is smaller than:", execPxRatio)

print(f'-'*100)
print("The ratio can be reached when relatively ... ")
print(PAIR_STOCK_A, "goes down and become under-valued and should buy it")
print(PAIR_STOCK_B, "goes up and become over-valued and should sell it")

In [None]:
# Calculate the average Buy PAIR_STOCK_B / Sell PAIR_STOCK_A ratio
avgPxRatio = stocks[BA_stock].mean()
print("Average BUY/SELL price ratio of", BA_stock, ":", avgPxRatio)

execPxRatio = avgPxRatio * (1 - PX_RATIO_THRESHOLD)
print("Buy", PAIR_STOCK_B, "and Sell", PAIR_STOCK_A, "when the BUY/SELL price ratio is smaller than:", execPxRatio)

print(f'-'*100)
print("The ratio can be reached when relatively ... ")
print(PAIR_STOCK_B, "goes down and become under-valued and should buy it")
print(PAIR_STOCK_A, "goes up and become over-valued and should sell it")

In [None]:
### Suggestions
# Web scrape HKEx web site to exclude non short sell eligible stocks
#   https://www.hkex.com.hk/Services/Trading/Securities/Securities-Lists/Designated-Securities-Eligible-for-Short-Selling?sc_lang=en

# Instead of using sector, uun a clustering alogrithms (such as k-mean or DBScan) to group similar stocks together and trade Pairs only within the same cluster
#   https://scikit-learn.org/stable/auto_examples/applications/plot_stock_market.html

# Use Co-integration rather than price correlation
#   https://blog.quantinsti.com/pairs-trading-basics/