# Import data 

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%load_ext google.colab.data_table 
%matplotlib inline

# Download and import pairslib for calculating PnL
!wget https://github.com/kenwkliu/ideas/raw/master/colab/pairslib.py
import pairslib

# Load the stockInfo, researchData and testData
stocksInfo = pd.read_excel('https://raw.githubusercontent.com/kenwkliu/ideas/master/colab/data/hkStocksQuotes.xlsx')
researchData = pd.read_csv('https://raw.githubusercontent.com/kenwkliu/ideas/master/colab/data/researchHKStocksAdjClosePx.csv', index_col=0)
testData = pd.read_csv('https://raw.githubusercontent.com/kenwkliu/ideas/master/colab/data/testHKStocksAdjClosePx.csv', index_col=0)


The google.colab.data_table extension is already loaded. To reload it, use:
  %reload_ext google.colab.data_table
--2022-02-07 06:23:59--  https://github.com/kenwkliu/ideas/raw/master/colab/pairslib.py
Resolving github.com (github.com)... 140.82.113.3
Connecting to github.com (github.com)|140.82.113.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/kenwkliu/ideas/master/colab/pairslib.py [following]
--2022-02-07 06:24:00--  https://raw.githubusercontent.com/kenwkliu/ideas/master/colab/pairslib.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2949 (2.9K) [text/plain]
Saving to: ‘pairslib.py.3’


2022-02-07 06:24:00 (41.4 MB/s) - ‘pairslib.py.3’ saved [2949/2949]



# Import libraries

In [None]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)

from datetime import datetime
import numpy as np
import pandas as pd
import pandas_datareader.data as web

# Download the font to display Chinese
!wget https://github.com/kenwkliu/ideas/raw/master/colab/data/simhei.ttf
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
CNFont = FontProperties(fname='/content/simhei.ttf')

# Yahoo Finance
!pip install yfinance
import yfinance as yf

# Google colab interactive table
%load_ext google.colab.data_table 
%matplotlib inline


### Helper functions
# Display the stock info in Chinese or not
def useChinese(use=True):
  # return STOCK_INFO_FILE, RESEARCH_AJD_CLOSE_FILE, TEST_AJD_CLOSE_FILE
  if use:
    return 'hkStocksQuotesChi.xlsx', 'researchHKStocksAdjClosePxChi.csv', 'testHKStocksAdjClosePxChi.csv'

  else:
    return 'hkStocksQuotes.xlsx', 'researchHKStocksAdjClosePx.csv', 'testHKStocksAdjClosePx.csv'


STOCK_INFO_FILE, RESEARCH_AJD_CLOSE_FILE, TEST_AJD_CLOSE_FILE = useChinese(False)

CHART_SIZE_X, CHART_SIZE_Y = 12, 8
SMALL_CHART_SIZE_X, SMALL_CHART_SIZE_Y = 8, 6


# Plot stock pair chart
def plotPair(df, stockA, stockB, sizeX, sizeY):
  ax1 = df[stockA].plot(label=stockA, legend=True, figsize = (sizeX, sizeY))
  ax1.set_ylim(df[stockA].min(), df[stockA].max())

  ax2 = df[stockB].plot(secondary_y=True, label=stockB, legend=True, figsize = (sizeX, sizeY))
  ax2.set_ylim(df[stockB].min(), df[stockB].max())

  ax1.legend(prop=CNFont, loc=2)
  ax2.legend(prop=CNFont, loc=1)

  plt.show()


# Plot all many Pairs at the same time
def plotManyPair(pairsDf):
  for index, row in pairsDf.iterrows():
      print('\n', index, ':', row['stockA'], 'vs', row['stockB'], '(', row['corr'], ')')
      plotPair(stocks, row['stockA'], row['stockB'], SMALL_CHART_SIZE_X, SMALL_CHART_SIZE_Y)


# Filter the correlated stock pairs with the THRESHOLD
def getCorrelatedPairs(stocksCorr, THRESHOLD=0.95):
  # filter the pairs with correlation values above the THRESHOLD
  highCorr = stocksCorr[((stocksCorr >= THRESHOLD) & (stocksCorr < 1))]
  highCorr = highCorr.unstack().sort_values(ascending=False).drop_duplicates()
  highCorr.dropna(inplace=True)
  highCorrDf = highCorr.to_frame().reset_index()
  highCorrDf.rename(columns = {'level_0':'stockA', 'level_1':'stockB', 0:'corr'}, inplace=True)

  # looks up the sectors for the stocksA and stockB
  cols = ['stockA', 'stockB', 'corr', 'sector_A', 'sector_B']
  pairsDf = highCorrDf.merge(stocksFilteredInfo[['shortName', 'sector']], how='left', left_on='stockA', right_on='shortName').merge(stocksFilteredInfo[['shortName', 'sector']], how='left', left_on='stockB', right_on='shortName', suffixes=('_A', '_B'))[cols]
  pairsDf['sameSector'] = (pairsDf['sector_A'] == pairsDf['sector_B'])
  
  return pairsDf


### back test related functions

# based on the reserch data to determind the trading params (Enter/Exit Points)
def researchTradingParams(researchData, stockA, stockB, threshold=0.05, dollarValue=10000):
  cols = [stockA, stockB]
  research_df = researchData[cols].copy()
  research_df.dropna(inplace = True)

  tradingParams = {}
  tradingParams['dollarValue'] = dollarValue

  # Calculate avgPxRatio for Exit (convergence)
  research_df['ratio'] = research_df[stockA] / research_df[stockB]
  avgPxRatio = research_df['ratio'].mean()
  tradingParams['avgPxRatio'] = avgPxRatio

  # Calculate shortA_longB_ratio for Entry (Divergence)
  shortA_longB_ratio = avgPxRatio * (1 + threshold)
  tradingParams['shortA_longB_ratio'] = shortA_longB_ratio

  # Calculate longA_shortB_ratio from Entry (Divergence)
  longA_shortB_ratio = avgPxRatio * (1 - threshold)
  tradingParams['longA_shortB_ratio'] = longA_shortB_ratio

  return tradingParams


# Determind the signal and dollarValue in the test data
# signal == -1: Long stockA Short stockB
# signal == 1: Short stockA Long stockB
# signal == 0: flat position
def backTest(testData, tradingParams, stockA, stockB):
  cols = [stockA, stockB]
  backTest_df = testData[cols].copy()
  backTest_df.dropna(inplace = True)

  # Get the tradingParams
  dollarValue = tradingParams['dollarValue']
  avgPxRatio = tradingParams['avgPxRatio']
  shortA_longB_ratio = tradingParams['shortA_longB_ratio']
  longA_shortB_ratio = tradingParams['longA_shortB_ratio']

  # Calculate the Price ratio in backTest_df
  backTest_df['pxRatio'] = backTest_df[stockA] / backTest_df[stockB]
  backTest_df['dollarValue'] = dollarValue
  
  # initialize the signal to 0
  backTest_df['signal'] = 0
  signal = 0

  # Determine the signal in each row of the backTest_df
  for index, row in backTest_df.iterrows():
    pxRatio = row['pxRatio']

    # mark signal = 1 if pxRatio > shortA_longB_ratio (Diverge outside the upper band)
    if pxRatio > shortA_longB_ratio:
      signal = 1

    # mark signal = -1 if pxRatio < longA_shortB_ratio (Diverge outside the lower band)
    elif pxRatio < longA_shortB_ratio:
      signal = -1

    else:
      # continue to mark signal = 1 if previous signal == 1 and pxRatio > avgPxRatio (Trade entered but not converge back yet)
      if signal == 1 and pxRatio > avgPxRatio:
        signal = 1

      # continue to mark signal = -1 if previous signal == -1 and pxRatio < avgPxRatio (Trade entered but not converge back yet)
      elif signal == -1 and pxRatio < avgPxRatio:
        signal = -1

      else:
        signal = 0

    backTest_df.loc[index, 'signal'] = signal

  return backTest_df


# determine pSignal and nSignal for up/down markers in plot
# pSignal and nSignal is for displaying the up/down markers in plotting chart only, they're not required for backtest calculation
def addSignalMarker(backTest_df):
  backTest_df['pSignal'] = np.where(backTest_df['signal'] == 1, backTest_df['pxRatio'], np.nan)
  backTest_df['nSignal'] = np.where(backTest_df['signal'] == -1, backTest_df['pxRatio'], np.nan)

  return backTest_df


# Combine the research and backtest for a Portfolio of Pairs
def researchAndBackTestPortfolio(pairsDf, researchData, testData, printOutput=True):
  pairsPortfolioBackTest = []

  for index, row in pairsDf.iterrows():
    stockA, stockB = row['stockA'], row['stockB']
    if printOutput: print(stockA, 'vs', stockB)
    tradingParams = researchTradingParams(researchData, stockA, stockB)
    pairsPortfolioBackTest.append(backTest(testData, tradingParams, stockA, stockB)[[stockA, stockB, 'signal', 'dollarValue']])

  return pairsPortfolioBackTest


# Download and import pairslib for calculating PnL
!wget  https://raw.githubusercontent.com/kenwkliu/ideas/master/colab/pairslib.py
import pairslib

--2022-02-07 06:24:04--  https://github.com/kenwkliu/ideas/raw/master/colab/data/simhei.ttf
Resolving github.com (github.com)... 140.82.113.3
Connecting to github.com (github.com)|140.82.113.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/kenwkliu/ideas/master/colab/data/simhei.ttf [following]
--2022-02-07 06:24:05--  https://raw.githubusercontent.com/kenwkliu/ideas/master/colab/data/simhei.ttf
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 9751960 (9.3M) [application/octet-stream]
Saving to: ‘simhei.ttf.2’


2022-02-07 06:24:05 (135 MB/s) - ‘simhei.ttf.2’ saved [9751960/9751960]

The google.colab.data_table extension is already loaded. To reload it, use:
  %reload_ext google.colab.data_tabl

# Check dataframe

In [None]:
stocksInfo




Unnamed: 0,code,shortName,industry,sector,language,region,quoteType,quoteSourceName,triggerable,currency,marketState,exchange,longName,messageBoardId,exchangeTimezoneName,exchangeTimezoneShortName,gmtOffSetMilliseconds,market,esgPopulated,firstTradeDateMilliseconds,priceHint,regularMarketChange,regularMarketChangePercent,regularMarketTime,regularMarketPrice,regularMarketDayHigh,regularMarketDayRange,regularMarketDayLow,regularMarketVolume,regularMarketPreviousClose,bid,ask,bidSize,askSize,fullExchangeName,financialCurrency,regularMarketOpen,averageDailyVolume3Month,averageDailyVolume10Day,fiftyTwoWeekLowChange,fiftyTwoWeekLowChangePercent,fiftyTwoWeekRange,fiftyTwoWeekHighChange,fiftyTwoWeekHighChangePercent,fiftyTwoWeekLow,fiftyTwoWeekHigh,trailingAnnualDividendRate,trailingAnnualDividendYield,sharesOutstanding,bookValue,fiftyDayAverage,fiftyDayAverageChange,fiftyDayAverageChangePercent,twoHundredDayAverage,twoHundredDayAverageChange,twoHundredDayAverageChangePercent,marketCap,priceToBook,sourceInterval,exchangeDataDelayedBy,tradeable,price,earningsTimestamp,earningsTimestampStart,earningsTimestampEnd,trailingPE,epsTrailingTwelveMonths,epsForward,forwardPE,dividendDate
0,4333.HK,CISCO-T,Information Technology,Telecommunications Equipment,en-US,US,EQUITY,Delayed Quote,False,HKD,REGULAR,HKG,"Cisco Systems, Inc.",finmb_19691,Asia/Hong_Kong,HKT,28800000,hk_market,False,9.597366e+11,3,0.000000,0.000000,1594885107,150.00,150.00,150.0 - 150.0,150.00,50.0,150.00,150.00,10.00,0.0,0.0,HKSE,USD,150.00,0.0,0.0,0.000000,0.000000,150.0 - 400.0,-250.000000,-0.625000,150.00,400.00,1.410,0.009400,4.222300e+09,8.462,321.428560,-171.428560,-0.533333,380.769230,-230.769230,-0.606061,1.528620e+12,17.726307,15,0,False,150.00,,,,,,,,
1,4335.HK,INTEL-T,Information Technology,Semi-conductor,en-US,US,EQUITY,Delayed Quote,False,HKD,REGULAR,HKG,Intel Corporation,finmb_21127,Asia/Hong_Kong,HKT,28800000,hk_market,False,9.597366e+11,3,0.000000,0.000000,1595224321,300.00,300.00,300.0 - 300.0,300.00,60.0,300.00,301.00,,,,HKSE,USD,300.00,0.0,0.0,50.000000,0.200000,250.0 - 400.0,-100.000000,-0.250000,250.00,400.00,1.290,0.004300,4.253000e+09,19.283,270.285700,29.714294,0.109937,289.930080,10.069916,0.034732,1.584567e+12,15.557744,15,0,False,300.00,,,,,,,,
2,4338.HK,MICROSOFT-T,Information Technology,Technology and Software,en-US,US,EQUITY,Delayed Quote,False,HKD,REGULAR,HKG,Microsoft Corporation,finmb_21835,Asia/Hong_Kong,HKT,28800000,hk_market,False,9.597366e+11,3,0.000000,0.000000,1589523065,500.00,500.00,500.0 - 500.0,500.00,20.0,500.00,800.00,,,,HKSE,USD,500.00,0.0,0.0,0.000000,0.000000,500.0 - 600.0,-100.000000,-0.166667,500.00,600.00,2.040,0.004080,7.571000e+09,15.626,500.000000,0.000000,0.000000,500.000000,0.000000,0.000000,1.197435e+13,31.997952,15,0,False,500.00,,,,,,,,
3,9988.HK,BABA-SW,Information Technology,E-commerce and Internet,en-US,US,EQUITY,Delayed Quote,False,HKD,REGULAR,HKG,Alibaba Group Holding Limited,finmb_42083601,Asia/Hong_Kong,HKT,28800000,hk_market,False,1.574732e+12,3,3.399994,1.388886,1596160874,248.20,249.00,246.6 - 249.0,246.60,3471005.0,244.80,248.20,248.40,,,HKSE,CNY,249.00,26842674.0,25690873.0,80.599990,0.480907,167.6 - 263.8,-15.599991,-0.059136,167.60,263.80,,,2.146180e+10,,230.531430,17.668564,0.076643,208.486010,39.713990,0.190488,5.370378e+12,,15,0,False,248.20,,,,,,,,
4,0700.HK,TENCENT,Information Technology,Online and Mobile Games,en-US,US,EQUITY,Delayed Quote,False,HKD,REGULAR,HKG,Tencent Holdings Limited,finmb_11042136,Asia/Hong_Kong,HKT,28800000,hk_market,False,1.087349e+12,3,13.000000,2.429907,1596160874,548.00,550.00,539.5 - 550.0,539.50,3691046.0,535.00,547.00,547.50,0.0,0.0,HKSE,CNY,540.00,21400492.0,20457768.0,235.799990,0.755285,312.2 - 564.0,-16.000000,-0.028369,312.20,564.00,0.111,0.000207,9.555170e+09,5.056,508.074280,39.925720,0.078582,426.177600,121.822390,0.285849,5.236233e+12,108.386070,15,0,False,548.00,1.597216e+09,1.597216e+09,1.597216e+09,49.754498,11.01408,10.94,50.091408,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2730,83186.HK,CICCKRANECNET-R,ETF,Equities-China,en-US,US,EQUITY,Delayed Quote,False,CNY,REGULAR,HKG,China Capital Corporation (Hong Kong) Limited ...,finmb_552823509,Asia/Hong_Kong,HKT,28800000,hk_market,False,,3,1.040001,1.314128,1595920002,80.18,79.08,79.04 - 79.08,79.04,4400.0,79.14,80.18,80.54,0.0,0.0,HKSE,,79.08,,,1.139999,0.014423,79.04 - 79.08,1.099999,0.013910,79.04,79.08,,,,,,,,,,,,,15,0,False,80.18,,,,,,,,
2731,83188.HK,CAM CSI300-R,ETF,Equities-China,en-US,US,EQUITY,Delayed Quote,False,CNY,REGULAR,HKG,ChinaAMC ETF Series - ChinaAMC CSI 300 Index ETF,finmb_217680427,Asia/Hong_Kong,HKT,28800000,hk_market,False,1.342489e+12,3,1.119999,2.367863,1596161574,48.42,48.50,47.64 - 48.5,47.64,235000.0,47.30,48.42,48.44,0.0,0.0,HKSE,,47.66,532859.0,661361.0,12.820000,0.360112,35.6 - 50.04,-1.620003,-0.032374,35.60,50.04,,,,,45.114285,3.305714,0.073274,41.090700,7.329300,0.178369,,,15,0,False,48.42,,,,,,,,
2732,83197.HK,CAM MSCI AINC-R,ETF,Equities-China,en-US,US,EQUITY,Delayed Quote,False,CNY,REGULAR,HKG,China Asset Management (Hong Kong) Limited - C...,finmb_549011400,Asia/Hong_Kong,HKT,28800000,hk_market,False,1.517967e+12,3,0.100000,0.883396,1596005390,11.42,11.32,11.32 - 11.32,11.32,38000.0,11.32,11.42,11.59,0.0,0.0,HKSE,,11.32,3849.0,10550.0,2.960000,0.349882,8.46 - 11.71,-0.290000,-0.024765,8.46,11.71,,,,,10.647715,0.772285,0.072531,9.632692,1.787308,0.185546,,,15,0,False,11.42,,,,,,,,
2733,83199.HK,CSOP CTPBBOND-R,ETF,Bonds,en-US,US,EQUITY,Delayed Quote,False,CNY,REGULAR,HKG,CSOP ETF Series II - CSOP China 5-Year Treasur...,finmb_254621421,Asia/Hong_Kong,HKT,28800000,hk_market,False,1.392773e+12,3,-0.200005,-0.194368,1596161101,102.70,102.70,102.7 - 102.7,102.70,400.0,102.90,102.55,102.85,0.0,0.0,HKSE,,102.70,694.0,892.0,0.000000,0.000000,102.7 - 118.75,-16.050003,-0.135158,102.70,118.75,,,,,103.916626,-1.216629,-0.011708,111.761390,-9.061394,-0.081078,,,15,0,False,102.70,,,,,,,,


# Create dataframe 


In [None]:
# Exploring the data 
# Look at the subset of useful columns and sort by largest marketCap
SELECTED_COLUMNS = ['code', 'quoteType', 'shortName', 'industry', 'sector', 'marketCap', 'regularMarketPreviousClose', 'averageDailyVolume10Day','fiftyDayAverage','twoHundredDayAverage']

stocksInfo[SELECTED_COLUMNS].sort_values(by=['marketCap'], ascending=False)

Unnamed: 0,code,quoteType,shortName,industry,sector,marketCap,regularMarketPreviousClose,averageDailyVolume10Day,fiftyDayAverage,twoHundredDayAverage
2,4338.HK,EQUITY,MICROSOFT-T,Information Technology,Technology and Software,1.197435e+13,500.00,0.0,500.000000,500.000000
3,9988.HK,EQUITY,BABA-SW,Information Technology,E-commerce and Internet,5.370378e+12,244.80,25690873.0,230.531430,208.486010
4,0700.HK,EQUITY,TENCENT,Information Technology,Online and Mobile Games,5.236233e+12,535.00,20457768.0,508.074280,426.177600
14,1398.HK,EQUITY,ICBC,Financial,State-owned China banks,2.752837e+12,4.65,161543498.0,4.922286,5.193873
11,2318.HK,EQUITY,PING AN,Financial,China Insurance,1.820020e+12,82.65,31290439.0,82.834290,82.879930
...,...,...,...,...,...,...,...,...,...,...
2729,83170.HK,EQUITY,ISHARESKS200-R,ETF,Equities-Asia (except Japan),,88.02,,,
2730,83186.HK,EQUITY,CICCKRANECNET-R,ETF,Equities-China,,79.14,,,
2731,83188.HK,EQUITY,CAM CSI300-R,ETF,Equities-China,,47.30,661361.0,45.114285,41.090700
2732,83197.HK,EQUITY,CAM MSCI AINC-R,ETF,Equities-China,,11.32,10550.0,10.647715,9.632692


# Check correlation




In [None]:
stocksCorr = researchData.corr()


In [None]:
# Before we look at the stock price correlation, select only liquid equity names for the trading purpose
# thus add a new colume 'turnover' 
stocksInfo['turnover'] = round(stocksInfo['regularMarketPreviousClose'] * stocksInfo['averageDailyVolume10Day'])

# Select only EQUITY and turnover is bigger than certain numbers
QUOTE_TYPE = 'EQUITY'
MIN_TURNOVER = 100000000  # 100 millions
SELECTED_COLUMNS = ['code', 'shortName', 'industry', 'sector', 'turnover','fiftyDayAverage','twoHundredDayAverage']

stocksFilteredInfo = stocksInfo[(stocksInfo.quoteType == QUOTE_TYPE) & (stocksInfo.turnover > MIN_TURNOVER) & (stocksInfo.industry != 'ETF')].reset_index()
stocksFilteredInfo.sort_values(by=['turnover'], ascending=False)[SELECTED_COLUMNS]

Unnamed: 0,code,shortName,industry,sector,turnover,fiftyDayAverage,twoHundredDayAverage
1,0700.HK,TENCENT,Information Technology,Online and Mobile Games,1.094491e+10,508.074280,426.177600
0,9988.HK,BABA-SW,Information Technology,E-commerce and Internet,6.289126e+09,230.531430,208.486010
27,0981.HK,SMIC,Information Technology,Semi-conductor,5.881712e+09,28.947430,18.711690
3,3690.HK,MEITUAN-W,Information Technology,E-commerce and Internet,4.727324e+09,184.345720,125.527115
8,0388.HK,HKEX,Financial,Other financial services,2.732908e+09,335.720000,276.300000
...,...,...,...,...,...,...,...
140,0136.HK,HENGTEN NET,Energy,Photovoltaic Solar,1.084515e+08,0.202171,0.127378
91,0880.HK,SJM HOLDINGS,Betting,Macau Gaming,1.077015e+08,8.842571,8.449296
158,3606.HK,FUYAO GLASS,Automobile production and distribution,Cars parts and maintenance,1.056159e+08,20.367714,19.890000
152,3933.HK,UNITED LAB,Medicine,Pharmaceutical,1.038379e+08,7.334571,6.485035


In [None]:
# filter the pairs with correlation values above the THRESHOLD
THRESHOLD = 0.96
pairsDf = getCorrelatedPairs(stocksCorr, THRESHOLD)

pairsDf.round(4)

Unnamed: 0,stockA,stockB,corr,sector_A,sector_B,sameSector
0,XINYI SOLAR,FLAT GLASS,0.9868,Photovoltaic Solar,Glass strands,False
1,MEITUAN-W,MEIDONG AUTO,0.9813,E-commerce and Internet,Auto sales,False
2,MAN WAH HLDGS,ZIJIN MINING,0.9769,Housewares,Precious metals,False
3,TENCENT,ZHONGSHENG HLDG,0.9737,Online and Mobile Games,Auto sales,False
4,XINYI GLASS,XINYI SOLAR,0.9733,Glass strands,Photovoltaic Solar,False
5,MEITUAN-W,ZHONGSHENG HLDG,0.9731,E-commerce and Internet,Auto sales,False
6,MAN WAH HLDGS,XIAOMI-W,0.9697,Housewares,Telecommunications Equipment,False
7,YIHAI INTL,CG SERVICES,0.9688,Food and beverage Production and Wholesale,Property Management and Agent,False
8,TECHTRONIC IND,MEITUAN-W,0.9684,Machinery,E-commerce and Internet,False
9,GREATWALL MOTOR,XINYI SOLAR,0.9679,Automobile,Photovoltaic Solar,False


# Back testing


In [None]:
# Selects the Pairs with same sectors into the portfolio
selectedPairsDf = pairsDf[(pairsDf.sameSector == True)]
print("Total pairs with same sectors for backtesting:", len(selectedPairsDf))
print('----------------------------------------------------------')

# Research the trading params and back test the selected Pairs in test period
pairsPortfolioBackTest = researchAndBackTestPortfolio(selectedPairsDf, researchData, testData)

# look at one of the pairs backtest results
# Each backtested Pairs is a dataframe with these columns (Date, stockA, stockB, signal, dollarValue)
pairsPortfolioBackTest[0]  

Total pairs with same sectors for backtesting: 2
----------------------------------------------------------
XINYI GLASS vs FLAT GLASS
WUXI BIO vs PHARMARON


Unnamed: 0_level_0,XINYI GLASS,FLAT GLASS,signal,dollarValue
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-01-04,21.486885,34.817314,-1,10000
2021-01-05,21.055231,34.718121,-1,10000
2021-01-06,20.575613,34.122955,-1,10000
2021-01-07,20.719498,37.445972,-1,10000
2021-01-08,20.575613,36.206039,-1,10000
...,...,...,...,...
2021-09-21,25.500000,35.250000,-1,10000
2021-09-23,25.250000,36.950001,-1,10000
2021-09-24,23.400000,37.200001,-1,10000
2021-09-27,23.299999,35.099998,-1,10000


# Profit calculation


In [None]:
pnl, pnlDf = pairslib.calcPortfolio(pairsPortfolioBackTest)
pnlDf

XINYI GLASS vs FLAT GLASS ---> $ 1114.6040717257274
WUXI BIO vs PHARMARON ---> $ 2226.3952734009163
PortfolioPnl: $ 3340.9993451266437


Unnamed: 0,stockA,stocksB,Pnl
0,XINYI GLASS,FLAT GLASS,1114.604072
1,WUXI BIO,PHARMARON,2226.395273
