In [2]:
from ib_insync import *
from datetime import datetime
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.decomposition import PCA
from trading_stratigies import run_OPTICS, get_groups, visualize_TSNE, write_file, sort_first_by_second
%matplotlib inline

In [14]:
# The function to get contract historical data
def get_contract_data(contract, look_back, resolution, side, end_time):
    bar = ib.reqHistoricalData(contract, endDateTime = end_time, durationStr=look_back,
        barSizeSetting= resolution, whatToShow=side, useRTH=True, formatDate=1)
# use util in ib-insync to convert bar data into pandas dataframe    
    df = util.df(bar)
    return df 
# The function to get stock historical data
def get_stock_data(ticker, look_back, resolution, side, end_time):
    contract = Contract(symbol = ticker, secType = 'STK', exchange = 'SMART',currency = 'USD')   
    if len(ib.qualifyContracts(contract)) == 0:
        return pd.DataFrame()
    return get_contract_data(contract, look_back, resolution, side, end_time)

# The function to get stock return historical data
def get_return_history(ticker_lst, look_back, resolution, side, end_time):
    column_names = np.array(get_stock_data("ABC", look_back, resolution, side, end_time)['date'][1:])
    Stocks_return_history_tmp = pd.DataFrame(columns = column_names)
    for ticker_index in range(len(ticker_lst)):
        ticker = ticker_lst [ticker_index]
        stock_data_tmp = get_stock_data(ticker, look_back, resolution, side, end_time)
        if type(stock_data_tmp) != type(pd.DataFrame()) or not "close" in stock_data_tmp:
            print ("no history available for ticker " + ticker)
            continue 
        if len(stock_data_tmp) - 1 < len(column_names):
            print ("not long enough history for " + str(ticker))
        elif len(stock_data_tmp) - 1 > len(column_names):
            print ("excessive " + str(len(stock_data_tmp)-len(column_names)) + 
                   " trading days for " + str(ticker))
        else:
            equity_close_history_tmp = stock_data_tmp.close
            Stocks_return_history_tmp.loc[ticker_lst[ticker_index]] = np.diff(np.log(equity_close_history_tmp))
    return Stocks_return_history_tmp

In [15]:
# Connect to IB Server
ib=IB()
util.startLoop()
ib.connect('127.0.0.1',7497, clientId= 12)

<IB connected to 127.0.0.1:7497 clientId=12>

In [66]:
SectorNames = ("BasicMaterials, ConsumerCyclical, FinancialServices, RealEstate, ConsumerDefensive, Healthcare, \
Utilities, CommunicationServices, Energy, Industrials, Technology").split(', ')

In [5]:
BasicMaterials = ['RIO', 'FCX', 'X', 'NEM', 'NUE', 'DOW', 'ECL', 'AA', 'DOW', 'AEM', 'MLM', 'LYB', 'CTVA', 'MOS', 'IFF', 'CE', 'AXTA', 'STLD', 'ALB', 'VMC', 'BBL', 'AVTR', 'SMG', 'KGC', 'BHP', 'GGB', 'HUN', 'CF', 'EMN', 'AU', 'KL', 'RGLD', 'UFS', 'AG', 'ASH', 'AUY', 'FNV', 'CX', 'LAC', 'OLN', 'LTHM', 'SUM', 'CXP', 'CC', 'SID', 'BTG', 'WFG', 'RS', 'WLK', 'ESI', 'BCC', 'SWM', 'BVN', 'UFPI', 'SBSW', 'KWR', 'FSM', 'CBT', 'AGI', 'KALU', 'NGVT', 'CMC', 'SAND', 'MDU', 'TROX', 'PKX', 'TSE', 'EGO', 'UNVR', 'WDFC', 'SCHN', 'PVG', 'EQX', 'MTX', 'SILV', 'CSTM', 'KRA', 'GCP', 'IAG', 'CMP', 'OR', 'SVM', 'SUZ', 'SXC', 'SCL', 'NG', 'ASIX']
ConsumerCyclical = ['TSLA', 'AMZN', 'F', 'BABA', 'HD', 'GM', 'DKNG', 'CCL', 'ULTA', 'LOW', 'NCLH', 'JD', 'EBAY', 'BBY', 'GPS', 'TJX', 'MELI', 'LVS', 'W', 'LULU', 'EXPE', 'CMG', 'ETSY', 'RCL', 'CVNA', 'ROST', 'BLL', 'PENN', 'MAR', 'YUMC', 'CROX', 'BURL', 'WYNN', 'IP', 'DHI', 'TSCO', 'VFC', 'PHM', 'WHR', 'RH', 'AAP', 'CHWY', 'POOL', 'DRI', 'LEN', 'HLT', 'TXRH', 'FBHS', 'CCK', 'MGA', 'PVH', 'MHK', 'YETI', 'UA', 'TRIP', 'DECK', 'FIVE', 'BFAM', 'HTHT', 'RL', 'GPC', 'Z', 'VAC', 'LEVI', 'BYD', 'HOME', 'NVR', 'BG', 'THO', 'EAT', 'TPX', 'WH', 'WRK', 'GT', 'FND', 'ASO', 'KMX', 'BWA', 'CBRL', 'CHDN', 'CWH', 'TOL', 'PII', 'ARVL', 'LEA', 'CRI', 'VRM', 'GPK', 'BERY', 'CAKE', 'HBI', 'HRB', 'GNTX', 'SIX', 'MTN', 'FOXF', 'ARMK', 'CHH', 'BC', 'ALV', 'VSTO', 'LGIH', 'MDC', 'BLMN', 'ROL', 'ADNT', 'GPI', 'HAS', 'PLNT', 'KBH', 'COLM', 'KAR', 'ALSN', 'MTH', 'MAT', 'SKX', 'PLAY', 'WGO', 'MED', 'LOVE', 'BZUN', 'VC', 'QRTEA', 'CUK', 'GOOS', 'WING', 'HGV', 'HMC', 'RRR', 'MLHR', 'SHOO', 'CCS', 'MUSA', 'KTB', 'PZZA', 'TMHC', 'MMYT', 'ABG', 'TTM', 'KIRK', 'FUN', 'CNNE', 'GIL', 'LEG', 'DAN', 'GOLF', 'MHO', 'CTRN', 'MBUU', 'SBH', 'XPEL', 'AMWD', 'TPH', 'H', 'SEAS', 'BKE', 'LESL', 'LCII', 'WWW', 'EYE', 'OI', 'LL', 'GIII', 'MNRO', 'AIN', 'GAN', 'LZB', 'CVCO', 'AXL', 'SAH', 'ZUMZ', 'TUP', 'ETH', 'DRVN', 'PATK', 'DOOO', 'CBD', 'CHUY', 'CARS', 'VNE', 'RUSHA', 'OXM']
FinancialServices = ['PYPL', 'MA', 'GS', 'TRV', 'COF', 'TFC', 'ALL', 'MET', 'HBAN', 'DNB', 'PRU', 'PGR', 'SYF', 'TROW', 'CFG', 'BX', 'ALLY', 'ITUB', 'AIG', 'MKTX', 'CSR', 'SIVB', 'BAP', 'AMP', 'EQH', 'PNC', 'DFS', 'KKR', 'TW', 'RJF', 'BBD', 'KEY', 'RE', 'EBC', 'PFG', 'RNR', 'HDB', 'ADS', 'APO', 'IBN', 'NDAQ', 'SBNY', 'IBKR', 'UNM', 'LNC', 'OMF', 'TREE', 'BCS', 'MKL', 'CMA', 'ZION', 'ING', 'WU', 'FAF', 'VOYA', 'SLM', 'BEN', 'XP', 'EWBC', 'ARES', 'AIZ', 'LPLA', 'MTG', 'EVR', 'CBSH', 'FHB', 'CG', 'Y', 'PBCT', 'FRHC', 'ESNT', 'BPOP', 'WAL', 'JHG', 'AMG', 'FCNCA', 'SF', 'RILY', 'UNIT', 'ARCC', 'SNV', 'PFSI', 'TRUP', 'BXS', 'PBR', 'HLI', 'PNFP', 'FCBP', 'EBC', 'TCBI', 'FBP', 'CNO', 'COLB', 'PRI', 'VLY', 'CIT', 'ERIE', 'MORN', 'GSHD', 'RLI', 'AXS', 'LPRO', 'GDOT', 'WTFC', 'HLNE', 'LAZ', 'HTGC', 'FOCS', 'VRTS', 'ASB', 'PSEC', 'GOCO', 'FHI', 'CMT', 'LOB', 'BKU', 'ABCB', 'UMPQ', 'LC', 'ISBC', 'APAM', 'GBCI', 'CADE', 'BOH', 'CFR', 'SC', 'INDB', 'LYG', 'GNW', 'FFIN', 'FRME', 'EHTH', 'KNSL', 'CASH', 'SIGI', 'SAN', 'CATY', 'PLMR', 'UMBF', 'UBSI', 'CBU', 'BOKF', 'PDI', 'IBTX', 'CUBI', 'CLM', 'PCI', 'MCY', 'FMBI', 'FSKR', 'ENVA', 'AEL', 'NMIH', 'IBOC', 'EFC', 'EGBN', 'AC', 'WD', 'UCBI', 'FULT', 'AEG', 'WSBC', 'KB', 'DFIN', 'MC', 'AGO', 'BMEZ', 'MAIN', 'SFBS', 'HMN', 'PFS', 'PTY', 'PJT', 'BSTZ', 'HOMB', 'CNS', 'FIBK', 'WSFS', 'JRVR', 'BSBR', 'SHG', 'CLII', 'RA', 'PRAA', 'EIG', 'PIPR', 'OCFC', 'SASR', 'FFBC', 'BRP', 'TRMK', 'GWB', 'NTB', 'OFG', 'STC', 'TBK', 'CIB', 'BOWX']
RealEstate = ['SPG', 'O', 'WY', 'EQR', 'CBRE', 'VICI', 'RLGY', 'ESS', 'VC', 'AMH', 'BXP', 'KIM', 'REXR', 'OHI', 'COLD', 'RDFN', 'UDR', 'CUZ', 'PEAK', 'SRC', 'REG', 'STWD', 'VNO', 'ACC', 'PK', 'CONE', 'SLG', 'HTA', 'NRZ', 'ADC', 'STOR', 'SKT', 'IVR', 'FRT', 'NSA', 'KRC', 'LAMR', 'COR', 'EPR', 'NHI', 'BRX', 'DEI', 'RYN', 'APLE', 'EGP', 'NNN', 'SHO', 'PGRE', 'LTC', 'IIPR', 'ABR', 'ESRT', 'CLNY', 'BDN', 'HASI', 'CXP', 'HPP', 'WRI', 'JBGS', 'FCPT', 'LXP', 'AIRC', 'PEB', 'RLJ', 'PCH', 'CIM', 'HHC', 'AIV', 'DRH', 'WRE', 'INN', 'NTST', 'EPRT', 'AAT', 'PDM', 'PAX', 'PMT', 'IRT', 'LAND', 'ARI', 'PZN', 'CLI', 'NMRK', 'CWK', 'UE', 'XHR', 'RPAI', 'APT', 'NTR', 'ALEX', 'MFA', 'KRG', 'SAFE', 'ILPT', 'GMRE', 'DX', 'UMH', 'BRMK', 'LADR']
ConsumerDefensive = ['DG', 'DLTR', 'SAM', 'OLLI', 'KR', 'CNS', 'SYY', 'BG', 'DAR', 'NWL', 'LW', 'HRL', 'ABEV', 'CHGG', 'PFGC', 'USFD', 'FRPT', 'CASY', 'SFM', 'FMX', 'BJ', 'SAFM', 'CCEP', 'HLF', 'COTY', 'HELE', 'TPB', 'BGS', 'NOMD', 'UTZ', 'TWOU', 'CORE', 'UVV', 'LANC', 'UNFI', 'ACI', 'LOPE', 'COKE', 'SPB', 'SMPL', 'STRA', 'ELF', 'NUS', 'EPC', 'LAUR', 'LRN', 'CENTA', 'JJSF', 'HMHC', 'PPC', 'CVGW', 'CALM', 'KOF', 'CHEF', 'SPTN', 'TR', 'AFYA', 'NTCO', 'EEIQ', 'IMKTA', 'WMK', 'PSMT']
Healthcare = ['MRNA', 'VEEV', 'TMO', 'CVS', 'ABT', 'DHR', 'ISRG', 'TDOC', 'LLY', 'ILMN', 'CGC', 'DXCM', 'ZEN', 'BSX', 'ALGN', 'HCA', 'CRL', 'EW', 'SYK', 'STAA', 'IDXX', 'HZNP', 'EXAS', 'WST', 'SGEN', 'RCM', 'LH', 'TECH', 'PODD', 'NVCR', 'TXG', 'STRL', 'CRSP', 'DGX', 'EBS', 'CAH', 'ZLAB', 'ELAN', 'GDRX', 'WAT', 'ABMD', 'NTRA', 'BHVN', 'XRAY', 'PEN', 'HSIC', 'HQY', 'BGNE', 'ALC', 'MRTX', 'PGNY', 'NVTA', 'BEAM', 'MASI', 'GH', 'NBIX', 'THC', 'ARWR', 'TFX', 'PDCO', 'ALLO', 'NVST', 'SAGE', 'CTLT', 'BPMC', 'ALNY', 'QDEL', 'EXEL', 'PACB', 'SDGR', 'CDNA', 'ARGX', 'MOH', 'UTHR', 'VIR', 'NARI', 'RGEN', 'TGTX', 'PHG', 'TNDM', 'ACCD', 'INMD', 'RPRX', 'PRGO', 'ARVN', 'GTHX', 'INSP', 'LIVN', 'AMED', 'GMED', 'NVRO', 'AXNX', 'NSTG', 'PINC', 'OMCL', 'BLUE', 'MRVI', 'LHCG', 'TPTX', 'PCRX', 'KRTX', 'GBT', 'EDIT', 'RARE', 'ARNA', 'ALKS', 'ADPT', 'FOLD', 'BBIO', 'INOV', 'DRNA', 'XLRN', 'SGRY', 'RVMD', 'OCDX', 'ACAD', 'ICUI', 'INSM', 'SIBN', 'HCAT', 'GRFS', 'PHR', 'SNN', 'ENDP', 'LGND', 'ZNTL', 'PETQ', 'SILK', 'GKOS', 'MMSI', 'GLPG', 'SEM', 'IMGN', 'PTCT', 'MORF', 'AXSM', 'AHCO', 'MDRX', 'OM', 'KOD', 'ALLK', 'MDGL', 'MYOV', 'CERT', 'AGIO', 'CNMD', 'ENSG', 'SHC', 'TVTX', 'CLVS', 'NEO', 'FGEN', 'IART', 'ACHC', 'CYTK', 'SWTX', 'TBIO', 'ITCI', 'PTGX', 'KURA', 'NRIX', 'LUNG', 'RGNX', 'ATEC', 'NEOG', 'SRRK', 'DVAX', 'BTAI', 'TNXP', 'CDXS', 'IRWD', 'RLAY', 'DCPH', 'PBH', 'ALHC', 'ATRC', 'ZYME', 'HSKA', 'OCUL', 'OPCH', 'SGMO', 'QURE', 'EVH', 'ATRA', 'CHRS', 'CERS', 'SNDX', 'RCUS', 'AFMD', 'ADUS', 'RVNC', 'HCSG', 'VNDA', 'TRHC', 'KPTI', 'PAVM', 'ESPR', 'XNCR', 'OMER', 'YMAB', 'MYGN', 'FMTX', 'AKBA', 'SANA', 'PRQR', 'BKD', 'ANGO', 'SUPN', 'AERI', 'ARDX', 'ATRI', 'AKRO', 'TCMD', 'BLFS', 'ZGNX', 'EOLS', 'WVE', 'MRSN', 'RIGL', 'CPRX', 'VKTX', 'OPRX', 'ZIOP', 'TXMD', 'INGN', 'RYTM', 'GB', 'ENTA', 'RDUS', 'KIDS', 'SGFY', 'CARA', 'TMDX']
Utilities = ['NSP', 'PCG', 'CMS', 'NRG', 'VST', 'AES', 'UGI', 'EIX', 'PNW', 'NGG', 'ORA', 'NEP', 'BEPC', 'CWEN', 'NJR', 'BIPC', 'SWX', 'SJI', 'BKH', 'SBS', 'CPK', 'CIG', 'AZRE', 'BEP', 'ELP', 'CWT']
CommunicationServices = ['FB', 'GOOG', 'ABNB', 'ZM', 'ROKU', 'CMCSA', 'SNAP', 'VIAC', 'BIDU', 'PINS', 'TWTR', 'Z', 'TWLO', 'CHTR', 'DISCA', 'ATVI', 'MTCH', 'SPOT', 'NTES', 'FVRR', 'DISCK', 'TTWO', 'DISH', 'ZNGA', 'ATUS', 'YNDX', 'LYV', 'CABO', 'LSXMK', 'ATHM', 'LSXMA', 'GRUB', 'ZG', 'MSGS', 'VOD', 'NYT', 'LBTYK', 'LBTYA', 'NXST', 'TV', 'AMX', 'GCI', 'VG', 'CDLX', 'MSGE', 'CARG', 'YELP', 'GOGO', 'SBGI', 'SSTK', 'TEF', 'TDS', 'VIV', 'IHRT', 'GRPN', 'COUP', 'GCI', 'MDP', 'WMG', 'AVID', 'PLTK', 'MCS', 'SCPL', 'SSP', 'CNSL', 'TTGT', 'QNST', 'DLX', 'EVER']
Energy = ['MPC', 'PBR', 'BP', 'SLB', 'PXD', 'VLO', 'EOG', 'WMB', 'OKE', 'PSX', 'COG', 'AHC', 'FANG', 'SU', 'HAL', 'CCJ', 'APA', 'XEC', 'TPL', 'TOT', 'EQT', 'CVE', 'TRGP', 'AR', 'SHLX', 'DEN', 'ETRN', 'OVV', 'CHK', 'MUR', 'PAGP', 'CLR', 'TS', 'PAA', 'PSXP', 'FTI', 'DHT', 'INT', 'MTDR', 'HP', 'ARCH', 'PBA', 'CNX', 'STNG', 'IMO', 'HEP', 'WLL', 'DK', 'GLNG', 'BCEI', 'FRO', 'CRK', 'VNOM', 'ERF', 'NOG', 'EURN', 'EC', 'GEL', 'SNP', 'CQP', 'PTR', 'OAS', 'WHD', 'WTI', 'ENBL', 'UGP', 'DNOW']
Industrials = ['BA', 'GE', 'AAL', 'CNI', 'DE', 'KSU', 'TRU', 'MIDD', 'FDX', 'AUD', 'DAL', 'LUV', 'PH', 'CP', 'SWK', 'CARR', 'TDG', 'EXPD', 'GNRC', 'AZEK', 'WAB', 'ODFL', 'URI', 'JCI', 'EFX', 'AVY', 'CPRT', 'CTAS', 'AA', 'JBLU', 'PCAR', 'CHRW', 'SPR', 'LPX', 'JBHT', 'ALK', 'MAS', 'IAA', 'CD', 'SAVE', 'OC', 'BLDR', 'BLD', 'IR', 'VRT', 'ZTO', 'ALLE', 'SNA', 'MTZ', 'TXT', 'J', 'HEI', 'XYL', 'WSO', 'PWR', 'BE', 'AYI', 'TGH', 'NDSN', 'ABB', 'AOS', 'ACM', 'CNHI', 'AER', 'PNR', 'BAH', 'DCI', 'EAF', 'AG', 'ERJ', 'UHAL', 'TTC', 'CPA', 'LII', 'CFX', 'SITE', 'HA', 'LSTR', 'JELD', 'MAN', 'GGG', 'HXL', 'RCII', 'JBT', 'WMS', 'RBC', 'KRNT', 'RBA', 'PRLB', 'TKR', 'CW', 'GFL', 'KBR', 'CSL', 'TEX', 'MRCY', 'ENS', 'WTS', 'ACP', 'R', 'KMT', 'LECO', 'WERN', 'AAN', 'NVT', 'PRG', 'WCC', 'B', 'RYAAY', 'IIN', 'AWI', 'TPIC', 'SWBI', 'AIMC', 'ROLL', 'FIX', 'MIC', 'APG', 'CR', 'ALGT', 'IBP', 'FLS', 'GOL', 'REZI', 'FAIL', 'AAWW', 'TRN', 'ATKR', 'AL', 'ARNC', 'ENR', 'ASGN', 'ADT', 'AQUA', 'HI', 'AEIS', 'GBX', 'PRIM', 'CYRX', 'CRS', 'TNET', 'RXN', 'DOOR', 'SKYW', 'ABM', 'PSN', 'UNF', 'GVA', 'GFF', 'BECN', 'TFII', 'OMAB', 'ROCK', 'AVAV', 'PBI', 'KFY', 'AIR', 'SSD', 'CMCO', 'SFL', 'ALEX', 'MYRG', 'HAYW', 'MWA', 'AMRC', 'HUBG', 'KEX', 'CWST', 'RGR', 'MLI', 'ROAD', 'AZZ', 'SHYF', 'ASTE', 'BMI', 'POWW', 'MTW', 'ACA', 'AAON', 'FELE', 'FLOW', 'LNN', 'LAND', 'SNDR', 'SCS', 'DGLY', 'RAVN', 'NAT', 'EPAC', 'HTLD', 'WIRE', 'GMS', 'PGTI', 'NPO', 'MEG', 'HEES', 'DHC', 'HURN', 'HSC']
Technology = ['NVDA', 'CRM', 'MSFT', 'AAPL', 'AMD', 'SNOW', 'INTC', 'SHOP', 'ADBE', 'MU', 'NOW', 'QCOM', 'AVGO', 'AMAT', 'UBER', 'SQ', 'TSM', 'OKTA', 'TXN', 'CRWD', 'INTU', 'LYFT', 'ADSK', 'LRCX', 'ADI', 'NXPI', 'ZS', 'WDAY', 'RNG', 'TTD', 'NOK', 'DELL', 'VMW', 'KLAC', 'TEAM', 'ENPH', 'PANW', 'U', 'MRVL', 'XLNX', 'SPLK', 'ON', 'BILL', 'QRVO', 'TER', 'WDC', 'SNPS', 'CDW', 'HPE', 'NET', 'COUP', 'GLW', 'HUBS', 'DDOG', 'MCHP', 'FTNT', 'TEL', 'MXIM', 'NTAP', 'DBX', 'CREE', 'ZBRA', 'FTV', 'SEDG', 'STNE', 'KEYS', 'BKI', 'FLT', 'TDY', 'TRMB', 'ANSS', 'FSLR', 'AVLR', 'BOX', 'EPAM', 'AVGOP', 'PSTG', 'GDS', 'PAYC', 'MPWR', 'TYL', 'ZEN', 'ANET', 'CIEN', 'AZPN', 'SABR', 'GDDY', 'ESTC', 'BSY', 'IIVI', 'XRX', 'CDAY', 'G', 'PAGS', 'ARW', 'FIVN', 'NTNX', 'DT', 'FROG', 'DOMO', 'FAF', 'DXC', 'SAP', 'BRKS', 'IPGP', 'JNPR', 'FFIV', 'AYX', 'CYBR', 'MKSI', 'CGNX', 'FOUR', 'STM', 'CSIQ', 'SMAR', 'GWRE', 'NCNO', 'CRUS', 'IRBT', 'UMC', 'ST', 'ENTG', 'SONO', 'CRSR', 'SAIL', 'DLB', 'AMBA', 'GLOB', 'MANH', 'COMM', 'VSAT', 'PEGA', 'NCR', 'DOX', 'SWCH', 'PRO', 'LSCC', 'DCT', 'EEFT', 'MDLA', 'ZI', 'CVLT', 'LPL', 'ENV', 'AVT', 'EVBG', 'ASAN', 'UCTT', 'SSYS', 'BAND', 'BL', 'DNB', 'SMTC', 'PCTY', 'RPD', 'EB', 'KLIC', 'PRFT', 'QLYS', 'MIME', 'ATC', 'ONTO', 'VRNS', 'STMP', 'LPSN', 'MTSI', 'NEWR', 'CCMP', 'SLAB', 'ALRM', 'CNXC', 'POWI', 'ALGM', 'SUMO', 'WK', 'NTCT', 'VRNT', 'AMKR', 'CALX', 'SANM', 'TTEC', 'VSH', 'NTGR', 'ICHR', 'PING', 'SVMK', 'ITRI', 'PRGS', 'FORM', 'TENB', 'ALTR', 'AVYA', 'SITM', 'FN', 'SNX', 'ZUO', 'TDC', 'SYNA', 'SWI', 'NATI', 'SPSC', 'CNDT', 'EGHT', 'ASX', 'CSOD', 'WIT', 'COHU', 'SGH', 'PTC', 'UIS', 'VCRA', 'ACVA', 'CGNT', 'EVTC', 'APPF', 'CMTL', 'EXTR', 'EPAY', 'SATS', 'MCFE', 'ACLS', 'MXL', 'LFUS', 'RMBS', 'MODN', 'NVMI', 'ROG', 'INFN', 'MX', 'DOCN', 'ESE', 'PAYA', 'AOSL', 'VICR', 'MITK', 'DBD', 'CTS', 'SMCI', 'BCOV', 'PI', 'AKTS', 'NPTN', 'EVOP', 'IMMR', 'PLAB', 'BHE']
Stocks_final = [BasicMaterials, ConsumerCyclical, FinancialServices, RealEstate, ConsumerDefensive, Healthcare,\
           Utilities, CommunicationServices, Energy, Industrials, Technology]

In [6]:
look_back = '6 M'
resolution = '1 day'
side ='Trades' # last trade, or ASK, or Bid
end_time = datetime.now()
first_day_select = get_stock_data("ABC", look_back, resolution, side, end_time)['date'].values[1]
last_day_select = get_stock_data("ABC", look_back, resolution, side, end_time)['date'].values[-1]
print ("The first day in return series for pair selection is " + str (first_day_select))
print ("The last day in return series for pair selection is " + str (last_day_select))

tickers_grouped_all = []
scalar = preprocessing.StandardScaler()
for SectorInd in range(len(SectorNames)):
    ticker_lst = Stocks_final[SectorInd]
    Stocks_return_history = get_return_history(ticker_lst, look_back, resolution, side, end_time)
    data_std = scalar.fit_transform(Stocks_return_history)
    pca = PCA(n_components=15)
    principle_components = pca.fit_transform(data_std)
    Stocks_return_history_PCA = pd.DataFrame(principle_components, index = Stocks_return_history.index)
    history_comp_std = pd.DataFrame(scalar.fit_transform(Stocks_return_history_PCA), \
                                    index = Stocks_return_history_PCA.index)
    tickers_labeled = run_OPTICS(history_comp_std, min_samples = 2, visualization = False)
    tickers_grouped = get_groups (tickers_labeled)
    tickers_grouped_all.append(tickers_grouped)

The first day in return series for pair selection is 2020-12-14
The last day in return series for pair selection is 2021-06-08


Error 162, reqId 102: Historical Market Data Service error message:No market data permissions for AMEX, ARCA, BATS, BEX, BYX, CHX, DRCTEDGE, EDGEA, IEX, ISLAND, LTSE, MEMX, NYSE, NYSENAT, PEARL, PSX STK, contract: Contract(secType='STK', conId=75218890, symbol='WFG', exchange='SMART', primaryExchange='NYSE', currency='USD', localSymbol='WFG', tradingClass='WFG')


no history available for ticker WFG
excessive 6 trading days for ARVL
not long enough history for DRVN
excessive 6 trading days for CSR
excessive 6 trading days for SLG
excessive 5 trading days for AIRC
not long enough history for PAX
excessive 6 trading days for LRN
not long enough history for EEIQ
not long enough history for OCDX
not long enough history for CERT
not long enough history for ALHC
not long enough history for SANA
not long enough history for SGFY
excessive 6 trading days for ELP
not long enough history for PLTK
excessive 2 trading days for AHC
excessive 6 trading days for APA
not long enough history for TPL
not long enough history for CHK
excessive 6 trading days for PAGP
excessive 6 trading days for PAA
excessive 6 trading days for AUD
excessive 6 trading days for FAIL
not long enough history for HAYW
excessive 6 trading days for MRVL
not long enough history for ATC
not long enough history for ACVA
not long enough history for CGNT
not long enough history for DOCN


In [67]:
output_str = ""
for SectorInd in range(len(SectorNames)):
    curr_str = SectorNames[SectorInd] + "_groups = " + str(tickers_grouped_all[SectorInd]) + "\n"
    output_str += curr_str
print (output_str)
write_file ("stock_groups_OPTICS.txt", output_str, "w")

BasicMaterials_groups = [['RIO', 'BBL', 'BHP', 'PKX'], ['RS', 'NGVT'], ['NUE', 'STLD'], ['CE', 'EMN'], ['NEM', 'AEM', 'KL', 'RGLD', 'AGI'], ['KGC', 'AUY'], ['SAND', 'OR'], ['ECL', 'SCL'], ['CTVA', 'ASH'], ['MLM', 'VMC'], ['AG', 'FSM', 'SILV', 'SVM'], ['CC', 'TROX'], ['GGB', 'SID']]
ConsumerCyclical_groups = [['EBAY', 'GOLF'], ['IP', 'AAP', 'GPC', 'WRK', 'GPK', 'BERY', 'GNTX', 'LEG'], ['HD', 'LOW', 'TSCO'], ['ROST', 'BURL'], ['WH', 'ARMK', 'CHH', 'FUN'], ['MAR', 'HLT', 'H'], ['LVS', 'WYNN'], ['AMZN', 'CMG', 'YUMC'], ['BLL', 'CCK'], ['BYD', 'RRR'], ['DRI', 'TXRH', 'CBRL'], ['DHI', 'TOL'], ['MDC', 'TMHC'], ['PII', 'BC'], ['SIX', 'SEAS'], ['EAT', 'BLMN'], ['F', 'GM'], ['CCL', 'NCLH', 'RCL', 'CUK'], ['BABA', 'JD']]
FinancialServices_groups = [['MET', 'PRU', 'PFG'], ['TFC', 'HBAN', 'CFG', 'KEY', 'CMA'], ['ZION', 'EWBC'], ['VLY', 'BOKF'], ['FRME', 'UCBI'], ['COLB', 'EGBN'], ['UMBF', 'CBU'], ['COF', 'AIG'], ['ALL', 'RLI', 'SIGI', 'HMN'], ['TRV', 'Y'], ['ARCC', 'HTGC', 'MAIN', 'RA'], ['PDI', 'P

The hypothesis that undelrying the pairs-trading strategy is that the residual time series is mean-reverting. Dickey-Fuller Test is to test if residual is mean-reverting (reject null hypothesis)

In general, a p-value of less than 5% means you can reject the null hypothesis that there is a unit root. You can also compare the calculated DFT statistic with a tabulated critical value.

The more negative the DF test statistic, the stronger the evidence for rejecting the null hypothesis of a unit root.

The notations and math equations are:
${Y = \beta X + \alpha+\epsilon , \: where \: \beta \: is : the \: slope,\: \alpha \: is\: the \: intercept,\: \epsilon \: is \: the \: residual. \: \sigma = standard \: deviation \: of \: \epsilon, \: zscore =\mathbf{\frac{\epsilon}{\sigma}}} $

In [72]:
BasicMaterials_groups = [['RIO', 'BBL', 'BHP', 'PKX'], ['RS', 'NGVT'], ['NUE', 'STLD'], ['CE', 'EMN'], ['NEM', 'AEM', 'KL', 'RGLD', 'AGI'], ['KGC', 'AUY'], ['SAND', 'OR'], ['ECL', 'SCL'], ['CTVA', 'ASH'], ['MLM', 'VMC'], ['AG', 'FSM', 'SILV', 'SVM'], ['CC', 'TROX'], ['GGB', 'SID']]
ConsumerCyclical_groups = [['EBAY', 'GOLF'], ['IP', 'AAP', 'GPC', 'WRK', 'GPK', 'BERY', 'GNTX', 'LEG'], ['HD', 'LOW', 'TSCO'], ['ROST', 'BURL'], ['WH', 'ARMK', 'CHH', 'FUN'], ['MAR', 'HLT', 'H'], ['LVS', 'WYNN'], ['AMZN', 'CMG', 'YUMC'], ['BLL', 'CCK'], ['BYD', 'RRR'], ['DRI', 'TXRH', 'CBRL'], ['DHI', 'TOL'], ['MDC', 'TMHC'], ['PII', 'BC'], ['SIX', 'SEAS'], ['EAT', 'BLMN'], ['F', 'GM'], ['CCL', 'NCLH', 'RCL', 'CUK'], ['BABA', 'JD']]
FinancialServices_groups = [['MET', 'PRU', 'PFG'], ['TFC', 'HBAN', 'CFG', 'KEY', 'CMA'], ['ZION', 'EWBC'], ['VLY', 'BOKF'], ['FRME', 'UCBI'], ['COLB', 'EGBN'], ['UMBF', 'CBU'], ['COF', 'AIG'], ['ALL', 'RLI', 'SIGI', 'HMN'], ['TRV', 'Y'], ['ARCC', 'HTGC', 'MAIN', 'RA'], ['PDI', 'PCI', 'PTY'], ['RE', 'RNR', 'MKL'], ['TROW', 'CNS'], ['ARES', 'EFC'], ['MKTX', 'TW'], ['BMEZ', 'BSTZ'], ['ING', 'SAN'], ['BCS', 'LYG'], ['FCNCA', 'CIT'], ['MTG', 'ESNT', 'NMIH'], ['EVR', 'HLI', 'MC', 'PJT', 'PIPR'], ['BEN', 'FHI'], ['KB', 'SHG'], ['HDB', 'IBN'], ['ITUB', 'BBD', 'PBR', 'BSBR']]
RealEstate_groups = [['CUZ', 'KRC', 'DEI', 'HPP'], ['BDN', 'JBGS', 'PDM'], ['EQR', 'ESS', 'UDR'], ['O', 'NNN'], ['EGP', 'LXP', 'IRT'], ['PEAK', 'HTA'], ['NHI', 'LTC', 'UMH'], ['KIM', 'BRX'], ['REG', 'FRT'], ['RPAI', 'KRG'], ['CONE', 'COR'], ['STWD', 'CIM', 'ARI', 'DX', 'LADR'], ['NRZ', 'PMT', 'MFA'], ['APLE', 'XHR'], ['SHO', 'DRH'], ['WY', 'RYN', 'PCH']]
ConsumerDefensive_groups = [['DG', 'DLTR', 'CASY'], ['CCEP', 'NOMD', 'LANC', 'JJSF'], ['IMKTA', 'WMK'], ['SAFM', 'PPC'], ['FMX', 'KOF'], ['UVV', 'CVGW', 'PSMT'], ['HLF', 'NUS'], ['ABEV', 'NTCO']]
Healthcare_groups = [['DXCM', 'CRL', 'IDXX', 'TECH'], ['SYK', 'GMED', 'SNN'], ['ELAN', 'MOH'], ['XRAY', 'HSIC'], ['TMO', 'ABT', 'DHR', 'WST'], ['LH', 'DGX'], ['MASI', 'AMED', 'LHCG'], ['HQY', 'RPRX'], ['EXEL', 'ALKS'], ['EXAS', 'NEO'], ['NBIX', 'ALNY'], ['SGEN', 'PTCT'], ['MDGL', 'VNDA', 'ENTA'], ['ARWR', 'BPMC'], ['SAGE', 'ITCI'], ['RARE', 'SGMO'], ['XNCR', 'RYTM'], ['GTHX', 'ARDX'], ['CYTK', 'AKRO'], ['BEAM', 'EDIT']]
Utilities_groups = [['CMS', 'PNW', 'CWT'], ['BKH', 'CPK']]
CommunicationServices_groups = [['FB', 'GOOG'], ['CMCSA', 'CHTR', 'ATUS', 'CABO', 'MSGS', 'AMX'], ['LSXMK', 'LSXMA'], ['LBTYK', 'LBTYA'], ['ATVI', 'TTWO', 'ZNGA'], ['SBGI', 'SSP'], ['ROKU', 'TWLO'], ['Z', 'ZG'], ['VIAC', 'DISCA', 'DISCK']]
Energy_groups = [['MPC', 'VLO', 'PSX'], ['WMB', 'OKE', 'HEP'], ['SU', 'IMO'], ['PXD', 'EOG', 'XEC'], ['SLB', 'HAL'], ['SNP', 'PTR'], ['DHT', 'FRO', 'EURN'], ['PBR', 'UGP']]
Industrials_groups = [['BA', 'SPR', 'HXL', 'RYAAY'], ['NVT', 'CR'], ['PNR', 'WTS', 'LECO'], ['SWK', 'GGG'], ['HI', 'MLI', 'NPO'], ['RXN', 'FELE'], ['PWR', 'ACM'], ['SNA', 'SNDR'], ['FDX', 'AAWW'], ['GFF', 'AIR'], ['OC', 'PGTI'], ['TDG', 'HEI'], ['AER', 'AL'], ['PRIM', 'ABM', 'EPAC'], ['DAL', 'LUV', 'ALK'], ['BLD', 'IBP'], ['MRCY', 'AEIS'], ['SWBI', 'RGR'], ['ERJ', 'GOL']]
Technology_groups = [['AMAT', 'LRCX', 'KLAC', 'TER'], ['TXN', 'ADI', 'MXIM'], ['MSFT', 'ADBE'], ['CDW', 'TRMB'], ['NOW', 'GDDY'], ['ARW', 'AVT'], ['DELL', 'VMW'], ['SNX', 'SMCI'], ['SMTC', 'MTSI', 'POWI'], ['AMD', 'QCOM', 'XLNX'], ['PEGA', 'SPSC'], ['PAYC', 'GLOB'], ['CIEN', 'JNPR', 'NTCT'], ['ZEN', 'SMAR'], ['TEAM', 'FIVN'], ['ZS', 'AVLR'], ['DDOG', 'WK'], ['UCTT', 'ICHR'], ['PRO', 'MODN'], ['CNXC', 'EGHT'], ['ESTC', 'ASAN'], ['U', 'NEWR'], ['NET', 'FOUR'], ['UBER', 'LYFT', 'BSY'], ['FSLR', 'CSIQ'], ['ENPH', 'SEDG'], ['NOK', 'IRBT']]
Tickers_grouped_all = [BasicMaterials_groups, ConsumerCyclical_groups, FinancialServices_groups, RealEstate_groups, \
                      ConsumerDefensive_groups, Healthcare_groups, Utilities_groups, CommunicationServices_groups, \
                      Energy_groups, Industrials_groups, Technology_groups]

In [68]:
# Form stock pairs froup groups
def form_pairs(groups):
    pairs = []
    for group in groups:
        for ind1 in range(len(group)):
            for ind2 in range(len(group))[ind1+1:]:
                pairs.append([group[ind1], group[ind2]])
    return pairs

In [87]:
# ADF test
def adf_test(x, y):
    import statsmodels.api as sm
    from statsmodels.tsa.stattools import adfuller
    x = sm.add_constant(x)
    model = sm.OLS(y,x)
    results = model.fit()
    res = results.resid
    #print(results.params[0])
    test  = adfuller(res)
    #return test
    return test[:2] # test stats, p value, smaller p value means more stationary

In [81]:
Pairs_all_sectorwise = []
for SectorInd in range(len(SectorNames)):
    Pairs_all_sectorwise.append (form_pairs(Tickers_grouped_all[SectorInd]))
Pairs_all = []
for SectorInd in range(len(SectorNames)):
    Pairs_all += Pairs_all_sectorwise[SectorInd]

In [91]:
look_back = '6 M'
resolution = '1 day'
side ='Trades' # last trade, or ASK, or Bid
end_time = datetime.now()
Pairs_all_adf_p = []
adf_stats = []
for pair_curr in Pairs_all:
    history_pair_curr = get_return_history(pair_curr, look_back, resolution, side, end_time)
    if len (history_pair_curr) != 2:
        print ("The stock pair " + str(pair_curr) + " fails to meet history requirement")
        continue
    [adf_stats_curr, adf_p_curr] = adf_test(history_pair_curr.iloc[0,:], history_pair_curr.iloc[1,:])
    if adf_p_curr >0 and adf_p_curr <0.05:
        Pairs_all_adf_p.append(pair_curr)
        adf_stats.append(adf_stats_curr)
    else:
        print ("The stock pair " + str(pair_curr) + " fails the ADF test with p value of " + str(adf_p_curr))

The stock pair ['WH', 'FUN'] fails the ADF test with p value of 0.21114276778919178
The stock pair ['PDI', 'PTY'] fails the ADF test with p value of 0.3451200801825793


In [1]:
Pairs_all_adf_p = [['RIO', 'BBL'], ['RIO', 'BHP'], ['RIO', 'PKX'], ['BBL', 'BHP'], ['BBL', 'PKX'], ['BHP', 'PKX'], ['RS', 'NGVT'], ['NUE', 'STLD'], ['CE', 'EMN'], ['NEM', 'AEM'], ['NEM', 'KL'], ['NEM', 'RGLD'], ['NEM', 'AGI'], ['AEM', 'KL'], ['AEM', 'RGLD'], ['AEM', 'AGI'], ['KL', 'RGLD'], ['KL', 'AGI'], ['RGLD', 'AGI'], ['KGC', 'AUY'], ['SAND', 'OR'], ['ECL', 'SCL'], ['CTVA', 'ASH'], ['MLM', 'VMC'], ['AG', 'FSM'], ['AG', 'SILV'], ['AG', 'SVM'], ['FSM', 'SILV'], ['FSM', 'SVM'], ['SILV', 'SVM'], ['CC', 'TROX'], ['GGB', 'SID'], ['EBAY', 'GOLF'], ['IP', 'AAP'], ['IP', 'GPC'], ['IP', 'WRK'], ['IP', 'GPK'], ['IP', 'BERY'], ['IP', 'GNTX'], ['IP', 'LEG'], ['AAP', 'GPC'], ['AAP', 'WRK'], ['AAP', 'GPK'], ['AAP', 'BERY'], ['AAP', 'GNTX'], ['AAP', 'LEG'], ['GPC', 'WRK'], ['GPC', 'GPK'], ['GPC', 'BERY'], ['GPC', 'GNTX'], ['GPC', 'LEG'], ['WRK', 'GPK'], ['WRK', 'BERY'], ['WRK', 'GNTX'], ['WRK', 'LEG'], ['GPK', 'BERY'], ['GPK', 'GNTX'], ['GPK', 'LEG'], ['BERY', 'GNTX'], ['BERY', 'LEG'], ['GNTX', 'LEG'], ['HD', 'LOW'], ['HD', 'TSCO'], ['LOW', 'TSCO'], ['ROST', 'BURL'], ['WH', 'ARMK'], ['WH', 'CHH'], ['ARMK', 'CHH'], ['ARMK', 'FUN'], ['CHH', 'FUN'], ['MAR', 'HLT'], ['MAR', 'H'], ['HLT', 'H'], ['LVS', 'WYNN'], ['AMZN', 'CMG'], ['AMZN', 'YUMC'], ['CMG', 'YUMC'], ['BLL', 'CCK'], ['BYD', 'RRR'], ['DRI', 'TXRH'], ['DRI', 'CBRL'], ['TXRH', 'CBRL'], ['DHI', 'TOL'], ['MDC', 'TMHC'], ['PII', 'BC'], ['SIX', 'SEAS'], ['EAT', 'BLMN'], ['F', 'GM'], ['CCL', 'NCLH'], ['CCL', 'RCL'], ['CCL', 'CUK'], ['NCLH', 'RCL'], ['NCLH', 'CUK'], ['RCL', 'CUK'], ['BABA', 'JD'], ['MET', 'PRU'], ['MET', 'PFG'], ['PRU', 'PFG'], ['TFC', 'HBAN'], ['TFC', 'CFG'], ['TFC', 'KEY'], ['TFC', 'CMA'], ['HBAN', 'CFG'], ['HBAN', 'KEY'], ['HBAN', 'CMA'], ['CFG', 'KEY'], ['CFG', 'CMA'], ['KEY', 'CMA'], ['ZION', 'EWBC'], ['VLY', 'BOKF'], ['FRME', 'UCBI'], ['COLB', 'EGBN'], ['UMBF', 'CBU'], ['COF', 'AIG'], ['ALL', 'RLI'], ['ALL', 'SIGI'], ['ALL', 'HMN'], ['RLI', 'SIGI'], ['RLI', 'HMN'], ['SIGI', 'HMN'], ['TRV', 'Y'], ['ARCC', 'HTGC'], ['ARCC', 'MAIN'], ['ARCC', 'RA'], ['HTGC', 'MAIN'], ['HTGC', 'RA'], ['MAIN', 'RA'], ['PDI', 'PCI'], ['PCI', 'PTY'], ['RE', 'RNR'], ['RE', 'MKL'], ['RNR', 'MKL'], ['TROW', 'CNS'], ['ARES', 'EFC'], ['MKTX', 'TW'], ['BMEZ', 'BSTZ'], ['ING', 'SAN'], ['BCS', 'LYG'], ['FCNCA', 'CIT'], ['MTG', 'ESNT'], ['MTG', 'NMIH'], ['ESNT', 'NMIH'], ['EVR', 'HLI'], ['EVR', 'MC'], ['EVR', 'PJT'], ['EVR', 'PIPR'], ['HLI', 'MC'], ['HLI', 'PJT'], ['HLI', 'PIPR'], ['MC', 'PJT'], ['MC', 'PIPR'], ['PJT', 'PIPR'], ['BEN', 'FHI'], ['KB', 'SHG'], ['HDB', 'IBN'], ['ITUB', 'BBD'], ['ITUB', 'PBR'], ['ITUB', 'BSBR'], ['BBD', 'PBR'], ['BBD', 'BSBR'], ['PBR', 'BSBR'], ['CUZ', 'KRC'], ['CUZ', 'DEI'], ['CUZ', 'HPP'], ['KRC', 'DEI'], ['KRC', 'HPP'], ['DEI', 'HPP'], ['BDN', 'JBGS'], ['BDN', 'PDM'], ['JBGS', 'PDM'], ['EQR', 'ESS'], ['EQR', 'UDR'], ['ESS', 'UDR'], ['O', 'NNN'], ['EGP', 'LXP'], ['EGP', 'IRT'], ['LXP', 'IRT'], ['PEAK', 'HTA'], ['NHI', 'LTC'], ['NHI', 'UMH'], ['LTC', 'UMH'], ['KIM', 'BRX'], ['REG', 'FRT'], ['RPAI', 'KRG'], ['CONE', 'COR'], ['STWD', 'CIM'], ['STWD', 'ARI'], ['STWD', 'DX'], ['STWD', 'LADR'], ['CIM', 'ARI'], ['CIM', 'DX'], ['CIM', 'LADR'], ['ARI', 'DX'], ['ARI', 'LADR'], ['DX', 'LADR'], ['NRZ', 'PMT'], ['NRZ', 'MFA'], ['PMT', 'MFA'], ['APLE', 'XHR'], ['SHO', 'DRH'], ['WY', 'RYN'], ['WY', 'PCH'], ['RYN', 'PCH'], ['DG', 'DLTR'], ['DG', 'CASY'], ['DLTR', 'CASY'], ['CCEP', 'NOMD'], ['CCEP', 'LANC'], ['CCEP', 'JJSF'], ['NOMD', 'LANC'], ['NOMD', 'JJSF'], ['LANC', 'JJSF'], ['IMKTA', 'WMK'], ['SAFM', 'PPC'], ['FMX', 'KOF'], ['UVV', 'CVGW'], ['UVV', 'PSMT'], ['CVGW', 'PSMT'], ['HLF', 'NUS'], ['ABEV', 'NTCO'], ['DXCM', 'CRL'], ['DXCM', 'IDXX'], ['DXCM', 'TECH'], ['CRL', 'IDXX'], ['CRL', 'TECH'], ['IDXX', 'TECH'], ['SYK', 'GMED'], ['SYK', 'SNN'], ['GMED', 'SNN'], ['ELAN', 'MOH'], ['XRAY', 'HSIC'], ['TMO', 'ABT'], ['TMO', 'DHR'], ['TMO', 'WST'], ['ABT', 'DHR'], ['ABT', 'WST'], ['DHR', 'WST'], ['LH', 'DGX'], ['MASI', 'AMED'], ['MASI', 'LHCG'], ['AMED', 'LHCG'], ['HQY', 'RPRX'], ['EXEL', 'ALKS'], ['EXAS', 'NEO'], ['NBIX', 'ALNY'], ['SGEN', 'PTCT'], ['MDGL', 'VNDA'], ['MDGL', 'ENTA'], ['VNDA', 'ENTA'], ['ARWR', 'BPMC'], ['SAGE', 'ITCI'], ['RARE', 'SGMO'], ['XNCR', 'RYTM'], ['GTHX', 'ARDX'], ['CYTK', 'AKRO'], ['BEAM', 'EDIT'], ['CMS', 'PNW'], ['CMS', 'CWT'], ['PNW', 'CWT'], ['BKH', 'CPK'], ['FB', 'GOOG'], ['CMCSA', 'CHTR'], ['CMCSA', 'ATUS'], ['CMCSA', 'CABO'], ['CMCSA', 'MSGS'], ['CMCSA', 'AMX'], ['CHTR', 'ATUS'], ['CHTR', 'CABO'], ['CHTR', 'MSGS'], ['CHTR', 'AMX'], ['ATUS', 'CABO'], ['ATUS', 'MSGS'], ['ATUS', 'AMX'], ['CABO', 'MSGS'], ['CABO', 'AMX'], ['MSGS', 'AMX'], ['LSXMK', 'LSXMA'], ['LBTYK', 'LBTYA'], ['ATVI', 'TTWO'], ['ATVI', 'ZNGA'], ['TTWO', 'ZNGA'], ['SBGI', 'SSP'], ['ROKU', 'TWLO'], ['Z', 'ZG'], ['VIAC', 'DISCA'], ['VIAC', 'DISCK'], ['DISCA', 'DISCK'], ['MPC', 'VLO'], ['MPC', 'PSX'], ['VLO', 'PSX'], ['WMB', 'OKE'], ['WMB', 'HEP'], ['OKE', 'HEP'], ['SU', 'IMO'], ['PXD', 'EOG'], ['PXD', 'XEC'], ['EOG', 'XEC'], ['SLB', 'HAL'], ['SNP', 'PTR'], ['DHT', 'FRO'], ['DHT', 'EURN'], ['FRO', 'EURN'], ['PBR', 'UGP'], ['BA', 'SPR'], ['BA', 'HXL'], ['BA', 'RYAAY'], ['SPR', 'HXL'], ['SPR', 'RYAAY'], ['HXL', 'RYAAY'], ['NVT', 'CR'], ['PNR', 'WTS'], ['PNR', 'LECO'], ['WTS', 'LECO'], ['SWK', 'GGG'], ['HI', 'MLI'], ['HI', 'NPO'], ['MLI', 'NPO'], ['RXN', 'FELE'], ['PWR', 'ACM'], ['SNA', 'SNDR'], ['FDX', 'AAWW'], ['GFF', 'AIR'], ['OC', 'PGTI'], ['TDG', 'HEI'], ['AER', 'AL'], ['PRIM', 'ABM'], ['PRIM', 'EPAC'], ['ABM', 'EPAC'], ['DAL', 'LUV'], ['DAL', 'ALK'], ['LUV', 'ALK'], ['BLD', 'IBP'], ['MRCY', 'AEIS'], ['SWBI', 'RGR'], ['ERJ', 'GOL'], ['AMAT', 'LRCX'], ['AMAT', 'KLAC'], ['AMAT', 'TER'], ['LRCX', 'KLAC'], ['LRCX', 'TER'], ['KLAC', 'TER'], ['TXN', 'ADI'], ['TXN', 'MXIM'], ['ADI', 'MXIM'], ['MSFT', 'ADBE'], ['CDW', 'TRMB'], ['NOW', 'GDDY'], ['ARW', 'AVT'], ['DELL', 'VMW'], ['SNX', 'SMCI'], ['SMTC', 'MTSI'], ['SMTC', 'POWI'], ['MTSI', 'POWI'], ['AMD', 'QCOM'], ['AMD', 'XLNX'], ['QCOM', 'XLNX'], ['PEGA', 'SPSC'], ['PAYC', 'GLOB'], ['CIEN', 'JNPR'], ['CIEN', 'NTCT'], ['JNPR', 'NTCT'], ['ZEN', 'SMAR'], ['TEAM', 'FIVN'], ['ZS', 'AVLR'], ['DDOG', 'WK'], ['UCTT', 'ICHR'], ['PRO', 'MODN'], ['CNXC', 'EGHT'], ['ESTC', 'ASAN'], ['U', 'NEWR'], ['NET', 'FOUR'], ['UBER', 'LYFT'], ['UBER', 'BSY'], ['LYFT', 'BSY'], ['FSLR', 'CSIQ'], ['ENPH', 'SEDG'], ['NOK', 'IRBT']]
adf_stats = [-13.190580231552945, -11.712674561471074, -11.757190820654975, -11.297991267249751, -12.091131750547651, -11.897998769181532, -11.18265771297178, -12.407600292725999, -10.61390161066923, -10.104384865388958, -12.088874873758428, -9.429068308883377, -12.258386048205947, -9.516751830086735, -8.255481204766975, -10.57431568392421, -9.642476118351443, -11.849269880875918, -11.414473606214218, -12.060147959506967, -11.69250724776466, -7.490580227066561, -11.958298503893376, -11.888674666233273, -5.756537326961991, -11.400763657537144, -9.702665683660646, -11.249915550173935, -10.791248357967246, -6.15266949729346, -10.806262187084846, -12.319310265171335, -10.823905229902195, -10.635742333487443, -10.641789281879415, -9.843607826749283, -4.703001956210073, -7.773571494185491, -11.152478279545887, -5.979002468692133, -11.06813527309235, -12.25127992131991, -12.066709797249816, -9.276366553409854, -11.537605029525526, -11.022107861690367, -6.882878101357894, -11.937061702291729, -9.929059154450723, -11.230918772277562, -11.11873170596361, -12.088425634122057, -8.454541638836556, -6.446779986808757, -5.60648491754964, -8.822482427279441, -13.082445364035078, -4.879550338706508, -10.855396321012773, -11.324265939271928, -11.316846374085651, -5.578608386876449, -10.727408924993979, -10.900902156755503, -12.04077465474847, -11.587817527331465, -10.366242493803677, -7.114744956862134, -4.588359996802949, -11.701062568531432, -13.351670819885813, -13.2121477925786, -13.426443961945612, -9.75049408444593, -10.00785320112333, -10.591706936723195, -10.045712989867887, -11.089943337023424, -11.57718731617334, -11.92779563806203, -7.036529920805705, -12.904560389184793, -9.593139886082804, -10.423245321126954, -11.589637543051191, -10.45577450175973, -11.211085343786651, -11.151343782803963, -11.742563014767098, -7.088397656644767, -15.488583132318771, -9.895744556662242, -10.423932183417806, -10.435020956207184, -8.84567327128175, -9.911884671573755, -8.797248558618305, -7.621251779831355, -10.392397179994525, -7.562241127722953, -12.265981518848196, -12.154685986655528, -8.90211432425229, -9.230714921216281, -8.051060642335658, -11.202856527338483, -11.401598762558843, -5.216161595623122, -4.851963373073829, -12.375395277787444, -12.360387974078662, -10.014902150163302, -9.941341267504784, -5.861769060553022, -9.86553229682036, -10.5510393136557, -4.272319161141277, -10.846985163303225, -4.544804308193583, -4.537473191170182, -11.00401383103042, -6.336799669920977, -11.671421631157193, -9.726123842208898, -7.485040652863122, -9.3829921604552, -9.281783340914691, -11.341880636097962, -11.452250251621946, -11.40408984434244, -4.234930674360034, -13.258635964292827, -12.328601719939575, -11.84215686600087, -10.09624459209518, -10.639862095426308, -6.406798097465959, -11.153730861178053, -17.252288329898505, -12.31633654764559, -11.583137214752746, -11.681664189675395, -11.41316211671991, -10.459198501721552, -7.97693133466675, -13.403717848770222, -10.898648941937319, -11.94110106287212, -13.561993115369656, -5.030129023472405, -11.979099172062668, -12.47893713732274, -10.656285720817587, -9.677173984867094, -7.448512400183802, -11.516411158961164, -8.708205993944931, -10.529117896332329, -8.613851693333313, -10.197327150729242, -9.067850009183255, -11.339474789182512, -9.192802559687284, -12.95669848334467, -9.632480449364344, -6.067311027060788, -10.091109127842218, -12.12759036695839, -12.260532113346693, -8.122128404690546, -11.539995334115225, -9.815586095777297, -14.60762199737429, -9.82899487584163, -11.36197189900506, -12.051682573985476, -10.103769478031868, -9.541559664642786, -9.732401257080534, -10.347942804583623, -11.15222307314169, -12.042805118444766, -10.069088806896067, -9.452081240288358, -9.234665571285397, -6.475557179068789, -12.663272922636567, -3.717660957813318, -12.534352035217676, -4.569774334799832, -6.7673241722614055, -13.231511694675683, -3.99884525033645, -12.267302250630035, -12.774397815159173, -12.71043115739292, -10.722316728989128, -12.790394862415411, -10.93668656483127, -9.325317753171738, -9.743810040863044, -11.872635856386482, -12.9147100666266, -10.116542164871566, -11.56645597494142, -11.474064473493387, -11.325939022775977, -11.729223138332811, -10.398605189033043, -7.46595575933761, -7.580219134473158, -7.500549961621744, -9.708772734359668, -10.684521753816439, -12.265433344595902, -14.106814842389028, -10.94367247185663, -10.853295145359237, -6.813198323557533, -15.121228322073664, -10.116361804404912, -8.724966018227873, -11.322623893112645, -9.55516780847377, -12.115517845178475, -11.243580779220878, -7.6943959426825295, -7.340268964637949, -11.323015683352414, -9.682049093827334, -10.74998449788012, -11.700407327691098, -10.855912052242283, -10.375565646825832, -6.487459692574364, -10.048596912800614, -7.016756204355863, -11.486875670532216, -9.270685096645316, -10.47070119866162, -12.820284990507119, -10.02099200466887, -10.202463523020501, -12.291343560455504, -5.776129657115846, -10.89202658066214, -12.255138503995479, -8.89213117839821, -10.87092840030663, -12.669612087410055, -9.857321524189942, -9.675365511574476, -8.640654608269328, -7.277451995566056, -10.100474388574034, -12.202717697155887, -10.50064259619668, -9.750820055255987, -9.674928869650412, -12.829318580249833, -11.790034213576188, -10.642036410326078, -10.950153308315775, -10.421066917428435, -6.029375790610995, -9.495322717825578, -11.847160590152754, -10.854790517666332, -6.343591687299202, -9.926861119030855, -11.52595339254848, -6.259270025457048, -9.55806476821135, -6.063754553268409, -9.81329677048191, -10.259899010512145, -11.181136004878143, -10.86601620093989, -6.819968438075603, -10.997073118357477, -11.897453545222772, -11.879835596303545, -9.447989268545566, -5.567642365993908, -10.085865605560485, -9.699444536715992, -9.89863042391828, -12.12241546124547, -11.868186151076168, -5.603448332018725, -14.249135067724303, -12.004039587486217, -12.535786475387978, -10.655790183704948, -7.686158129755923, -10.266593229871093, -9.559255712032648, -12.20068546438965, -3.2900688664926876, -10.674108759727131, -12.97784818287318, -13.463351393801739, -9.61760434207353, -13.400177627812955, -11.35261910429558, -8.08120081771559, -7.245262219892561, -4.7493586234286, -7.597379861163393, -9.121091028943624, -4.183680228881772, -7.599260846242192, -11.484779284807658, -5.6538619690023015, -6.689302336090904, -4.188173683045791, -6.901564063305047, -7.0962164179424985, -11.179470094488899, -9.1920650656299, -8.978857497178124, -12.65183373132546, -10.670780303266561, -9.141852893742278, -9.940269570327931, -11.12547655872172, -9.515411844801093, -10.801615794787963, -10.159225940391492, -4.811614893564808, -12.708607436365439, -8.046946501663623, -12.92679364327035, -9.91018861162357, -4.481565205800368, -5.9441006518609445, -11.98293863329966, -10.145811973344728, -11.47342506257576, -10.597816079750677, -10.122694424503612, -10.503961056796589, -10.552559182148658, -11.817084003250491, -7.108938301726886, -12.070095653556436, -3.309686061955524, -5.136032464682316, -6.358005105461266, -8.920979483176986, -11.060025226321997, -10.404469550598177, -11.049172264628083, -10.437282702761037, -7.52369080475389, -6.007601743716033, -11.701577171503391, -7.15515556245966, -4.821748481296939, -6.321524938008446, -5.13644835080425, -10.047211812235734, -11.45429946027708, -11.537435672586339, -12.936275395321717, -10.52532716788778, -8.93595492409632, -11.452942146329782, -6.086222467649371, -10.527991096008906, -11.242738773219624, -10.170559876226088, -4.433205610790743, -10.480633928441824, -7.151699005688689, -11.198287250041965, -9.44587967577393]

In [4]:
[Pairs_all_adf_p_sorted, adf_stats_sorted] = sort_first_by_second (Pairs_all_adf_p, adf_stats)

In [5]:
pd.DataFrame([Pairs_all_adf_p_sorted, adf_stats_sorted])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,367,368,369,370,371,372,373,374,375,376
0,"[FCNCA, CIT]","[CCL, CUK]","[ABEV, NTCO]","[ESS, UDR]","[WMB, OKE]","[UVV, CVGW]","[HLI, PIPR]","[FRO, EURN]","[HLT, H]","[EVR, PIPR]",...,"[ERJ, GOL]","[UBER, BSY]","[ALL, HMN]","[RE, MKL]","[HI, NPO]","[PNR, WTS]","[ARI, DX]","[STWD, DX]","[NOW, GDDY]","[SNP, PTR]"
1,-17.252288,-15.488583,-15.121228,-14.607622,-14.249135,-14.106815,-13.561993,-13.463351,-13.426444,-13.403718,...,-4.481565,-4.433206,-4.272319,-4.234931,-4.188174,-4.18368,-3.998845,-3.717661,-3.309686,-3.290069


In [6]:
# Store data
output_str = "Pairs_all_adf_p = " + str(Pairs_all_adf_p_sorted) + "\n" + \
             "adf_stats_sorted = " + str(adf_stats_sorted)
print (output_str)
write_file ("Pairs_all_adf_p.txt", output_str)

Pairs_all_adf_p = [['FCNCA', 'CIT'], ['CCL', 'CUK'], ['ABEV', 'NTCO'], ['ESS', 'UDR'], ['WMB', 'OKE'], ['UVV', 'CVGW'], ['HLI', 'PIPR'], ['FRO', 'EURN'], ['HLT', 'H'], ['EVR', 'PIPR'], ['BA', 'SPR'], ['MAR', 'HLT'], ['RNR', 'MKL'], ['CIM', 'LADR'], ['MAR', 'H'], ['RIO', 'BBL'], ['GPK', 'GNTX'], ['DHT', 'EURN'], ['CUZ', 'HPP'], ['DDOG', 'WK'], ['MRCY', 'AEIS'], ['RYN', 'PCH'], ['TXRH', 'CBRL'], ['BKH', 'CPK'], ['AMED', 'LHCG'], ['PMT', 'MFA'], ['DX', 'LADR'], ['NRZ', 'PMT'], ['LUV', 'ALK'], ['ARWR', 'BPMC'], ['STWD', 'ARI'], ['GFF', 'AIR'], ['OKE', 'HEP'], ['STWD', 'LADR'], ['PJT', 'PIPR'], ['NUE', 'STLD'], ['VLY', 'BOKF'], ['FRME', 'UCBI'], ['TROW', 'CNS'], ['GGB', 'SID'], ['MTG', 'ESNT'], ['EXAS', 'NEO'], ['ARI', 'LADR'], ['TFC', 'KEY'], ['FMX', 'KOF'], ['BDN', 'PDM'], ['NEM', 'AGI'], ['MDGL', 'VNDA'], ['AAP', 'WRK'], ['BEAM', 'EDIT'], ['SLB', 'HAL'], ['TFC', 'CMA'], ['BDN', 'JBGS'], ['MPC', 'VLO'], ['CRL', 'TECH'], ['BBL', 'PKX'], ['NEM', 'KL'], ['WRK', 'GPK'], ['CDW', 'TRMB'], ['AAP