correct tickers from CRSP

# 1) Setup

## 1a) Library functions

In [1]:
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from time import time
from datetime import datetime, timedelta
from copy import copy

print("done")

done


## 1b) Helper functions

In [2]:
def ListUnion(l1, l2):
    union = list(set(l1) | set(l2))
    return union

# 2) CRSP Permnos
- https://wrds-www.wharton.upenn.edu/data-dictionary/crsp_a_indexes/dsp500list/
- Log in to the SAS cloud and navigate to: /wrds/crsp/sasdata/a_indexes/dsp500list.sas7bdat
- convert permnos to IBES tickers here https://wrds-www.wharton.upenn.edu/pages/get-data/linking-suite-wrds/ibes-crsp-link/

In [3]:
#crsp sas
wrds_tickers = pd.read_sas("CorrectMembers/dsp500list.sas7bdat")
wrds_tickers

Unnamed: 0,PERMNO,start,ending
0,10006.0,1957-03-01,1984-07-18
1,10030.0,1957-03-01,1969-01-08
2,10049.0,1925-12-31,1932-10-01
3,10057.0,1957-03-01,1992-07-02
4,10078.0,1992-08-20,2010-01-28
...,...,...,...
2007,93159.0,2012-07-31,2016-03-29
2008,93246.0,2021-03-22,2021-12-31
2009,93422.0,2010-07-01,2015-06-30
2010,93429.0,2017-03-01,2021-12-31


In [4]:
relevant_tickers = wrds_tickers[(wrds_tickers["ending"] >= "2000-01-01")]
tickers = relevant_tickers["PERMNO"]
relevant_tickers

Unnamed: 0,PERMNO,start,ending
4,10078.0,1992-08-20,2010-01-28
6,10104.0,1989-08-03,2021-12-31
7,10107.0,1994-06-07,2021-12-31
8,10108.0,2002-07-22,2005-08-11
10,10137.0,2000-12-11,2011-02-25
...,...,...,...
2007,93159.0,2012-07-31,2016-03-29
2008,93246.0,2021-03-22,2021-12-31
2009,93422.0,2010-07-01,2015-06-30
2010,93429.0,2017-03-01,2021-12-31


In [5]:

with open("CorrectMembers/SPX_CRSPMembers.txt", 'w') as f:
    for item in tickers:
        item = int(item)
        f.write("%s\n" % item)
print("done")

done


## 2b) more comprehensive ticker names
- All from WRDS: https://wrds-www.wharton.upenn.edu/data-dictionary/crsp_a_indexes/dsp500list/
- Go to wrds > crsp > a_stock, a_index, a_cc
- Get dsp500list.sas7bdat, dsenames.sas7bdat, ccmxpf_linktable.sas7bdat
- or monthly equivalent
- Note: using the TICKERS - although non-unique, will get you more complete data from WRDS/Compustat etc

In [4]:
#Get permnos for sp500 index members
msp500list = pd.read_sas("CorrectMembers/msp500list.sas7bdat")

#Get company identifiers
msenames = pd.read_sas("CorrectMembers/msenames.sas7bdat")
# if nameendt is missing then set to today date
msenames['NAMEENDT']=msenames['NAMEENDT'].fillna(pd.to_datetime('today'))
# Merge with SP500 data
msp500 = pd.merge(msp500list, msenames, how = 'left', on = 'PERMNO')

#link to compustat
ccmxpf = pd.read_sas("CorrectMembers/ccmxpf_linktable.sas7bdat")
# if linkenddt is missing then set to today date
ccmxpf['linkenddt']=ccmxpf['linkenddt'].fillna(pd.to_datetime('today'))
#link to CCM
msp500_ccmxpf = pd.merge(msp500, ccmxpf, how='left', left_on=['PERMNO'], right_on = "lpermno")
msp500_ccmxpf

Unnamed: 0,PERMNO,start,ending,NAMEDT,NAMEENDT,SHRCD,EXCHCD,SICCD,NCUSIP,TICKER,...,CUSIP,gvkey,linkprim,liid,linktype,lpermno,lpermco,USEDFLAG,linkdt,linkenddt
0,10006.0,1957-03-01,1984-07-18,1925-12-31,1954-05-31,10.0,1.0,3740.0,,,...,b'00080010',b'001010',b'C',b'00X',b'LU',10006.0,22156.0,1.0,1950-05-01,1962-01-30 00:00:00.000000
1,10006.0,1957-03-01,1984-07-18,1925-12-31,1954-05-31,10.0,1.0,3740.0,,,...,b'00080010',b'001010',b'P',b'01',b'LU',10006.0,22156.0,1.0,1962-01-31,1984-06-28 00:00:00.000000
2,10006.0,1957-03-01,1984-07-18,1954-06-01,1962-07-01,10.0,1.0,3740.0,,,...,b'00080010',b'001010',b'C',b'00X',b'LU',10006.0,22156.0,1.0,1950-05-01,1962-01-30 00:00:00.000000
3,10006.0,1957-03-01,1984-07-18,1954-06-01,1962-07-01,10.0,1.0,3740.0,,,...,b'00080010',b'001010',b'P',b'01',b'LU',10006.0,22156.0,1.0,1962-01-31,1984-06-28 00:00:00.000000
4,10006.0,1957-03-01,1984-07-18,1962-07-02,1968-01-01,10.0,1.0,3743.0,,b'ACF',...,b'00080010',b'001010',b'C',b'00X',b'LU',10006.0,22156.0,1.0,1950-05-01,1962-01-30 00:00:00.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15991,93429.0,2017-03-01,2021-12-31,2017-10-18,2018-09-16,11.0,3.0,9999.0,b'12503M10',b'CBOE',...,b'12503M10',b'184500',b'P',b'01',b'LC',93429.0,53447.0,1.0,2010-06-15,2022-02-12 11:00:10.709512
15992,93429.0,2017-03-01,2021-12-31,2018-09-17,2021-03-28,11.0,5.0,6231.0,b'12503M10',b'CBOE',...,b'12503M10',b'184500',b'P',b'01',b'LC',93429.0,53447.0,1.0,2010-06-15,2022-02-12 11:00:10.709512
15993,93429.0,2017-03-01,2021-12-31,2021-03-29,2021-12-31,11.0,5.0,6211.0,b'12503M10',b'CBOE',...,b'12503M10',b'184500',b'P',b'01',b'LC',93429.0,53447.0,1.0,2010-06-15,2022-02-12 11:00:10.709512
15994,93436.0,2020-12-21,2021-12-31,2010-06-29,2017-02-01,11.0,3.0,9999.0,b'88160R10',b'TSLA',...,b'88160R10',b'184996',b'P',b'01',b'LC',93436.0,53453.0,1.0,2010-06-29,2022-02-12 11:00:10.709512


In [11]:
df_toby = msp500_ccmxpf[['PERMNO', 'start', 'ending', 'NAMEDT', 'NAMEENDT','TICKER', 'COMNAM','CUSIP', 'gvkey']]
df_toby = df_toby[df_toby["ending"] >= "2000-01-01"]
#convert cusip, gvkey, liid to str
df_toby["COMNAM"] = df_toby["COMNAM"].astype(str).str[2:-1]
df_toby["CUSIP"] = df_toby["CUSIP"].astype(str).str[2:-1]
df_toby["gvkey"] = df_toby["gvkey"].astype(str).str[2:-1]
df_toby["TICKER"] = df_toby["TICKER"].astype(str).str[2:-1]
df_toby.to_pickle("link_table.pkl")
df_toby

Unnamed: 0,PERMNO,start,ending,NAMEDT,NAMEENDT,TICKER,COMNAM,CUSIP,gvkey
29,10078.0,1992-08-20,2010-01-28,1986-03-04,2004-06-09,SUNW,SUN MICROSYSTEMS INC,86681020,012136
30,10078.0,1992-08-20,2010-01-28,2004-06-10,2007-08-26,SUNW,SUN MICROSYSTEMS INC,86681020,012136
31,10078.0,1992-08-20,2010-01-28,2007-08-27,2007-11-11,JAVA,SUN MICROSYSTEMS INC,86681020,012136
32,10078.0,1992-08-20,2010-01-28,2007-11-12,2007-12-10,JAVA,SUN MICROSYSTEMS INC,86681020,012136
33,10078.0,1992-08-20,2010-01-28,2007-12-11,2010-01-26,JAVA,SUN MICROSYSTEMS INC,86681020,012136
...,...,...,...,...,...,...,...,...,...
15991,93429.0,2017-03-01,2021-12-31,2017-10-18,2018-09-16,CBOE,C B O E GLOBAL MARKETS INC,12503M10,184500
15992,93429.0,2017-03-01,2021-12-31,2018-09-17,2021-03-28,CBOE,C B O E GLOBAL MARKETS INC,12503M10,184500
15993,93429.0,2017-03-01,2021-12-31,2021-03-29,2021-12-31,CBOE,C B O E GLOBAL MARKETS INC,12503M10,184500
15994,93436.0,2020-12-21,2021-12-31,2010-06-29,2017-02-01,TSLA,TESLA MOTORS INC,88160R10,184996


In [128]:
#remove unnecessary columns - including ticker, since that is non-unique
df_sp500 = msp500_ccmxpf[['PERMNO', "TICKER", 'COMNAM', 'NCUSIP','gvkey', 'liid', 'start', 'ending']]
df_sp500 = df_sp500[df_sp500["ending"] >= "2000-01-01"]
df_sp500_tickers = df_sp500.drop_duplicates("TICKER")
df_sp500 = df_sp500.drop_duplicates("PERMNO")

#convert cusip, gvkey, liid to str
df_sp500["COMNAM"] = df_sp500["COMNAM"].astype(str).str[2:-1]
df_sp500["NCUSIP"] = df_sp500["NCUSIP"].astype(str).str[2:-1]
df_sp500["gvkey"] = df_sp500["gvkey"].astype(str).str[2:-1]
df_sp500["liid"] = df_sp500["liid"].astype(str).str[2:-1]
df_sp500["PERMNO"] = df_sp500["PERMNO"].astype(int)
df_sp500.rename(columns={'PERMNO':'permno', "NCUSIP":"CUSIP","liid":"iid"}, inplace=True)

df_sp500_tickers["TICKER"] = df_sp500_tickers["TICKER"].astype(str).str[2:-1]
df_sp500_tickers["PERMNO"] = df_sp500_tickers["PERMNO"].astype(int)
df_sp500_tickers["gvkey"] = df_sp500_tickers["gvkey"].astype(str).str[2:-1]
df_sp500_tickers.rename(columns={'PERMNO':'permno', "NCUSIP":"CUSIP","liid":"iid"}, inplace=True)


df_sp500


Unnamed: 0,permno,TICKER,COMNAM,CUSIP,gvkey,iid,start,ending
29,10078,b'SUNW',SUN MICROSYSTEMS INC,86681010,012136,01,1992-08-20,2010-01-28
42,10104,b'ORCL',ORACLE SYSTEMS CORP,68389X10,012142,01,1989-08-03,2021-12-31
46,10107,b'MSFT',MICROSOFT CORP,59491810,012141,01,1994-06-07,2021-12-31
48,10108,b'SNDT',SUNGARD DATA SYSTEMS INC,86736310,012144,01,2002-07-22,2005-08-11
68,10137,,AMERICAN WATER WORKS & ELEC INC,,001279,00X,2000-12-11,2011-02-25
...,...,...,...,...,...,...,...,...
15981,93159,b'ESV',E N S C O PLC NEW,G3157S10,002270,01,2012-07-31,2016-03-29
15984,93246,b'GNRC',GENERAC HOLDINGS INC,36873610,183736,01,2021-03-22,2021-12-31
15985,93422,b'QEP',Q E P RESOURCES INC,74733V10,154357,01,2010-07-01,2015-06-30
15990,93429,b'CBOE',C B O E HOLDINGS INC,12503M10,184500,01,2017-03-01,2021-12-31


In [99]:
#save identifiers
save_list = ["permno","COMNAM", "CUSIP", "gvkey", "iid"]
for name in save_list:
    save_name = "CorrectMembers/SPX_CRSPMembers_"+ name +".txt"
    with open(save_name, 'w') as f:
        for item in df_sp500[name]:
            f.write("%s\n" % item)
print("done")

done


In [100]:
#tickers for analyst estimates
with open("CorrectMembers/SPX_CRSPMembers_TICKERS.txt", 'w') as f:
    for item in df_sp500_tickers["TICKER"]:
        f.write("%s\n" % item)
print("done")

done


# 3) Financial Ratios
- https://wrds-www.wharton.upenn.edu/pages/get-data/financial-ratios-suite-wrds/financial-ratios-with-ibes-subscription/financial-ratios-firm-level-ibes/
- Also use this to get conversion from PERMNO to TICKER
- create month label to merge with macro data


In [138]:
df_FundamentalRatios = pd.read_csv("CorrectMembers/FundamentalRatios.csv")
df_FundamentalRatios["month"] = pd.to_datetime(df_FundamentalRatios["public_date"]).dt.to_period('M')
df_FundamentalRatios.rename(columns={'cusip':'CUSIP'}, inplace=True)
df_FundamentalRatios = df_FundamentalRatios.drop(labels = ["permno","gvkey"],axis = 1)
df_FundamentalRatios = df_FundamentalRatios.merge(df_sp500_tickers[["permno","gvkey","TICKER"]],on = "TICKER", how = "inner")
df_FundamentalRatios.sort_values("month")

Unnamed: 0,adate,qdate,public_date,bm,evm,pe_exi,ps,pcf,dpr,npm,...,accrual,ptb,PEG_trailing,PEG_1yrforward,PEG_ltgforward,TICKER,CUSIP,month,permno,gvkey
11697,1998/12/31,1999/09/30,2000/01/31,0.266,23.916,36.936,11.366,80.683,0.000,0.102,...,0.028,8.229,,0.221,1.404,NVLS,67000810,2000-01,12067,014623
98538,1998/12/31,1999/09/30,2000/01/31,0.965,9.300,6.171,0.624,6.477,1.563,0.016,...,0.040,0.715,0.065,-0.187,0.606,CSX,12640810,2000-01,62148,002574
129841,1999/03/31,1999/09/30,2000/01/31,0.164,12.591,15.270,7.454,20.693,0.000,0.151,...,0.170,7.460,0.098,0.424,0.593,MCHP,59501710,2000-01,78987,027965
51535,1998/12/31,1999/09/30,2000/01/31,0.074,26.381,32.261,7.216,35.376,0.343,0.226,...,-0.024,13.598,,1.655,2.044,SGP,80660510,2000-01,25013,009459
51654,1998/12/31,1999/09/30,2000/01/31,0.393,15.086,10.965,2.104,3.232,0.027,0.199,...,0.041,2.220,9.462,1.056,0.960,CMA,20034010,2000-01,25081,003231
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
132842,2019/12/31,2020/09/30,2020/12/31,0.658,8.582,,0.958,4.099,0.040,0.079,...,0.144,1.785,,,,UHS,91390310,2020-12,79637,011032
15996,2019/12/31,2020/09/30,2020/12/31,0.208,20.909,82.756,3.764,21.788,0.826,0.046,...,0.078,5.824,,,,XYL,98419M10,2020-12,13035,189491
48330,2019/12/31,2020/09/30,2020/12/31,0.793,10.918,,1.959,6.514,0.563,0.138,...,0.032,1.278,,,,ETR,29364G10,2020-12,24010,007366
163597,2020/09/30,2020/09/30,2020/12/31,0.298,14.586,35.118,4.610,16.400,0.000,0.131,...,0.088,4.854,,,,FFIV,31561610,2020-12,86964,121077


In [102]:
# get tickers
tickers = df_FundamentalRatios["TICKER"].unique()
with open("CorrectMembers/SPX_CRSPMembers_Tickers.txt", 'w') as f:
    for item in tickers:
        f.write("%s\n" % item)
print("done")

done


In [103]:
cusips = df_FundamentalRatios["CUSIP"].unique()
with open("CorrectMembers/SPX_CRSPMembers_Cusips.txt", 'w') as f:
    for item in cusips:
        f.write("%s\n" % item)
print("done")

done


# 4) Price Data
- compustat: https://wrds-www.wharton.upenn.edu/pages/get-data/compustat-capital-iq-standard-poors/compustat/north-america-daily/security-monthly/
- Optionmetrics: https://wrds-www.wharton.upenn.edu/pages/get-data/optionmetrics/ivy-db-us/securities/security-prices/

In [139]:
df_PriceData = pd.read_csv("CorrectMembers/PriceData.csv")
df_PriceData["month"] = pd.to_datetime(df_PriceData["datadate"]).dt.to_period('M')
df_PriceData.rename(columns={'tic':'TICKER'}, inplace=True)
df_PriceData["cusip"] = df_PriceData["cusip"].str[:-1]
df_PriceData.rename(columns={'cusip':'CUSIP'}, inplace=True)
df_PriceData = df_PriceData.drop(labels = ["gvkey"],axis = 1)
df_PriceData = df_PriceData.merge(df_sp500_tickers[["permno","gvkey","TICKER"]],on = "TICKER", how = "inner")
df_PriceData.sort_values(["month","TICKER"])

Unnamed: 0,iid,datadate,TICKER,CUSIP,conm,ajexm,cshtrm,curcdm,prccm,trfm,trt1m,cshom,ggroup,gind,gsector,gsubind,month,permno,gvkey
164902,01,2000/01/31,A,00846U10,AGILENT TECHNOLOGIES INC,1.0,32107900.0,USD,66.1875,1.0000,-14.3897,4.520000e+08,3520.0,352030.0,35.0,35203010.0,2000-01,87432,126554
5409,01,2000/01/31,AAPL,03783310,APPLE INC,112.0,111989500.0,USD,103.7500,1.0962,0.9119,1.611590e+08,4520.0,452020.0,45.0,45202030.0,2000-01,14593,001690
139070,01,2000/01/31,ABC,03073E10,AMERISOURCEBERGEN CORP,4.0,9007900.0,USD,18.1250,1.0000,19.3416,5.118800e+07,3510.0,351020.0,35.0,35102010.0,2000-01,81540,031673
98825,01,2000/01/31,ABMD,00365410,ABIOMED INC,2.0,1865200.0,USD,58.5000,1.0000,59.1837,8.708000e+06,3510.0,351010.0,35.0,35101010.0,2000-01,75107,013619
361,01,2000/01/31,ABT,00282410,ABBOTT LABORATORIES,1.0,124540200.0,USD,32.5625,2.2311,-9.8589,1.537311e+09,3510.0,351010.0,35.0,35101010.0,2000-01,20482,001078
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
171876,01,2021/12/31,ZBH,98956P10,ZIMMER BIOMET HOLDINGS INC,1.0,41701268.0,USD,127.0400,1.0875,6.4214,2.089080e+08,3510.0,351010.0,35.0,35101010.0,2021-12,89070,144559
116931,01,2021/12/31,ZBRA,98920710,ZEBRA TECHNOLOGIES CP -CL A,1.0,6187041.0,USD,595.2000,1.0000,1.0904,5.344100e+07,4520.0,452030.0,45.0,45203010.0,2021-12,76795,024405
88903,01,2021/12/31,ZION,98970110,ZIONS BANCORPORATION NA,1.0,27241303.0,USD,63.1600,3.5754,0.1268,1.564630e+08,4010.0,401010.0,40.0,40101015.0,2021-12,84129,011687
99976,01,2021/12/31,ZTS,98978V10,ZOETIS INC,1.0,30337184.0,USD,244.0300,1.0621,9.9036,4.731260e+08,3520.0,352020.0,35.0,35202010.0,2021-12,13788,013721


# 5) Analyst estimates
-  https://wrds-www.wharton.upenn.edu/pages/get-data/ibes-thomson-reuters/ibes-academic/unadjusted-summary/price-target/

In [140]:
df_AnalystEstimates = pd.read_csv("CorrectMembers/AnalystPredictions.csv")
df_AnalystEstimates["month"] = pd.to_datetime(df_AnalystEstimates["STATPERS"]).dt.to_period('M')
df_AnalystEstimates.rename(columns={'TICKER':'tic'}, inplace=True)
df_AnalystEstimates.rename(columns={'OFTIC':'TICKER'}, inplace=True)
df_AnalystEstimates = df_AnalystEstimates.merge(df_sp500_tickers[["permno","gvkey","TICKER"]],on = "TICKER", how = "inner")
df_AnalystEstimates.sort_values(["TICKER","month"])

Unnamed: 0,TICKER,tic,CUSIP,CNAME,STATPERS,NUMEST,NUMUP4W,NUMDOWN4W,MEDPTG,STDEV,PTGHIGH,PTGLOW,CURR,month,permno,gvkey
0,A,AT1,00846U10,AGILENT TECHNOLOGIES INC,2000/01/20,7,4,0,80.0,16.547,90.0,55.0,USD,2000-01,87432,126554
1,A,AT1,00846U10,AGILENT TECHNOLOGIES INC,2000/02/17,7,2,0,85.0,16.036,90.0,55.0,USD,2000-02,87432,126554
2,A,AT1,00846U10,AGILENT TECHNOLOGIES INC,2000/03/16,8,6,0,172.5,28.504,180.0,110.0,USD,2000-03,87432,126554
3,A,AT1,00846U10,AGILENT TECHNOLOGIES INC,2000/04/20,8,0,1,160.0,28.031,175.0,110.0,USD,2000-04,87432,126554
4,A,AT1,00846U10,AGILENT TECHNOLOGIES INC,2000/05/18,8,0,3,125.0,28.715,175.0,110.0,USD,2000-05,87432,126554
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
210358,ZY,03GA,98985X10,ZYMERGEN INC,2021/08/19,5,0,1,42.0,19.950,56.0,12.0,USD,2021-08,40539,011672
210359,ZY,03GA,98985X10,ZYMERGEN INC,2021/09/16,5,0,2,12.0,14.328,43.0,8.0,USD,2021-09,40539,011672
210360,ZY,03GA,98985X10,ZYMERGEN INC,2021/10/14,5,0,0,12.0,14.328,43.0,8.0,USD,2021-10,40539,011672
210361,ZY,03GA,98985X10,ZYMERGEN INC,2021/11/18,5,0,3,9.0,2.345,13.0,7.0,USD,2021-11,40539,011672


In [106]:
print(df_AnalystEstimates["tic"].nunique())
print(df_AnalystEstimates["TICKER"].nunique())
print(df_PriceData["TICKER"].nunique())
print(df_FundamentalRatios["TICKER"].nunique())

1331
1247
1378
1320


# 6) Merge dataframes + Macro data

In [141]:
len(ListUnion(df_FundamentalRatios["TICKER"].unique(),df_AnalystEstimates["TICKER"].unique()))

1277

In [197]:
#merge
df_Merge = copy(df_FundamentalRatios)
df_Merge = df_Merge.merge(df_AnalystEstimates, on = ["gvkey","month"], how = "inner")
df_Merge = df_Merge.merge(df_PriceData, on = ["gvkey","month"], how = "inner")
df_Merge.sort_values("month")

Unnamed: 0,adate,qdate,public_date,bm,evm,pe_exi,ps,pcf,dpr,npm,...,curcdm,prccm,trfm,trt1m,cshom,ggroup,gind,gsector,gsubind,permno
98419,1998/12/31,1999/09/30,2000/01/31,0.709,5.262,2.261,0.247,5.717,0.852,0.026,...,USD,38.2500,2.8956,-20.8279,4.140000e+07,2010.0,201060.0,20.0,20106010.0,41080
48205,1998/12/31,1999/09/30,2000/01/31,0.616,7.213,14.037,1.072,5.235,0.003,0.077,...,USD,17.1875,12.6394,1.1029,5.699200e+07,5510.0,551020.0,55.0,55102010.0,15553
48204,1999/01/31,1999/10/31,2000/01/31,1.660,6.179,12.586,0.167,2.363,1.329,0.013,...,USD,17.1875,12.6394,1.1029,5.699200e+07,5510.0,551020.0,55.0,55102010.0,15553
48203,1999/01/31,1999/10/31,2000/01/31,1.660,6.179,12.586,0.167,2.363,1.329,0.013,...,USD,17.1875,12.6394,1.1029,5.699200e+07,5510.0,551020.0,55.0,55102010.0,15553
48202,1999/09/30,1999/09/30,2000/01/31,0.967,7.128,13.324,0.917,3.669,0.835,0.075,...,USD,17.1875,12.6394,1.1029,5.699200e+07,5510.0,551020.0,55.0,55102010.0,15553
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135990,2019/12/31,2020/09/30,2020/12/31,0.314,18.364,29.265,5.281,17.475,0.216,0.180,...,USD,11.2300,1.7893,17.3459,7.442454e+09,1010.0,101020.0,10.0,10102010.0,63467
34780,2019/12/31,2020/09/30,2020/12/31,0.513,5.274,,0.367,5.437,0.339,0.032,...,USD,47.8000,1.1302,10.5202,3.920100e+07,2010.0,201070.0,20.0,20107010.0,42024
135991,2019/12/31,2020/09/30,2020/12/31,0.314,18.364,29.265,5.281,17.475,0.216,0.180,...,USD,47.4100,1.8267,5.2854,2.840590e+08,4030.0,403010.0,40.0,40301010.0,63467
59765,2019/12/31,2020/09/30,2020/12/31,0.404,-2.398,,,-80.644,,,...,USD,21.0000,1.0000,4.1667,4.544500e+07,3520.0,352010.0,35.0,35201010.0,90352


In [198]:
#add in index inclusion
df_sp500.rename(columns={'PERMNO':'permno'}, inplace=True)
df_Merge = df_Merge.merge(df_sp500, on = ["permno"], how = "left")
df_Merge["is_member"] = ( (df_Merge["public_date"] >= df_Merge["start"]) & (df_Merge["public_date"] <= df_Merge["ending"]) )
df_Merge

  return merge(


Unnamed: 0,adate,qdate,public_date,bm,evm,pe_exi,ps,pcf,dpr,npm,...,gsubind,permno,TICKER_y,COMNAM,CUSIP_y,gvkey_y,iid_y,start,ending,is_member
0,2008/12/31,2009/09/30,2009/11/30,0.850,5.945,11.708,0.498,3.113,0.000,0.042,...,55102010.0,32986,b'EGAS',ENERGAS CO,29293410,004383,01,2019-02-15,2021-12-31,False
1,2008/12/31,2009/09/30,2009/11/30,0.850,5.945,11.708,0.498,3.113,0.000,0.042,...,55102010.0,32986,b'EGAS',ENERGAS CO,29293410,004383,01,2019-02-15,2021-12-31,False
2,2009/09/30,2009/09/30,2009/11/30,1.054,7.298,13.168,0.510,2.759,0.636,0.038,...,55102010.0,32986,b'EGAS',ENERGAS CO,29293410,004383,01,2019-02-15,2021-12-31,False
3,2009/09/30,2009/09/30,2009/11/30,1.054,7.298,13.168,0.510,2.759,0.636,0.038,...,55102010.0,32986,b'EGAS',ENERGAS CO,29293410,004383,01,2019-02-15,2021-12-31,False
4,2008/12/31,2009/09/30,2009/12/31,0.850,5.945,13.553,0.577,3.604,0.000,0.042,...,55102010.0,32986,b'EGAS',ENERGAS CO,29293410,004383,01,2019-02-15,2021-12-31,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
220014,2019/12/31,2020/06/30,2020/08/31,0.049,35.254,258.197,18.062,171.660,0.000,0.014,...,25102010.0,93436,b'TSLA',TESLA MOTORS INC,88160R10,184996,01,2020-12-21,2021-12-31,False
220015,2019/12/31,2020/06/30,2020/09/30,0.049,35.254,222.285,15.820,150.352,0.000,0.014,...,25102010.0,93436,b'TSLA',TESLA MOTORS INC,88160R10,184996,01,2020-12-21,2021-12-31,False
220016,2019/12/31,2020/06/30,2020/10/31,0.049,35.254,201.057,14.308,135.979,0.000,0.014,...,25102010.0,93436,b'TSLA',TESLA MOTORS INC,88160R10,184996,01,2020-12-21,2021-12-31,False
220017,2019/12/31,2020/09/30,2020/11/30,0.039,53.275,494.500,19.095,123.713,0.000,0.020,...,25102010.0,93436,b'TSLA',TESLA MOTORS INC,88160R10,184996,01,2020-12-21,2021-12-31,False


In [203]:
#(df_Merge.count()/len(df_Merge)).sort_values(ascending = False)[:20]
np.sort(df_Merge.columns)

array(['CNAME', 'COMNAM', 'CURR', 'CUSIP_x', 'CUSIP_x', 'CUSIP_y',
       'CUSIP_y', 'MEDPTG', 'NUMDOWN4W', 'NUMEST', 'NUMUP4W',
       'PEG_1yrforward', 'PEG_ltgforward', 'PEG_trailing', 'PTGHIGH',
       'PTGLOW', 'STATPERS', 'STDEV', 'TICKER_x', 'TICKER_x', 'TICKER_y',
       'TICKER_y', 'accrual', 'adate', 'aftret_invcapx', 'ajexm',
       'at_turn', 'bm', 'cash_conversion', 'conm', 'cshom', 'cshtrm',
       'curcdm', 'curr_ratio', 'datadate', 'de_ratio', 'debt_assets',
       'dpr', 'ending', 'evm', 'ggroup', 'gind', 'gsector', 'gsubind',
       'gvkey_x', 'gvkey_y', 'iid_x', 'iid_y', 'is_member', 'month',
       'npm', 'pcf', 'pe_exi', 'permno', 'permno_x', 'permno_y', 'prccm',
       'ps', 'ptb', 'public_date', 'qdate', 'quick_ratio', 'roe',
       'sale_nwc', 'start', 'tic', 'trfm', 'trt1m'], dtype=object)

In [204]:
temp = df_Merge.pop('month')
df_Merge.insert(0, 'month', temp)
df_Merge = df_Merge.loc[:,~df_Merge.columns.duplicated()]
df_Merge.insert(0, 'TICKER', df_Merge["TICKER_x"])
df_Merge.insert(0, 'gvkey', df_Merge["gvkey_x"])
df_Merge.insert(0, 'XUSIP', df_Merge["CUSIP_x"])
df_Merge = df_Merge.drop(labels = ["sale_nwc", "cash_conversion", "curr_ratio", "quick_ratio", "PEG_ltgforward", "dpr", "PEG_1yrforward", "PEG_trailing",
                        "qdate", "public_date", "TICKER_x", "TICKER_y","CUSIP_x", "CUSIP_y", "datadate", "iid_x","iid_y","permno_x","permno_y","gvkey_x","gvkey_y","STATPERS","tic", "adate"],axis = 1)
df_Merge

Unnamed: 0,XUSIP,gvkey,TICKER,month,bm,evm,pe_exi,ps,pcf,npm,...,cshom,ggroup,gind,gsector,gsubind,permno,COMNAM,start,ending,is_member
0,29269V10,004383,EGAS,2009-11,0.850,5.945,11.708,0.498,3.113,0.042,...,92600000.0,5510.0,551020.0,55.0,55102010.0,32986,ENERGAS CO,2019-02-15,2021-12-31,False
1,29269V10,004383,EGAS,2009-11,0.850,5.945,11.708,0.498,3.113,0.042,...,4361000.0,5510.0,551020.0,55.0,55102010.0,32986,ENERGAS CO,2019-02-15,2021-12-31,False
2,04956010,004383,ATO,2009-11,1.054,7.298,13.168,0.510,2.759,0.038,...,92600000.0,5510.0,551020.0,55.0,55102010.0,32986,ENERGAS CO,2019-02-15,2021-12-31,False
3,04956010,004383,ATO,2009-11,1.054,7.298,13.168,0.510,2.759,0.038,...,4361000.0,5510.0,551020.0,55.0,55102010.0,32986,ENERGAS CO,2019-02-15,2021-12-31,False
4,29269V10,004383,EGAS,2009-12,0.850,5.945,13.553,0.577,3.604,0.042,...,92932000.0,5510.0,551020.0,55.0,55102010.0,32986,ENERGAS CO,2019-02-15,2021-12-31,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
220014,88160R10,184996,TSLA,2020-08,0.049,35.254,258.197,18.062,171.660,0.014,...,931810000.0,2510.0,251020.0,25.0,25102010.0,93436,TESLA MOTORS INC,2020-12-21,2021-12-31,False
220015,88160R10,184996,TSLA,2020-09,0.049,35.254,222.285,15.820,150.352,0.014,...,933540000.0,2510.0,251020.0,25.0,25102010.0,93436,TESLA MOTORS INC,2020-12-21,2021-12-31,False
220016,88160R10,184996,TSLA,2020-10,0.049,35.254,201.057,14.308,135.979,0.014,...,947901000.0,2510.0,251020.0,25.0,25102010.0,93436,TESLA MOTORS INC,2020-12-21,2021-12-31,False
220017,88160R10,184996,TSLA,2020-11,0.039,53.275,494.500,19.095,123.713,0.020,...,947901000.0,2510.0,251020.0,25.0,25102010.0,93436,TESLA MOTORS INC,2020-12-21,2021-12-31,False


In [205]:
macro = ["CPI", "FedFundsTargetRate", "GDP", "MedianHomeSalesPrice", 
         "NonFarmPayrolls", "PMI", "PPI", "PrivateHousingStarts", "Unemployment"]

for ratio in macro:
    readname = "MacroData/df_US_" + ratio + ".pkl"
    df_temp = pd.read_pickle(readname)
    if ratio == "PPI":   #PPI had different format
        df_temp = df_temp[1:]
        df_temp.columns = ["year", "m", "month", "PPI"]
        df_temp = df_temp[["month", "PPI"]]
        df_temp["month"] = pd.to_datetime(df_temp["month"]).dt.to_period('M')
    elif ratio == "GDP":
        df_temp.columns = ["date", ratio]
        df_temp["date"] = pd.to_datetime(df_temp["date"])   #format date
        df_temp["month"] = df_temp['date'].dt.to_period('M')   #nearest month
        df_temp = df_temp[["month", ratio]]
        df_temp = df_temp.set_index('month').resample('M').interpolate().reset_index()
    else:
        df_temp.columns = ["date", ratio]
        df_temp["date"] = pd.to_datetime(df_temp["date"])   #format date
        df_temp["month"] = df_temp['date'].dt.to_period('M')   #nearest month
        df_temp = df_temp[["month", ratio]]
    df_Merge = df_Merge.merge(df_temp, on = ["month"], how = "left")

df_Merge.to_pickle("df_Merge.pkl")
df_Merge

Unnamed: 0,XUSIP,gvkey,TICKER,month,bm,evm,pe_exi,ps,pcf,npm,...,is_member,CPI,FedFundsTargetRate,GDP,MedianHomeSalesPrice,NonFarmPayrolls,PMI,PPI,PrivateHousingStarts,Unemployment
0,29269V10,004383,EGAS,2009-11,0.850,5.945,11.708,0.498,3.113,0.042,...,False,1.84,0.25,1.532499e+07,218800,130057000,54.4,177.4,0.64,9.9
1,29269V10,004383,EGAS,2009-11,0.850,5.945,11.708,0.498,3.113,0.042,...,False,1.84,0.25,1.532499e+07,218800,130057000,54.4,177.4,0.64,9.9
2,04956010,004383,ATO,2009-11,1.054,7.298,13.168,0.510,2.759,0.038,...,False,1.84,0.25,1.532499e+07,218800,130057000,54.4,177.4,0.64,9.9
3,04956010,004383,ATO,2009-11,1.054,7.298,13.168,0.510,2.759,0.038,...,False,1.84,0.25,1.532499e+07,218800,130057000,54.4,177.4,0.64,9.9
4,29269V10,004383,EGAS,2009-12,0.850,5.945,13.553,0.577,3.604,0.042,...,False,2.72,0.25,1.537916e+07,222600,129788000,55.3,178.1,0.63,9.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
220014,88160R10,184996,TSLA,2020-08,0.049,35.254,258.197,18.062,171.660,0.014,...,False,1.31,0.25,1.812658e+07,325500,141149000,55.6,194.3,1.47,8.4
220015,88160R10,184996,TSLA,2020-09,0.049,35.254,222.285,15.820,150.352,0.014,...,False,1.37,0.25,1.856077e+07,344400,141865000,55.7,195.5,1.54,7.9
220016,88160R10,184996,TSLA,2020-10,0.049,35.254,201.057,14.308,135.979,0.014,...,False,1.18,0.25,1.862978e+07,346900,142545000,58.8,196.5,1.61,6.9
220017,88160R10,184996,TSLA,2020-11,0.039,53.275,494.500,19.095,123.713,0.020,...,False,1.17,0.25,1.869878e+07,350800,142809000,57.7,198.3,1.65,6.7


In [17]:
df_Merge = pd.read_pickle("df_Merge.pkl")

df_Merge.sort_values("month")[['TICKER', 'month', 'CURR', 'conm', 'ajexm', 'cshtrm',
       'curcdm', 'prccm', 'trfm', 'trt1m', 'cshom', 'ggroup', 'gind',
       'gsector', 'gsubind', 'permno','is_member']]

Unnamed: 0,TICKER,month,CURR,conm,ajexm,cshtrm,curcdm,prccm,trfm,trt1m,cshom,ggroup,gind,gsector,gsubind,permno,is_member
98419,CUM,2000-01,USD,CUMMINS INC,4.0,5.719100e+06,USD,38.2500,2.8956,-20.8279,4.140000e+07,2010.0,201060.0,20.0,20106010.0,41080,True
48205,GAS,2000-01,USD,SOUTHERN CO GAS,1.0,2.584300e+06,USD,17.1875,12.6394,1.1029,5.699200e+07,5510.0,551020.0,55.0,55102010.0,15553,False
48204,AGL,2000-01,USD,SOUTHERN CO GAS,1.0,2.584300e+06,USD,17.1875,12.6394,1.1029,5.699200e+07,5510.0,551020.0,55.0,55102010.0,15553,False
48203,AGL,2000-01,USD,SOUTHERN CO GAS,1.0,2.584300e+06,USD,17.1875,12.6394,1.1029,5.699200e+07,5510.0,551020.0,55.0,55102010.0,15553,False
48202,ATG,2000-01,USD,SOUTHERN CO GAS,1.0,2.584300e+06,USD,17.1875,12.6394,1.1029,5.699200e+07,5510.0,551020.0,55.0,55102010.0,15553,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135990,BRO,2020-12,USD,PETROLEO BRASILEIRO SA- PETR,1.0,4.462285e+08,USD,11.2300,1.7893,17.3459,7.442454e+09,1010.0,101020.0,10.0,10102010.0,63467,False
34780,BCC,2020-12,USD,BOISE CASCADE CO,1.0,6.580090e+06,USD,47.8000,1.1302,10.5202,3.920100e+07,2010.0,201070.0,20.0,20107010.0,42024,False
135991,BRO,2020-12,USD,BROWN & BROWN INC,1.0,2.206736e+07,USD,47.4100,1.8267,5.2854,2.840590e+08,4030.0,403010.0,40.0,40301010.0,63467,False
59765,DYN,2020-12,USD,DYNE THERAPEUTICS INC,1.0,7.221982e+06,USD,21.0000,1.0000,4.1667,4.544500e+07,3520.0,352010.0,35.0,35201010.0,90352,False


In [14]:
df_Merge.columns

Index(['XUSIP', 'gvkey', 'TICKER', 'month', 'bm', 'evm', 'pe_exi', 'ps', 'pcf',
       'npm', 'roe', 'aftret_invcapx', 'debt_assets', 'de_ratio', 'at_turn',
       'accrual', 'ptb', 'CNAME', 'NUMEST', 'NUMUP4W', 'NUMDOWN4W', 'MEDPTG',
       'STDEV', 'PTGHIGH', 'PTGLOW', 'CURR', 'conm', 'ajexm', 'cshtrm',
       'curcdm', 'prccm', 'trfm', 'trt1m', 'cshom', 'ggroup', 'gind',
       'gsector', 'gsubind', 'permno', 'COMNAM', 'start', 'ending',
       'is_member', 'CPI', 'FedFundsTargetRate', 'GDP', 'MedianHomeSalesPrice',
       'NonFarmPayrolls', 'PMI', 'PPI', 'PrivateHousingStarts',
       'Unemployment'],
      dtype='object')

In [207]:
(df_Merge.count()/len(df_Merge)).sort_values(ascending = False)

XUSIP                   1.000000
NUMDOWN4W               1.000000
PTGHIGH                 1.000000
PTGLOW                  1.000000
CURR                    1.000000
gvkey                   1.000000
permno                  1.000000
COMNAM                  1.000000
start                   1.000000
ending                  1.000000
is_member               1.000000
CPI                     1.000000
FedFundsTargetRate      1.000000
GDP                     1.000000
MedianHomeSalesPrice    1.000000
NonFarmPayrolls         1.000000
PMI                     1.000000
PPI                     1.000000
PrivateHousingStarts    1.000000
MEDPTG                  1.000000
conm                    1.000000
NUMUP4W                 1.000000
month                   1.000000
TICKER                  1.000000
Unemployment            1.000000
NUMEST                  1.000000
CNAME                   1.000000
ajexm                   0.999845
trfm                    0.999714
curcdm                  0.999568
prccm     

# 7) Format data for ML
- something wrong with prices here!

In [213]:
df_all = copy(df_Merge)
company_name = "TICKER"
# returns
df_all["mcap"] = df_all["prccm"] * df_all["cshom"]
df_all["price_adjusted"] = df_all["prccm"] /df_all["ajexm"]

#melt, then pivot to create price df
prices = df_all.melt(id_vars = [company_name, "month"], value_vars = "price_adjusted", var_name = "price_adjusted")
prices = prices.pivot_table(values = "value", index = "month", columns = company_name)

#rets over diff periods
periods = [-1, 1,3,6,9,12]
label = ["pred_target","return_1M","return_3M","return_6M","return_9M","return_12M"]
for j in range(len(periods)):
    if j> 0:
        i = periods[j]
        ret = prices/prices.shift(i) - 1
        df_melt = ret.melt(var_name = company_name, value_name = label[j], ignore_index = False)
        df_all = df_all.merge(df_melt, on = [company_name, "month"],how = "left")
    else:
        ret = (prices/prices.shift() - 1).shift(-1)
        df_melt = ret.melt(var_name = company_name, value_name = label[j], ignore_index = False)
        df_all = df_all.merge(df_melt, on = [company_name, "month"],how = "left")
        
df_all

Unnamed: 0,XUSIP,gvkey,TICKER,month,bm,evm,pe_exi,ps,pcf,npm,...,PrivateHousingStarts,Unemployment,mcap,price_adjusted,pred_target,return_1M,return_3M,return_6M,return_9M,return_12M
0,29269V10,004383,EGAS,2009-11,0.850,5.945,11.708,0.498,3.113,0.042,...,0.64,9.9,2.536314e+09,27.390,0.094026,,,,,
1,29269V10,004383,EGAS,2009-11,0.850,5.945,11.708,0.498,3.113,0.042,...,0.64,9.9,3.880418e+07,8.898,0.094026,,,,,
2,04956010,004383,ATO,2009-11,1.054,7.298,13.168,0.510,2.759,0.038,...,0.64,9.9,2.536314e+09,27.390,0.094026,-0.012034,0.014198,0.117241,0.179331,0.130115
3,04956010,004383,ATO,2009-11,1.054,7.298,13.168,0.510,2.759,0.038,...,0.64,9.9,3.880418e+07,8.898,0.094026,-0.012034,0.014198,0.117241,0.179331,0.130115
4,29269V10,004383,EGAS,2009-12,0.850,5.945,13.553,0.577,3.604,0.042,...,0.63,9.9,2.732201e+09,29.400,-0.050882,0.094026,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
220014,88160R10,184996,TSLA,2020-08,0.049,35.254,258.197,18.062,171.660,0.014,...,1.47,8.4,4.643396e+11,498.320,-0.139087,0.741452,1.983952,2.729996,6.551676,10.043837
220015,88160R10,184996,TSLA,2020-09,0.049,35.254,222.285,15.820,150.352,0.014,...,1.54,7.9,4.004980e+11,429.010,-0.095499,-0.139087,0.986507,3.093607,4.127650,7.905426
220016,88160R10,184996,TSLA,2020-10,0.049,35.254,201.057,14.308,135.979,0.014,...,1.61,6.9,3.678235e+11,388.040,0.462736,-0.095499,0.356063,1.481455,1.982308,5.160930
220017,88160R10,184996,TSLA,2020-11,0.039,53.275,494.500,19.095,123.713,0.020,...,1.65,6.7,5.380286e+11,567.600,0.243252,0.462736,0.139027,2.398802,3.248567,7.601564


In [214]:
df_nn = copy(df_all)
#Convert indices to 0-2
df_nn["PMI"] = df_nn["PMI"]/100
df_nn["PPI"] = df_nn["PPI"].astype(float)/100

#one hot encode sectors
df_nn["gsector"] = df_nn["gsector"]/5 - 2

#growth measures
df_nn.rename(columns={'cshtrm':'volume'}, inplace=True)
growth_measures = ["GDP", "MedianHomeSalesPrice", "PrivateHousingStarts","NonFarmPayrolls", "volume"]
for ratio in growth_measures:
    if ratio == "volume":  #no need for percentage. Normalise to a fraction
        df_nn[ratio] = df_nn.groupby(company_name)[ratio].apply(lambda x: (x.diff()[1:]/x.shift(1)))
    else:
        df_nn[ratio] = df_nn.groupby(company_name)[ratio].apply(lambda x: (100* x.diff()[1:]/x.shift(1)))

# medptg - adjusted to a return: slightly weird denominator for normalisation
df_nn["MEDPTG"] = np.exp((df_nn["MEDPTG"] - df_nn["price_adjusted"])/df_nn["MEDPTG"])

# convert analyst up/down to percentage
df_nn["PCTUP4W"] = df_nn["NUMUP4W"]/df_nn["NUMEST"]   #use 4 weeks since there are granularity issues with 1M (e.g. NUMDOWN1M > NUMEST for index 180834)
df_nn["PCTDOWN4W"] = df_nn["NUMDOWN4W"]/df_nn["NUMEST"]

#remove first 12 months & last month
df_nn = df_nn[(df_nn["month"]>= "2001-01") & (df_nn["month"]<= "2020-11")].reset_index(drop = True)

#remove index non-members
df_nn = df_nn[df_nn["is_member"] == True]

#remove unnecessary columns
valid_cols = [company_name, "month", "pred_target","price_adjusted","mcap",
              "debt_assets", "de_ratio", "evm", "pe_exi", 
              "roe", "npm","ps", "ptb", "pcf", "aftret_invcapx",
              "CPI", "PMI", "PPI", "FedFundsTargetRate", "GDP", 
              "MedianHomeSalesPrice", "PrivateHousingStarts","NonFarmPayrolls", "Unemployment",
            "MEDPTG", "PCTUP4W", "PCTDOWN4W", "volume",
             "return_1M", "return_3M", "return_6M", "return_9M", "return_12M"]

df_nn = df_nn[valid_cols]
#fill nans
df_nn.iloc[:,2:]=df_nn.iloc[:,2:].replace(np.nan, df_nn.iloc[:,2:].mean(axis = 0))

#save
df_nn.to_pickle("df_NeuralNetworkFeatures.pkl")
df_nn



Unnamed: 0,TICKER,month,pred_target,price_adjusted,mcap,debt_assets,de_ratio,evm,pe_exi,roe,...,Unemployment,MEDPTG,PCTUP4W,PCTDOWN4W,volume,return_1M,return_3M,return_6M,return_9M,return_12M
688,SUNW,2006-02,-0.173913,1150.00,3.234794e+10,0.537,1.158,31.570,-166.800,-0.049,...,4.8,3.755182e-125,0.111111,0.111111,74.750154,0.009367,0.023967,0.047332,0.068906,0.094519
689,SUNW,2006-03,0.047368,950.00,3.234794e+10,0.537,1.158,31.570,-205.200,-0.049,...,4.7,1.946951e-103,0.111111,0.000000,0.391174,-0.173913,0.023967,0.047332,0.068906,0.094519
690,SUNW,2006-04,-0.120603,995.00,3.170707e+07,0.537,1.158,31.570,-200.000,-0.049,...,4.7,2.532457e-108,0.200000,0.000000,-0.671308,0.047368,0.023967,0.047332,0.068906,0.094519
691,SUNW,2006-05,0.000000,875.00,2.788310e+07,0.543,1.188,36.801,-123.470,-0.078,...,4.6,2.269322e-73,0.500000,0.000000,1.051449,-0.120603,-0.239130,0.047332,0.068906,0.094519
692,SUNW,2006-06,-0.200000,875.00,2.788310e+07,0.543,1.188,36.801,-110.670,-0.078,...,4.6,2.269322e-73,0.222222,0.111111,-0.780028,0.000000,-0.078947,0.047332,0.068906,0.094519
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209711,CBOE,2020-07,0.046636,87.70,9.622444e+09,0.351,0.541,14.910,22.430,0.117,...,10.2,1.140747e+00,0.000000,0.500000,-0.163128,-0.059820,-0.117529,-0.288265,-0.238385,-0.197695
209712,CBOE,2020-08,-0.044122,91.79,9.982897e+09,0.360,0.561,14.185,22.065,0.124,...,8.4,1.055180e+00,0.000000,0.500000,0.071894,0.046636,-0.137798,-0.194825,-0.228007,-0.229691
209713,CBOE,2020-09,-0.073513,87.74,9.542427e+09,0.360,0.561,14.185,21.091,0.124,...,7.9,1.079417e+00,0.133333,0.066667,0.091796,-0.044122,-0.059391,-0.016919,-0.268833,-0.236446
209714,CBOE,2020-10,0.123385,81.29,8.782734e+09,0.360,0.561,14.185,19.541,0.124,...,6.9,1.144780e+00,0.062500,0.375000,0.130047,-0.073513,-0.073090,-0.182029,-0.340286,-0.294051
