This file briefly explains how the data was formatted

# 1) Setup

## 1a) Library functions

In [95]:
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from time import time
from datetime import datetime, timedelta

print("done")

done


## 1b) Helper functions

In [33]:
def ListUnion(l1, l2):
    union = list(set(l1) | set(l2))
    return union

# 2) SPX Index Members
Uses a web scraper from https://github.com/leosmigel/analyzingalpha/tree/master/2019-09-18-sp500-historical-components-and-changes
- Note: only data until Sep 2021 was collected
- Did not rerun scraper since we only consider prices up to Dec 2021

In [40]:
df_CurrentMembers = pd.read_csv("MarketData/SPX_CurrentMembers.txt", encoding = "utf-16")
df_CurrentMembers = df_CurrentMembers.iloc[:,1:]
df_HistoricalMembers = pd.read_csv("MarketData/SPX_HistoricalMembers.txt", encoding = "utf-16")
df_HistoricalMembers = df_HistoricalMembers.iloc[:,1:]

tickers_current = list(df_CurrentMembers["ticker"])
tickers_historical = list(df_HistoricalMembers["ticker"])
tickers_all = ListUnion(tickers_current,tickers_historical)
print(len(tickers_all))
with open("MarketData/SPX_AllMembers.txt", 'w') as f:
    for item in tickers_all:
        f.write("%s\n" % item)
print("done")

774
done


In [41]:
df_CurrentMembers

Unnamed: 0,ticker,name,date,cik
0,MMM,3M,1976-08-09,66740
1,ABT,Abbott Laboratories,1964-03-31,1800
2,ABBV,AbbVie,2012-12-31,1551152
3,ABMD,Abiomed,2018-05-31,815094
4,ACN,Accenture,2011-07-06,1467373
...,...,...,...,...
500,YUM,Yum! Brands,1997-10-06,1041061
501,ZBRA,Zebra Technologies,2019-12-23,877212
502,ZBH,Zimmer Biomet,2001-08-07,1136869
503,ZION,Zions Bancorp,2001-06-22,109380


# 3) Analyst Estimates
- Downloaded from WRDS: https://wrds-www.wharton.upenn.edu/pages/get-data/ibes-thomson-reuters/ibes-academic/unadjusted-summary/price-target/
- Note: sometimes few analyst estimates, so median may be more reliable than direction

In [75]:
df_AnalystEstimates = pd.read_csv("FundamentalData/AnalystEstimates.csv")
df_AnalystEstimates

Unnamed: 0,OFTIC,TICKER,CUSIP,CNAME,STATPERS,NUMEST,NUMUP4W,NUMDOWN4W,NUMUP1M,NUMDOWN1M,MEANPTG,MEDPTG,STDEV,PTGHIGH,PTGLOW,CURR
0,A,@F9I,SI496526,ALFESCA,2006/03/16,1,0,0,0,0,5.100,5.1,,5.1,5.1,ISK
1,A,@F9I,SI496526,ALFESCA,2006/04/20,1,1,0,1,0,5.400,5.4,,5.4,5.4,ISK
2,A,@F9I,SI496526,ALFESCA,2006/05/18,1,0,0,0,0,5.400,5.4,,5.4,5.4,ISK
3,A,@F9I,SI496526,ALFESCA,2006/06/15,1,0,0,0,0,5.400,5.4,,5.4,5.4,ISK
4,A,@F9I,SI496526,ALFESCA,2006/07/20,1,0,0,0,0,5.400,5.4,,5.4,5.4,ISK
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
228342,ZTS,ZOTS,98978V10,ZOETIS INC,2021/08/19,13,9,0,9,0,225.000,227.0,14.259,240.0,184.0,USD
228343,ZTS,ZOTS,98978V10,ZOETIS INC,2021/09/16,13,0,0,0,0,225.000,227.0,14.259,240.0,184.0,USD
228344,ZTS,ZOTS,98978V10,ZOETIS INC,2021/10/14,12,0,0,0,0,224.583,226.0,14.811,240.0,184.0,USD
228345,ZTS,ZOTS,98978V10,ZOETIS INC,2021/11/18,11,7,0,7,0,236.545,245.0,21.398,258.0,184.0,USD


In [50]:
#median price targets
df_temp = df_AnalystEstimates[["OFTIC", "STATPERS","MEDPTG"]]
df_AnalystMedian = pd.pivot_table(df_temp, values='MEDPTG', index=["STATPERS"],
                    columns=['OFTIC'])
df_AnalystMedian

OFTIC,A,AA,AAL,AAP,AAPL,ABBV,ABC,ABK,ABMD,ABS,...,XRAY,XRX,XTO,XYL,YHOO,YUM,ZBH,ZBRA,ZION,ZTS
STATPERS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000/01/20,80.0,100.0,,,135.00,,,77.0,,45.50,...,36.5,38.0,17.0,,495.0,61.0,,60.0,74.0,
2000/02/17,85.0,100.0,,,137.00,,,71.0,,45.00,...,36.5,35.0,17.0,,247.5,61.0,,75.0,72.0,
2000/03/16,172.5,97.5,,,139.00,,,74.0,100.0,44.00,...,36.5,35.0,17.5,,250.0,61.0,,77.5,69.5,
2000/04/20,160.0,95.2,,,140.00,,,75.0,100.0,41.00,...,36.0,35.0,17.0,,250.0,60.0,,80.0,58.0,
2000/05/18,125.0,95.0,,,140.00,,,75.0,100.0,35.00,...,37.0,32.5,20.0,,247.0,50.0,,80.0,58.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021/08/19,170.0,50.0,35.8435,230.0,168.00,126.0,526.100000,,415.0,23.65,...,74.5,20.0,,130.0,,138.0,193.5,595.0,57.0,227.0
2021/09/16,170.0,51.0,35.6565,242.5,169.64,127.5,526.500000,,415.0,23.65,...,74.5,20.0,,130.0,,138.0,193.5,605.0,59.0,227.0
2021/10/14,176.0,56.0,33.9365,240.5,169.64,129.5,526.500000,,415.0,23.65,...,72.0,18.0,,125.0,,138.0,189.0,617.5,64.0,226.0
2021/11/18,176.0,60.0,32.2420,265.5,170.00,130.0,541.333333,,415.0,23.65,...,69.0,17.0,,130.0,,135.5,172.0,630.0,67.0,245.0


In [54]:
# %up
df_temp = df_AnalystEstimates[["OFTIC", "STATPERS","NUMUP1M","NUMEST"]]
df_temp["NUMUP1M"] = df_temp["NUMUP1M"]/df_temp["NUMEST"]
df_AnalystUp = pd.pivot_table(df_temp, values='NUMUP1M', index=["STATPERS"],
                    columns=['OFTIC'])
df_AnalystUp

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_temp["NUMUP1M"] = df_temp["NUMUP1M"]/df_temp["NUMEST"]


OFTIC,A,AA,AAL,AAP,AAPL,ABBV,ABC,ABK,ABMD,ABS,...,XRAY,XRX,XTO,XYL,YHOO,YUM,ZBH,ZBRA,ZION,ZTS
STATPERS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000/01/20,0.428571,0.583333,,,0.384615,,,0.0,,0.00,...,0.000000,0.000000,0.0,,0.636364,0.000000,,0.000000,0.142857,
2000/02/17,0.285714,0.000000,,,0.000000,,,0.0,,0.00,...,0.000000,0.142857,0.0,,0.000000,0.000000,,0.500000,0.000000,
2000/03/16,0.750000,0.000000,,,0.000000,,,0.0,0.0,0.00,...,0.000000,0.000000,0.0,,0.000000,0.000000,,0.250000,0.000000,
2000/04/20,0.000000,0.071429,,,0.214286,,,0.2,0.0,0.00,...,0.000000,0.000000,0.0,,0.090909,0.000000,,0.250000,0.285714,
2000/05/18,0.000000,0.076923,,,0.000000,,,0.0,0.0,0.00,...,0.000000,0.000000,0.6,,0.000000,0.000000,,0.000000,0.000000,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021/08/19,0.812500,0.454545,0.353535,0.266667,0.600000,0.375000,0.351140,,0.5,0.25,...,0.083333,0.600000,,0.833333,,0.826087,0.038462,0.700000,0.095238,0.692308
2021/09/16,0.117647,0.545455,0.052139,0.750000,0.142857,0.090909,0.215456,,0.0,0.00,...,0.000000,0.000000,,0.000000,,0.045455,0.000000,0.200000,0.285714,0.000000
2021/10/14,0.125000,0.454545,0.000000,0.000000,0.000000,0.045455,0.051282,,0.0,0.00,...,0.000000,0.000000,,0.090909,,0.000000,0.000000,0.200000,0.409091,0.000000
2021/11/18,0.000000,0.500000,0.023810,0.875000,0.200000,0.260870,0.284799,,0.0,0.00,...,0.000000,0.000000,,0.363636,,0.200000,0.000000,0.727273,0.636364,0.636364


In [55]:
# %down
df_temp = df_AnalystEstimates[["OFTIC", "STATPERS","NUMDOWN1M","NUMEST"]]
df_temp["NUMDOWN1M"] = df_temp["NUMDOWN1M"]/df_temp["NUMEST"]
df_AnalystDown = pd.pivot_table(df_temp, values='NUMDOWN1M', index=["STATPERS"],
                    columns=['OFTIC'])
df_AnalystDown

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_temp["NUMDOWN1M"] = df_temp["NUMDOWN1M"]/df_temp["NUMEST"]


OFTIC,A,AA,AAL,AAP,AAPL,ABBV,ABC,ABK,ABMD,ABS,...,XRAY,XRX,XTO,XYL,YHOO,YUM,ZBH,ZBRA,ZION,ZTS
STATPERS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000/01/20,0.000,0.000000,,,0.000,,,0.000000,,0.000000,...,0.000000,0.0,0.000000,,0.000000,0.000000,,0.0,0.000000,
2000/02/17,0.000,0.083333,,,0.000,,,0.200000,,0.000000,...,0.000000,0.0,0.222222,,0.000000,0.166667,,0.0,0.000000,
2000/03/16,0.000,0.142857,,,0.000,,,0.000000,0.0,0.222222,...,0.000000,0.0,0.000000,,0.090909,0.000000,,0.0,0.285714,
2000/04/20,0.125,0.142857,,,0.000,,,0.000000,0.0,0.111111,...,0.000000,0.0,0.090909,,0.000000,0.000000,,0.0,0.142857,
2000/05/18,0.375,0.076923,,,0.000,,,0.333333,0.0,0.000000,...,0.000000,0.0,0.000000,,0.000000,0.000000,,0.0,0.000000,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021/08/19,0.000,0.090909,0.113636,0.0000,0.025,0.041667,0.000000,,0.0,0.000000,...,0.416667,0.2,,0.000000,,0.000000,0.346154,0.0,0.428571,0.0
2021/09/16,0.000,0.000000,0.149733,0.0625,0.000,0.136364,0.111111,,0.0,0.000000,...,0.000000,0.0,,0.000000,,0.000000,0.000000,0.0,0.000000,0.0
2021/10/14,0.000,0.000000,0.313492,0.0625,0.000,0.000000,0.000000,,0.0,0.000000,...,0.181818,0.2,,0.636364,,0.000000,0.192308,0.0,0.000000,0.0
2021/11/18,0.000,0.000000,0.253968,0.0000,0.075,0.043478,0.000000,,0.0,0.000000,...,0.500000,0.6,,0.090909,,0.100000,0.791667,0.0,0.181818,0.0


In [56]:
# num_estimates
df_temp = df_AnalystEstimates[["OFTIC", "STATPERS","NUMEST"]]
df_AnalystNum = pd.pivot_table(df_temp, values='NUMEST', index=["STATPERS"],
                    columns=['OFTIC'])
df_AnalystNum

OFTIC,A,AA,AAL,AAP,AAPL,ABBV,ABC,ABK,ABMD,ABS,...,XRAY,XRX,XTO,XYL,YHOO,YUM,ZBH,ZBRA,ZION,ZTS
STATPERS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000/01/20,7.0,12.0,,,13.0,,,5.0,,8.0,...,2.0,9.0,10.0,,11.0,6.0,,3.0,7.0,
2000/02/17,7.0,12.0,,,14.0,,,5.0,,9.0,...,2.0,7.0,9.0,,11.0,6.0,,4.0,8.0,
2000/03/16,8.0,14.0,,,15.0,,,4.0,1.0,9.0,...,2.0,7.0,8.0,,11.0,6.0,,4.0,7.0,
2000/04/20,8.0,14.0,,,14.0,,,5.0,1.0,9.0,...,3.0,7.0,11.0,,11.0,7.0,,4.0,7.0,
2000/05/18,8.0,13.0,,,14.0,,,3.0,1.0,9.0,...,2.0,6.0,10.0,,13.0,5.0,,4.0,5.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021/08/19,16.0,11.0,20.0,15.0,40.0,24.0,10.000000,,6.0,1.5,...,12.0,5.0,,12.0,,23.0,26.0,10.0,21.0,13.0
2021/09/16,17.0,11.0,19.5,16.0,42.0,22.0,10.000000,,6.0,1.5,...,12.0,5.0,,12.0,,22.0,26.0,10.0,21.0,13.0
2021/10/14,16.0,11.0,19.5,16.0,42.0,22.0,10.000000,,6.0,1.5,...,11.0,5.0,,11.0,,22.0,26.0,10.0,22.0,12.0
2021/11/18,16.0,12.0,19.5,16.0,40.0,23.0,9.333333,,6.0,1.5,...,12.0,5.0,,11.0,,20.0,24.0,11.0,22.0,11.0


In [57]:
#save to pickle
df_AnalystEstimates.to_pickle("FundamentalData/df_AnalystEstimates.pkl")
df_AnalystMedian.to_pickle("FundamentalData/df_AnalystMedian.pkl")
df_AnalystUp.to_pickle("FundamentalData/df_AnalystUp.pkl")
df_AnalystDown.to_pickle("FundamentalData/df_AnalystDown.pkl")
df_AnalystNum.to_pickle("FundamentalData/df_AnalystNum.pkl")
print("done")

done


# 4) Macro Data
- Downloaded from Capital IQ
- Except for PPI (credit @ Joseph Garcia Ben)

In [76]:
#convert xlsx to txt
#  open with encoding, sep & header
df_CPI = pd.read_csv("MacroData/US_CPI.txt", encoding = "utf-16", sep = "\t", header = None)
df_CPI

Unnamed: 0,0,1
0,Dec-31-2021,7.04
1,Nov-30-2021,6.81
2,Oct-31-2021,6.22
3,Sep-30-2021,5.39
4,Aug-31-2021,5.25
...,...,...
260,Apr-30-2000,3.07
261,Mar-31-2000,3.76
262,Feb-29-2000,3.22
263,Jan-31-2000,2.74


In [78]:
filenames = ["Dates_USAll",
            "Dates_USFed",
            "US_CPI",
            "US_FedFundsTargetRate",
            "US_GDP",
            "US_MedianHomeSalesPrice",
            "US_NonFarm",
            "US_NonFarmPayrolls",
            "US_PMI",
            "US_PPI",
            "US_PrivateHousingStarts",
            "US_Unemployment"]
for file in filenames:
    readname = "MacroData/" + file + ".txt"
    savename = "MacroData/df_" + file + ".pkl"
    df_temp = pd.read_csv(readname, encoding = "utf-16", sep = "\t", header = None)
    print(readname)
    print(df_temp[:3])
    df_temp.to_pickle(savename)
print("DONE")

MacroData/Dates_USAll.txt
                      0                              1
0  Nov-03-2004 10:00 AM      US: ISM Non-Manufacturing
1   Nov-10-2004 2:15 PM  US: FOMC - Fed Funds Rate (%)
2   Nov-16-2004 8:30 AM       US: PPI ex Food & Energy
MacroData/Dates_USFed.txt
                     0                              1
0  Nov-10-2004 2:15 PM  US: FOMC - Fed Funds Rate (%)
1  Dec-14-2004 2:15 PM  US: FOMC - Fed Funds Rate (%)
2  Feb-02-2005 2:15 PM  US: FOMC - Fed Funds Rate (%)
MacroData/US_CPI.txt
             0     1
0  Dec-31-2021  7.04
1  Nov-30-2021  6.81
2  Oct-31-2021  6.22
MacroData/US_FedFundsTargetRate.txt
             0     1
0  Feb-01-2022  0.25
1  Jan-01-2022  0.25
2  Dec-01-2021  0.25
MacroData/US_GDP.txt
             0         1
0  Dec-31-2021  19805962
1  Sep-30-2021  19478893
2  Jun-30-2021  19368310
MacroData/US_MedianHomeSalesPrice.txt
             0       1
0  Dec-31-2021  377700
1  Nov-30-2021  416100
2  Oct-31-2021  421500
MacroData/US_NonFarm.txt
           

# 5) Fundamental Ratios

## 5a) Initial ratios
- some were poorly filled, especially divyield, PEG_trailing, sale_nwc, quick_ratio
- 5b will replace these with alternative ratios

In [79]:
df_FundamentalRatios = pd.read_csv("FundamentalData/SPX_FundamentalRatios.csv")
df_FundamentalRatios

Unnamed: 0,permno,adate,qdate,public_date,evm,pe_exi,ps,npm,roe,debt_assets,de_ratio,quick_ratio,sale_nwc,ptb,PEG_trailing,divyield,TICKER,cusip
0,10104,19990531.0,19991130,20000131,23.669,25.486,15.247,0.155,0.413,0.470,0.888,1.834,4.325,40.279,0.158,,ORCL,68389X10
1,10104,19990531.0,19991130,20000229,23.669,37.883,22.592,0.155,0.413,0.470,0.888,1.834,4.325,59.683,0.234,,ORCL,68389X10
2,10104,19990531.0,19991130,20000331,23.669,39.828,23.752,0.155,0.413,0.470,0.888,1.834,4.325,62.748,0.246,,ORCL,68389X10
3,10104,19990531.0,20000229,20000430,34.204,62.451,23.393,0.197,0.529,0.471,0.891,1.819,4.357,54.508,0.244,,ORCL,68389X10
4,10104,19990531.0,20000229,20000531,34.204,56.152,20.805,0.197,0.529,0.471,0.891,1.819,4.357,30.870,0.219,,ORCL,68389X10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
156627,93436,20191231.0,20200630,20200831,35.254,258.197,18.062,0.014,0.054,0.736,3.310,0.835,16.310,47.117,46.705,,TSLA,88160R10
156628,93436,20191231.0,20200630,20200930,35.254,222.285,15.820,0.014,0.054,0.736,3.310,0.835,16.310,41.269,40.209,,TSLA,88160R10
156629,93436,20191231.0,20200630,20201031,35.254,201.057,14.308,0.014,0.054,0.736,3.310,0.835,16.310,37.324,36.369,,TSLA,88160R10
156630,93436,20191231.0,20200930,20201130,53.275,494.500,19.095,0.020,0.070,0.694,2.586,0.991,7.110,33.562,,,TSLA,88160R10


In [84]:
ratios = ["evm","pe_exi","ps", "npm", "roe", "debt_assets", "de_ratio","quick_ratio", "sale_nwc", "ptb", "PEG_trailing"]
for ratio in ratios:
    savename = "FundamentalData/df_" + ratio + ".pkl"
    df_temp = df_FundamentalRatios[["TICKER", "public_date",ratio]]
    df_save = pd.pivot_table(df_temp, values=ratio, index=["public_date"],
                    columns=['TICKER'])
    print(savename)
    print(df_save[:2])
    df_save.to_pickle(savename)
print("DONE")



FundamentalData/df_evm.pkl
TICKER        A      AA    AAG  AAL  AAP    AAPL    AAS  ABBV  ABC  ABI  ...  \
public_date                                                              ...   
20000131    NaN   8.936  9.348  NaN  NaN  22.570  9.263   NaN  NaN  NaN  ...   
20000229    NaN  12.501  8.473  NaN  NaN  19.266  9.256   NaN  NaN  NaN  ...   

TICKER         XTO  XYL     YHOO     YUM       Z  ZBH    ZBRA    ZION  ZMH  \
public_date                                                                  
20000131     9.082  NaN  302.645  10.211  12.439  NaN  10.355  14.710  NaN   
20000229     7.706  NaN  586.309   6.732  12.439  NaN  15.435  17.097  NaN   

TICKER       ZTS  
public_date       
20000131     NaN  
20000229     NaN  

[2 rows x 926 columns]
FundamentalData/df_pe_exi.pkl
TICKER        A      AA     AAG  AAL  AAP    AAPL     AAS  ABBV  ABC  ABI  \
public_date                                                                 
20000131    NaN  41.813  10.117  NaN  NaN  28.740  13.1

In [88]:
for ratio in ratios:
    df_temp = df_FundamentalRatios[["TICKER", "public_date",ratio]]
    filled = df_temp.count() / len(df_temp)
    print(ratio, filled[2])

evm 0.9947201082792788
pe_exi 0.981549108738955
ps 0.9981102201338169
npm 0.9981102201338169
roe 0.9655434393993565
debt_assets 0.9985315899688442
de_ratio 0.9985124367945247
quick_ratio 0.8704287757290975
sale_nwc 0.710404004290311
ptb 0.9672416875223454
PEG_trailing 0.6075067674549262


## 5b) Alternative ratios
- replace trailing PEGs with forward-looking
- quick ratio does not need replacing
- replace sale_nwc with cash_conversion

In [89]:
df_FundamentalRatios2 = pd.read_csv("FundamentalData/SPX_FundamentalRatios2.csv")
df_FundamentalRatios2

Unnamed: 0,permno,adate,qdate,public_date,curr_ratio,cash_conversion,PEG_1yrforward,PEG_ltgforward,TICKER,cusip
0,10104,19990531.0,19991130,20000131,1.834,58.838,0.801,1.045,ORCL,68389X10
1,10104,19990531.0,19991130,20000229,1.834,58.838,1.191,1.537,ORCL,68389X10
2,10104,19990531.0,19991130,20000331,1.834,58.838,0.876,1.584,ORCL,68389X10
3,10104,19990531.0,20000229,20000430,1.819,58.838,1.374,2.447,ORCL,68389X10
4,10104,19990531.0,20000229,20000531,1.819,58.838,1.235,2.242,ORCL,68389X10
...,...,...,...,...,...,...,...,...,...,...
156627,93436,20191231.0,20200630,20200831,1.182,22.445,,,TSLA,88160R10
156628,93436,20191231.0,20200630,20200930,1.182,22.445,,,TSLA,88160R10
156629,93436,20191231.0,20200630,20201031,1.182,22.445,,,TSLA,88160R10
156630,93436,20191231.0,20200930,20201130,1.329,18.662,,,TSLA,88160R10


In [91]:
ratios2 = ["curr_ratio","cash_conversion","PEG_1yrforward", "PEG_ltgforward"]
for ratio in ratios2:
    savename = "FundamentalData/df_" + ratio + ".pkl"
    df_temp = df_FundamentalRatios2[["TICKER", "public_date",ratio]]
    df_save = pd.pivot_table(df_temp, values=ratio, index=["public_date"],
                    columns=['TICKER'])
    print(savename)
    print(df_save[:2])
    df_save.to_pickle(savename)
print("DONE")



FundamentalData/df_curr_ratio.pkl
TICKER        A     AA  AAL  AAP   AAPL    AAS  ABBV  ABC  ABI   ABMD  ...  \
public_date                                                            ...   
20000131    NaN  1.539  NaN  NaN  2.766  1.447   NaN  NaN  NaN  4.976  ...   
20000229    NaN  1.598  NaN  NaN  2.665  1.498   NaN  NaN  NaN  4.226  ...   

TICKER         XRX    XTO  XYL   YHOO    YUM      Z  ZBH   ZBRA  ZMH  ZTS  
public_date                                                                
20000131     1.605  1.577  NaN  4.852  0.493  1.216  NaN  7.490  NaN  NaN  
20000229     1.508  1.529  NaN  4.919  0.374  1.216  NaN  8.027  NaN  NaN  

[2 rows x 823 columns]
FundamentalData/df_cash_conversion.pkl
TICKER        A      AA     AAG  AAL  AAP     AAS  ABBV  ABC  ABI  ABK  ...  \
public_date                                                             ...   
20000131    NaN  69.851  74.157  NaN  NaN  21.188   NaN  NaN  NaN  NaN  ...   
20000229    NaN  70.511  62.873  NaN  NaN  24.201

In [92]:
for ratio in ratios2:
    df_temp = df_FundamentalRatios2[["TICKER", "public_date",ratio]]
    filled = df_temp.count() / len(df_temp)
    print(ratio, filled[2])

curr_ratio 0.87034577864038
cash_conversion 0.7951376474794423
PEG_1yrforward 0.8867600490321262
PEG_ltgforward 0.8420948465192298


In [94]:
for ratio in ["quick_ratio","sale_nwc","PEG_trailing"]:
    df_temp = df_FundamentalRatios[["TICKER", "public_date",ratio]]
    filled = df_temp.count() / len(df_temp)
    print(ratio, filled[2])

quick_ratio 0.8704287757290975
sale_nwc 0.710404004290311
PEG_trailing 0.6075067674549262
