In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import csv
import requests
import os
import time
import json

In [3]:
#List of S&P 500 tickers from: https://stockmarketmba.com/stocksinthesp500.php

ticker_list = pd.read_csv('./datasets/SP500_index.csv')['Symbol']

In [33]:
def income_puller(ticker):
    url=f'https://stockrow.com/api/companies/{ticker}/financials.xlsx?dimension=T&section=Income%20Statement&sort=desc'
    response = requests.get(url)
    with open(os.path.join("StockRow_financials/Excel", f"{ticker}_inc.xlsx"), 'wb') as f:
        f.write(response.content)
    time.sleep(0.25)

def csv_maker(ticker):
    #this section reads the Excel file and turns into a CSV
    ticker_df = pd.read_excel(f'StockRow_financials/Excel/{ticker}_inc.xlsx').T
    # change the first row to the columns
    ticker_df.columns = ticker_df.iloc[0]
    #remove the first row
    ticker_df = ticker_df.iloc[1:]
    
    ticker_df['Ticker'] = f"{ticker}"
    
    ticker_df.to_csv(f'StockRow_financials/CSV/{ticker}_inc.csv',index_label="Date")

#### This code pulls financial information from StockRow.com

In [151]:
#do not call this function unless you have 15 minutes to kill
#also note that it overwrites all the previously downloaded Excel workbooks
#start_time = time.time()
#
#for ticker in ticker_list:
#    try:
#        income_puller(ticker)
#    except:
#        print(f"Ticker {ticker} throws an error")
#    
#print(f"Code took {np.round(time.time() - start_time,2)} seconds to run")

#### This code converts the StockRow workbooks from Excel files to CSV files

In [34]:
start_time = time.time()

for ticker in ticker_list:
    try:
        csv_maker(ticker)
    except:
        print(f"Ticker {ticker} throws an error")
    
print(f"Code took {np.round(time.time() - start_time,2)} seconds to run")

Ticker META throws an error
Ticker ELV throws an error
Ticker BALL throws an error
Ticker TRGP throws an error
Ticker GEN throws an error
Code took 15.84 seconds to run


#### This code combines all downloaded CSV files (for which we have a ticker) into one Pandas DataFrame

In [100]:
starter_df = pd.read_csv(f'StockRow_financials/CSV/{ticker_list[0]}_inc.csv')

start_time = time.time()

for ticker in ticker_list[1:]:
    try:        
        ticker_df = pd.read_csv(f'StockRow_financials/CSV/{ticker}_inc.csv')
        starter_df = pd.concat([starter_df,ticker_df])
    except:
        print(f'Ticker {ticker} threw an error')
    
        
        
print(f"This code took {time.time() - start_time} seconds")

Ticker META threw an error
Ticker ELV threw an error
Ticker BALL threw an error
Ticker TRGP threw an error
Ticker GEN threw an error
This code took 2.4510436058044434 seconds


In [152]:
#save the DataFrame as one large CSV
starter_df.to_csv('SP500_allFinancials.csv',index=False)

#### Narrow the financial columns, convert "Date" to a DateTime object, create a "Year" field

In [103]:
starter_df = starter_df[['Ticker', 'Date', 'Revenue', 'Gross Profit',
                         'Operating Income', 'Income after Tax', 'Net Income Common']].reset_index(drop=True)

starter_df['Date'] = pd.to_datetime(starter_df['Date'])
starter_df['Year'] = pd.DatetimeIndex(starter_df['Date']).year

In [153]:
#example of grouping the data the find the average financial values for each calendar year
starter_df[starter_df['Ticker']=='AAPL'].groupby('Year').mean()

Unnamed: 0_level_0,Revenue,Gross Profit,Operating Income,Income after Tax,Net Income Common
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2012,164687000000.0,69019000000.0,55111000000.0,41747000000.0,41747000000.0
2013,170852500000.0,65258000000.0,50112250000.0,37872000000.0,37872000000.0
2014,184193500000.0,71014000000.0,53361000000.0,40058500000.0,40058500000.0
2015,226301000000.0,90290500000.0,68532000000.0,51417500000.0,51417500000.0
2016,220395000000.0,86462500000.0,62246500000.0,47344750000.0,47344750000.0
2017,228093500000.0,87715500000.0,61227500000.0,47814250000.0,47814250000.0
2018,257474500000.0,98559000000.0,68206000000.0,57100000000.0,57100000000.0
2019,261345200000.0,99122500000.0,64999250000.0,56412000000.0,56412000000.0
2020,277622000000.0,106432500000.0,68317500000.0,59245000000.0,59245000000.0
2021,354175200000.0,146928500000.0,103673200000.0,89587000000.0,89587000000.0


In [154]:
#grouping the data to find average values per year for every ticker
condensed_finances_df = starter_df.groupby(['Ticker','Year']).mean()
condensed_finances_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Revenue,Gross Profit,Operating Income,Income after Tax,Net Income Common
Ticker,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A,2013,6.854500e+09,3.569250e+09,9.845000e+08,9.442500e+08,9.442500e+08
A,2014,5.252250e+09,2.680750e+09,6.885000e+08,4.702500e+08,6.605000e+08
A,2015,4.619250e+09,2.308250e+09,5.257500e+08,3.070000e+08,3.390000e+08
A,2016,4.116000e+09,2.130250e+09,5.877500e+08,4.672500e+08,4.650000e+08
A,2017,4.357750e+09,2.319750e+09,7.500000e+08,6.020000e+08,6.020000e+08
...,...,...,...,...,...,...
ZTS,2018,5.644000e+09,3.819000e+09,1.673500e+09,1.166000e+09,1.171250e+09
ZTS,2019,6.092500e+09,4.097500e+09,1.727250e+09,1.430500e+09,1.431000e+09
ZTS,2020,6.474000e+09,4.500250e+09,1.967500e+09,1.631250e+09,1.632250e+09
ZTS,2021,7.454000e+09,5.169500e+09,2.354000e+09,1.922500e+09,1.925500e+09


In [155]:
#sanity check using AAPL
condensed_finances_df.loc['AAPL']

Unnamed: 0_level_0,Revenue,Gross Profit,Operating Income,Income after Tax,Net Income Common
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2012,164687000000.0,69019000000.0,55111000000.0,41747000000.0,41747000000.0
2013,170852500000.0,65258000000.0,50112250000.0,37872000000.0,37872000000.0
2014,184193500000.0,71014000000.0,53361000000.0,40058500000.0,40058500000.0
2015,226301000000.0,90290500000.0,68532000000.0,51417500000.0,51417500000.0
2016,220395000000.0,86462500000.0,62246500000.0,47344750000.0,47344750000.0
2017,228093500000.0,87715500000.0,61227500000.0,47814250000.0,47814250000.0
2018,257474500000.0,98559000000.0,68206000000.0,57100000000.0,57100000000.0
2019,261345200000.0,99122500000.0,64999250000.0,56412000000.0,56412000000.0
2020,277622000000.0,106432500000.0,68317500000.0,59245000000.0,59245000000.0
2021,354175200000.0,146928500000.0,103673200000.0,89587000000.0,89587000000.0


In [156]:
execpay_df = pd.read_csv('main_SCT_pay_file.csv')

execpay_df = execpay_df[execpay_df['ticker'].isin(ticker_list)]

execpay_df

Unnamed: 0,id,cik,ticker,name,position,year,salary,bonus,stockAwards,optionAwards,nonEquityIncentiveCompensation,otherCompensation,total,changeInPensionValueAndDeferredEarnings,CEO,CFO,Interim
0,73b3a60ba203743c008330c96b7d8b66,1090872,A,Sam Raha,"Senior Vice President, President Diagnostics a...",2021,563500,0,1541332,0,738536,33534,3262887,,False,False,False
1,97393f60cd9f321650e472673daaa70c,1090872,A,Michael R. McMullen,Chief Executive Officer,2021,1280000,0,9165390,0,3149714,77512,15967631,,True,False,False
2,83b9cc2bca477fe8ce23e0ab56e70c66,1090872,A,Jacob Thaysen,"Senior Vice President, President Life Sciences...",2021,625000,0,1812285,0,897589,13684,3802349,,False,False,False
3,7c60bb804071675ae15ec930f6dea190,1090872,A,Robert McMahon,"Senior Vice President, Chief Financial Officer",2021,663500,0,2291271,0,1007000,176196,4711733,,False,True,False
4,259b710a8befe67c61a2c3dec14f344a,1090872,A,Padraig McDonnell,"Senior Vice President, President Cross-Lab Group",2021,495000,0,1249771,0,686716,499472,3243927,,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140505,509ce14e56d662f93a33c221556bee61,829224,SBUX,Clifford Burrows,"president, Starbucks Coffee Americas and US",2011,678942,0,2050337,909284,633822,53391,4325776,,False,False,False
140509,ca5ec5821d6a445f90716a3d9695b490,1393612,DFS,Harit Talwar,"EVP, President—US Cards",2010,1750000,875000,0,9292,0,0,3531442,17150.0,False,False,False
140517,5182311313ab849b40a9a5063ec9b2da,1175454,FLT,Todd W. House,"President, US Direct Business & Chief Operatin...",2010,275000,40000,787500,1293779,110000,22487,2528766,0.0,False,False,False
140520,1c4e649f102fc4f3523b36f94390bd72,14272,BMY,Anthony C. Hooper,"SVP Commercial Operations & President US, Japa...",2010,800000,0,2649962,0,1276236,267872,6148512,,False,False,False


In [157]:
main_df = execpay_df.merge(condensed_finances_df,left_on=['ticker','year'], right_on=['Ticker','Year'])

In [158]:
main_df

Unnamed: 0,id,cik,ticker,name,position,year,salary,bonus,stockAwards,optionAwards,...,total,changeInPensionValueAndDeferredEarnings,CEO,CFO,Interim,Revenue,Gross Profit,Operating Income,Income after Tax,Net Income Common
0,73b3a60ba203743c008330c96b7d8b66,1090872,A,Sam Raha,"Senior Vice President, President Diagnostics a...",2021,563500,0,1541332,0,...,3262887,,False,False,False,5.952000e+09,3.193250e+09,1.175500e+09,9.837500e+08,983750000.0
1,97393f60cd9f321650e472673daaa70c,1090872,A,Michael R. McMullen,Chief Executive Officer,2021,1280000,0,9165390,0,...,15967631,,True,False,False,5.952000e+09,3.193250e+09,1.175500e+09,9.837500e+08,983750000.0
2,83b9cc2bca477fe8ce23e0ab56e70c66,1090872,A,Jacob Thaysen,"Senior Vice President, President Life Sciences...",2021,625000,0,1812285,0,...,3802349,,False,False,False,5.952000e+09,3.193250e+09,1.175500e+09,9.837500e+08,983750000.0
3,7c60bb804071675ae15ec930f6dea190,1090872,A,Robert McMahon,"Senior Vice President, Chief Financial Officer",2021,663500,0,2291271,0,...,4711733,,False,True,False,5.952000e+09,3.193250e+09,1.175500e+09,9.837500e+08,983750000.0
4,259b710a8befe67c61a2c3dec14f344a,1090872,A,Padraig McDonnell,"Senior Vice President, President Cross-Lab Group",2021,495000,0,1249771,0,...,3243927,,False,False,False,5.952000e+09,3.193250e+09,1.175500e+09,9.837500e+08,983750000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28435,7b803e9ff0f142d5b1c36b5fdbaced15,1564708,NWS,Gerson Zweifach,General Counsel,2014,1500000,0,0,0,...,1500000,,False,False,False,8.548750e+09,3.453500e+09,4.040000e+08,1.545000e+08,-29750000.0
28436,473bf52703cdbffdc9b311d8dee9101a,1564708,NWS,Bedi Ajay Singh,Chief Financial Officer,2013,655769,713425,496499,0,...,2087442,,False,True,False,8.822667e+09,3.454667e+09,2.525000e+08,2.126667e+08,169000000.0
28437,c36a1d9990a2218f8d2a09b25fb7cd65,1564708,NWS,Robert J. Thomson,Chief Executive Officer,2013,992308,1000000,0,0,...,2661463,,True,False,False,8.822667e+09,3.454667e+09,2.525000e+08,2.126667e+08,169000000.0
28438,7d64a65ecf544ffd0ce4d4ada1607c73,1564708,NWS,Gerson Zweifach,General Counsel,2013,0,0,0,0,...,0,,False,False,False,8.822667e+09,3.454667e+09,2.525000e+08,2.126667e+08,169000000.0


In [160]:
main_df.to_csv('execpay_financials_sp500.csv',index=False)