In [1]:
# packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import datetime as dt
import re 
import yfinance as yf
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import requests
from get_all_tickers import get_tickers as gt
import bs4 as bs  # beautiful soups for web-scraping
import os
import pandas_datareader.data as pdr

In [149]:
# choose stocks to target 
# html = requests.get('http://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
# soup = bs.BeautifulSoup(html.text, 'lxml')
# table = soup.find('table', {'class': 'wikitable sortable'})

# tickers = []
# for row in table.findAll('tr')[1:]:
#         ticker = row.findAll('td')[0].text
#         ticker = ticker[:-1]
#         tickers.append(ticker)
# tickers = pd.DataFrame(tickers).replace('.', '-')
# tickers = tickers[0]

In [3]:
# choose stocks to target
SP_500 = requests.get("https://financialmodelingprep.com/api/v3/sp500_constituent?apikey=f0060c7a7b275396c21cb98f4985f3ae")
SP_500 = SP_500.json()

tickers = []
for i in range(len(SP_500)):
    tickers.append(SP_500[i]['symbol'])
tickers = pd.Series(tickers)

In [214]:
tickers.head()

0     MMM
1     ABT
2    ABBV
3    ABMD
4     ACN
dtype: object

The above tickers are the current tickers in the S&P 500...
- Ideally I would like to include all tickers that have been in the S&P 500 over the past 5 years or so, including those that have delisted.
- How to deal with stock ticker name changes?

In [76]:
# fundamental data of companies in S&P 500 sourced from Bloomberg Terminal
SP_500_data = pd.read_excel('SP500_fundamental_data.xlsx', index_col = 0, header=[0,1])
SP_500_data = SP_500_data.rename(str.strip, axis='columns')  # strip trailing whitespace from column names
SP_500_data.head()

Unnamed: 0_level_0,A,A,A,A,A,A,A,AAL,AAL,AAL,...,ZION,ZION,ZION,ZTS,ZTS,ZTS,ZTS,ZTS,ZTS,ZTS
dates,RETURN_ON_ASSET,CF_CASH_FROM_OPER,BS_CUR_ASSET_REPORT,BS_CUR_LIAB,TOT_DEBT_TO_TOT_ASSET,ASSET_TURNOVER,NET_INCOME,RETURN_ON_ASSET,CF_CASH_FROM_OPER,BS_CUR_ASSET_REPORT,...,TOT_DEBT_TO_TOT_ASSET,ASSET_TURNOVER,NET_INCOME,RETURN_ON_ASSET,CF_CASH_FROM_OPER,BS_CUR_ASSET_REPORT,BS_CUR_LIAB,TOT_DEBT_TO_TOT_ASSET,ASSET_TURNOVER,NET_INCOME
2015-01-31,4.0976,-20.0,3654.0,928.0,22.0508,0.4479,63.0,7.3709,2494.0,14129.0,...,2.3893,0.0409,92.025,9.2418,60.0,3110.0,1370.0,56.6874,0.7465,165.0
2015-04-30,3.4711,183.0,3620.0,930.0,22.3421,0.4383,87.0,9.0096,2347.0,14109.0,...,2.3268,0.0385,13.96,6.4024,122.0,3316.0,1572.0,55.2743,0.7328,-37.0
2015-07-31,3.2216,108.0,3478.0,853.0,22.8244,0.459,111.0,10.6063,1180.0,13706.0,...,2.2231,0.0385,100.999,6.7055,204.0,3450.0,1658.0,54.3524,0.7282,189.0
2015-10-31,4.384,237.0,3686.0,976.0,22.1286,0.4415,140.0,16.6085,228.0,9985.0,...,2.0767,0.0376,102.487,4.6755,278.0,3830.0,1781.0,61.519,0.6572,22.0
2016-01-31,6.1939,111.0,3399.0,947.0,23.7332,0.5452,121.0,15.2654,2620.0,10802.0,...,1.8649,0.0381,90.437,5.4455,51.0,3307.0,1163.0,59.949,0.6951,204.0


In [93]:
# price per earnings ratio 
SP500_PE_ratio = pd.read_excel('SP500_PE_ratio.xlsx', index_col = 0, header = [0,1])
SP500_PE_ratio = SP500_PE_ratio.rename(str.strip, axis='columns')  # strip trailing whitespace from column names
SP500_PE_ratio.head()

Unnamed: 0_level_0,A,AAL,AAP,AAPL,ABBV,ABC,ABMD,ABT,ACN,ADBE,...,XEL,XLNX,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION,ZTS
Dates,Price Earnings Ratio (P/E),Price Earnings Ratio (P/E),Price Earnings Ratio (P/E),Price Earnings Ratio (P/E),Price Earnings Ratio (P/E),Price Earnings Ratio (P/E),Price Earnings Ratio (P/E),Price Earnings Ratio (P/E),Price Earnings Ratio (P/E),Price Earnings Ratio (P/E),...,Price Earnings Ratio (P/E),Price Earnings Ratio (P/E),Price Earnings Ratio (P/E),Price Earnings Ratio (P/E),Price Earnings Ratio (P/E),Price Earnings Ratio (P/E),Price Earnings Ratio (P/E),Price Earnings Ratio (P/E),Price Earnings Ratio (P/E),Price Earnings Ratio (P/E)
1/31/2015,32.4291,7.5969,21.427,15.2538,17.0764,,111.2864,27.3643,19.1146,132.3404,...,17.706,17.5937,12.883,22.7057,17.8535,27.0005,20.9933,42.1366,14.6633,28.8274
4/30/2015,31.2193,5.0079,19.7095,14.6436,18.0994,,74.4221,26.2355,20.2285,110.005,...,16.3683,19.1748,14.9968,23.1947,19.3937,31.3389,20.693,223.1307,25.0345,29.2976
7/31/2015,28.9413,4.2792,21.9251,12.4545,13.809,,98.0262,21.2289,19.5672,81.7653,...,17.0569,19.063,15.9281,21.3564,17.5525,26.0998,20.1882,223.1307,24.3438,24.51
10/31/2015,25.8573,4.7224,24.2232,11.5201,14.4875,55.7635,103.0267,23.5027,22.3015,76.6375,...,17.1327,22.6965,20.2468,27.5991,19.7377,22.9846,20.1646,223.1307,22.0026,27.702
1/31/2016,24.9317,4.7503,19.9937,11.8064,13.5396,12.5232,112.869,27.2393,19.8359,56.2225,...,19.8199,23.0457,26.8778,26.7839,21.8922,26.3477,22.5734,223.1307,19.2738,25.0387


In [None]:
# gross margin 

In [87]:
# compute Piotroski F-score given S&P 500 ticker
ROA = SP_500_data["A"]["RETURN_ON_ASSET"]
CFO = SP_500_data["A"]["CF_CASH_FROM_OPER"]
NET_INCOME = SP_500_data["A"]["NET_INCOME"]
TOT_CURR_ASSET = SP_500_data["A"]["BS_CUR_ASSET_REPORT"]

dROA = ROA.diff(1)
ACCRUAL = (NET_INCOME - CFO)/TOT_CURR_ASSET.shift(1)


In [88]:
ACCRUAL

2015-01-31         NaN
2015-04-30   -0.026273
2015-07-31    0.000829
2015-10-31   -0.027890
2016-01-31    0.002713
2016-04-30   -0.047955
2016-07-31   -0.020069
2016-10-31   -0.030595
2017-01-31    0.014305
2017-04-30   -0.025585
2017-07-31   -0.013947
2017-10-31   -0.027778
2018-01-31   -0.128328
2018-04-30   -0.022288
2018-07-31    0.008619
2018-10-31   -0.048268
2019-01-31    0.075624
2019-04-30   -0.018858
2019-07-31   -0.013379
2019-10-31   -0.034712
2020-01-31    0.080276
2020-04-30   -0.068343
2020-07-31   -0.028698
2020-10-31   -0.047766
NaT                NaN
dtype: float64

In [215]:
# compute Piotroski F-score

def F_SCORE(ticker, start_date, end_date):

    #Balance Sheet
        url_1 = "https://financialmodelingprep.com/api/v3/financials/"
        balance_sheet = "balance-sheet-statement/"
        url_2 = f"{ticker}?period=quarter&apikey=f0060c7a7b275396c21cb98f4985f3ae"
        BS = requests.get(url_1 + balance_sheet + url_2)
        BS = BS.json()
        N = len(bs['financials'])

        for i in range(N):
            if re.search(year+"-1\d-\d\d", bs["financials"][i]['date']):
                a,b,c = i,i+1,i+2
                break

        print("year of study is "+bs["financials"][a]['date'])

        # target year
        long_term_debt = float(bs["financials"][a]['Long-term debt'])
        total_assets = float(bs["financials"][a]['Total assets'])
        current_assets = float(bs["financials"][a]['Total current assets'])
        current_assets = current_assets if current_assets != 0 else 1
        current_liabilities = float(bs["financials"][a]['Total current liabilities'])
        current_liabilities = current_liabilities if current_liabilities != 0 else 1

        # prior year
        long_term_debt_py = float(bs["financials"][b]['Long-term debt'])
        total_assets_py = float(bs["financials"][b]['Total assets'])
        current_assets_py = float(bs["financials"][b]['Total current assets'])
        current_assets_py = current_assets_py if current_assets_py != 0 else 10
        current_liabilities_py = float(bs["financials"][b]['Total current liabilities'])
        current_liabilities_py = current_liabilities_py if current_liabilities_py != 0 else 1

        # prior year * 2 
        total_assets_py2 = float(bs["financials"][c]['Total assets'])

        # Income statement
        income_statement = "income-statement/"
        IS = requests.get(url_1 + income_statement + url_2)
        IS = IS.json()

        # Year 2018
        revenue = float(IS['financials'][a]['Revenue'])
        gross_profit = float(IS['financials'][a]['Gross Profit'])
        net_income = float(IS['financials'][a]['Net Income'])

        # Previous year (2017)
        revenue_py = float(IS['financials'][b]['Revenue'])
        gross_profit_py = float(IS['financials'][b]['Gross Profit'])
        net_income_py = float(IS['financials'][b]['Net Income'])
        
        # Cashflow
        cashflow = "cash-flow-statement/"
        CF = requests.get(url_1 + cashflow + url_2)
        CF = CF.json()
        cashflow_op = float(CF['financials'][a]["Operating Cash Flow"])

        #Piotroski F-score

        ROA_FS = int(net_income/((total_assets + total_assets_py)/2)>0)
        CFO_FS = int(cashflow_op>0)
        ROA_D_FS = int((net_income/((total_assets + total_assets_py)/2))>(net_income_py/((total_assets_py + total_assets_py2))))
        CFO_ROA_FS = int((cashflow_op/total_assets)>(net_income/((total_assets + total_assets_py)/2)))
        LTD_FS = int(long_term_debt <= long_term_debt_py)
        CR_FS = int((current_assets/current_liabilities)>(current_assets_py/current_liabilities_py))
        DILUTION_FS = int(float(IS['financials'][a]['Weighted Average Shs Out'])<=float(IS['financials'][b]['Weighted Average Shs Out']))
        GM_FS = int(gross_profit/revenue>gross_profit_py/revenue_py)
        ATO_FS = int((revenue/((total_assets + total_assets_py)/2))>(revenue_py/((total_assets_py + total_assets_py2))))
        f_score = ROA_FS + CFO_FS + ROA_D_FS + CFO_ROA_FS + LTD_FS + CR_FS + DILUTION_FS + GM_FS + ATO_FS     
        print(f'{ticker} F_Score is {f_score}')
        F_score_tgt_yr.append(f_score)
        
    F_score["F_score"] = F_score_tgt_yr
        
    return F_score
    

In [216]:
year = "2020"
F_SCORE(tickers, year)

Analyzing MMM
year of study is 2020-12-31
MMM F_Score is 7
Analyzing ABT
year of study is 2020-12-31
ABT F_Score is 6
Analyzing ABBV
year of study is 2020-12-31
ABBV F_Score is 4
Analyzing ABMD
year of study is 2021-06-30
ABMD F_Score is 8
Analyzing ACN
year of study is 2020-08-31
ACN F_Score is 8
Analyzing ATVI
year of study is 2020-12-31
ATVI F_Score is 6
Analyzing ADBE
year of study is 2020-11-27
ADBE F_Score is 8
Analyzing AMD
year of study is 2020-12-26
AMD F_Score is 7
Analyzing AAP
year of study is 2021-01-02
AAP F_Score is 8
Analyzing AES
year of study is 2020-12-31
AES F_Score is 6
Analyzing AFL
year of study is 2020-12-31
AFL F_Score is 8
Analyzing A
year of study is 2020-10-31
A F_Score is 7
Analyzing APD
year of study is 2020-09-30
APD F_Score is 7
Analyzing AKAM
year of study is 2020-12-31
AKAM F_Score is 6
Analyzing ALK
year of study is 2020-12-31
ALK F_Score is 2
Analyzing ALB
year of study is 2020-12-31
ALB F_Score is 6
Analyzing ARE
year of study is 2020-12-31
ARE F_Sc

KeyError: 'financials'

In [20]:
ticker = "AAPL"
end_date = "2020-12-31"
start_date = "2015-01-01"
url = f"https://financialmodelingprep.com/api/v3/balance-sheet-statement/{ticker}?period=quarter&from=2020-12-31&to=2015-01-01&apikey=f0060c7a7b275396c21cb98f4985f3ae"
BS = requests.get(url)
BS = BS.json()