In [84]:
# packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import datetime as dt
import re 
import yfinance as yf
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import requests
from get_all_tickers import get_tickers as gt
import bs4 as bs  # beautiful soups for web-scraping
import os
import pandas_datareader.data as pdr

The below tickers are the current tickers in the S&P 500...
- Ideally I would like to include all tickers that have been in the S&P 500 over the past 5 years or so, including those that have delisted.
- How to deal with stock ticker name changes?

In [85]:
# fundamental data of companies in S&P 500 sourced from Bloomberg Terminal
SP_500_data = pd.read_excel('SP500_fundamental_data.xlsx', index_col = 0, header=[0,1])
SP_500_data = SP_500_data.rename(str.strip, axis='columns')  # strip trailing whitespace from column names
SP_500_data.head()

Unnamed: 0_level_0,A,A,A,A,A,A,A,AAL,AAL,AAL,...,ZION,ZION,ZION,ZTS,ZTS,ZTS,ZTS,ZTS,ZTS,ZTS
dates,RETURN_ON_ASSET,CF_CASH_FROM_OPER,BS_CUR_ASSET_REPORT,BS_CUR_LIAB,TOT_DEBT_TO_TOT_ASSET,ASSET_TURNOVER,NET_INCOME,RETURN_ON_ASSET,CF_CASH_FROM_OPER,BS_CUR_ASSET_REPORT,...,TOT_DEBT_TO_TOT_ASSET,ASSET_TURNOVER,NET_INCOME,RETURN_ON_ASSET,CF_CASH_FROM_OPER,BS_CUR_ASSET_REPORT,BS_CUR_LIAB,TOT_DEBT_TO_TOT_ASSET,ASSET_TURNOVER,NET_INCOME
2015-01-31,4.0976,-20.0,3654.0,928.0,22.0508,0.4479,63.0,7.3709,2494.0,14129.0,...,2.3893,0.0409,92.025,9.2418,60.0,3110.0,1370.0,56.6874,0.7465,165.0
2015-04-30,3.4711,183.0,3620.0,930.0,22.3421,0.4383,87.0,9.0096,2347.0,14109.0,...,2.3268,0.0385,13.96,6.4024,122.0,3316.0,1572.0,55.2743,0.7328,-37.0
2015-07-31,3.2216,108.0,3478.0,853.0,22.8244,0.459,111.0,10.6063,1180.0,13706.0,...,2.2231,0.0385,100.999,6.7055,204.0,3450.0,1658.0,54.3524,0.7282,189.0
2015-10-31,4.384,237.0,3686.0,976.0,22.1286,0.4415,140.0,16.6085,228.0,9985.0,...,2.0767,0.0376,102.487,4.6755,278.0,3830.0,1781.0,61.519,0.6572,22.0
2016-01-31,6.1939,111.0,3399.0,947.0,23.7332,0.5452,121.0,15.2654,2620.0,10802.0,...,1.8649,0.0381,90.437,5.4455,51.0,3307.0,1163.0,59.949,0.6951,204.0


In [None]:
# S&P quarterly returns over the same period


In [91]:
# price per earnings ratio 
SP500_PE_ratio = pd.read_excel('SP500_PE_ratio_monthly_10-20.xlsx', index_col=0, header = [0,1])
SP500_PE_ratio = SP500_PE_ratio.rename(str.strip, axis='columns')  # strip trailing whitespace from column names
SP500_PE_ratio = SP500_PE_ratio.iloc[::3, :]  # quarterly data
SP500_PE_ratio.index = [str(x.year) + "_" + str(x.quarter) for x in SP500_PE_ratio.index]  # clean date index

# create multi-level quarterly returns df
N = SP500_PE_ratio.shape[0]
tickers = list(SP500_PE_ratio.columns.get_level_values(0))  # level 1
tickers = list(set(tickers))  # keep only unique ticker names
ret_col = ["qtr_ret"]*len(tickers)  # level 2
tuples = list(zip(tickers, ret_col))
index = pd.MultiIndex.from_tuples(tuples, names=[None, "Dates"])

qtr_rets = pd.DataFrame(np.zeros((N, len(tickers))), index = SP500_PE_ratio.index, columns = index)

for i in range(len(tickers)):
    qtr_rets.iloc[:,i] = SP500_PE_ratio.iloc[:,i] / SP500_PE_ratio.iloc[:,i].shift(1) - 1

qtr_rets = qtr_rets.shift(-1)
qtr_rets

# merge PE ratio dataset to future qtr return dataset
PE_ratio_qtr_rets_df = pd.merge(SP500_PE_ratio, qtr_rets, left_index=True, right_index=True)
PE_ratio_qtr_rets_df = PE_ratio_qtr_rets_df.sort_index(axis=1)
PE_ratio_qtr_rets_df = PE_ratio_qtr_rets_df.drop('Last Price', axis=1, level=1)  # drop "Last Price" col
PE_ratio_qtr_rets_df

# create stacked dataframe and clean
PE_ratio_qtr_rets_df = PE_ratio_qtr_rets_df.stack(0)
PE_ratio_qtr_rets_df['EP_RATIO'] = np.reciprocal(PE_ratio_qtr_rets_df['PE_RATIO'])  # create Earnings/Price Ratio col
PE_ratio_qtr_rets_df.drop(PE_ratio_qtr_rets_df.columns[0], axis=1, inplace=True)  # drop PE_RATIO col
PE_ratio_qtr_rets_df['EP_RATIO'].fillna(0, inplace=True)  # change NaN values to 0 in EP_RATIO col
PE_ratio_qtr_rets_df = PE_ratio_qtr_rets_df[["EP_RATIO", "qtr_ret"]]
PE_ratio_qtr_rets_df

Unnamed: 0,Dates,EP_RATIO,qtr_ret
2010_1,A,0.031703,0.264903
2010_1,AAL,0.000000,0.044679
2010_1,AAP,0.071499,0.181218
2010_1,AAPL,0.053300,0.049298
2010_1,ABBV,0.000000,0.083056
...,...,...,...
2020_4,YUM,0.032222,
2020_4,ZBH,0.026667,
2020_4,ZBRA,0.035215,
2020_4,ZION,0.080480,


In [None]:
# compute Piotroski F-score given S&P 500 ticker
ROA = SP_500_data["A"]["RETURN_ON_ASSET"]
CFO = SP_500_data["A"]["CF_CASH_FROM_OPER"]
NET_INCOME = SP_500_data["A"]["NET_INCOME"]
TOT_CURR_ASSET = SP_500_data["A"]["BS_CUR_ASSET_REPORT"]

dROA = ROA.diff(1)
ACCRUAL = (NET_INCOME - CFO)/TOT_CURR_ASSET.shift(1)


In [None]:
ACCRUAL

In [None]:
# compute Piotroski F-score

def F_SCORE(ticker, start_date, end_date):

    #Balance Sheet
        url_1 = "https://financialmodelingprep.com/api/v3/financials/"
        balance_sheet = "balance-sheet-statement/"
        url_2 = f"{ticker}?period=quarter&apikey=f0060c7a7b275396c21cb98f4985f3ae"
        BS = requests.get(url_1 + balance_sheet + url_2)
        BS = BS.json()
        N = len(bs['financials'])

        for i in range(N):
            if re.search(year+"-1\d-\d\d", bs["financials"][i]['date']):
                a,b,c = i,i+1,i+2
                break

        print("year of study is "+bs["financials"][a]['date'])

        # target year
        long_term_debt = float(bs["financials"][a]['Long-term debt'])
        total_assets = float(bs["financials"][a]['Total assets'])
        current_assets = float(bs["financials"][a]['Total current assets'])
        current_assets = current_assets if current_assets != 0 else 1
        current_liabilities = float(bs["financials"][a]['Total current liabilities'])
        current_liabilities = current_liabilities if current_liabilities != 0 else 1

        # prior year
        long_term_debt_py = float(bs["financials"][b]['Long-term debt'])
        total_assets_py = float(bs["financials"][b]['Total assets'])
        current_assets_py = float(bs["financials"][b]['Total current assets'])
        current_assets_py = current_assets_py if current_assets_py != 0 else 10
        current_liabilities_py = float(bs["financials"][b]['Total current liabilities'])
        current_liabilities_py = current_liabilities_py if current_liabilities_py != 0 else 1

        # prior year * 2 
        total_assets_py2 = float(bs["financials"][c]['Total assets'])

        # Income statement
        income_statement = "income-statement/"
        IS = requests.get(url_1 + income_statement + url_2)
        IS = IS.json()

        # Year 2018
        revenue = float(IS['financials'][a]['Revenue'])
        gross_profit = float(IS['financials'][a]['Gross Profit'])
        net_income = float(IS['financials'][a]['Net Income'])

        # Previous year (2017)
        revenue_py = float(IS['financials'][b]['Revenue'])
        gross_profit_py = float(IS['financials'][b]['Gross Profit'])
        net_income_py = float(IS['financials'][b]['Net Income'])
        
        # Cashflow
        cashflow = "cash-flow-statement/"
        CF = requests.get(url_1 + cashflow + url_2)
        CF = CF.json()
        cashflow_op = float(CF['financials'][a]["Operating Cash Flow"])

        #Piotroski F-score

        ROA_FS = int(net_income/((total_assets + total_assets_py)/2)>0)
        CFO_FS = int(cashflow_op>0)
        ROA_D_FS = int((net_income/((total_assets + total_assets_py)/2))>(net_income_py/((total_assets_py + total_assets_py2))))
        CFO_ROA_FS = int((cashflow_op/total_assets)>(net_income/((total_assets + total_assets_py)/2)))
        LTD_FS = int(long_term_debt <= long_term_debt_py)
        CR_FS = int((current_assets/current_liabilities)>(current_assets_py/current_liabilities_py))
        DILUTION_FS = int(float(IS['financials'][a]['Weighted Average Shs Out'])<=float(IS['financials'][b]['Weighted Average Shs Out']))
        GM_FS = int(gross_profit/revenue>gross_profit_py/revenue_py)
        ATO_FS = int((revenue/((total_assets + total_assets_py)/2))>(revenue_py/((total_assets_py + total_assets_py2))))
        f_score = ROA_FS + CFO_FS + ROA_D_FS + CFO_ROA_FS + LTD_FS + CR_FS + DILUTION_FS + GM_FS + ATO_FS     
        print(f'{ticker} F_Score is {f_score}')
        F_score_tgt_yr.append(f_score)
        
    F_score["F_score"] = F_score_tgt_yr
        
    return F_score
    