# HW #3 Financial Ratio Quantile Strategies
[FINM 33150] Regression Analysis and Quantitative Trading Strategies\
Winter 2022 | Professor Brian Boonstra

_**Due:** Thursday, February 3rd, at 11:00pm\
**Name:** Ashley Tsoi (atsoi, Student ID: 12286230)_

### 1. Fetch and clean data

#### 1-1. Import packages

In [1]:
import quandl
import json
import pandas as pd
pd.set_option("display.precision", 4)
pd.set_option('display.float_format', lambda x: '%.4f' % x)
from pandas.core.common import SettingWithCopyWarning
import warnings
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)

import math
import numpy as np
import datetime as dt
import functools
from itertools import permutations

# let plot display in the notebook instead of in a different window
%matplotlib inline 
from matplotlib import pyplot as plt
plt.rcParams['figure.figsize'] = [21, 8]

#### 1-2. Define the functions to fetch data from Quandl

**1-2-1. Get my personal keys** from ../data/APIs.json

In [2]:
f = open('../data/APIs.json')
APIs = json.load(f)
f.close()

**1-2-2. Define date-format helper function**

In [3]:
def assertCorrectDateFormat(date_text):
    try:
        dt.datetime.strptime(date_text, '%Y-%m-%d')
    except ValueError:
        raise ValueError("Incorrect date format, should be YYYY-MM-DD")

**1-2-3. Define function** to retrieve raw data from Quandl

**Documentation:**
```
Zacks Fundamentals Collection B (ZFB)
https://data.nasdaq.com/databases/ZFB/documentation
https://data.nasdaq.com/databases/ZFB/usage/quickstart/python

End of Day US Stock Prices (EOD)
https://data.nasdaq.com/databases/EOD/documentation
https://data.nasdaq.com/databases/EOD/usage/quickstart/python
```

In [4]:
# Define function that retrieves ZFB data from Quandl
@functools.lru_cache(maxsize=16) # Cache the function output
def getQuandlZFBData(from_table,secs,start_date,end_date,columns):
    # Get data fom Quandl using quandl.get_table
    # NOTE: missing data for the inputted date will NOT return a row.

    # INPUT         | DATA TYPE                 | DESCRIPTION
    # from_table    | string                    | FC, FR, MT, MKTV, SHRS, or HDM
    # secs          | string / tuple of string  | security name(s)
    # start_date    | string (YYYY-MM-DD)       | start date of data
    # end_date      | string (YYYY-MM-DD)       | end date of data (same as or after start_date)
    # columns       | string / tuple of string  | names of the columns to return
    
    if secs=='all': secs = list(pd.read_csv('../data/zacks-tickers.csv').ticker.unique()) # import all tickers from zacks-tickers

    if type(secs)==str: seclen = 1
    else: seclen=len(secs)
    print("Quandl | START | Retriving Quandl data for {:d} securities from the ZACKS/{} table: \n".format(seclen,from_table))
    
    # Retrieve data using quandl.get_table
    quandl.ApiConfig.api_key = APIs['Quandl']

    if from_table in ['FC','FR','MKTV','SHRS','HDM']:
        data = quandl.get_table('ZACKS/'+from_table,
                                ticker = secs, 
                                per_end_date = {'gte':start_date, 'lte':end_date},
                                qopts = {'columns':list(columns)},
                                paginate = True)
        
        data['per_end_date'] = pd.to_datetime(data['per_end_date'])
        if 'filing_date' in data.columns:
            data['filing_date'] = pd.to_datetime(data['filing_date'])

    elif from_table == 'MT':
        data = quandl.get_table('ZACKS/MT',
                                ticker = secs, 
                                qopts = {'columns':list(columns)},
                                paginate = True)

    else:
        print("from_table is limited to FC, FR, MT, MKTV, SHRS and HDM")
        
    print("Quandl | DONE  | Returning {:d} rows of data from the ZACKS/{} table.\n".format(len(data),from_table))

    return data


@functools.lru_cache(maxsize=16) # Cache the function output
def _getZFBData(secs,start_date,end_date):
    # Merged Zacks data in five tables: FC, FR, MT, MKTV, and SHRS
    # NOTE: missing data for the inputted date will NOT return a row.

    # INPUT         | DATA TYPE                 | DESCRIPTION
    # secs          | string / tuple of string  | security name(s)
    # start_date    | string (YYYY-MM-DD)       | start date of data
    # end_date      | string (YYYY-MM-DD)       | end date of data (same as or after start_date)
    
    # Retrieve data using quandl.get_table
    fc = getQuandlZFBData('FC',secs,start_date,end_date,('ticker','exchange','per_end_date','per_type','zacks_sector_code','basic_net_eps','diluted_net_eps','tot_lterm_debt','net_lterm_debt','filing_date'))
    fr = getQuandlZFBData('FR',secs,start_date,end_date,('ticker','exchange','per_end_date','per_type','ret_invst','tot_debt_tot_equity'))
    mt = getQuandlZFBData('MT',secs,start_date,end_date,('ticker','ticker_type','asset_type'))
    mktv = getQuandlZFBData('MKTV',secs,start_date,end_date,('ticker','per_end_date','per_type','mkt_val'))
    shrs = getQuandlZFBData('SHRS',secs,start_date,end_date,('ticker','per_end_date','per_type','shares_out','avg_d_shares'))

    # Merge the tables
    print("MERGE  | START | \n")

    zacks_1 = fc.merge(fr, how='outer', on=['ticker','exchange','per_end_date','per_type'])
    zacks_2 = mktv.merge(shrs, how='outer', on=['ticker','per_end_date','per_type'])
    zacks_3 = zacks_1.merge(zacks_2, how='outer', on=['ticker','per_end_date','per_type'])
    zacks = zacks_3.merge(mt, how='outer', on='ticker')

    print("MERGE  | DONE  | Returning {:d} rows of ZACKS data.\n".format(len(zacks)))
    
    return zacks


In [None]:
# Define function that retrieves EOD data from Quandl
@functools.lru_cache(maxsize=16) # Cache the function output
def getQuandlEODData(secs,start_date,end_date):
    # Get one security (sec)'s data fom Quandl using quandl.get_table
    # NOTE: missing data for the inputted date will NOT return a row.

    # INPUT         | DATA TYPE                 | DESCRIPTION
    # sec           | string / list of string   | security name(s)
    # start_date    | string (YYYY-MM-DD)       | start date of data
    # end_date      | string (YYYY-MM-DD)       | end date of data (same as or after start_date)
    
    print("Quandl | START | Retriving Quandl data for securities: \n",secs)
    
    # Retrieve data using quandl.get_table
    quandl.ApiConfig.api_key = APIs['Quandl']
    data = quandl.get_table('QUOTEMEDIA/PRICES',
                            ticker = secs, 
                            date = {'gte':start_date, 'lte':end_date})

    print("Quandl | DONE  | Returning {:d} dates of data for {}.\n".format(len(data),secs))
    return data

**1-2-4. Define function** to filter / clean raw data

**Requirements:**
```
- US Equities
- not in the automotive, financial or insurance sector
- end-of-day adjusted closing prices are available
- debt/market cap ratio is greater than 0.1
- has feasible calculation of the ratios: 
  - debt to market cap, 
  - return on investment, and 
  - price to earnings. 
  Including for at least one PER END DATE no more than one year old. Debt ratio of zero is OK.
```

In [9]:
def getCleanZFBData(secs,start_date,end_date):

    # === GET RAW DATA ============================================
    raw_zacks = _getZFBData(secs,start_date,end_date)
    
    # === FILTER ==================================================
    # US Equities only
    zacks = raw_zacks[raw_zacks['exchange'].isin(('NYSE','NASDAQ'))]  # select US stock exchanges
    zacks = zacks[zacks['ticker_type']=='S']                          # S = Securities
    zacks = zacks[zacks['asset_type']=='COM'][zacks.columns]          # COM = Common stocks

    # remove tickers without filing dates (tickers without filing dates are impossible to join on)
    filingDate_filter = zacks[pd.isnull(zacks['filing_date'])]['ticker'].unique()
    zacks = zacks[(~zacks['ticker'].isin(filingDate_filter))]
    
    # not in the automotive, financial or insurance sector for any date (since there might be sector changes)
    sector_filter = zacks[zacks['zacks_sector_code'].isin((5,13))]['ticker'].unique() # 5 = finance (includes insurance), 13 = Autumotive
    zacks = zacks[(~zacks['ticker'].isin(sector_filter))]
    
    # debt-to-market-cap ratio greater than 0.1 AND not null (filter all since we will have enough tickers)
    badDebtToMC_filter = zacks[(zacks['tot_debt_tot_equity']<=0.1) | (pd.isnull(zacks['tot_debt_tot_equity']))]['ticker'].unique()
    zacks = zacks[(~zacks['ticker'].isin(badDebtToMC_filter))]

    # other ratios are not null
    nullRatio_filter = zacks[pd.isnull(zacks['mkt_val'])]
    nullRatio_filter.append(zacks[(pd.isnull(zacks['basic_net_eps']) & pd.isnull(zacks['diluted_net_eps']))]['ticker'].unique())
    nullRatio_filter.append(zacks[(pd.isnull(zacks['tot_lterm_debt'])) & (pd.isnull(zacks['net_lterm_debt']))]['ticker'].unique())
    zacks = zacks[(~zacks['ticker'].isin(nullRatio_filter.unique()))]

    zacks['basic_net_eps'] = [x if x>=0 else 0 for x in zacks['basic_net_eps']]       # make all negative eps 0
    zacks['diluted_net_eps'] = [x if x>=0 else 0 for x in zacks['diluted_net_eps']]   # make all negative eps 0

    print(f'remaining number of tickers: {len(zacks.ticker.unique())}')

    # === FINAL CLEAN-UP ==========================================
    # drop these columns as they are no longer needed
    zacks.drop(['ticker_type','asset_type','zacks_sector_code'], axis=1, inplace=True)

    return zacks

SyntaxError: closing parenthesis ']' does not match opening parenthesis '(' (Temp/ipykernel_6068/1611796597.py, line 25)

In [8]:
getCleanZFBData('all','2013-07-01','2021-01-31')

remaining number of tickers: 911


Unnamed: 0,ticker,exchange,per_end_date,per_type,basic_net_eps,diluted_net_eps,tot_lterm_debt,net_lterm_debt,filing_date,ret_invst,tot_debt_tot_equity,mkt_val,shares_out,avg_d_shares
10,A,NYSE,2013-10-31,A,2.1200,2.1000,2699.0000,347.0000,2013-12-19,2.8167,0.5103,,,
11,A,NYSE,2014-10-31,A,1.4900,1.4700,1663.0000,99.0000,2014-12-22,3.3300,0.3135,,,
12,A,NYSE,2015-10-31,A,1.2000,1.1900,1655.0000,,2015-12-21,7.5193,0.3969,,,
13,A,NYSE,2016-10-31,A,1.4100,1.4000,1904.0000,262.0000,2016-12-20,7.5122,0.4484,,,
14,A,NYSE,2017-10-31,A,2.1200,2.1000,1801.0000,110.0000,2017-12-21,10.3074,0.4159,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
222923,ZTS,NYSE,2019-12-31,Q,0.8100,0.8000,5947.0000,,2020-02-13,4.4367,2.3807,63038.1800,476.3000,480.2000
222924,ZTS,NYSE,2020-03-31,Q,0.8900,0.8800,5963.0000,,2020-05-06,4.8531,2.3476,55892.1600,474.9100,479.0000
222925,ZTS,NYSE,2020-06-30,Q,0.7900,0.7900,7194.0000,1240.0000,2020-08-06,3.6932,2.5762,65085.9100,474.9400,478.1000
222926,ZTS,NYSE,2020-09-30,Q,1.0100,1.0000,6595.0000,1240.0000,2020-11-05,4.6952,2.1336,78595.0300,475.2700,478.5000


#### 1-3. Fetch cleaned data using the functions above

**Dates:**
```
January 1, 2014 - January 31, 2021*
```
**Note: fetch data from July 1, 2013 to get all data reported by January 1, 2014*

**1-3-1. Fetch data** 

In [None]:
# zacks_fc = getQuandlZFBData('FC','all','2013-07-01','2021-01-31',('ticker','exchange','per_end_date','per_type','zacks_sector_code','basic_net_eps','diluted_net_eps','tot_lterm_debt','net_lterm_debt','filing_date'))
# zacks_fr = getQuandlZFBData('FR','all','2013-07-01','2021-01-31',('ticker','exchange','per_end_date','per_type','ret_invst','tot_debt_tot_equity'))
# zacks_mt = getQuandlZFBData('MT','all','2013-07-01','2021-01-31',('ticker','ticker_type','asset_type'))
# zacks_mktv = getQuandlZFBData('MKTV','all','2013-07-01','2021-01-31',('ticker','per_end_date','per_type','mkt_val'))
# zacks_shrs = getQuandlZFBData('SHRS','all','2013-07-01','2021-01-31',('ticker','per_end_date','per_type','shares_out','avg_d_shares'))

In [11]:
l = [1,0,-1,-2]
[x if x>=0 else 0 for x in l ]

[1, 0, 0, 0]