In [52]:
import pandas as pd
import numpy as np
import yfinance as yf

In [2]:
firm_data = pd.read_csv('input_data/Firm_data.csv')
data_desc = pd.read_csv('data_description/Firm_data_descriptions.csv')

In [3]:
data_desc['Variable Name'] = data_desc['Variable Name'].str.lower()
data_desc = data_desc[['Variable Name','Description']]
column_names = pd.DataFrame(firm_data.columns)
column_names = column_names.rename(columns = {column_names.columns[0]: 'Variable Name'})
data_definitions = column_names.merge(data_desc, 
                how='left',
                on='Variable Name',
                indicator=False,
                validate='1:1')

In [4]:
data_definitions

Unnamed: 0,Variable Name,Description
0,gvkey,GVKEY -- Global Company Key (GVKEY)
1,datadate,
2,fyear,FYEAR -- Data Year - Fiscal (FYEAR)
3,indfmt,
4,consol,
5,popsrc,
6,datafmt,
7,tic,Ticker Symbol (TIC)
8,cusip,CUSIP (CUSIP)
9,conm,Company Name (CONM)


# NOTE: EDA IS NEEDED! There are a ton of missing values throughout this dataset

### Investment Opportunities

**Market-to-book**

In [5]:
def mtb(tic, year):
    market_value = firm_data.query('tic == @tic & fyear == @year')['mkvalt'].iloc[0]
    book_value = firm_data.query('tic == @tic & fyear == @year')['ceql'].iloc[0]
    value = market_value/book_value
    return value

**Tobin's Q**

For Tobin's Q -- is "debt" just liabilities? or just debt? (I included both in the dataset)

In [45]:
def tobinsQ(tic, year):
    price = firm_data.query('tic == @tic & fyear == @year')['prcc_f'].iloc[0]
    so = firm_data.query('tic == @tic & fyear == @year')['csho'].iloc[0]
    assets = firm_data.query('tic == @tic & fyear == @year')['at'].iloc[0]
    equity = firm_data.query('tic == @tic & fyear == @year')['ceq'].iloc[0]
    return ((price * so) + assets - equity) / assets

**R&D Scaled**

In [7]:
# def rdScaled(tic, year):
#     rd = firm_data.query('tic == @tic & fyear == @year')['xrd']
#     assets = firm_data.query('tic == @tic & fyear == @year')['at']
#     return rd/assets

**Advertising Scaled**

In [8]:
# def advScaled(tic, year):
#     adv = firm_data.query('tic == @tic & fyear == @year')['xad']
#     assets = firm_data.query('tic == @tic & fyear == @year')['at']
#     return adv/assets

**Liquidity**

In [9]:
def liquidity(tic, year):
    currA = firm_data.query('tic == @tic & fyear == @year')['act'].iloc[0]
    currL = firm_data.query('tic == @tic & fyear == @year')['lct'].iloc[0]
    assets = firm_data.query('tic == @tic & fyear == @year')['at'].iloc[0]
    return (currA-currL)/assets

In [10]:
liquidity('AAPL',2012)

0.10854575608869502

### Firm Complexity

**Sales per Firm per Year**

In [11]:
sales = firm_data[['tic','fyear','ni']].copy()
sales # In millions per year

Unnamed: 0,tic,fyear,ni
0,AAL,2010.0,-471.0
1,AAL,2011.0,-1979.0
2,AAL,2012.0,-1876.0
3,AAL,2013.0,-1834.0
4,AAL,2014.0,2882.0
...,...,...,...
5762,ALLE,2015.0,153.9
5763,ALLE,2016.0,229.1
5764,ALLE,2017.0,273.3
5765,ALLE,2018.0,434.9


In [12]:
# To query:
def netIncome(tic, year):
    return firm_data.query('tic == @tic & fyear == @year')['ni'].iloc[0]

In [13]:
netIncome('AAL', 2012)

-1876.0

**Number of Employees per Firm per Year**

In [14]:
employees = firm_data[['tic','fyear','emp']].copy()
employees # In thousands per year

Unnamed: 0,tic,fyear,emp
0,AAL,2010.0,78.25
1,AAL,2011.0,80.10
2,AAL,2012.0,77.75
3,AAL,2013.0,110.40
4,AAL,2014.0,113.30
...,...,...,...
5762,ALLE,2015.0,9.40
5763,ALLE,2016.0,9.40
5764,ALLE,2017.0,10.00
5765,ALLE,2018.0,11.00


In [15]:
# To query:
def numEmployees(tic, year):
    return firm_data.query('tic == @tic & fyear == @year')['emp'].iloc[0]

In [16]:
numEmployees('AAL', 2012)

77.75

**Market Value per Firm per Year**

In [17]:
mv = firm_data[['tic','fyear','mkvalt']].copy()
mv # In millions per year

Unnamed: 0,tic,fyear,mkvalt
0,AAL,2010.0,2597.5755
1,AAL,2011.0,117.3438
2,AAL,2012.0,266.5571
3,AAL,2013.0,6591.9923
4,AAL,2014.0,37405.5843
...,...,...,...
5762,ALLE,2015.0,6327.7267
5763,ALLE,2016.0,6097.5360
5764,ALLE,2017.0,7563.1327
5765,ALLE,2018.0,7543.5153


In [18]:
# To query:
def marketValue(tic, year):
    return firm_data.query('tic == @tic & fyear == @year')['mkvalt'].iloc[0]

In [19]:
marketValue('AAL', 2012)

266.5571

### Monitoring

**Leverage (Debt / Equity)**

In [20]:
def dte(tic, year):
    debt = firm_data.query('tic == @tic & fyear == @year')['lt'].iloc[0]
    equity = firm_data.query('tic == @tic & fyear == @year')['seq'].iloc[0]
    return debt/equity

**Assets in Place**

In [21]:
def assetsInPlace(tic, year):
    ppe = firm_data.query('tic == @tic & fyear == @year')['ppegt'].iloc[0]
    assets = firm_data.query('tic == @tic & fyear == @year')['at'].iloc[0]
    return ppe/assets

**CapEx Scaled by Assets**

In [22]:
def capexAssets(tic, year):
    capx = firm_data.query('tic == @tic & fyear == @year')['capx'].iloc[0]
    assets = firm_data.query('tic == @tic & fyear == @year')['at'].iloc[0]
    return capx/assets

In [23]:
# All variables but Tobin's Q

firm_date_data = firm_data[['tic','fyear']].copy()
firm_date_data = firm_date_data.dropna()

**Return on Assets**

In [24]:
def roa(tic, year):
    ni = firm_data.query('tic == @tic & fyear == @year')['ni'].iloc[0]
    assets = firm_data.query('tic == @tic & fyear == @year')['at'].iloc[0]
    return ni/assets

In [25]:
firm_date_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5762 entries, 0 to 5766
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   tic     5762 non-null   object 
 1   fyear   5762 non-null   float64
dtypes: float64(1), object(1)
memory usage: 135.0+ KB


In [26]:
null_mask = firm_date_data.isnull().any(axis=1)
print(firm_date_data[null_mask])
firm_date_data

Empty DataFrame
Columns: [tic, fyear]
Index: []


Unnamed: 0,tic,fyear
0,AAL,2010.0
1,AAL,2011.0
2,AAL,2012.0
3,AAL,2013.0
4,AAL,2014.0
...,...,...
5762,ALLE,2015.0
5763,ALLE,2016.0
5764,ALLE,2017.0
5765,ALLE,2018.0


In [27]:
# commented for runs

# firm_date_data['liquidity'] = firm_date_data.apply(lambda x: liquidity(x['tic'], x['fyear']), axis=1)
# firm_date_data['net_income'] = firm_date_data.apply(lambda x: netIncome(x['tic'], x['fyear']), axis=1)
# firm_date_data['num_employees'] = firm_date_data.apply(lambda x: numEmployees(x['tic'], x['fyear']), axis=1)
# firm_date_data['market_value'] = firm_date_data.apply(lambda x: marketValue(x['tic'], x['fyear']), axis=1)
# firm_date_data['debt_to_equity'] = firm_date_data.apply(lambda x: dte(x['tic'], x['fyear']), axis=1)
# firm_date_data['assets_in_place'] = firm_date_data.apply(lambda x: assetsInPlace(x['tic'], x['fyear']), axis=1)
# firm_date_data['capex_by_assets'] = firm_date_data.apply(lambda x: capexAssets(x['tic'], x['fyear']), axis=1)
# firm_date_data['return_on_equity'] = firm_date_data.apply(lambda x: roa(x['tic'], x['fyear']), axis=1)

In [None]:
# commented for runs

# corr_measure = firm_date_data[['tic','fyear']]
# corr_measure['market_to_book'] = corr_measure.apply(lambda x: mtb(x['tic'], x['fyear']), axis=1)
# corr_measure['TobinsQ'] = corr_measure.apply(lambda x: tobinsQ(x['tic'], x['fyear']), axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  corr_measure['market_to_book'] = corr_measure.apply(lambda x: mtb(x['tic'], x['fyear']), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  corr_measure['TobinsQ'] = corr_measure.apply(lambda x: tobinsQ(x['tic'], x['fyear']), axis=1)


In [29]:
# Data is reported on 12/31 for the year in question, so +1 sets it to year start
firm_date_data['fyear'] = firm_date_data['fyear']+1
corr_measure['fyear'] = corr_measure['fyear']+1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  corr_measure['fyear'] = corr_measure['fyear']+1


In [30]:
firm_date_data.to_csv('input_data_clean/director_comp_deter.csv')

In [31]:
firm_date_data

Unnamed: 0,tic,fyear,liquidity,net_income,num_employees,market_value,debt_to_equity,assets_in_place,capex_by_assets,return_on_equity
0,AAL,2011.0,-0.077408,-471.0,78.25,2597.5755,-7.359442,1.064931,0.078205,-0.018774
1,AAL,2012.0,-0.078539,-1979.0,80.10,117.3438,-4.353677,1.042184,0.067511,-0.082984
2,AAL,2013.0,-0.094938,-1876.0,77.75,266.5571,-3.943533,1.030753,0.080306,-0.079796
3,AAL,2014.0,0.012229,-1834.0,110.40,6591.9923,-16.480776,0.718861,0.073655,-0.043380
4,AAL,2015.0,-0.030225,2882.0,113.30,37405.5843,20.658090,0.807452,0.121336,0.065843
...,...,...,...,...,...,...,...,...,...,...
5762,ALLE,2016.0,0.126023,153.9,9.40,6327.7267,88.109375,0.269768,0.015403,0.067343
5763,ALLE,2017.0,0.177850,229.1,9.40,6097.5360,18.808473,0.284951,0.018911,0.101940
5764,ALLE,2018.0,0.224980,273.3,10.00,7563.1327,5.319970,0.278482,0.019394,0.107514
5765,ALLE,2019.0,0.146182,434.9,11.00,7543.5153,3.312135,0.266458,0.017472,0.154758


### Firm Performance

In [32]:
firm_data

Unnamed: 0,gvkey,datadate,fyear,indfmt,consol,popsrc,datafmt,tic,cusip,conm,...,xad,xrd,cik,costat,dvpsp_f,mkvalt,prcc_f,gsector,naics,ipodate
0,1045,2010-12-31,2010.0,INDL,C,D,STD,AAL,02376R102,AMERICAN AIRLINES GROUP INC,...,165.0,,6201,A,0.00,2597.5755,7.790,20,481111,
1,1045,2011-12-31,2011.0,INDL,C,D,STD,AAL,02376R102,AMERICAN AIRLINES GROUP INC,...,186.0,,6201,A,0.00,117.3438,0.350,20,481111,
2,1045,2012-12-31,2012.0,INDL,C,D,STD,AAL,02376R102,AMERICAN AIRLINES GROUP INC,...,153.0,,6201,A,0.00,266.5571,0.795,20,481111,
3,1045,2013-12-31,2013.0,INDL,C,D,STD,AAL,02376R102,AMERICAN AIRLINES GROUP INC,...,166.0,,6201,A,0.00,6591.9923,25.250,20,481111,
4,1045,2014-12-31,2014.0,INDL,C,D,STD,AAL,02376R102,AMERICAN AIRLINES GROUP INC,...,100.0,,6201,A,0.20,37405.5843,53.630,20,481111,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5762,316056,2015-12-31,2015.0,INDL,C,D,STD,ALLE,G0176J109,ALLEGION PLC,...,,45.2,1579241,A,0.40,6327.7267,65.920,20,332510,
5763,316056,2016-12-31,2016.0,INDL,C,D,STD,ALLE,G0176J109,ALLEGION PLC,...,,47.3,1579241,A,0.48,6097.5360,64.000,20,332510,
5764,316056,2017-12-31,2017.0,INDL,C,D,STD,ALLE,G0176J109,ALLEGION PLC,...,,48.3,1579241,A,0.64,7563.1327,79.560,20,332510,
5765,316056,2018-12-31,2018.0,INDL,C,D,STD,ALLE,G0176J109,ALLEGION PLC,...,,54.4,1579241,A,0.84,7543.5153,79.710,20,332510,


### Profitability Performance

In [35]:
def EBITDAMargin(tic, year):
    ebitda = firm_data.query('tic == @tic & fyear == @year')['ebitda'].iloc[0]
    revenue = firm_data.query('tic == @tic & fyear == @year')['revt'].iloc[0]
    return ebitda/revenue

In [37]:
def niRev(tic, year):
    netIncome = firm_data.query('tic == @tic & fyear == @year')['ni'].iloc[0]
    revenue = firm_data.query('tic == @tic & fyear == @year')['revt'].iloc[0]
    return netIncome/revenue

In [38]:
def roe(tic, year):
    netIncome = firm_data.query('tic == @tic & fyear == @year')['ni'].iloc[0]
    equity = firm_data.query('tic == @tic & fyear == @year')['seq'].iloc[0]
    return netIncome/equity

In [36]:
def niRev(tic, year):
    netIncome = firm_data.query('tic == @tic & fyear == @year')['ni'].iloc[0]
    revenue = firm_data.query('tic == @tic & fyear == @year')['revt'].iloc[0]
    return netIncome/revenue

### Market Value Performance

In [39]:
def eps(tic, year):
    netIncome = firm_data.query('tic == @tic & fyear == @year')['ni'].iloc[0]
    prefDiv = firm_data.query('tic == @tic & fyear == @year')['dvp'].iloc[0]
    so = firm_data.query('tic == @tic & fyear == @year')['csho'].iloc[0]
    return (netIncome-prefDiv)/so

In [56]:
def stockPrice(tic, year):
    start_date = str(year) + "-01-01"
    end_date = str(year) + "-12-31"
    ticker_data = yf.download(tic, start=start_date, end=end_date)
    start_price = ticker_data['Adj Close'][0]
    end_price = ticker_data['Adj Close'][-1]
    change = (end_price/start_price)-1
    return 

In [41]:
def divYield(tic, year):
    dividend = firm_data.query('tic == @tic & fyear == @year')['dvpsp_f'].iloc[0]
    price = firm_data.query('tic == @tic & fyear == @year')['prcc_f'].iloc[0]
    return dividend/price

In [54]:
def volatility(tic, year):
    start_date = year + "-01-01"
    end_date = year + "-12-31"
    ticker_data = yf.download(tic, start=start_date, end=end_date)
    daily_ret = ticker_data['Adj Close'].pct_change()[1:]
    vol = np.sqrt(252)*daily_ret.std()
    return vol

In [44]:
def mva(tic, year):
    marketVal = firm_data.query('tic == @tic & fyear == @year')['mkvalt'].iloc[0]
    equity = firm_data.query('tic == @tic & fyear == @year')['seq'].iloc[0]
    return dividend/price

### Growth Performance

In [47]:
def assetg(tic, year):
    prior_year = year-1
    yrminus1 = firm_data.query('tic == @tic & fyear == @prior_year')['at'].iloc[0]
    yr = firm_data.query('tic == @tic & fyear == @year')['at'].iloc[0]
    return (yr/yrminus1)-1

In [48]:
def revenueg(tic, year):
    prior_year = year-1
    yrminus1 = firm_data.query('tic == @tic & fyear == @prior_year')['revt'].iloc[0]
    yr = firm_data.query('tic == @tic & fyear == @year')['revt'].iloc[0]
    return (yr/yrminus1)-1

In [49]:
def incomeg(tic, year):
    prior_year = year-1
    yrminus1 = firm_data.query('tic == @tic & fyear == @prior_year')['ni'].iloc[0]
    yr = firm_data.query('tic == @tic & fyear == @year')['ni'].iloc[0]
    return (yr/yrminus1)-1

In [50]:
def employeeg(tic, year):
    prior_year = year-1
    yrminus1 = firm_data.query('tic == @tic & fyear == @prior_year')['emp'].iloc[0]
    yr = firm_data.query('tic == @tic & fyear == @year')['emp'].iloc[0]
    return (yr/yrminus1)-1

### Building the dataframe

In [33]:
firm_performance = firm_data[['tic','fyear']].copy()
firm_performance = firm_date_data.dropna()

In [57]:
# Profitability
# ROA = NI / assets
firm_performance['roa'] = firm_performance.apply(lambda x: roa(x['tic'], x['fyear']), axis=1)
# EBITDA Margin = EBITDA/total revenue
firm_performance['EBITDA_margin'] = firm_performance.apply(lambda x: EBITDAMargin(x['tic'], x['fyear']), axis=1)
# Net income / Revenue
# ROE = Net income / Shareholders equity

# Market value
# EPS = (net income - preferred dividends)/ shares outstanding
# Changes in stock price - pull from Yahoo Finance
# Dividend yield = dividends per share / price per share
# Volatility = standard deviation multiplied by the square root of the number of periods of time (252 trading days in US)
# MVA = Market Value of Shares - Shareholders equity (https://corporatefinanceinstitute.com/resources/valuation/market-value-added-mva/)
# Tobin's Q = (Market Value of common stock + book value of total assets - book value of common equity)/book value total assets (https://funginstitute.berkeley.edu/wp-content/uploads/2013/12/Younge-Marx-2012-Working-Paper-The-Market-Value-of-Knowledge-Protection-Jan-10-2012.pdf)
    # ((PRCC_F * CSHO) + AT – CEQ ) / AT
    
# Growth Performance
# Asset growth
# Total revenue growth
# Net income growth
# Employee growth

IndexError: single positional indexer is out-of-bounds