In [85]:
import pandas as pd

In [86]:
firm_data = pd.read_csv('input_data/Firm_data.csv')
data_desc = pd.read_csv('data_description/Firm_data_descriptions.csv')

In [87]:
data_desc['Variable Name'] = data_desc['Variable Name'].str.lower()
data_desc = data_desc[['Variable Name','Description']]
column_names = pd.DataFrame(firm_data.columns)
column_names = column_names.rename(columns = {column_names.columns[0]: 'Variable Name'})
data_definitions = column_names.merge(data_desc, 
                how='left',
                on='Variable Name',
                indicator=False,
                validate='1:1')

In [88]:
data_definitions

Unnamed: 0,Variable Name,Description
0,gvkey,GVKEY -- Global Company Key (GVKEY)
1,datadate,
2,fyear,FYEAR -- Data Year - Fiscal (FYEAR)
3,indfmt,
4,consol,
5,popsrc,
6,datafmt,
7,tic,Ticker Symbol (TIC)
8,cusip,CUSIP (CUSIP)
9,conm,Company Name (CONM)


# NOTE: EDA IS NEEDED! There are a ton of missing values throughout this dataset

### Investment Opportunities

**Market-to-book**

In [89]:
def mtb(tic, year):
    market_value = firm_data.query('tic == @tic & fyear == @year')['mkvalt']
    book_value = firm_data.query('tic == @tic & fyear == @year')['ceql']
    return market_value/book_value

**Tobin's Q**

For Tobin's Q -- is "debt" just liabilities? or just debt? (I included both in the dataset)

In [56]:
def tobinsQ(tic, year):
    market_value = firm_data.query('tic == @tic & fyear == @year')['mkvalt']
    liq_pref = firm_data.query('tic == @tic & fyear == @year')['pstkl']
    debt = firm_data.query('tic == @tic & fyear == @year')['lt']
    assets = firm_data.query('tic == @tic & fyear == @year')['at']
    return (market_value+liq_pref+debt)/assets

**R&D Scaled**

In [58]:
def rdScaled(tic, year):
    rd = firm_data.query('tic == @tic & fyear == @year')['xrd']
    assets = firm_data.query('tic == @tic & fyear == @year')['at']
    return rd/assets

**Advertising Scaled**

In [60]:
def advScaled(tic, year):
    adv = firm_data.query('tic == @tic & fyear == @year')['xad']
    assets = firm_data.query('tic == @tic & fyear == @year')['at']
    return adv/assets

**Liquidity**

In [62]:
def liquidity(tic, year):
    currA = firm_data.query('tic == @tic & fyear == @year')['act']
    currL = firm_data.query('tic == @tic & fyear == @year')['lct']
    assets = firm_data.query('tic == @tic & fyear == @year')['at']
    return (currA-currL)/assets

In [63]:
liquidity('AAPL',2012)

202    0.108546
dtype: float64

### Firm Complexity

**Sales per Firm per Year**

In [67]:
sales = firm_data[['tic','fyear','ni']]
sales # In millions per year

Unnamed: 0,tic,fyear,ni
0,AAL,2010.0,-471.0
1,AAL,2011.0,-1979.0
2,AAL,2012.0,-1876.0
3,AAL,2013.0,-1834.0
4,AAL,2014.0,2882.0
...,...,...,...
5762,ALLE,2015.0,153.9
5763,ALLE,2016.0,229.1
5764,ALLE,2017.0,273.3
5765,ALLE,2018.0,434.9


In [73]:
# To query:
def netIncome(tic, year):
    return firm_data.query('tic == @tic & fyear == @year')['ni']

In [74]:
netIncome('AAL', 2012)

2   -1876.0
Name: ni, dtype: float64

**Number of Employees per Firm per Year**

In [65]:
employees = firm_data[['tic','fyear','emp']]
employees # In thousands per year

Unnamed: 0,tic,fyear,emp
0,AAL,2010.0,78.25
1,AAL,2011.0,80.10
2,AAL,2012.0,77.75
3,AAL,2013.0,110.40
4,AAL,2014.0,113.30
...,...,...,...
5762,ALLE,2015.0,9.40
5763,ALLE,2016.0,9.40
5764,ALLE,2017.0,10.00
5765,ALLE,2018.0,11.00


In [75]:
# To query:
def numEmployees(tic, year):
    return firm_data.query('tic == @tic & fyear == @year')['emp']

In [76]:
numEmployees('AAL', 2012)

2    77.75
Name: emp, dtype: float64

**Market Value per Firm per Year**

In [90]:
mv = firm_data[['tic','fyear','mkvalt']]
mv # In millions per year

Unnamed: 0,tic,fyear,mkvalt
0,AAL,2010.0,2597.5755
1,AAL,2011.0,117.3438
2,AAL,2012.0,266.5571
3,AAL,2013.0,6591.9923
4,AAL,2014.0,37405.5843
...,...,...,...
5762,ALLE,2015.0,6327.7267
5763,ALLE,2016.0,6097.5360
5764,ALLE,2017.0,7563.1327
5765,ALLE,2018.0,7543.5153


In [91]:
# To query:
def marketValue(tic, year):
    return firm_data.query('tic == @tic & fyear == @year')['mkvalt']

In [92]:
marketValue('AAL', 2012)

2    266.5571
Name: mkvalt, dtype: float64

### Monitoring

**Leverage (Debt / Equity)**

In [77]:
def dte(tic, year):
    debt = firm_data.query('tic == @tic & fyear == @year')['lt']
    equity = firm_data.query('tic == @tic & fyear == @year')['seq']
    return debt/equity

**Assets in Place**

In [78]:
def assetsInPlace(tic, year):
    ppe = firm_data.query('tic == @tic & fyear == @year')['ppegt']
    assets = firm_data.query('tic == @tic & fyear == @year')['at']
    return ppe/assets

**CapEx Scaled by Assets**

In [79]:
def assetsInPlace(tic, year):
    capx = firm_data.query('tic == @tic & fyear == @year')['capx']
    assets = firm_data.query('tic == @tic & fyear == @year')['at']
    return capx/assets

**Return on Assets**

In [80]:
def RoA(tic, year):
    ni = firm_data.query('tic == @tic & fyear == @year')['ni']
    assets = firm_data.query('tic == @tic & fyear == @year')['at']
    return capx/assets