In [1]:
import pandas as pd
import numpy as np

In [10]:
# filename = "wrds_data.csv"
filename = "i6phkajlrvf1t8gd.csv"
df = pd.read_csv(filename)
# df.columns = ["gvkey","datadate","fyear","indfmt","consol","popsrc","datafmt","curcd","ceq","csho","dlc","dltt","costat","sich","prcc_f","sic"]
print("Dimensions:",df.shape)
df.head()

Dimensions: (138867, 16)


Unnamed: 0,gvkey,datadate,fyear,indfmt,consol,popsrc,datafmt,curcd,ceq,csho,dlc,dltt,costat,sich,prcc_f,sic
0,1004,31/05/2010,2009.0,INDL,C,D,STD,USD,746.906,39.484,100.833,336.191,A,5080.0,19.7,5080.0
1,1004,31/05/2011,2010.0,INDL,C,D,STD,USD,835.845,39.781,114.075,329.802,A,5080.0,26.39,5080.0
2,1004,31/05/2012,2011.0,INDL,C,D,STD,USD,864.649,40.273,122.865,669.489,A,5080.0,12.05,5080.0
3,1004,31/05/2013,2012.0,INDL,C,D,STD,USD,918.6,39.382,86.4,622.2,A,5080.0,20.06,5080.0
4,1004,31/05/2014,2013.0,INDL,C,D,STD,USD,999.5,39.56,69.7,564.3,A,5080.0,24.3,5080.0


# Replace NULL SICH with SIC codes

### WHAT ABOUT ROWS WITH NULL SIC


In [5]:
df["sich"] = df.apply(lambda x: (x["sic"] if np.isnan(x["sich"]) else x["sich"]), axis = 1)
# df.isnull().sum()

# Book and Market Leverage

$$\text{BookLeverage} = \frac{\text{DLTT} + \text{DLC}}{\text{DLTT} + \text{DLC} + \text{CEQ}}$$

$$\text{MarketLeverage} = \frac{\text{DLTT} + \text{DLC}}{\text{DLTT} + \text{DLC} + (\text{PRCC\_F} * \text{CSHO})} $$


In [8]:
df["book_leverage"] = (df["dltt"]+df["dlc"])/(df["dltt"]+df["dlc"]+df["ceq"])
df["market_leverage"] = (df["dltt"]+df["dlc"])/(df["dltt"]+df["dlc"]+df["prcc_f"]*df["csho"])
df["asset_tangibility"] = df["ppent"]/df["at"]
df["market_to_book"] = (df["dltt"]+df["dlc"]+df["prcc_f"]*df["csho"])/(df["dltt"]+df["dlc"]+df["ceq"])
df["profitability"] = df["ebitda"]/df["at"]
df.head()

KeyError: 'ppent'

Before Dropping NULL Values

In [5]:
df[["book_leverage","market_leverage"]].isnull().sum()

book_leverage      102469
market_leverage    217156
dtype: int64

After dropping NULL Values

In [6]:
df.dropna(subset=["book_leverage","market_leverage"], inplace=True)
df[["book_leverage","market_leverage"]].isnull().sum()

book_leverage      0
market_leverage    0
dtype: int64

# Winsorize Outliers

In [7]:
from scipy.stats.mstats import winsorize
df['book_leverage'] = winsorize(df['book_leverage'], limits=[0.01,0.01])
df['market_leverage'] = winsorize(df['market_leverage'], limits=[0.01,0.01])

print("Dimensions:",df.shape)
df.head()

Dimensions: (359496, 18)


Unnamed: 0,gvkey,datadate,fyear,indfmt,consol,popsrc,datafmt,curcd,ceq,csho,dlc,dltt,costat,sich,prcc_f,sic,book_leverage,market_leverage
9,1000,31-Dec-70,1970.0,INDL,C,D,STD,USD,10.544,2.446,12.378,0.917,I,3089.0,10.0,3089.0,0.5577,0.352139
10,1000,31-Dec-71,1971.0,INDL,C,D,STD,USD,8.381,2.995,2.857,10.318,I,3089.0,5.75,3089.0,0.611199,0.433442
11,1000,31-Dec-72,1972.0,INDL,C,D,STD,USD,7.021,2.902,0.0,7.0,I,3089.0,5.125,3089.0,0.499251,0.320033
12,1000,31-Dec-73,1973.0,INDL,C,D,STD,USD,8.567,2.84,0.0,7.0,I,3089.0,1.75,3089.0,0.449669,0.584795
13,1000,31-Dec-74,1974.0,INDL,C,D,STD,USD,9.843,2.15,0.5,7.0,I,3089.0,2.125,3089.0,0.432451,0.62144


# Report: Mean and number of observations of BookLeverage and MarketLeverage for

## All firm-years in Compustat

In [9]:
tdf = df
print("Book Leverage:\n\tCount: {0}\n\tMean: {1}".format(tdf.shape[0], np.average(tdf["book_leverage"])))
print("Market Leverage:\n\tCount: {0}\n\tMean: {1}".format(tdf.shape[0], np.average(tdf["market_leverage"])))

Book Leverage:
	Count: 359496
	Mean: 0.3420995375016838
Market Leverage:
	Count: 359496
	Mean: 0.2789952778284136


## All firm-years for which SICH is in the range 4900-4999 (utilities)


In [11]:
tdf = df[df["sich"].apply(lambda x: (x in range(4900,5000)))]
print("Book Leverage:\n\tCount: {0}\n\tMean: {1}".format(tdf.shape[0], np.average(tdf["book_leverage"])))
print("Market Leverage:\n\tCount: {0}\n\tMean: {1}".format(tdf.shape[0], np.average(tdf["market_leverage"])))

Book Leverage:
	Count: 14316
	Mean: 0.5365861115750526
Market Leverage:
	Count: 14316
	Mean: 0.45515856686018163


## All firm-years for which SICH is in the range 2000-3999 (manufacturing)


In [13]:
tdf = df[df["sich"].apply(lambda x: (x in range(2000,4000)))]
print("Book Leverage:\n\tCount: {0}\n\tMean: {1}".format(tdf.shape[0], np.average(tdf["book_leverage"])))
print("Market Leverage:\n\tCount: {0}\n\tMean: {1}".format(tdf.shape[0], np.average(tdf["market_leverage"])))

Book Leverage:
	Count: 142188
	Mean: 0.3003636241293289
Market Leverage:
	Count: 142188
	Mean: 0.23334500680401427


## All firm-years for which SICH is in the range 6000-6199 (banks and credit unions)


In [15]:
tdf = df[df["sich"].apply(lambda x: (x in range(6000,6200)))]
print("Book Leverage:\n\tCount: {0}\n\tMean: {1}".format(tdf.shape[0], np.average(tdf["book_leverage"])))
print("Market Leverage:\n\tCount: {0}\n\tMean: {1}".format(tdf.shape[0], np.average(tdf["market_leverage"])))

Book Leverage:
	Count: 29217
	Mean: 0.5081158558190922
Market Leverage:
	Count: 29217
	Mean: 0.4667814357464671


## All firm-years for which SICH is 7372 (computer software)


In [17]:
tdf = df[df["sich"].apply(lambda x: (x == 7372))]
print("Book Leverage:\n\tCount: {0}\n\tMean: {1}".format(tdf.shape[0], np.average(tdf["book_leverage"])))
print("Market Leverage:\n\tCount: {0}\n\tMean: {1}".format(tdf.shape[0], np.average(tdf["market_leverage"])))

Book Leverage:
	Count: 9307
	Mean: 0.11427250395383656
Market Leverage:
	Count: 9307
	Mean: 0.08716271015546005


***
***

# Fitting Regression Curves

## Book Leverage


In [7]:
from patsy import dmatrices

A = dmatrices('book_leverage ~ asset_tangibility + market_to_book + profitability',
        data = df
    )
A

PatsyError: Error evaluating factor: NameError: name 'asset_tangibility' is not defined
    book_leverage ~ asset_tangibility + market_to_book + profitability
                    ^^^^^^^^^^^^^^^^^