### Install, import, and connect

In [21]:
!pip install wrds



In [2]:
import numpy as np
import pandas as pd
import wrds       
conn = wrds.Connection() 

WRDS recommends setting up a .pgpass file.
You can create this file yourself at any time
with the create_pgpass_file() function.
Loading library list...
Done


### CRSP

Get stock prices and returns from  CRSP (Center for Research in Security Prices).  We use standard filters.  See http://www.crsp.com/files/data_descriptions_guide_0.pdf for a complete set of variable definitions.  CRSP uses PERMCO as a permanent company identifier and PERMNO as a permanent security identifier.  Some companies have multiple classes of common stock, which means multiple common stock PERMNOs can be associated with a single PERMCO.  Both monthly and daily data are available.

In [4]:
crsp = conn.raw_sql(
    """
    select a.permno, a.permco, a.date, a.ret, abs(a.prc)*a.shrout as me 
    from crsp.msf a inner join crsp.msenames b 
    on a.permno=b.permno and a.date between b.namedt and b.nameendt
    and b.exchcd in (1,2,3) and b.shrcd in (10,11)
    where a.date >= '2000-01-01' 
    order by a.permno, a.date 
    """, 
    date_cols=['date']
)

# change strings or floats to integers
for col in ['permno','permco'] :
    crsp[col] = crsp[col].astype(int)

# define market equity as sum of market equities of all permnos associated with a permco
crsp['me'] = crsp.groupby(['date','permco']).me.transform(sum)

### Define delisting returns

This is always done, but there are some different ways to do it.  Here, we follow some of the literature and assign a lower delisting return to Nasdaq stocks than to NYSE/AMEX stocks if the delisting return is missing.

In [25]:
mse = conn.raw_sql(
    """
    select permno, dlret, dlstcd
    from crsp.mse
    where event='DELIST' and dlstcd>100
    order by permno
    """
)

# change string or float to int
mse['permno'] = mse.permno.astype(int)

# merge with crsp, keeping all rows of crsp
crsp = crsp.merge(mse, how='left', on='permno')
del mse

# series of True and False, True if it is the last date for a stock
LastObs = crsp.permno != crsp.permno.shift(-1)     

# series of True and False, True if delisted for poor performance
DLCode = (crsp.dlstcd==500) | ((crsp.dlstcd >=520)&(crsp.dlstcd<=584))  

# -35% if no delisting return, delisted for poor performance and NYSE/AMEX 
crsp['dlret'] = np.where(DLCode & crsp.dlret.isnull() & crsp.exchcd.isin([1,2]), -0.35, crsp.dlret )

# -55% if no delisting return, delisted for poor performance and Nasdaq
crsp['dlret'] = np.where(DLCode & crsp.dlret.isnull() & (crsp.exchcd==3), -0.55, crsp.dlret )

# if delisting return exists and < -1, change to -1
crsp['dlret'] = np.where(crsp.dlret.notnull() & crsp.dlret<-1,-1,crsp.dlret)

# if last day and return exists, define return by compounding with delisting return (if exists)
crsp['ret'] = np.where(LastObs & crsp.ret.notnull(), (1+crsp.ret)*(1+crsp.dlret.fillna(0))-1, crsp.ret)

# if last day and return does not exist, define return as delisting return
crsp['ret'] = np.where(LastObs & crsp.ret.isnull(), crsp.dlret, crsp.ret)

crsp = crsp.drop(columns=['dlstcd','dlret'])

### Annual Compustat

An example of pulling data from the annual Compustat table.  datadate is the end of the fiscal year.  We impose standard filters.  See https://wrds-web.wharton.upenn.edu/wrds/demo/demoform_compustat.cfm for a full list of Compustat variable definitions. Quarterly data is also available.

In [26]:
comp = conn.raw_sql(
    """
    select a.gvkey, a.datadate, a.at
    from comp.FUNDA a left outer join comp.Names b
    on a.gvkey = b.gvkey
    where a.datadate >= '2000-01-01' and a.at>0 
    and INDFMT='INDL' and DATAFMT='STD' and POPSRC='D' and CONSOL='C'
    order by a.gvkey, datadate
    """, 
    date_cols=['datadate']
)

# convert string or float to int
comp.gvkey = comp.gvkey.astype(int)

### Lag data to reflect reporting lag

Fama and French shift all annual reports in a calendar year to June 30 of the following year.  Many people follow them.  Others shift 6 months.

In [None]:
SHIFT_TO_JUNE = True

if SHIFT_TO_JUNE:

    # define date as June 30 of year following datadate
    comp['date'] = pd.to_datetime(comp.datadate.apply(lambda d: str(d.year+1)+'-06-30'))
    
    # if two annual reports in one calendar year (due to change of fiscal year), keep last one
    comp = comp.drop_duplicates(subset=['gvkey','date'],keep='last') 

else:

    # define date to be 6 months after datadate
    comp['date'] = comp.datadate + pd.offsets.MonthOffset(6)

### Ratios and growth rates

Define ratios, growth rates, etc.

In [None]:
comp['inv'] = comp['at'].pct_change()

### Assign permnos if merging with CRSP

In [None]:
link = conn.raw_sql(
    """
    select distinct gvkey, lpermno as permno, linkdt, linkenddt
    from crsp.Ccmxpf_linktable
    where linktype in ('LU', 'LC')
    and LINKPRIM in ('P', 'C')
    """
)

# convert strings or floats to ints
link['gvkey'] = link.gvkey.astype(int)
link['permno'] = link.permno.astype(int)

# fill in missing end dates with a future date
link['linkenddt'] = pd.to_datetime(link.linkenddt).fillna(pd.Timestamp('21000101'))

# merge with Compustat data and keep rows with Compustat datadate between link date and link end date
comp = comp.merge(link,on='gvkey',how='inner')
comp = comp[(comp.datadate>=comp.linkdt) & (comp.datadate<=comp.linkenddt)]

comp = comp.drop(columns=['gvkey','datadate','linkdt','linkenddt'])

### Merge CRSP with Compustat

* Change dates to monthly period format before merging, because Compustat date is the last day of the month, and CRSP date is the last trading day  of the month.
* Merge keeping all rows of CRSP data.  There will be NaNs for Compustat data for 11 months each year.

In [None]:
crsp.date = crsp.date.dt.to_period('M')
comp.date = comp.date.dt.to_period('M')

df = crsp.merge(comp, on=['permno', 'date'], how='left')

### Fill Compustat data into months

* Group by permno when filling forward so we don't fill from one stock into another
* A limit of 11 months on the forward fill is the right limit if we have shifted to June 30, but it should probably be longer otherwise, because a firm might change its fiscal year and go more than 12 months between annual reports.

In [None]:
df[['at', 'inv']] = df.groupby('permno')[['at', 'inv']].ffill(limit=11)