# wrds_compa

> Retrieve and process data from WRDS Compustat Annual

Since this is a proprietary dataset, in the documentation below, I can not show any of the data that is being retrieved/generated (will show only column names).

In [None]:
#| default_exp wrds.compa

In [None]:
#|exports
from __future__ import annotations
from pathlib import Path
from typing import List
import os

import pandas as pd
import numpy as np

import pandasmore as pdm
from finsets.wrds import wrds_api
from finsets import RESOURCES

In [None]:
#| export 
def raw_metadata(rawfile: str|Path=RESOURCES/'compa_variable_descriptions.csv', # location of the raw variable labels file
             ) -> pd.DataFrame:
    "Loads raw variable labels file, cleans it and returns it as a pd.DataFrame"

    df = pd.read_csv(rawfile)
    df['output_of'] = 'wrds.compa.clean'

    df['Variable Label'] = df.apply(lambda row: row['Description'].replace(row['Variable Name'].strip()+' -- ', ''), axis=1)
    df['Variable Label'] = df.apply(lambda row: row['Variable Label'].replace( '(' + row['Variable Name'].strip() + ')', ''), axis=1)
    df['Variable Name'] = df['Variable Name'].str.strip().str.lower()
    df = df[['Variable Name', 'Variable Label', 'output_of', 'Type']].copy()
    df.columns = ['name','label','output_of','type']
    return df

In [None]:
raw_metadata()

Unnamed: 0,name,label,output_of,type
0,gvkey,Global Company Key,wrds.compa.clean,string
1,conm,Company Name,wrds.compa.clean,string
2,tic,Ticker Symbol,wrds.compa.clean,string
3,cusip,CUSIP,wrds.compa.clean,string
4,cik,CIK Number,wrds.compa.clean,string
...,...,...,...,...
969,prch_c,Price High - Annual - Calendar,wrds.compa.clean,double
970,prch_f,Price High - Annual - Fiscal,wrds.compa.clean,double
971,prcl_c,Price Low - Annual - Calendar,wrds.compa.clean,double
972,prcl_f,Price Low - Annual - Fiscal,wrds.compa.clean,double


The following function gives more detailed metadata but requires connecting to WRDS. If all you want is variable names and labels, then `raw_metadata` is sufficient.

In [None]:
#| export
def raw_metadata_extra(wrds_username: str=None
             ) -> pd.DataFrame:
    "Collects metadata from WRDS `comp.funda` table and merges it with `variable_labels`."

    if wrds_username is None:
        wrds_username = os.getenv('WRDS_USERNAME')
        if wrds_username is None: wrds_username = input("Enter your WRDS username: ") 

    try:
        db = wrds_api.Connection(wrds_username = wrds_username)
        funda = db.describe_table('comp','funda')
        nr_rows = db.get_row_count('comp','funda')
    finally:
        db.close()

    meta = funda[['name','type']].copy()
    meta['nr_rows'] = nr_rows
    meta['wrds_library'] = 'comp'
    meta['wrds_table'] = 'funda'

    meta = meta.merge(raw_metadata()[['name','label']], how='left', on='name')
    
    meta['output_of'] = 'wrds.compa.download'
    meta = pdm.order_columns(meta,these_first=['name','label','output_of'])
    for v in list(meta.columns):
        meta[v] = meta[v].astype('string')
    
    return meta

In [None]:
#| eval: false
raw_metadata_extra()

Loading library list...
Done
Approximately 881223 rows in comp.funda.


Unnamed: 0,name,label,output_of,type,nr_rows,wrds_library,wrds_table
0,gvkey,Global Company Key,wrds.compa.download,VARCHAR(6),881223,comp,funda
1,datadate,,wrds.compa.download,DATE,881223,comp,funda
2,fyear,Data Year - Fiscal,wrds.compa.download,DOUBLE_PRECISION,881223,comp,funda
3,indfmt,,wrds.compa.download,VARCHAR(12),881223,comp,funda
4,consol,,wrds.compa.download,VARCHAR(2),881223,comp,funda
...,...,...,...,...,...,...,...
943,au,Auditor,wrds.compa.download,VARCHAR(8),881223,comp,funda
944,auop,Auditor Opinion,wrds.compa.download,VARCHAR(8),881223,comp,funda
945,auopic,Auditor Opinion - Internal Control,wrds.compa.download,VARCHAR(1),881223,comp,funda
946,ceoso,Chief Executive Officer SOX Certification,wrds.compa.download,VARCHAR(1),881223,comp,funda


In [None]:
#| export
def default_raw_vars():
    """Default variables used in `download` if none are specified."""

    return ['datadate', 'gvkey', 'cusip' ,'cik' ,'tic' ,'fyear' ,'fyr' ,'naicsh', 'sich' ,'exchg',  
            'lt' ,'at' ,'txditc' ,'pstkl' ,'pstkrv' ,'pstk' ,'csho' ,'ajex' , 'rdip',
            'act' ,'dvc' ,'xad','seq' ,'che' ,'lct' ,'dlc' ,'ib' ,'dvp' ,'txdi' ,'dp' ,
            'txp' ,'oancf' ,'ivncf' ,'fincf' ,'dltt' ,'mib','ceq' ,'invt' ,'cogs' , 'revt',
            'sale' ,'capx' ,'xrd' ,'txdb' ,'prcc_f' ,'sstk' ,'prstkc' ,'dltis' ,'dltr' ,'emp' ,
            'dd1' ,'ppegt' ,'ppent' ,'xint' ,'txt' ,'sppe' ,'gdwl' ,'xrent' ,'re' ,'dvpsx_f' ,
            'tstk' ,'wcap' ,'rect' ,'xsga' ,'aqc' ,'oibdp' ,'dpact' ,'fic' ,'ni' ,'ivao' ,'ivst' ,
            'dv' , 'intan' ,'pi' ,'txfo' ,'pifo' ,'xpp' ,'drc' ,'drlt' ,'ap' ,'xacc' ,'itcb']             

In [None]:
print(default_raw_vars())

['datadate', 'gvkey', 'cusip', 'cik', 'tic', 'fyear', 'fyr', 'naicsh', 'sich', 'exchg', 'lt', 'at', 'txditc', 'pstkl', 'pstkrv', 'pstk', 'csho', 'ajex', 'rdip', 'act', 'dvc', 'xad', 'seq', 'che', 'lct', 'dlc', 'ib', 'dvp', 'txdi', 'dp', 'txp', 'oancf', 'ivncf', 'fincf', 'dltt', 'mib', 'ceq', 'invt', 'cogs', 'revt', 'sale', 'capx', 'xrd', 'txdb', 'prcc_f', 'sstk', 'prstkc', 'dltis', 'dltr', 'emp', 'dd1', 'ppegt', 'ppent', 'xint', 'txt', 'sppe', 'gdwl', 'xrent', 're', 'dvpsx_f', 'tstk', 'wcap', 'rect', 'xsga', 'aqc', 'oibdp', 'dpact', 'fic', 'ni', 'ivao', 'ivst', 'dv', 'intan', 'pi', 'txfo', 'pifo', 'xpp', 'drc', 'drlt', 'ap', 'xacc', 'itcb']


In [None]:
#| export
def download(vars: List[str]=None, # If None, downloads `default_raw_vars`; else `permno`, `permco`, and `date` are added by default
             obs_limit: int=None, #Number of rows to download. If None, full dataset will be downloaded
             wrds_username: str=None, #If None, looks for WRDS_USERNAME with `os.getenv`, then prompts you if needed
             start_date: str="01/01/1900", # Start date in MM/DD/YYYY format
             end_date: str=None #End date in MM/DD/YYYY format; if None, defaults to current date
             ) -> pd.DataFrame:
    """Downloads `vars` from `start_date` to `end_date` from WRDS `comp.funda` library and adds PERMNO and PERMCO as in CCM"""

    if vars is None: vars = default_raw_vars()
    vars = ','.join(['a.gvkey', 'a.datadate'] + 
                    [f'a.{x}' for x in vars if x not in ['datadate', 'gvkey']])

    limit_clause = f"LIMIT {obs_limit}" if obs_limit is not None else ""
    sql_string=f"""SELECT b.lpermno as permno, b.lpermco as permco, b.liid as iid, {vars}
                    FROM comp.funda AS a
                    INNER JOIN crsp.ccmxpf_lnkhist AS b ON a.gvkey = b.gvkey
                    WHERE datadate BETWEEN b.linkdt AND COALESCE(b.linkenddt, CURRENT_DATE)
                            AND b.linktype IN ('LU','LC') AND b.linkprim IN ('P','C')
                            AND indfmt='INDL' AND datafmt='STD' AND popsrc='D' AND consol='C'
                            AND datadate BETWEEN '{start_date}' AND COALESCE(%(end)s, CURRENT_DATE)
                    {limit_clause}
                """
    return wrds_api.download(sql_string, wrds_username=wrds_username, params={'end':end_date})

In [None]:
#| eval: false
raw = download(obs_limit=100)

Loading library list...
Done


In [None]:
#| eval: false
raw

Unnamed: 0,permno,permco,iid,gvkey,datadate,cusip,cik,tic,fyear,fyr,...,intan,pi,txfo,pifo,xpp,drc,drlt,ap,xacc,itcb
0,25881.0,23369.0,01,001000,1970-12-31,000032102,,AE.2,1970.0,12.0,...,0.226,3.620,,,0.579,,,6.114,0.763,0.0
1,25881.0,23369.0,01,001000,1971-12-31,000032102,,AE.2,1971.0,12.0,...,0.198,0.138,0.0,,0.546,,,4.326,1.195,0.0
2,25881.0,23369.0,01,001000,1972-12-31,000032102,,AE.2,1972.0,12.0,...,0.170,2.702,,,0.370,,,2.768,1.172,0.0
3,25881.0,23369.0,01,001000,1973-12-31,000032102,,AE.2,1973.0,12.0,...,0.152,2.879,,,0.325,,,2.789,0.826,0.0
4,25881.0,23369.0,01,001000,1974-12-31,000032102,,AE.2,1974.0,12.0,...,0.177,2.339,,,0.484,,,4.183,1.665,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,10074.0,5271.0,01,001009,1985-10-31,000781104,0000313368,ABSI.1,1985.0,10.0,...,0.000,3.231,,,,,,1.463,,0.0
96,10074.0,5271.0,01,001009,1986-10-31,000781104,0000313368,ABSI.1,1986.0,10.0,...,0.000,3.187,,,,,,1.239,,0.0
97,10074.0,5271.0,01,001009,1987-10-31,000781104,0000313368,ABSI.1,1987.0,10.0,...,0.000,1.492,,,,,,2.041,,0.0
98,10074.0,5271.0,01,001009,1988-10-31,000781104,0000313368,ABSI.1,1988.0,10.0,...,0.000,1.945,,,0.305,,,3.244,1.301,0.0


In [None]:
#| export
def clean(df: pd.DataFrame=None,        # If None, downloads `vars` using `download` function; else, must contain `permno` and `datadate` columns
          vars: List[str]=None,         # If None, downloads `default_raw_vars`
          obs_limit: int=None, #Number of rows to download. If None, full dataset will be downloaded
          wrds_username: str=None,      # If None, looks for WRDS_USERNAME with `os.getenv`, then prompts you if needed
          start_date: str="01/01/1900", # Start date in MM/DD/YYYY format
          end_date: str=None,           # End date. Default is current date          
          clean_kwargs: dict={},        # Params to pass to `pdm.setup_panel` other than `panel_ids`, `time_var`, and `freq`
          ) -> pd.DataFrame:
    """Applies `pandasmore.setup_panel` to `df`. If `df` is None, downloads `vars` using `download` function."""

    if df is None: df = download(vars=vars, obs_limit=obs_limit,  wrds_username=wrds_username, start_date=start_date, end_date=end_date)
    df = pdm.setup_panel(df, panel_ids='permno', time_var='datadate', freq='Y', **clean_kwargs)
    return df 

In [None]:
#| eval: false
df = clean(raw)

In [None]:
#| eval: false
df

Unnamed: 0_level_0,Unnamed: 1_level_0,datadate,dtdate,permco,iid,gvkey,cusip,cik,tic,fyear,fyr,...,intan,pi,txfo,pifo,xpp,drc,drlt,ap,xacc,itcb
permno,Ydate,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
10015,1983,1983-12-31,1983-12-31,6398.0,01,001001,000165100,0000723576,AMFD.,1983.0,12.0,...,0.612,1.574,0.0,,0.084,,,0.650,0.689,0.0
10015,1984,1984-12-31,1984-12-31,6398.0,01,001001,000165100,0000723576,AMFD.,1984.0,12.0,...,0.631,1.806,0.0,,0.138,,,1.344,0.750,0.0
10015,1985,1985-12-31,1985-12-31,6398.0,01,001001,000165100,0000723576,AMFD.,1985.0,12.0,...,13.077,4.724,0.0,,0.137,,,2.193,2.137,0.0
10023,1972,1972-12-31,1972-12-31,22159.0,01,001002,000352104,0001306124,AAIC.1,1972.0,12.0,...,0.000,1.466,,,,,,2.423,,0.0
10031,1983,1983-12-31,1983-12-31,6672.0,01,001003,000354100,0000730052,ANTQ,1983.0,12.0,...,0.000,1.959,0.0,,0.000,,,0.443,0.695,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61903,1977,1977-10-31,1977-10-31,11.0,01,001005,000370106,,ABA.2,1977.0,10.0,...,0.000,0.527,0.0,,,,,0.303,0.362,0.0
61903,1978,1978-10-31,1978-10-31,11.0,01,001005,000370106,,ABA.2,1978.0,10.0,...,0.000,0.354,0.0,,,,,1.031,0.207,0.0
61903,1979,1979-10-31,1979-10-31,11.0,01,001005,000370106,,ABA.2,1979.0,10.0,...,0.000,0.680,0.0,,,,,1.130,0.389,0.0
61903,1980,1980-10-31,1980-10-31,11.0,01,001005,000370106,,ABA.2,1980.0,10.0,...,0.107,1.387,0.0,,,,,2.806,0.801,0.0


We can ask to download a small sample of data and clean it in one step:

In [None]:
#| eval: false
df = clean(obs_limit=100)
df

Loading library list...
Done


Unnamed: 0_level_0,Unnamed: 1_level_0,datadate,dtdate,permco,iid,gvkey,cusip,cik,tic,fyear,fyr,...,intan,pi,txfo,pifo,xpp,drc,drlt,ap,xacc,itcb
permno,Ydate,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
10015,1983,1983-12-31,1983-12-31,6398.0,01,001001,000165100,0000723576,AMFD.,1983.0,12.0,...,0.612,1.574,0.0,,0.084,,,0.650,0.689,0.0
10015,1984,1984-12-31,1984-12-31,6398.0,01,001001,000165100,0000723576,AMFD.,1984.0,12.0,...,0.631,1.806,0.0,,0.138,,,1.344,0.750,0.0
10015,1985,1985-12-31,1985-12-31,6398.0,01,001001,000165100,0000723576,AMFD.,1985.0,12.0,...,13.077,4.724,0.0,,0.137,,,2.193,2.137,0.0
10023,1972,1972-12-31,1972-12-31,22159.0,01,001002,000352104,0001306124,AAIC.1,1972.0,12.0,...,0.000,1.466,,,,,,2.423,,0.0
10031,1983,1983-12-31,1983-12-31,6672.0,01,001003,000354100,0000730052,ANTQ,1983.0,12.0,...,0.000,1.959,0.0,,0.000,,,0.443,0.695,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61903,1977,1977-10-31,1977-10-31,11.0,01,001005,000370106,,ABA.2,1977.0,10.0,...,0.000,0.527,0.0,,,,,0.303,0.362,0.0
61903,1978,1978-10-31,1978-10-31,11.0,01,001005,000370106,,ABA.2,1978.0,10.0,...,0.000,0.354,0.0,,,,,1.031,0.207,0.0
61903,1979,1979-10-31,1979-10-31,11.0,01,001005,000370106,,ABA.2,1979.0,10.0,...,0.000,0.680,0.0,,,,,1.130,0.389,0.0
61903,1980,1980-10-31,1980-10-31,11.0,01,001005,000370106,,ABA.2,1980.0,10.0,...,0.107,1.387,0.0,,,,,2.806,0.801,0.0


In [None]:
#| export
def book_equity(df: pd.DataFrame=None, # If None, downloads (and cleans) only required vars
                add_itcb=False,
                return_metadata: bool=False # If true, just returns metadata dictionary
                ) -> pd.DataFrame:

    metadata = {'inputs': {'wrds.compa.clean': ['at', 'lt', 'seq', 'ceq', 'txditc', 'pstk', 'pstkrv', 'pstkl', 'itcb']},
                'outputs': ['bookeq','shreq','pref_stock'],
                'labels': {'bookeq': 'Book equity', 'shreq': 'Shareholder equity', 'pref_stock': 'Preferred stock'}
    }      
    if return_metadata: return metadata

    reqs = metadata['inputs']['wrds.compa.clean']
    if df is None: df = clean(vars=reqs)
    df = df[reqs].copy()

    df['pstk'] = df['pstk'].fillna(0)
    df['pref_stock'] = np.where(df['pstkrv'].isnull(), df['pstkl'], df['pstkrv'])
    df['pref_stock'] = np.where(df['pref_stock'].isnull(),df['pstk'], df['pref_stock'])

    df['shreq'] = np.where(df['seq'].isnull(), df['ceq'] + df['pstk'], df['seq'])
    df['shreq'] = np.where(df['shreq'].isnull(), df['at'] - df['lt'], df['shreq'])

    df['bookeq'] = df['shreq'] + df['txditc'].fillna(0) - df['pref_stock']
    if add_itcb: df['bookeq'] = df['bookeq'] + df['itcb'].fillna(0)
    
    return df[metadata['outputs']].copy()

In [None]:
#| eval: false
book_equity(return_metadata=True)

{'inputs': {'wrds.compa.clean': ['at',
   'lt',
   'seq',
   'ceq',
   'txditc',
   'pstk',
   'pstkrv',
   'pstkl',
   'itcb']},
 'outputs': ['bookeq', 'shreq', 'pref_stock'],
 'labels': {'bookeq': 'Book equity',
  'shreq': 'Shareholder equity',
  'pref_stock': 'Preferred stock'}}

In [None]:
#| eval: false
beq = book_equity(df)

In [None]:
#| eval: false
beq.head(0)

Unnamed: 0_level_0,Unnamed: 1_level_0,bookeq,shreq,pref_stock
permno,Ydate,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


In [None]:
#| eval: false
pdm.wins(beq).describe()

Unnamed: 0,bookeq,shreq,pref_stock
count,99.0,99.0,100.0
mean,221.978419,205.264409,0.05707
std,341.880151,319.721299,0.328719
min,-0.58248,-0.58248,0.0
25%,5.515,4.4,0.0
50%,15.527,15.675,0.0
75%,318.4505,297.919,0.0
max,1133.28,1035.792,2.069


In [None]:
#| eval: false
beq_from_scratch = book_equity()

Loading library list...
Done


In [None]:
#| eval: false
beq_from_scratch.head(0)

Unnamed: 0_level_0,Unnamed: 1_level_0,bookeq,shreq,pref_stock
permno,Ydate,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


In [None]:
#| export 
def tobin_q(df: pd.DataFrame=None,      # If None, downloads (and cleans) only required vars
            return_metadata: bool=False # If True, just returns the metadata dictionary
            ) -> pd.DataFrame:

    metadata = {'inputs': {'wrds.compa.clean': ['at', 'lt', 'seq', 'ceq', 'txditc', 'pstk', 'pstkrv', 'pstkl', 'itcb','prcc_f','csho']},
                'outputs':  ['tobinq'],
                'labels': {'tobinq': 'Tobin Q'}
    }      
    if return_metadata: return metadata

    reqs = metadata['inputs']['wrds.compa.clean']
    if df is None: df = clean(vars=reqs)
    df = df[reqs].copy()

    beq = book_equity(df)[['bookeq']].copy()
    df = df.join(beq)

    df['tobinq'] = (df['at'] - df['bookeq'] + df['prcc_f'] * df['csho']) / df['at']
    df = df.replace([np.inf, -np.inf], np.nan)
    return  df[metadata['outputs']].copy()

In [None]:
#| eval: false
tobin_q(return_metadata=True)

{'inputs': {'wrds.compa.clean': ['at',
   'lt',
   'seq',
   'ceq',
   'txditc',
   'pstk',
   'pstkrv',
   'pstkl',
   'itcb',
   'prcc_f',
   'csho']},
 'outputs': ['tobinq'],
 'labels': {'tobinq': 'Tobin Q'}}

In [None]:
#| eval: false
q = tobin_q(df)

In [None]:
#| eval: false
q.head(0)

Unnamed: 0_level_0,Unnamed: 1_level_0,tobinq
permno,Ydate,Unnamed: 2_level_1


In [None]:
#| eval: false
pdm.wins(q).describe()

count    94.000000
mean      1.256989
std       0.600131
min       0.693168
25%       0.912091
50%       1.075441
75%       1.413881
max       4.777662
Name: tobinq, dtype: float64

In [None]:
#| export
def issuance_vars(df: pd.DataFrame=None,        # If None, downloads (and cleans) only required vars
                    return_metadata: bool=False # If True, just returns the metadata dictionary
                    ) -> pd.DataFrame:

    metadata = {'inputs': {'wrds.compa.clean': ['at', 'lt', 'seq', 'ceq', 'txditc', 'pstk', 'pstkrv', 'pstkl', 'itcb',
                                                  'sstk','prstkc','dltis','dltr', 're', 'dlc','dltt']},
                'outputs': ['equityiss_tot','equityiss_cfs', 'debtiss_tot', 'debtiss_cfs', 'debtiss_bs'],
                'labels': {'equityiss_tot':'Equity issuance','equityiss_cfs':'Equity issuance', 
                           'debtiss_tot':'Debt issuance', 'debtiss_cfs':'Debt issuance', 'debtiss_bs':'Debt issuance'}
    }      
    if return_metadata: return metadata

    reqs = metadata['inputs']['wrds.compa.clean']
    if df is None: df = clean(vars=reqs)
    df = df[reqs].copy()

    beq = book_equity(df)[['bookeq']].copy()
    df = df.join(beq)
    
    df['lag_at'] = pdm.lag(df['at'])

    df['equityiss_cfs'] = (df['sstk'].fillna(0) - df['prstkc'].fillna(0)) / df['lag_at']
    df['debtiss_cfs'] = (df['dltis'].fillna(0) - df['dltr'].fillna(0)) / df['lag_at']

    df['debtiss_bs'] = (pdm.rdiff(df['dltt']) + pdm.rdiff(df['dlc'].fillna(0))) / df['lag_at']

    df['equityiss_tot'] = (pdm.rdiff(df['bookeq']) - pdm.rdiff(df['re'])) / df['lag_at']
    df['debtiss_tot'] = (pdm.rdiff(df['at']) - pdm.rdiff(df['bookeq'])) / df['lag_at']
    
    df = df.replace([np.inf, -np.inf], np.nan)
    return df[metadata['outputs']].copy()

In [None]:
#| eval: false
iss = issuance_vars(df)

In [None]:
#| eval: false
pdm.wins(iss).describe()

Unnamed: 0,equityiss_tot,equityiss_cfs,debtiss_tot,debtiss_cfs,debtiss_bs
count,89.0,89.0,89.0,89.0,89.0
mean,0.024752,0.013238,0.058586,0.032378,0.041292
std,0.084305,0.074493,0.183283,0.098437,0.14436
min,-0.126677,-0.126537,-0.33093,-0.181757,-0.248653
25%,-0.002975,-0.003645,-0.038075,-0.011577,-0.029312
50%,0.002689,0.0,0.025316,0.0,0.005799
75%,0.020709,0.00044,0.166115,0.082257,0.098624
max,0.437305,0.415715,0.612256,0.345409,0.582878


In [None]:
#| export 
def investment_vars(df: pd.DataFrame=None,      # If None, downloads (and cleans) only required vars
                    return_metadata: bool=False # If True, just returns the metadata dictionary
                    ) -> pd.DataFrame:

    metadata = {'inputs': {'wrds.compa.clean': ['ppent','capx','at']},
                'outputs': ['ppentpch','capx2la'],
                'labels': {'ppentpch':'Pct change in net PPE','capx2la': 'CAPX to lagged assets'}
    }      
    if return_metadata: return metadata

    reqs = metadata['inputs']['wrds.compa.clean']
    if df is None: df = clean(vars=reqs)
    df = df[reqs].copy()
    
    df['ppentpch'] = pdm.rpct_change(df['ppent'])
    df['capx2la'] = df['capx'] / pdm.lag(df['at'])
    df = df.replace([np.inf, -np.inf], np.nan)
    return df[metadata['outputs']].copy()

In [None]:
#| eval: false
inv = investment_vars(df)

In [None]:
#| eval: false
pdm.wins(inv).describe()

Unnamed: 0,ppentpch,capx2la
count,89.0,89.0
mean,0.130206,0.061352
std,0.310574,0.079814
min,-0.48252,0.000541
25%,-0.057994,0.017257
50%,0.064402,0.035409
75%,0.249182,0.066603
max,1.258939,0.418355


In [None]:
#| export 
def profitability_vars(df: pd.DataFrame=None,      # If None, downloads (and cleans) only required vars
                        return_metadata: bool=False # If True, just returns the metadata dictionary
                        ) -> pd.DataFrame:

    metadata = {'inputs': {'wrds.compa.clean': ['ib','at']},
                'outputs': ['roa'],
                'labels': {'roa':'Return on assets'}
    }      
    if return_metadata: return metadata

    reqs = metadata['inputs']['wrds.compa.clean']
    if df is None: df = clean(vars=reqs)
    df = df[reqs].copy()

    df['roa'] = df['ib'] / df['at']
    df = df.replace([np.inf, -np.inf], np.nan)
    return df[metadata['outputs']].copy()

In [None]:
#| eval: false
prof = profitability_vars(df)

In [None]:
#| eval: false
pdm.wins(prof).describe()

count    99.000000
mean      0.010533
std       0.130836
min      -0.579638
25%       0.019789
50%       0.043407
75%       0.059093
max       0.153198
Name: roa, dtype: float64

In [None]:
#| export 
def cashflow_vars(df: pd.DataFrame=None,      # If None, downloads (and cleans) only required vars
                        return_metadata: bool=False # If True, just returns the metadata dictionary
                        ) -> pd.DataFrame:

    metadata = {'inputs': {'wrds.compa.clean': ['dtdate','oancf','ib','dp','at']},
                'outputs': ['cflow2la_is', 'cflow2la_cfs', 'cflow2la_full'],
                'labels': {'cflow2la_is':'Cash flows to lagged assets', 
                           'cflow2la_cfs':'Cash flows to lagged assets', 
                           'cflow2la_full':'Cash flows to lagged assets'}
    }      
    if return_metadata: return metadata

    reqs = metadata['inputs']['wrds.compa.clean']
    if df is None: df = clean(vars=reqs)
    df = df[reqs].copy()
    
    df['cflow2la_is'] = (df['ib']+df['dp']) / pdm.lag(df['at'])
    df['cflow2la_cfs'] = df['oancf'] / pdm.lag(df['at'])
    df['cflow2la_full'] = np.where(df.dtdate.dt.year<1987, df['cflow2la_is'], df['cflow2la_cfs'])

    df = df.replace([np.inf, -np.inf], np.nan)    
    return df[metadata['outputs']].copy()

In [None]:
#| eval: false
cflow = cashflow_vars(df)

In [None]:
#| eval: false
pdm.wins(cflow).describe()

Unnamed: 0,cflow2la_is,cflow2la_cfs,cflow2la_full
count,89.0,39.0,85.0
mean,0.06329,0.031109,0.051232
std,0.114185,0.048238,0.101109
min,-0.482895,-0.106876,-0.387374
25%,0.04785,0.01496,0.015854
50%,0.078823,0.042539,0.056279
75%,0.109669,0.056724,0.103573
max,0.264514,0.127353,0.265303


In [None]:
#| export 
def liquidity_vars(df: pd.DataFrame=None,      # If None, downloads (and cleans) only required vars
                        return_metadata: bool=False # If True, just returns the metadata dictionary
                        ) -> pd.DataFrame:

    metadata = {'inputs': {'wrds.compa.clean': ['che','at']},
                'outputs': ['cash2a'],
                'labels': {'cash2a':'Cash holdings to assets'}
    }      
    if return_metadata: return metadata

    reqs = metadata['inputs']['wrds.compa.clean']
    if df is None: df = clean(vars=reqs)
    df = df[reqs].copy()

    df['cash2a'] = df['che'] / df['at']

    df = df.replace([np.inf, -np.inf], np.nan) 
    return df[metadata['outputs']].copy()

In [None]:
#| eval: false
liq = liquidity_vars(df)

In [None]:
#| eval: false
pdm.wins(liq).describe()

count    99.000000
mean      0.080199
std       0.101530
min       0.000353
25%       0.023544
50%       0.042460
75%       0.081949
max       0.436071
Name: cash2a, dtype: float64

In [None]:
#| export 
def leverage_vars(df: pd.DataFrame=None,      # If None, downloads (and cleans) only required vars
                        return_metadata: bool=False # If True, just returns the metadata dictionary
                        ) -> pd.DataFrame:

    metadata = {'inputs': {'wrds.compa.clean': ['dltt','dlc','at']},
                'outputs': ['booklev'],
                'labels': {'booklev':'Book leverage'}
    }      
    if return_metadata: return metadata

    reqs = metadata['inputs']['wrds.compa.clean']
    if df is None: df = clean(vars=reqs)
    df = df[reqs].copy()

    df['booklev'] = (df['dltt'] + df['dlc']) / df['at']
    df.loc[df.booklev<0, 'booklev'] = 0
    df.loc[df.booklev>1, 'booklev'] = 1
    
    df = df.replace([np.inf, -np.inf], np.nan)         
    return df[metadata['outputs']].copy()

In [None]:
#| eval: false
lev = leverage_vars(df)

In [None]:
#| eval: false
pdm.wins(lev).describe()

count    99.000000
mean      0.285207
std       0.170456
min       0.000000
25%       0.182050
50%       0.285096
75%       0.360982
max       0.738850
Name: booklev, dtype: float64

In [None]:
#| export 
def payout_vars(df: pd.DataFrame=None,      # If None, downloads (and cleans) only required vars
                        return_metadata: bool=False # If True, just returns the metadata dictionary
                        ) -> pd.DataFrame:

    metadata = {'inputs': {'wrds.compa.clean': ['dvc','prstkc','at']},
                'outputs': ['div2la','rep2la'],
                'labels': {'div2la': 'Dividends to lagged assets',
                           'rep2la': 'Repurchases to lagged assets'}
    }      
    if return_metadata: return metadata

    reqs = metadata['inputs']['wrds.compa.clean']
    if df is None: df = clean(vars=reqs)
    df = df[reqs].copy()

    df['div2la'] = df['dvc'].fillna(0) / pdm.lag(df['at'])
    df['rep2la'] = df['prstkc'].fillna(0) / pdm.lag(df['at'])

    df = df.replace([np.inf, -np.inf], np.nan) 
    return df[metadata['outputs']].copy()

In [None]:
#| eval: false
payout = payout_vars(df)

In [None]:
#| eval: false
pdm.wins(payout).describe()

Unnamed: 0,div2la,rep2la
count,89.0,89.0
mean,0.009004,0.008829
std,0.011841,0.023134
min,0.0,0.0
25%,0.0,0.0
50%,0.005753,0.001323
75%,0.015195,0.00665
max,0.05017,0.143524


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()