# wrds_api

> `wrds` package <https://github.com/wharton/wrds> plus a couple of utilities

In [None]:
#| default_exp wrds.wrds_api

In [None]:
#| hide
#| export 
from typing import Sequence, List
import os 
import pandas as pd 
import wrds 
from datetime import datetime

In [None]:
#|export
def Connection(): 
    return wrds.Connection(
        wrds_username=os.getenv("WRDS_USERNAME"), 
        wrds_password=os.getenv("WRDS_PASSWORD"),
        dtype_backend = "pyarrow",
        )

In [None]:
#|exports
def download(sql_string: str=None,
             params: Sequence=None # Params cited in the `sql_string`
             ) -> pd.DataFrame:
    """Downloads data from WRDS using the given PostgreSQL `sql_string`"""

    try:
        db = Connection()
        df = db.raw_sql(sql=sql_string, params=params)
    except Exception as err:
        raise err 
    finally:
        db.close()

    return df

In [None]:
#| eval: false
download("SELECT * from ff.factors_monthly")

Loading library list...
Done


Unnamed: 0,date,mktrf,smb,hml,rf,year,month,umd,dateff
0,1926-07-01,0.0289,-0.0255,-0.0239,0.0022,1926.0,7.0,,1926-07-31
1,1926-08-01,0.0264,-0.0114,0.0381,0.0025,1926.0,8.0,,1926-08-31
2,1926-09-01,0.0038,-0.0136,0.0005,0.0023,1926.0,9.0,,1926-09-30
3,1926-10-01,-0.0327,-0.0014,0.0082,0.0032,1926.0,10.0,,1926-10-30
4,1926-11-01,0.0254,-0.0011,-0.0061,0.0031,1926.0,11.0,,1926-11-30
...,...,...,...,...,...,...,...,...,...
1183,2025-02-01,-0.0244,-0.0579,0.0491,0.0033,2025.0,2.0,-0.0081,
1184,2025-03-01,-0.0639,-0.0276,0.029,0.0034,2025.0,3.0,-0.0284,
1185,2025-04-01,-0.0084,-0.0059,-0.034,0.0035,2025.0,4.0,0.0497,
1186,2025-05-01,0.0606,0.007,-0.0288,0.0038,2025.0,5.0,0.0221,


## Examples of useful features of the `Connection` class

In [None]:
#| eval: false
db = Connection()

Loading library list...
Done


In [None]:
#| eval: false
db.list_libraries()[:5]

['aha', 'aha_common', 'aha_hcris_3years', 'aha_it_survey_3years', 'aha_sample']

In [None]:
#| eval: false
db.list_tables(library='crsp')[:5]

['acti', 'asia', 'asib', 'asic', 'asio']

In [None]:
#| eval: false
db.describe_table(library='comp',table='funda').iloc[:5]

Approximately 921202 rows in comp.funda.


Unnamed: 0,name,nullable,type,comment
0,gvkey,True,VARCHAR(6),Global Company Key
1,datadate,True,DATE,Data Date
2,fyear,True,INTEGER,Data Year - Fiscal
3,indfmt,True,VARCHAR(12),Industry Format
4,consol,True,VARCHAR(2),Level of Consolidation - Company Annual Descri...


In [None]:
#| eval: false
db.get_table(library='ff', 
             table='factors_monthly',
             columns=['date','mktrf','smb','hml','rf'],
             obs=5)

Unnamed: 0,date,mktrf,smb,hml,rf
0,1926-07-01,0.0289,-0.0255,-0.0239,0.0022
1,1926-08-01,0.0264,-0.0114,0.0381,0.0025
2,1926-09-01,0.0038,-0.0136,0.0005,0.0023
3,1926-10-01,-0.0327,-0.0014,0.0082,0.0032
4,1926-11-01,0.0254,-0.0011,-0.0061,0.0031


In [None]:
#| eval: false
db.close()

In [None]:
#| export
def validate_dates(date_strings: List[str]) -> bool:
    for date in date_strings:
        if date is not None:
            try:
                datetime.strptime(date, '%m/%d/%Y')
            except ValueError:
                raise ValueError(f"Date '{date}' is not in the correct format. Should be 'mm/dd/yyyy'")

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()