# fred

> Retrieve and process data from the St. Louis FRED

This module is a wrapper on the `fred_api` module, which itself is a fork of [fredapi](https://github.com/mortada/fredapi).

To use the functions in the `fred` module, you'll need an API key from the St. Louis FRED. 

Get one [here](https://fred.stlouisfed.org/docs/api/api_key.html) and store it in your environment variables under the name `FRED_API_KEY` 

Alternatively, you can supply the API key directly as the `api_key` parameter in each function in the `fred` module.

In [None]:
#| default_exp fred.fred

In [None]:
#| exports
from typing import List, Dict
import time
import copy

import pandas as pd

import pandasmore as pdm
from finsets.fred.fred_api import Fred

In [None]:
#| exports
PROVIDER = 'Federal Reserve Economic Data (FRED)'
URL = 'https://fred.stlouisfed.org/'

In [None]:
#| export 
def get_series_info(series: str, # FRED series name
                    api_key: str=None # FRED api key. If None, will use os.getenv("FRED_API_KEY")
                    ) -> pd.Series:
    """Get metadata from FRED for given `series` from FRED"""
    
    return Fred(api_key=api_key).get_series_info(series)

In [None]:
#| eval: false
get_series_info('TB3MS')

id                                                                       TB3MS
realtime_start                                                      2023-11-12
realtime_end                                                        2023-11-12
title                        3-Month Treasury Bill Secondary Market Rate, D...
observation_start                                                   1934-01-01
observation_end                                                     2023-10-01
frequency                                                              Monthly
frequency_short                                                              M
units                                                                  Percent
units_short                                                                  %
seasonal_adjustment                                    Not Seasonally Adjusted
seasonal_adjustment_short                                                  NSA
last_updated                                        

In [None]:
#| export
def default_raw_vars() -> pd.DataFrame:
    """List of FRED series that will be used in `get_raw_data()` if none are specified"""

    return ['TB3MS','DTB3','GS10','DGS10','GS1','DGS1','AAA','BAA','DAAA','DBAA','FEDFUNDS','DFF','CPIAUCSL','CPIAUCNS','INDPRO','IPB50001SQ','UNRATE','GDP','GDPC1','GNP','GNPC96','GDPPOT','USREC','RECPROUSM156N','CFNAI','UMCSENT','MICH','USEPUINDXM','USEPUNEWSINDXM','USEPUINDXD','VIXCLS','VXOCLS']


In [None]:
print(default_raw_vars())

['TB3MS', 'DTB3', 'GS10', 'DGS10', 'GS1', 'DGS1', 'AAA', 'BAA', 'DAAA', 'DBAA', 'FEDFUNDS', 'DFF', 'CPIAUCSL', 'CPIAUCNS', 'INDPRO', 'IPB50001SQ', 'UNRATE', 'GDP', 'GDPC1', 'GNP', 'GNPC96', 'GDPPOT', 'USREC', 'RECPROUSM156N', 'CFNAI', 'UMCSENT', 'MICH', 'USEPUINDXM', 'USEPUNEWSINDXM', 'USEPUINDXD', 'VIXCLS', 'VXOCLS']


In [None]:
#| export
def parse_varlist(vars: List[str]=None, #list of variables requested by user; if None, will use `default_raw_vars()`
                  api_key: str=None # FRED api key. If None, will use os.getenv("FRED_API_KEY")
                  ) -> Dict[str, list]:
    "Splits `vars` by frequency and returns dict with one pd.DataFrame per frequency"
     
    if vars is None: vars = default_raw_vars()
    if isinstance(vars, str): vars = [vars]

    # Collect frequency data for all series
    freq_store = []
    info_store = []
    for series in vars:
        info = get_series_info(series, api_key)
        info_store.append(info.to_frame().T)
        freq_store.append(pd.DataFrame({'series': [series], 'freq': [info['frequency_short']]}))

    info_store = pd.concat(info_store, ignore_index=True)
    freq_store = pd.concat(freq_store, ignore_index=True)

    # Split the series by frequency
    out = {'info': info_store}
    frequencies = list(freq_store['freq'].value_counts().index)
    for f in frequencies:
        out[f] = list(freq_store['series'].loc[freq_store.freq==f])

    return out

In [None]:
#| eval: false
varlist = parse_varlist(['AAA','GDP', 'INDPRO'])

In [None]:
#| eval: false
varlist.keys()

dict_keys(['info', 'M', 'Q'])

In [None]:
#| eval: false
varlist['info']

Unnamed: 0,id,realtime_start,realtime_end,title,observation_start,observation_end,frequency,frequency_short,units,units_short,seasonal_adjustment,seasonal_adjustment_short,last_updated,popularity,notes
0,AAA,2023-11-12,2023-11-12,Moody's Seasoned Aaa Corporate Bond Yield,1919-01-01,2023-10-01,Monthly,M,Percent,%,Not Seasonally Adjusted,NSA,2023-11-01 10:17:02-05,79,These instruments are based on bonds with matu...
1,GDP,2023-11-12,2023-11-12,Gross Domestic Product,1947-01-01,2023-07-01,Quarterly,Q,Billions of Dollars,Bil. of $,Seasonally Adjusted Annual Rate,SAAR,2023-10-26 07:55:01-05,92,BEA Account Code: A191RC Gross domestic produ...
2,INDPRO,2023-11-12,2023-11-12,Industrial Production: Total Index,1919-01-01,2023-09-01,Monthly,M,Index 2017=100,Index 2017=100,Seasonally Adjusted,SA,2023-10-17 08:29:02-05,78,The industrial production (IP) index measures ...


In [None]:
#| eval: false
varlist['M']

['AAA', 'INDPRO']

In [None]:
#| eval: false
varlist['Q']

['GDP']

In [None]:
#| export 
def get_raw_data(vars: str=None, # FRED series name
               api_key: str=None # FRED api key. If None, will use os.getenv("FRED_API_KEY")
               ) -> Dict[str, pd.DataFrame]: 
    """Retrieves `vars` from FRED, splits them by frequency and returns dict with one pd.DataFrame per frequency"""

    api = Fred(api_key=api_key)

    varlist = parse_varlist(vars)

    out = {'info':varlist['info']}
    varlist.pop('info')
    for freq, vars in varlist.items():
        out[freq] = pd.concat([api.get_series(series).to_frame(name=series) for series in vars], axis=1).dropna(how='all')
        
    return out

In [None]:
#| eval: false
rdat = get_raw_data(['AAA','GDP', 'INDPRO'])

In [None]:
#| eval: false
rdat.keys()

dict_keys(['info', 'M', 'Q'])

In [None]:
#| eval: false
rdat['M']

Unnamed: 0,AAA,INDPRO
1919-01-01,5.35,4.8665
1919-02-01,5.35,4.6514
1919-03-01,5.39,4.5170
1919-04-01,5.44,4.5976
1919-05-01,5.39,4.6245
...,...,...
2023-06-01,4.65,102.2924
2023-07-01,4.66,103.2895
2023-08-01,4.95,103.3170
2023-09-01,5.13,103.6115


In [None]:
#| eval: false
rdat['Q']

Unnamed: 0,GDP
1947-01-01,243.164
1947-04-01,245.968
1947-07-01,249.585
1947-10-01,259.745
1948-01-01,265.742
...,...
2022-07-01,25994.639
2022-10-01,26408.405
2023-01-01,26813.601
2023-04-01,27063.012


In [None]:
#| export 
def process_raw_data(
        data: dict=None,  # keys are frequency str literals; same format as the output of `get_raw_data`
) -> pd.DataFrame: 
    """Processes data from FRED: cleans the date and sets it as index using `pdm.setup_tseries`"""

    data = copy.deepcopy(data)
    out = {'info': data['info']}
    data.pop('info')  
    for freq, df in data.items():  
        df = df.reset_index().rename({'index':'date'},axis=1)
        df = pdm.setup_tseries(df, freq=freq).drop('date', axis=1)
        out[freq] = df 

    return out

In [None]:
#| eval: false
cdat = process_raw_data(rdat)

In [None]:
#| eval: false
cdat.keys()

dict_keys(['info', 'M', 'Q'])

In [None]:
#| eval: false
cdat['M']

Unnamed: 0_level_0,dtdate,AAA,INDPRO
Mdate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1919-01,1919-01-01,5.35,4.8665
1919-02,1919-02-01,5.35,4.6514
1919-03,1919-03-01,5.39,4.5170
1919-04,1919-04-01,5.44,4.5976
1919-05,1919-05-01,5.39,4.6245
...,...,...,...
2023-06,2023-06-01,4.65,102.2924
2023-07,2023-07-01,4.66,103.2895
2023-08,2023-08-01,4.95,103.3170
2023-09,2023-09-01,5.13,103.6115


In [None]:
#| export 
def search(search_text: str=None, # What to search for
              order_by: str='popularity', # How to order search results; try `search_rank` if you don't find what you were looking for
              nr_results: int=10, # How many results to output
              api_key: str=None # FRED api key. If None, will use os.getenv("FRED_API_KEY")
              ) -> pd.DataFrame:
    """Search FRED for a given `search_text`, sort by popularity and return only the first `nr_results`"""

    api = Fred(api_key=api_key)  
    return api.search(search_text, order_by=order_by)\
              .pipe(pdm.order_columns, ['title', 'popularity','frequency_short', 'observation_start', 'observation_end'])\
              .iloc[:nr_results].copy()
              

In [None]:
#| eval: false
search('industrial production', nr_results=1)

Unnamed: 0_level_0,title,popularity,frequency_short,observation_start,observation_end,id,realtime_start,realtime_end,frequency,units,units_short,seasonal_adjustment,seasonal_adjustment_short,last_updated,notes
series id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
INDPRO,Industrial Production: Total Index,78,M,1919-01-01,2023-09-01,INDPRO,2023-11-12,2023-11-12,Monthly,Index 2017=100,Index 2017=100,Seasonally Adjusted,SA,2023-10-17 08:29:02-05:00,The industrial production (IP) index measures ...


In [None]:
#| eval: false
search("three month treasury bill", order_by='popularity',nr_results=3)

Unnamed: 0_level_0,title,popularity,frequency_short,observation_start,observation_end,id,realtime_start,realtime_end,frequency,units,units_short,seasonal_adjustment,seasonal_adjustment_short,last_updated,notes
series id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
TB3MS,"3-Month Treasury Bill Secondary Market Rate, D...",78,M,1934-01-01,2023-10-01,TB3MS,2023-11-12,2023-11-12,Monthly,Percent,%,Not Seasonally Adjusted,NSA,2023-11-01 15:21:08-05:00,"Averages of Business Days, Discount Basis"
DTB3,"3-Month Treasury Bill Secondary Market Rate, D...",75,D,1954-01-04,2023-11-08,DTB3,2023-11-12,2023-11-12,Daily,Percent,%,Not Seasonally Adjusted,NSA,2023-11-09 15:20:01-06:00,Discount Basis
DGS3MO,Market Yield on U.S. Treasury Securities at 3-...,73,D,1981-09-01,2023-11-08,DGS3MO,2023-11-12,2023-11-12,Daily,Percent,%,Not Seasonally Adjusted,NSA,2023-11-09 15:19:09-06:00,For further information regarding treasury con...


In [None]:
#| eval: false
search("three month treasury bill", order_by='search_rank',nr_results=3)

Unnamed: 0_level_0,title,popularity,frequency_short,observation_start,observation_end,id,realtime_start,realtime_end,frequency,units,units_short,seasonal_adjustment,seasonal_adjustment_short,last_updated,notes
series id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
TB3MS,"3-Month Treasury Bill Secondary Market Rate, D...",78,M,1934-01-01,2023-10-01,TB3MS,2023-11-12,2023-11-12,Monthly,Percent,%,Not Seasonally Adjusted,NSA,2023-11-01 15:21:08-05:00,"Averages of Business Days, Discount Basis"
DTB3,"3-Month Treasury Bill Secondary Market Rate, D...",75,D,1954-01-04,2023-11-08,DTB3,2023-11-12,2023-11-12,Daily,Percent,%,Not Seasonally Adjusted,NSA,2023-11-09 15:20:01-06:00,Discount Basis
WTB3MS,"3-Month Treasury Bill Secondary Market Rate, D...",37,W,1954-01-08,2023-11-03,WTB3MS,2023-11-12,2023-11-12,"Weekly, Ending Friday",Percent,%,Not Seasonally Adjusted,NSA,2023-11-06 15:22:08-06:00,"Averages of Business Days, Discount Basis"


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()