# Pulling Inflation Data from BLS

In [None]:
from datetime import datetime as dt
import pandas as pd
from api_bls import get_bls_data

hit_api = False

In [11]:
current_year= int(dt.now().strftime("%Y"))
years = list(range(1979, current_year + 1))

base_series_ids = {
    "CUUR": "CPI (Unadjusted)"
 }

regions = {
    "0000": "National",
    "0100": "Northeast",
    "0200": "Midwest",
    "0300": "South",
    "0400": "West",
}

items = {
    "SA0": "All items",
    "SA0L1E": "Core CPI (excludes food & energy)",
    "SAA": "Apparel",
    "SAE": "Education and communication",
    "SAF": "Food and beverages",
    "SAG": "Other goods and services",
    "SAH": "Housing",
    "SAM": "Medical care",
    "SAR": "Recreation",
    "SAT": "Transportation",
}

series_ids = [base_series_id + region + item for base_series_id in base_series_ids for region in regions for item in items]

In [12]:
if hit_api:
    df = get_bls_data(
        series_ids = series_ids,
        years = years
    )
else:
    df = pd.read_csv('output/inflation_data.csv')
    
df.head()

Unnamed: 0,series id,year,period,value,region,item,date
0,CUUR0000SA0,1998,M12,163.9,National,All items,1998-12-01
1,CUUR0000SA0,1998,M11,164.0,National,All items,1998-11-01
2,CUUR0000SA0,1998,M10,164.0,National,All items,1998-10-01
3,CUUR0000SA0,1998,M09,163.6,National,All items,1998-09-01
4,CUUR0000SA0,1998,M08,163.4,National,All items,1998-08-01


In [13]:
df['data_type'] = df['series id'].str[0:4].apply(lambda x: base_series_ids[x])
df['region'] = df['series id'].str[4:8].apply(lambda x: regions[x])
df['item'] = df['series id'].str[8:].apply(lambda x: items[x])
df['date'] = df['year'].astype(str) + '-' + df['period'].str[1:]
df['date'] = pd.to_datetime(df['date'])
df.head()

Unnamed: 0,series id,year,period,value,region,item,date,data_type
0,CUUR0000SA0,1998,M12,163.9,National,All items,1998-12-01,CPI (Unadjusted)
1,CUUR0000SA0,1998,M11,164.0,National,All items,1998-11-01,CPI (Unadjusted)
2,CUUR0000SA0,1998,M10,164.0,National,All items,1998-10-01,CPI (Unadjusted)
3,CUUR0000SA0,1998,M09,163.6,National,All items,1998-09-01,CPI (Unadjusted)
4,CUUR0000SA0,1998,M08,163.4,National,All items,1998-08-01,CPI (Unadjusted)


In [14]:
df.dtypes

series id            object
year                  int64
period               object
value               float64
region               object
item                 object
date         datetime64[ns]
data_type            object
dtype: object

In [15]:
df.describe()

Unnamed: 0,year,value,date
count,23751.0,23751.0,23751
mean,2004.291314,197.422206,2004-09-30 09:39:22.258431104
min,1979.0,64.0,1979-01-01 00:00:00
25%,1994.0,122.4225,1994-10-01 00:00:00
50%,2005.0,161.7,2005-05-01 00:00:00
75%,2015.0,239.947,2015-06-01 00:00:00
max,2025.0,645.605,2025-07-01 00:00:00
std,12.5625,103.872037,


In [16]:
df.to_csv('output/inflation_data.csv', index=False)