In [8]:
import requests
import json
import pandas as pd
import altair as alt

# API Pull

In [9]:
# Import list of targeted CPI-U cateogires.
targets = pd.read_excel("CPI_Category_Tree_Final.xlsx", header=0)
targets

Unnamed: 0,Category Name,Series ID,Level,Parent Series ID,Leaf
0,CPI - All items,CUSR0000SA0,0,,0
1,CPI - Food and Beverages,CUSR0000SAF,1,CUSR0000SA0,0
2,CPI - Food at home,CUSR0000SAF11,2,CUSR0000SAF,0
3,CPI - Cereals and bakery products,CUSR0000SAF111,3,CUSR0000SAF11,1
4,"CPI - Meats, poultry, fish, and eggs",CUSR0000SAF112,3,CUSR0000SAF11,1
...,...,...,...,...,...
141,CPI - Legal services,CUSR0000SEGD01,3,CUSR0000SEGD,1
142,CPI - Funeral expenses,CUSR0000SEGD02,3,CUSR0000SEGD,1
143,CPI - Laundry and dry cleaning services,CUSR0000SEGD03,3,CUSR0000SEGD,1
144,CPI* - Apparel services other than laundry and...,CUUR0000SEGD04,3,CUSR0000SEGD,1


In [10]:
def API_call(series_ids, start_year, end_year):
    '''
    Calls the BLS API to return data. Returns a DataFrame with the combined results.
    '''
    
    ### MAKE SURE TO SET YOUR API KEY BELOW.
    api_key = 'XXXXX'
    
    # Build message to send to API.
    headers = {'Content-type': 'application/json'}
    data = json.dumps({"seriesid": series_ids,"startyear":start_year, "endyear":end_year, "registrationkey":api_key})
    p = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/?registrationkey=9026b9bdf3e24f269aa6c29da82a189d', data=data, headers=headers)

    # Parse results.
    j = json.loads(p.text)
    dfs = []
    for x in range(0,len(j["Results"]['series'])):
        t_df = pd.DataFrame(j["Results"]['series'][x]['data'])
        t_df['series'] = j["Results"]['series'][x]['seriesID']
        dfs.append(t_df)
    df = pd.concat(dfs)
    
    return df

In [11]:
# Split query inputs into chunks that fit in the API limit.
year_ranges = [(x,x+19) for x in range(1970,2021,20)]
series_ids = list(targets['Series ID'])
series_id_groups = [series_ids[0:50],series_ids[50:100],series_ids[100:150]]

# Send API requests and combine into a single DataFrame
df = pd.concat([API_call(series_id_groups[y], year_ranges[x][0], year_ranges[x][1]) for x in range(0,3) for y in range(0,3)])

In [12]:
# Merge other data into results from API pull.
series_names = targets.set_index('Series ID').to_dict()['Category Name']
df['Category'] = df['series'].map(series_names)
parent_IDs = targets.set_index('Series ID').to_dict()['Parent Series ID']
df['Parent Series ID'] = df['series'].map(parent_IDs)
levels = targets.set_index('Series ID').to_dict()['Level']
df['Level'] = df['series'].map(levels)
leaves = targets.set_index('Series ID').to_dict()['Leaf']
df['Leaf'] = df['series'].map(leaves)

# Convert month and year to a datetime column.
df['date'] = pd.to_datetime(df.year.astype(str) + '/' + df.period.str[1:] + '/01')

In [13]:
# Make sure values are stored as numbers and not as strings.
df.value = df.value.astype(float)

# Save DataFrame as pickle.
df.to_pickle("data_final.pkl")

# Load DataFrame from pickle.
#df = pd.read_pickle("data.pkl")

# Check to see that format looks correct.
df.head()

Unnamed: 0,year,period,periodName,value,footnotes,series,latest,Category,Parent Series ID,Level,Leaf,date
0,1989,M12,December,126.3,[{}],CUSR0000SA0,,CPI - All items,,0,0,1989-12-01
1,1989,M11,November,125.9,[{}],CUSR0000SA0,,CPI - All items,,0,0,1989-11-01
2,1989,M10,October,125.4,[{}],CUSR0000SA0,,CPI - All items,,0,0,1989-10-01
3,1989,M09,September,124.8,[{}],CUSR0000SA0,,CPI - All items,,0,0,1989-09-01
4,1989,M08,August,124.5,[{}],CUSR0000SA0,,CPI - All items,,0,0,1989-08-01
