In [1]:
import requests
import json
import pandas as pd
import altair as alt

# API Pull

In [2]:
# Import list of targeted CPI-U cateogires.
targets = pd.read_excel("Other_BLS_Data.xlsx", header=0)
targets

Unnamed: 0,Category Name,Series ID,Type,Bucket
0,Earnings - All People,LEU0252881500,Earnings,Top
1,Earnings - Men,LEU0252881800,Earnings,Gender
2,Earnings - Women,LEU0252882700,Earnings,Gender
3,Earnings - White People,LEU0252883600,Earnings,Race
4,Earnings - White Men,LEU0252883900,Earnings,Race and Gender
5,Earnings - White Women,LEU0252884200,Earnings,Race and Gender
6,Earnings - Black People,LEU0252884500,Earnings,Race
7,Earnings - Black Men,LEU0252884800,Earnings,Race and Gender
8,Earnings - Black Women,LEU0252885100,Earnings,Race and Gender
9,Earnings - Asian People,LEU0254468400,Earnings,Race


In [3]:
def API_call(series_ids, start_year, end_year):
    '''
    Calls the BLS API to return data. Returns a DataFrame with the combined results.
    '''
    
    ### MAKE SURE TO SET YOUR API KEY BELOW.
    api_key = 'XXXXX'
    
    # Build message to send to API.
    headers = {'Content-type': 'application/json'}
    data = json.dumps({"seriesid": series_ids,"startyear":start_year, "endyear":end_year, "registrationkey":api_key})
    p = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/', data=data, headers=headers)

    # Parse results.
    j = json.loads(p.text)
    dfs = []
    for x in range(0,len(j["Results"]['series'])):
        t_df = pd.DataFrame(j["Results"]['series'][x]['data'])
        t_df['series'] = j["Results"]['series'][x]['seriesID']
        dfs.append(t_df)
    df = pd.concat(dfs)
    
    return df

In [4]:
# Split query inputs into chunks that fit in the API limit.
year_ranges = [(x,x+19) for x in range(1970,2021,20)]
series_ids = list(targets['Series ID'])
series_id_groups = [series_ids[0:50]]

# Send API requests and combine into a single DataFrame
df = pd.concat([API_call(series_id_groups[y], year_ranges[x][0], year_ranges[x][1]) for x in range(0,3) for y in range(0,1)])

In [5]:
# Merge other data into results from API pull.
series_names = targets.set_index('Series ID').to_dict()['Category Name']
df['Category'] = df['series'].map(series_names)
types = targets.set_index('Series ID').to_dict()['Type']
df['Type'] = df['series'].map(types)
buckets = targets.set_index('Series ID').to_dict()['Bucket']
df['Bucket'] = df['series'].map(buckets)

# Convert quarter names to months.
df['period'].replace(to_replace = ['Q01', 'Q02', 'Q03', 'Q04'], value = ['M01', 'M04', 'M07', 'M10'], inplace = True)

# Convert month and year to a datetime column.
df['date'] = pd.to_datetime(df.year.astype(str) + '/' + df.period.str[1:] + '/01')

In [6]:
# Make sure values are stored as numbers and not as strings.
df.value = df.value.astype(float)
df.year = df.year.astype(int)

# Save DataFrame as pickle.
df.to_pickle("other_data_final.pkl")

# Load DataFrame from pickle.
#df = pd.read_pickle("data.pkl")

# Check to see that format looks correct.
df

Unnamed: 0,year,period,periodName,value,footnotes,series,latest,Category,Type,Bucket,date
0,1989,M10,4th Quarter,408.0,[{}],LEU0252881500,,Earnings - All People,Earnings,Top,1989-10-01
1,1989,M07,3rd Quarter,395.0,[{}],LEU0252881500,,Earnings - All People,Earnings,Top,1989-07-01
2,1989,M04,2nd Quarter,398.0,[{}],LEU0252881500,,Earnings - All People,Earnings,Top,1989-04-01
3,1989,M01,1st Quarter,397.0,[{}],LEU0252881500,,Earnings - All People,Earnings,Top,1989-01-01
4,1988,M10,4th Quarter,392.0,[{}],LEU0252881500,,Earnings - All People,Earnings,Top,1988-10-01
...,...,...,...,...,...,...,...,...,...,...,...
145,2010,M05,May,4.6,[{}],LNS14027662,,Unemployment Rate - Bachelor's and Higher,Unemployment,Education,2010-05-01
146,2010,M04,April,4.8,[{}],LNS14027662,,Unemployment Rate - Bachelor's and Higher,Unemployment,Education,2010-04-01
147,2010,M03,March,4.9,[{}],LNS14027662,,Unemployment Rate - Bachelor's and Higher,Unemployment,Education,2010-03-01
148,2010,M02,February,4.9,[{}],LNS14027662,,Unemployment Rate - Bachelor's and Higher,Unemployment,Education,2010-02-01


In [7]:
# Get stock data.
stocks = pd.read_excel("Stock_Markets_Data.xlsx", header=0)

# Adjust format of stocks data to align with other data.
stocks = stocks.set_index('date').groupby('Category').resample('M').mean().reset_index()
stocks['date'] = stocks['date'] + pd.offsets.MonthBegin(-1)
stocks['year'] = stocks['date'].dt.year

# Add bucket labels
stocks['Type'] = 'Stocks'

In [9]:
# Load main data and combine DataFrames
main_df = pd.read_pickle("data_final.pkl")
main_df['Type'] = 'CPI'

# Combine all DataFrames.
combo_df = pd.concat([main_df, df, stocks])

# Check format.
combo_df

Unnamed: 0,year,period,periodName,value,footnotes,series,latest,Category,Parent Series ID,Level,Leaf,date,Type,Bucket
0,1989,M12,December,126.300000,[{}],CUSR0000SA0,,CPI - All items,,0.0,0.0,1989-12-01,CPI,
1,1989,M11,November,125.900000,[{}],CUSR0000SA0,,CPI - All items,,0.0,0.0,1989-11-01,CPI,
2,1989,M10,October,125.400000,[{}],CUSR0000SA0,,CPI - All items,,0.0,0.0,1989-10-01,CPI,
3,1989,M09,September,124.800000,[{}],CUSR0000SA0,,CPI - All items,,0.0,0.0,1989-09-01,CPI,
4,1989,M08,August,124.500000,[{}],CUSR0000SA0,,CPI - All items,,0.0,0.0,1989-08-01,CPI,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1872,2022,,,4435.980526,,,,Stocks - S&P,,,,2022-02-01,Stocks,
1873,2022,,,4391.265217,,,,Stocks - S&P,,,,2022-03-01,Stocks,
1874,2022,,,4391.296000,,,,Stocks - S&P,,,,2022-04-01,Stocks,
1875,2022,,,4040.360000,,,,Stocks - S&P,,,,2022-05-01,Stocks,


In [17]:
# Save DataFrame as pickle.
combo_df.year = combo_df.year.astype(int)
combo_df.to_pickle("combined_data.pkl")