In [1]:
import requests
import pandas as pd 
import json
import pymongo
from pymongo import MongoClient

In [3]:
table_name = 'CPI_RegionUrban'

In [4]:
df_region = pd.read_excel('../StateRegionDivision.xlsx', converters={'FIPS' : lambda x : str(x)})
df_region.head()

Unnamed: 0,FIPS,State,Region,Division
0,9,Connecticut,1,1
1,23,Maine,1,1
2,25,Massachusetts,1,1
3,33,New Hampshire,1,1
4,44,Rhode Island,1,1


In [5]:
regions = ['0100', '0200', '0300', '0400']
regions

['0100', '0200', '0300', '0400']

In [6]:
# set up dataframe and create a list of the series for this dataset
df = pd.DataFrame(columns=["series id","year","period","period_name","value"])
series = []
for i in regions:
    # construct series id by concatenating: Prefix + MSA Area Code + Measure Code
    series.append('CUUR' + i + 'SA0')

In [11]:
series

['CUUR0100SA0', 'CUUR0200SA0', 'CUUR0300SA0', 'CUUR0400SA0']

In [13]:
# function to request data from the BLS api
def request_series(series_list):
    #  print(series_list)
    headers = {'Content-type': 'application/json'}
    data = json.dumps({"seriesid": series_list,"startyear":"2010", "endyear":"2019","registrationkey":"<key here>"})
    p = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/', data=data, headers=headers)
    json_data = json.loads(p.text)
    if json_data['status'] == 'REQUEST_SUCCEEDED':
        print(json_data)
        add_to_df(json_data)
    else:
        print(json_data['status'])

In [9]:
# function to add data to the dataframe
def add_to_df(data):
    for series in data['Results']['series']:
        seriesId = series['seriesID']
        for item in series['data']:
            year = item['year']
            period = item['period']
            period_name = item["periodName"]
            value = item['value']
            if 'M01' <= period <= 'M12':
                df.loc[len(df)] = [seriesId,year,period,period_name,value]

In [12]:
for i in series:
    print(i)

CUUR0100SA0
CUUR0200SA0
CUUR0300SA0
CUUR0400SA0


In [2]:
df = pd.read_csv('../CSVs/CPI_RegionUrban.csv')
df

Unnamed: 0,CBSA,series id,year,period,period_name,value
0,100SA,CUUR0100SA0,2019,M12,December,270.429
1,100SA,CUUR0100SA0,2019,M11,November,270.643
2,100SA,CUUR0100SA0,2019,M10,October,270.348
3,100SA,CUUR0100SA0,2019,M09,September,270.563
4,100SA,CUUR0100SA0,2019,M08,August,270.548
...,...,...,...,...,...,...
475,400SA,CUUR0400SA0,2010,M05,May,221.417
476,400SA,CUUR0400SA0,2010,M04,April,221.202
477,400SA,CUUR0400SA0,2010,M03,March,220.809
478,400SA,CUUR0400SA0,2010,M02,February,220.179


In [3]:
df['CBSA'] = df['CBSA'].str.replace('00SA','')
df

Unnamed: 0,CBSA,series id,year,period,period_name,value
0,1,CUUR0100SA0,2019,M12,December,270.429
1,1,CUUR0100SA0,2019,M11,November,270.643
2,1,CUUR0100SA0,2019,M10,October,270.348
3,1,CUUR0100SA0,2019,M09,September,270.563
4,1,CUUR0100SA0,2019,M08,August,270.548
...,...,...,...,...,...,...
475,4,CUUR0400SA0,2010,M05,May,221.417
476,4,CUUR0400SA0,2010,M04,April,221.202
477,4,CUUR0400SA0,2010,M03,March,220.809
478,4,CUUR0400SA0,2010,M02,February,220.179


In [4]:
df.rename(columns={'CBSA' : 'Region'}, inplace=True)
df

Unnamed: 0,Region,series id,year,period,period_name,value
0,1,CUUR0100SA0,2019,M12,December,270.429
1,1,CUUR0100SA0,2019,M11,November,270.643
2,1,CUUR0100SA0,2019,M10,October,270.348
3,1,CUUR0100SA0,2019,M09,September,270.563
4,1,CUUR0100SA0,2019,M08,August,270.548
...,...,...,...,...,...,...
475,4,CUUR0400SA0,2010,M05,May,221.417
476,4,CUUR0400SA0,2010,M04,April,221.202
477,4,CUUR0400SA0,2010,M03,March,220.809
478,4,CUUR0400SA0,2010,M02,February,220.179


In [6]:
df.to_csv('../CSVs/CPI_RegionUrban.csv', index=False)

In [7]:
# Create instance of MongoClient
client = MongoClient()
# Connection URI
client = MongoClient('<conn string>')
# Select database
db = client['MSA']
# create new collection
collection = db.CPI_RegionUrban_raw
# turn dataframe into readable format for mongo
df_dict = df.to_dict(orient='records')
# write dataframe to unemployment_predicted_2024 collection
collection.insert_many(df_dict)

<pymongo.results.InsertManyResult at 0x1d0a543cfc0>