In [78]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import datetime
import os
import json

# Import API keys
from config import blsapi

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_directory = "output_data"
output_data_file = "bls_region.csv"
output_full_path = output_directory + "/" + output_data_file

# The BLS API requires you to submit a seriesID with every call.  The seriesID is a code that tells 
# BLS what data set you are requesting and it is a fairly complex formula to build the series.
# Each data set has a unique Format for the series id parameter.
# This site defines the series id format that must be used
# https://www.bls.gov/help/hlpforma.htm#OE
# this analysis focuses on Occupational Employment statistics 
# The data collection and purpose of this data set is defined 
# https://www.bls.gov/oes/oes_emp.htm

# 	Series ID    OEUN000000011100011000001
# 	Positions       Value           Field Name
# 	1-2             OE              Prefix
# 	3               U               Seasonal Adjustment Code
prefix = 'OE'
season_ad_cd = 'U'

# Area codes are defined
# https://download.bls.gov/pub/time.series/oe/oe.area
# 	4               N               Area Type Code
# 	5-11            0000000         Area Code
#
# 0011700	M	Asheville, NC	
# 0015500	M	Burlington, NC	
# 0016740	M	Charlotte-Concord-Gastonia, NC-SC	
# 0020500	M	Durham-Chapel Hill, NC	
# 0022180	M	Fayetteville, NC	
# 0024140	M	Goldsboro, NC	
# 0024660	M	Greensboro-High Point, NC	
# 0024780	M	Greenville, NC	
# 0025860	M	Hickory-Lenoir-Morganton, NC	
# 0027340	M	Jacksonville, NC	
# 0035100	M	New Bern, NC	
# 0039580	M	Raleigh, NC	
# 0040580	M	Rocky Mount, NC	
# 0048900	M	Wilmington, NC	
# 0049180	M	Winston-Salem, NC	
# 3700000	S	North Carolina	
# 3700001	M	Southeast Coastal North Carolina nonmetropolitan area	
# 3700002	M	Northeast Coastal North Carolina nonmetropolitan area	
# 3700003	M	Piedmont North Carolina nonmetropolitan area	
# 3700004	M	Mountain North Carolina nonmetropolitan area	

area_type_code = 'N'
area_code = '0000000'

area_data = pd.DataFrame({
    "area_type_code":["N","S","M"],
    "area_code":['0000000','3700000','0011700'],
    "area_name":['National','North Carolina','Asheville, NC']
})



# Industry codes are defined
# https://download.bls.gov/pub/time.series/oe/oe.industry
# 	12-17           111000          Industry Code
ind_code = '000000'

# We have decided to focus on the following occupations
# OCC_CODE	OCC_TITLE
# 13-1141	Compensation, Benefits, and Job Analysis Specialists
# 13-1161	Market Research Analysts and Marketing Specialists
# 13-2031	Budget Analysts
# 13-2041	Credit Analysts
# 13-2051	Financial Analysts
# 15-1121	Computer Systems Analysts
# 15-1122	Information Security Analysts
# 15-1141	Database Administrators
# 15-2011	Actuaries
# 15-2031	Operations Research Analysts
# 15-2041	Statisticians
# 	18-23           110000          Occupation Code
occ_code = ['131141','131161','132031','132041','132051','151121','151122','151141','152011','152031','152041']
occ_title = ['Compensation, Benefits, and Job Analysis Specialists','Market Research Analysts and Marketing Specialists',
            'Budget Analysts','Credit Analysts','Financial Analysts','Computer Systems Analysts','Information Security Analysts',
            'Database Administrators','Actuaries', 'Operations Research Analysts', 'Statisticians']

# buils a base dataframe to capture the occupation data
data_set = pd.DataFrame({"Occupation Code": occ_code,
           "Occupation Title": occ_title})

# The list of data types can be found
# https://download.bls.gov/pub/time.series/oe/oe.datatype
# 	24-25           01              Data Type Code
# 01	Employment
# 04	Annual mean wage	
# 13	Annual median wage	
# 16	Employment per 1,000 jobs	
# 17	Location Quotient	
# based on the data limitations the API call will be one for each data type.  Then 
data_types = ['01','04','13']
data_type_name = ['Employment','Annual mean wage', 'Annual median wage']



In [79]:
# build series list
# per API restrictions a single API call cannot contain more than 25 series IDs
# based on deadline I am making this code less DRY to get it done.  Due to this I am reducing this 25 limit so I get one
# query per data type.  I will build a CSV with all my data so I can chart it.  This API only lets you return one data value
# per series call.
apiserieslimit = 12
maxocc = round(apiserieslimit/(len(occ_code)+1))

series_list = []
for apirun in data_types:
    for occup in occ_code:
        series_list.append(prefix + season_ad_cd + area_type_code + area_code + ind_code + occup + apirun)
    if ((data_types.index(apirun)+1)%maxocc == 0) or data_types.index(apirun)+1 == len(data_types):
# the following code came from the BLS code sample page that shows how to call the API
# my series data only has data from 2017
        headers = {'Content-type': 'application/json'}
        data = json.dumps({"seriesid": series_list,"startyear":"2017", "endyear":"2017", "registrationkey": blsapi})
        p = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/', data=data, headers=headers)
        json_data = json.loads(p.text)
        series_list = []
# add the data collected to the data frame
    seriesid = []
    keyvalue = []

    for x in range(len(json_data['Results']['series'])):
        seriesid.append(json_data['Results']['series'][x]['seriesID'][-8:-2])
        keyvalue.append(json_data['Results']['series'][x]['data'][0]['value'])

    keyvalue_header = json_data['Results']['series'][x]['seriesID'][-2:]

    data_collector = pd.DataFrame({"Occupation Code": seriesid,
                                   data_type_name[data_types.index(keyvalue_header)]: keyvalue})

    data_set = pd.merge(data_set, data_collector, on="Occupation Code")

print(data_set)

   Occupation Code                                   Occupation Title  \
0           131141  Compensation, Benefits, and Job Analysis Speci...   
1           131161  Market Research Analysts and Marketing Special...   
2           132031                                    Budget Analysts   
3           132041                                    Credit Analysts   
4           132051                                 Financial Analysts   
5           151121                          Computer Systems Analysts   
6           151122                      Information Security Analysts   
7           151141                            Database Administrators   
8           152011                                          Actuaries   
9           152031                       Operations Research Analysts   
10          152041                                      Statisticians   

   Employment Annual mean wage  
0       80530            67160  
1      596450            71450  
2       54550           

In [76]:
print(data_set)

   Occupation Code                                   Occupation Title  \
0           131141  Compensation, Benefits, and Job Analysis Speci...   
1           131161  Market Research Analysts and Marketing Special...   
2           132031                                    Budget Analysts   
3           132041                                    Credit Analysts   
4           132051                                 Financial Analysts   
5           151121                          Computer Systems Analysts   
6           151122                      Information Security Analysts   
7           151141                            Database Administrators   
8           152011                                          Actuaries   
9           152031                       Operations Research Analysts   
10          152041                                      Statisticians   

   Employment  
0       80530  
1      596450  
2       54550  
3       74850  
4      294110  
5      581960  
6      1052

In [73]:
seriesid = []
keyvalue = []

for x in range(len(json_data['Results']['series'])):
    seriesid.append(json_data['Results']['series'][x]['seriesID'][-8:-2])
    keyvalue.append(json_data['Results']['series'][x]['data'][0]['value'])

keyvalue_header = json_data['Results']['series'][x]['seriesID'][-2:]

data_collector = pd.DataFrame({"Occupation Code": seriesid,
                               data_type_name[data_types.index(keyvalue_header)]: keyvalue})

summary_table = pd.merge(data_set, data_collector, on="Occupation Code")

summary_table

Unnamed: 0,Occupation Code,Occupation Title,Red
0,131141,"Compensation, Benefits, and Job Analysis Speci...",80530
1,131161,Market Research Analysts and Marketing Special...,596450
2,132031,Budget Analysts,54550
3,132041,Credit Analysts,74850
4,132051,Financial Analysts,294110
5,151121,Computer Systems Analysts,581960
6,151122,Information Security Analysts,105250
7,151141,Database Administrators,113690
8,152011,Actuaries,19210
9,152031,Operations Research Analysts,106050
