In [1]:
# Dependencies
import requests
import json
import pandas as pd
from sqlalchemy import create_engine

In [2]:
# Get PostgreSql password from config file
from apikeys_config import pgAdmin_pw

ModuleNotFoundError: No module named 'apikeys_config'

In [3]:
# Set the BLS api url
bls_api_url = 'https://api.bls.gov/publicAPI/v2/timeseries/data/'

In [4]:
# Extract Unemployment data

# Extract data from the Bureau of Labor Statistics Unemployment database
# No registration key is required if optional parameters are not requested
# Go to this URL for help to construct the Series number: bls.gov/help/hlpforma.htm

# For unemployment data, get monthly unemployment rate, for 10 years (maximum allowed): 2011-2020
# Series ID for National Unemployment Rate by month is LNS14000000:
#     Series Title: (Seas) Unemployment Rate
#     Series ID: LNS14000000
#     Seasonality: Seasonally Adjusted
#     Survey Name: Percent or rate
#     Measure Data Type: Seasonally
#     Industry: All Industries
#     Occupation: All Occupations
#     Labor Force Status: Unemployment rate
#     Age: 16 years and over
#     Ethnicity: All Origins
#     Race: All Races
#     Gender: Both Sexes
#     Marital Status: All marital statuses
#     Education or Training: All education levels

# Set the call variables
unemployment_data_series = 'LNS14000000?startyear=2011&endyear=2020'

# Make the API call
raw_unemployment_data = requests.post(f"{bls_api_url}{unemployment_data_series}")
json_unemployment_data = json.loads(raw_unemployment_data.text)

In [5]:
# Confirm call was successful
raw_unemployment_data

<Response [200]>

In [6]:
# View json results
json_unemployment_data

{'status': 'REQUEST_SUCCEEDED',
 'responseTime': 208,
 'message': [],
 'Results': {'series': [{'seriesID': 'LNS14000000',
    'data': [{'year': '2020',
      'period': 'M12',
      'periodName': 'December',
      'latest': 'true',
      'value': '6.7',
      'footnotes': [{}]},
     {'year': '2020',
      'period': 'M11',
      'periodName': 'November',
      'value': '6.7',
      'footnotes': [{}]},
     {'year': '2020',
      'period': 'M10',
      'periodName': 'October',
      'value': '6.9',
      'footnotes': [{}]},
     {'year': '2020',
      'period': 'M09',
      'periodName': 'September',
      'value': '7.8',
      'footnotes': [{}]},
     {'year': '2020',
      'period': 'M08',
      'periodName': 'August',
      'value': '8.4',
      'footnotes': [{}]},
     {'year': '2020',
      'period': 'M07',
      'periodName': 'July',
      'value': '10.2',
      'footnotes': [{}]},
     {'year': '2020',
      'period': 'M06',
      'periodName': 'June',
      'value': '11.1',
     

In [7]:
# Transform the Unemployment data
# Pull just the unemployment records
unemployment_rates = (json_unemployment_data['Results']['series'][0]['data'])

# Count the number of records for the for loop
number_records = len(unemployment_rates)

# Create lists to hold response info
year = []
month_code = []
month_name = []
rate = []

# Loop through unemployment data and load lists
for item in range(number_records):
    year.append(unemployment_rates[item]['year'])
    month_code.append(unemployment_rates[item]['period'])
    month_name.append(unemployment_rates[item]['periodName'])
    rate.append(unemployment_rates[item]['value'])

# Define & Load Dictionary
unemployment_dict = {
    "year":year,
    "month_code":month_code,
    "month_name":month_name,
    "unemployment_rate":rate
}


# Convert the results to a Pandas dataframe
unemployment_dict_df = pd.DataFrame(unemployment_dict)

# View Pandas dataframe
unemployment_dict_df

Unnamed: 0,year,month_code,month_name,unemployment_rate
0,2020,M12,December,6.7
1,2020,M11,November,6.7
2,2020,M10,October,6.9
3,2020,M09,September,7.8
4,2020,M08,August,8.4
...,...,...,...,...
115,2011,M05,May,9.0
116,2011,M04,April,9.1
117,2011,M03,March,9.0
118,2011,M02,February,9.0


In [8]:
# Extract Earnings data

# Extract data from the Bureau of Labor Statistics Current Employment Statistics (CES) database
# No registration key is required if optional parameters are not requested
# Go to this URL for help to construct the Series number: bls.gov/help/hlpforma.htm

# For earnings data, get monthly average hourly earnings for all private employees, for 10 years (maximum allowed): 2011-2020
# Series ID for National Private Average Hourly Earnings of All Employees CES50500000003:
#     Series Title: Average hourly earnings of all employees, total private, seasonally adjusted
#     Series ID: CES50500000003
#     Seasonality: Seasonally Adjusted
#     Survey Name: Employment, Hurs, and Earnins from the Current Employment Statistics survey (National)
#     Measure Data Type: Average Hourly Earnings of All Employees
#     Industry: Total private
#     Sector: Total private

# Set the call variables
earnings_data_series = 'CES0500000003?startyear=2011&endyear=2020'

# Make the API call
raw_earnings_data = requests.post(f"{bls_api_url}{earnings_data_series}")
json_earnings_data = json.loads(raw_earnings_data.text)

In [9]:
# Confirm call was successful
raw_earnings_data

<Response [200]>

In [10]:
# View json results
json_earnings_data

{'status': 'REQUEST_SUCCEEDED',
 'responseTime': 223,
 'message': [],
 'Results': {'series': [{'seriesID': 'CES0500000003',
    'data': [{'year': '2020',
      'period': 'M12',
      'periodName': 'December',
      'latest': 'true',
      'value': '29.81',
      'footnotes': [{'code': 'P', 'text': 'preliminary'}]},
     {'year': '2020',
      'period': 'M11',
      'periodName': 'November',
      'value': '29.58',
      'footnotes': [{'code': 'P', 'text': 'preliminary'}]},
     {'year': '2020',
      'period': 'M10',
      'periodName': 'October',
      'value': '29.49',
      'footnotes': [{}]},
     {'year': '2020',
      'period': 'M09',
      'periodName': 'September',
      'value': '29.47',
      'footnotes': [{}]},
     {'year': '2020',
      'period': 'M08',
      'periodName': 'August',
      'value': '29.45',
      'footnotes': [{}]},
     {'year': '2020',
      'period': 'M07',
      'periodName': 'July',
      'value': '29.35',
      'footnotes': [{}]},
     {'year': '2020'

In [11]:
# Transform the Earnings data
# Pull just the earnings records
earnings_data = (json_earnings_data['Results']['series'][0]['data'])

# Count the number of records for the for loop
number_records = len(earnings_data)

# Create lists to hold response info
year = []
month_code = []
month_name = []
wage = []

# Loop through unemployment data and load lists
for item in range(number_records):
    year.append(earnings_data[item]['year'])
    month_code.append(earnings_data[item]['period'])
    month_name.append(earnings_data[item]['periodName'])
    wage.append(earnings_data[item]['value'])

# Define & Load Dictionary
earnings_dict = {
    "year":year,
    "month_code":month_code,
    "month_name":month_name,
    "hourly_wage":wage
}


# Convert the results to a Pandas dataframe
earnings_dict_df = pd.DataFrame(earnings_dict)

# View Pandas dataframe
earnings_dict_df

Unnamed: 0,year,month_code,month_name,hourly_wage
0,2020,M12,December,29.81
1,2020,M11,November,29.58
2,2020,M10,October,29.49
3,2020,M09,September,29.47
4,2020,M08,August,29.45
...,...,...,...,...
115,2011,M05,May,22.99
116,2011,M04,April,22.92
117,2011,M03,March,22.87
118,2011,M02,February,22.87


In [12]:
# Merge Unemployment data and Earnings data into one dataframe
employment_df = pd.merge(unemployment_dict_df, earnings_dict_df, on=['year','month_code','month_name'], how='left')
employment_df

Unnamed: 0,year,month_code,month_name,unemployment_rate,hourly_wage
0,2020,M12,December,6.7,29.81
1,2020,M11,November,6.7,29.58
2,2020,M10,October,6.9,29.49
3,2020,M09,September,7.8,29.47
4,2020,M08,August,8.4,29.45
...,...,...,...,...,...
115,2011,M05,May,9.0,22.99
116,2011,M04,April,9.1,22.92
117,2011,M03,March,9.0,22.87
118,2011,M02,February,9.0,22.87


In [13]:
# Change the month code to a 2-digit month code
# Create a dictionary of month_code keys & values
month_code_dict = {'M01': '01',
                   'M02': '02',
                   'M03': '03',
                   'M04': '04',
                   'M05': '05',
                   'M06': '06',
                   'M07': '07',
                   'M08': '08',
                   'M09': '09',
                   'M10': '10',
                   'M11': '11',
                   'M12': '12'
                  }

# Remap the values of the new month_code field using the dictionary
employment_df['month_code'] = employment_df['month_code'].map(month_code_dict)

# Create a new field, year_month, containing month_name and year, for ease of sorting during plotting and charting
employment_df['year_month'] = employment_df['year_month'] = employment_df['year'] + '-' + employment_df['month_code']
employment_df

Unnamed: 0,year,month_code,month_name,unemployment_rate,hourly_wage,year_month
0,2020,12,December,6.7,29.81,2020-12
1,2020,11,November,6.7,29.58,2020-11
2,2020,10,October,6.9,29.49,2020-10
3,2020,09,September,7.8,29.47,2020-09
4,2020,08,August,8.4,29.45,2020-08
...,...,...,...,...,...,...
115,2011,05,May,9.0,22.99,2011-05
116,2011,04,April,9.1,22.92,2011-04
117,2011,03,March,9.0,22.87,2011-03
118,2011,02,February,9.0,22.87,2011-02


In [14]:
# Create Postgres database connection
connection_string = f"postgres:{pgAdmin_pw}@localhost:5432/Real_Estate_Analysis_db"
engine = create_engine(f'postgresql://{connection_string}')

NameError: name 'pgAdmin_pw' is not defined

In [None]:
# Confirm tables
engine.table_names()

In [None]:
# Use Pandas to load dataframes into tables
employment_df.to_sql(name='employment', con=engine, if_exists='append', index=False)

In [None]:
# Confirm data has been added by querying the unemployment table
pd.read_sql_query('select * from employment', con=engine)

In [15]:
# Write dataframe to JSON file for plotting data in JavaScript
# This is a temporary step until Heroku is working on website
# Valid orientation types are: 'split', 'records', 'index', 'values', 'table', 'columns' (default)
employment_df.to_json(r'../data/employmentrecords.json', orient='records')

In [16]:
# Write dataframe to csv file for plotting data in Tableau
employment_df.to_csv(r'../data/newemploymentrecords.csv')