**How to Query the Bureau of Labor Statistics Dataset (BigQuery)**

In [1]:
import bq_helper
from bq_helper import BigQueryHelper
# https://www.kaggle.com/sohier/introduction-to-the-bq-helper-package
BLS = bq_helper.BigQueryHelper(active_project="bigquery-public-data",
                                   dataset_name="bls")

In [2]:
bq_assistant = BigQueryHelper("bigquery-public-data", "bls")
bq_assistant.list_tables()

['c_cpi_u',
 'cpi_u',
 'employment_hours_earnings',
 'employment_hours_earnings_series',
 'unemployment_cps',
 'unemployment_cps_series',
 'wm',
 'wm_series']

In [3]:
bq_assistant.head('cpi_u', num_rows=3)

Unnamed: 0,series_id,year,period,value,footnote_codes,survey_abbreviation,seasonal_code,periodicity_code,area_code,area_name,item_code,item_name,date
0,CUUR0000AA0,1997,M13,480.8,,CU,U,R,0,U.S. city average,AA0,All items - old base,1997-12-31
1,CUUR0000AA0,2001,M01,524.5,,CU,U,R,0,U.S. city average,AA0,All items - old base,2001-01-01
2,CUUR0000AA0,2003,M02,548.5,,CU,U,R,0,U.S. city average,AA0,All items - old base,2003-02-01


In [4]:
bq_assistant.table_schema("cpi_u")

[SchemaField('series_id', 'STRING', 'REQUIRED', 'Code identifying the specific series', ()),
 SchemaField('year', 'INTEGER', 'NULLABLE', 'Identifies year of observation', ()),
 SchemaField('period', 'STRING', 'NULLABLE', 'Identifies period for which data is observed. M01 = January, M02 = February….M13 = Annual Average', ()),
 SchemaField('value', 'FLOAT', 'NULLABLE', 'Price index for item', ()),
 SchemaField('footnote_codes', 'STRING', 'NULLABLE', 'Identifies footnote for the data series', ()),
 SchemaField('survey_abbreviation', 'STRING', 'NULLABLE', 'Code identifying the survey', ()),
 SchemaField('seasonal_code', 'STRING', 'NULLABLE', 'Code identifying whether the data are seasonally adjusted. S = Seasonally Adjusted, U = Unadjusted', ()),
 SchemaField('periodicity_code', 'STRING', 'NULLABLE', 'Frequency of data observation. R = Monthly, S = Semi-Annual', ()),
 SchemaField('area_code', 'STRING', 'NULLABLE', 'Unique code used to identify a specific geographic area. Full area codes fo

What is the average annual inflation across all US Cities?

In [5]:
query1 = """SELECT *, ROUND((100*(value-prev_year)/value), 1) rate
FROM (
  SELECT
    year,
    LAG(value) OVER(ORDER BY year) prev_year,
    ROUND(value, 1) AS value,
    area_name
  FROM
    `bigquery-public-data.bls.cpi_u`
  WHERE
    period = "S03"
    AND item_code = "SA0"
    AND area_name = "U.S. city average"
)
ORDER BY year
        """
response1 = BLS.query_to_pandas_safe(query1)
response1.head(10)

Unnamed: 0,year,prev_year,value,area_name,rate
0,1997,,160.5,U.S. city average,
1,1998,160.5,163.0,U.S. city average,1.5
2,1999,163.0,166.6,U.S. city average,2.2
3,2000,166.6,172.2,U.S. city average,3.3
4,2001,172.2,177.1,U.S. city average,2.8
5,2002,177.1,179.9,U.S. city average,1.6
6,2003,179.9,184.0,U.S. city average,2.2
7,2004,184.0,188.9,U.S. city average,2.6
8,2005,188.9,195.3,U.S. city average,3.3
9,2006,195.3,201.6,U.S. city average,3.1


What was the monthly unemployment rate (U3) in 2016?

In [6]:
query2 = """SELECT
  year,
  date,
  period,
  value,
  series_title
FROM
  `bigquery-public-data.bls.unemployment_cps`
WHERE
  series_id = "LNS14000000"
  AND year = 2016
ORDER BY date
        """
response2 = BLS.query_to_pandas_safe(query2)
response2.head(10)

Unnamed: 0,year,date,period,value,series_title
0,2016,2016-01-01,M01,4.9,(Seas) Unemployment Rate
1,2016,2016-02-01,M02,4.9,(Seas) Unemployment Rate
2,2016,2016-03-01,M03,5.0,(Seas) Unemployment Rate
3,2016,2016-04-01,M04,5.0,(Seas) Unemployment Rate
4,2016,2016-05-01,M05,4.7,(Seas) Unemployment Rate
5,2016,2016-06-01,M06,4.9,(Seas) Unemployment Rate
6,2016,2016-07-01,M07,4.9,(Seas) Unemployment Rate
7,2016,2016-08-01,M08,4.9,(Seas) Unemployment Rate
8,2016,2016-09-01,M09,5.0,(Seas) Unemployment Rate
9,2016,2016-10-01,M10,4.9,(Seas) Unemployment Rate


What are the top 10 hourly-waged types of work in Pittsburgh, PA for 2016?

In [7]:
query3 = """SELECT
  year,
  period,
  value,
  series_title
FROM
  `bigquery-public-data.bls.wm`
WHERE
  series_title LIKE '%Pittsburgh, PA%'
  AND year = 2016
ORDER BY
  value DESC
LIMIT
  10
        """
response3 = BLS.query_to_pandas_safe(query3, max_gb_scanned=10)
response3.head(10)

Unnamed: 0,year,period,value,series_title
0,2016,A01,81.27,"Hourly mean wage for financial managers, in Pi..."
1,2016,A01,79.09,"Hourly mean wage for education, training, and ..."
2,2016,A01,79.07,"Hourly mean wage for education, training, and ..."
3,2016,A01,68.2,"Hourly mean wage for management occupations, i..."
4,2016,A01,66.97,"Hourly mean wage for management occupations, i..."
5,2016,A01,66.05,Hourly mean wage for computer and mathematical...
6,2016,A01,61.39,"Hourly mean wage for education, training, and ..."
7,2016,A01,61.19,"Hourly mean wage for education, training, and ..."
8,2016,A01,61.13,"Hourly mean wage for financial managers, in Pi..."
9,2016,A01,59.86,Hourly mean wage for business and financial op...
