**How to Query the USA Census Dataset (BigQuery)**

In [1]:
import bq_helper
from bq_helper import BigQueryHelper
# https://www.kaggle.com/sohier/introduction-to-the-bq-helper-package
census_data = bq_helper.BigQueryHelper(active_project="bigquery-public-data",
                                   dataset_name="census_bureau_usa")

In [2]:
bq_assistant = BigQueryHelper("bigquery-public-data", "census_bureau_usa")
# this query breaks but fortunately we don't need it to proceed
# bq_assistant.list_tables()

In [3]:
bq_assistant.head("population_by_zip_2010", num_rows=3)

Unnamed: 0,geo_id,zipcode,population,minimum_age,maximum_age,gender
0,8600000US25647,25647,768,,,
1,8600000US49336,49336,4352,,,
2,8600000US72106,72106,5376,,,


In [4]:
bq_assistant.table_schema("population_by_zip_2010")

Unnamed: 0,name,type,mode,description
0,geo_id,STRING,NULLABLE,Geo code
1,zipcode,STRING,REQUIRED,Five digit ZIP Code Tabulation Area Census Code
2,population,INTEGER,NULLABLE,The total count of the population for this seg...
3,minimum_age,INTEGER,NULLABLE,"The minimum age in the age range. If null, thi..."
4,maximum_age,INTEGER,NULLABLE,"The maximum age in the age range. If null, thi..."
5,gender,STRING,NULLABLE,"male or female. If empty, the row is a total p..."


What are the ten most populous zip codes in the US in the 2010 census?

In [5]:
query1 = """SELECT
  zipcode,
  population
FROM
  `bigquery-public-data.census_bureau_usa.population_by_zip_2010`
WHERE
  gender = ''
ORDER BY
  population DESC
LIMIT
  10
        """
response1 = census_data.query_to_pandas_safe(query1)
response1.head(10)

Unnamed: 0,zipcode,population


What are the top 10 zip codes that experienced the greatest change in population between the 2000 and 2010 censuses?

In [6]:
query2 = """SELECT
  zipcode,
  pop_2000,
  pop_2010,
  pop_chg,
  pop_pct_chg
FROM (
  SELECT
    r1.zipcode AS zipcode,
    r2.population AS pop_2000,
    r1.population AS pop_2010,
    r1.population - r2.population AS pop_chg,
    ROUND((r1.population - r2.population)/NULLIF(r2.population,0) * 100, 2) AS pop_pct_chg,
    ABS((r1.population - r2.population)/NULLIF(r2.population,0)) AS abs_pct_chg
  FROM
    `bigquery-public-data.census_bureau_usa.population_by_zip_2010` AS r1
  INNER JOIN
    `bigquery-public-data.census_bureau_usa.population_by_zip_2000` AS r2
  ON
    r1.zipcode = r2.zipcode WHERE --following criteria selects total population without breaking down by age/gender
    r1.minimum_age IS NULL
    AND r2.minimum_age IS NULL
    AND r1.maximum_age IS NULL
    AND r2.maximum_age IS NULL
    AND r1.gender = ''
    AND r2.gender = '' )
ORDER BY
  abs_pct_chg DESC
LIMIT
  10
        """
response2 = census_data.query_to_pandas_safe(query2)
response2.head(10)

Unnamed: 0,zipcode,pop_2000,pop_2010,pop_chg,pop_pct_chg


![https://cloud.google.com/bigquery/images/census-population-map.png](https://cloud.google.com/bigquery/images/census-population-map.png)
https://cloud.google.com/bigquery/images/census-population-map.png

Credit: Many functions are adapted from https://cloud.google.com/bigquery/public-data/us-census