**How to Query the USA Census Dataset (BigQuery)**

In [1]:
import bq_helper
from bq_helper import BigQueryHelper
# https://www.kaggle.com/sohier/introduction-to-the-bq-helper-package
census_data = bq_helper.BigQueryHelper(active_project="bigquery-public-data",
                                   dataset_name="census_bureau_usa")

In [2]:
bq_assistant = BigQueryHelper("bigquery-public-data", "census_bureau_usa")
bq_assistant.list_tables()

['population_by_zip_2000', 'population_by_zip_2010']

In [3]:
bq_assistant.head("population_by_zip_2010", num_rows=3)

Unnamed: 0,zipcode,geo_id,minimum_age,maximum_age,gender,population
0,99776,8600000US99776,,,,124
1,38305,8600000US38305,,,,49808
2,37086,8600000US37086,,,,31513


In [4]:
bq_assistant.table_schema("population_by_zip_2010")

[SchemaField('zipcode', 'STRING', 'REQUIRED', 'Five digit ZIP Code Tabulation Area Census Code', ()),
 SchemaField('geo_id', 'STRING', 'NULLABLE', 'Geo code', ()),
 SchemaField('minimum_age', 'INTEGER', 'NULLABLE', 'The minimum age in the age range. If null, this indicates the row as a total for male, female, or overall population.', ()),
 SchemaField('maximum_age', 'INTEGER', 'NULLABLE', 'The maximum age in the age range. If null, this indicates the row as having no maximum (such as 85 and over) or the row is a total of the male, female, or overall population.', ()),
 SchemaField('gender', 'STRING', 'NULLABLE', 'male or female. If empty, the row is a total population summary.', ()),
 SchemaField('population', 'INTEGER', 'NULLABLE', 'The total count of the population for this segment.', ())]

What are the ten most populous zip codes in the US in the 2010 census?

In [5]:
query1 = """SELECT
  zipcode,
  population
FROM
  `bigquery-public-data.census_bureau_usa.population_by_zip_2010`
WHERE
  gender = ''
ORDER BY
  population DESC
LIMIT
  10
        """
response1 = census_data.query_to_pandas_safe(query1)
response1.head(10)

Unnamed: 0,zipcode,population
0,60629,113916
1,79936,111086
2,11368,109931
3,926,108862
4,90650,105549
5,90011,103892
6,91331,103689
7,11226,101572
8,90201,101279
9,11373,100820


What are the top 10 zip codes that experienced the greatest change in population between the 2000 and 2010 censuses?

In [6]:
query2 = """SELECT
  zipcode,
  pop_2000,
  pop_2010,
  pop_chg,
  pop_pct_chg
FROM (
  SELECT
    r1.zipcode AS zipcode,
    r2.population AS pop_2000,
    r1.population AS pop_2010,
    r1.population - r2.population AS pop_chg,
    ROUND((r1.population - r2.population)/NULLIF(r2.population,0) * 100, 2) AS pop_pct_chg,
    ABS((r1.population - r2.population)/NULLIF(r2.population,0)) AS abs_pct_chg
  FROM
    `bigquery-public-data.census_bureau_usa.population_by_zip_2010` AS r1
  INNER JOIN
    `bigquery-public-data.census_bureau_usa.population_by_zip_2000` AS r2
  ON
    r1.zipcode = r2.zipcode WHERE --following criteria selects total population without breaking down by age/gender
    r1.minimum_age IS NULL
    AND r2.minimum_age IS NULL
    AND r1.maximum_age IS NULL
    AND r2.maximum_age IS NULL
    AND r1.gender = ''
    AND r2.gender = '' )
ORDER BY
  abs_pct_chg DESC
LIMIT
  10
        """
response2 = census_data.query_to_pandas_safe(query2)
response2.head(10)

Unnamed: 0,zipcode,pop_2000,pop_2010,pop_chg,pop_pct_chg
0,60654,7,14875,14868,212400.0
1,90263,2,1612,1610,80500.0
2,70373,19,7141,7122,37484.21
3,95937,4,1491,1487,37175.0
4,98164,1,141,140,14000.0
5,25644,4,536,532,13300.0
6,89011,175,19550,19375,11071.43
7,76177,45,4891,4846,10768.89
8,89141,262,25150,24888,9499.24
9,52235,8,698,690,8625.0


![https://cloud.google.com/bigquery/images/census-population-map.png](https://cloud.google.com/bigquery/images/census-population-map.png)
https://cloud.google.com/bigquery/images/census-population-map.png