**How to Query the International Census Dataset (BigQuery)**

In [1]:
import bq_helper
from bq_helper import BigQueryHelper
# https://www.kaggle.com/sohier/introduction-to-the-bq-helper-package
international_census = bq_helper.BigQueryHelper(active_project="bigquery-public-data",
                                   dataset_name="census_bureau_international")

In [2]:
bq_assistant = BigQueryHelper("bigquery-public-data", "census_bureau_international")
bq_assistant.list_tables()

['age_specific_fertility_rates',
 'birth_death_growth_rates',
 'country_names_area',
 'midyear_population',
 'midyear_population_5yr_age_sex',
 'midyear_population_age_sex',
 'midyear_population_agespecific',
 'mortality_life_expectancy']

In [3]:
bq_assistant.head("mortality_life_expectancy", num_rows=3)

Unnamed: 0,country_code,country_name,year,infant_mortality,infant_mortality_male,infant_mortality_female,life_expectancy,life_expectancy_male,life_expectancy_female,mortality_rate_under5,mortality_rate_under5_male,mortality_rate_under5_female,mortality_rate_1to4,mortality_rate_1to4_male,mortality_rate_1to4_female
0,AA,Aruba,2047,5.29,6.43,4.13,81.27,78.2,84.41,5.99,7.32,4.63,0.7,0.9,0.5
1,AA,Aruba,2038,6.28,7.82,4.72,80.25,77.17,83.4,7.13,8.93,5.3,0.85,1.12,0.58
2,AA,Aruba,2008,14.26,18.92,9.51,75.06,72.03,78.14,16.48,22.06,10.79,2.25,3.2,1.29


In [4]:
bq_assistant.table_schema("mortality_life_expectancy")

[SchemaField('country_code', 'STRING', 'REQUIRED', 'Federal Information Processing Standard (FIPS) country/area code', ()),
 SchemaField('country_name', 'STRING', 'NULLABLE', 'Country or area name', ()),
 SchemaField('year', 'INTEGER', 'REQUIRED', 'Year', ()),
 SchemaField('infant_mortality', 'FLOAT', 'NULLABLE', 'Both sexes infant mortality rate (infant deaths per 1,000 population)', ()),
 SchemaField('infant_mortality_male', 'FLOAT', 'NULLABLE', 'Male infant mortality rate (infant deaths per 1,000 population)', ()),
 SchemaField('infant_mortality_female', 'FLOAT', 'NULLABLE', 'Female infant mortality rate (infant deaths per 1,000 population)', ()),
 SchemaField('life_expectancy', 'FLOAT', 'NULLABLE', 'Both sexes life expectancy at birth (years)', ()),
 SchemaField('life_expectancy_male', 'FLOAT', 'NULLABLE', 'Male life expectancy at birth (years)', ()),
 SchemaField('life_expectancy_female', 'FLOAT', 'NULLABLE', 'Female life expectancy at birth (years)', ()),
 SchemaField('mortality_

What countries have the longest life expectancy?


In [5]:
query1 = """SELECT
  age.country_name,
  age.life_expectancy,
  size.country_area
FROM (
  SELECT
    country_name,
    life_expectancy
  FROM
    `bigquery-public-data.census_bureau_international.mortality_life_expectancy`
  WHERE
    year = 2016) age
INNER JOIN (
  SELECT
    country_name,
    country_area
  FROM
    `bigquery-public-data.census_bureau_international.country_names_area` where country_area > 25000) size
ON
  age.country_name = size.country_name
ORDER BY
  2 DESC
/* Limit removed for Data Studio Visualization */
LIMIT
  10
        """
response1 = international_census.query_to_pandas_safe(query1)
response1.head(10)

Unnamed: 0,country_name,life_expectancy,country_area
0,Japan,85.0,364485.0
1,Iceland,83.02,100250.0
2,Switzerland,82.57,39997.0
3,"Korea, South",82.43,96920.0
4,Australia,82.23,7682300.0
5,Italy,82.2,294140.0
6,Sweden,82.06,410335.0
7,Canada,81.85,9093507.0
8,France,81.84,640427.0
9,Norway,81.78,304282.0


Which countries have the largest proportion of their population under 25?


In [6]:
query2 = """SELECT
  age.country_name,
  SUM(age.population) AS under_25,
  pop.midyear_population AS total,
  ROUND((SUM(age.population) / pop.midyear_population) * 100,2) AS pct_under_25
FROM (
  SELECT
    country_name,
    population,
    country_code
  FROM
    `bigquery-public-data.census_bureau_international.midyear_population_agespecific`
  WHERE
    year =2017
    AND age < 25) age
INNER JOIN (
  SELECT
    midyear_population,
    country_code
  FROM
    `bigquery-public-data.census_bureau_international.midyear_population`
  WHERE
    year = 2017) pop
ON
  age.country_code = pop.country_code
GROUP BY
  1,
  3
ORDER BY
  4 DESC
/* Remove limit for visualization */
LIMIT
  10
        """
response2 = international_census.query_to_pandas_safe(query2)
response2.head(10)

Unnamed: 0,country_name,under_25,total,pct_under_25
0,Uganda,27361075,39570125,69.15
1,Niger,13108162,19245344,68.11
2,Mali,11985934,17885245,67.02
3,Malawi,12840006,19196246,66.89
4,Angola,19453005,29310273,66.37
5,Mozambique,17614963,26573706,66.29
6,Gaza Strip,1185377,1795183,66.03
7,Zambia,10544838,15972000,66.02
8,Burkina Faso,13060451,20107509,64.95
9,South Sudan,8457121,13026129,64.92


Which countries are seeing the largest net migration?


In [7]:
query3 = """SELECT
  growth.country_name,
  growth.net_migration,
  CAST(area.country_area as INT64) as country_area
FROM (
  SELECT
    country_name,
    net_migration,
    country_code
  FROM
    `bigquery-public-data.census_bureau_international.birth_death_growth_rates`
  WHERE
    year = 2017) growth
INNER JOIN (
  SELECT
    country_area,
    country_code
  FROM
    `bigquery-public-data.census_bureau_international.country_names_area`
  WHERE
    country_area > 500) area
ON
  growth.country_code = area.country_code
ORDER BY
  net_migration DESC
LIMIT
  10
        """
response3 = international_census.query_to_pandas_safe(query3, max_gb_scanned=10)
response3.head(10)

Unnamed: 0,country_name,net_migration,country_area
0,Syria,61.46,183630
1,Luxembourg,15.52,2586
2,Qatar,14.61,11586
3,Singapore,13.1,687
4,Bahrain,12.07,760
5,South Sudan,10.6,644329
6,United Arab Emirates,10.48,83600
7,Turks and Caicos Islands,9.51,948
8,Cyprus,8.7,9241
9,Spain,7.76,498980
