In [35]:
import os
from google.cloud import bigquery

os.environ.setdefault("GCLOUD_PROJECT", "enduring-rush-410922")
client = bigquery.Client()

In [38]:
QUERY = """
WITH country_data AS (
  SELECT country_code, 
  short_name country,
  region, 
  income_group 
  FROM bigquery-public-data.world_bank_wdi.country_summary
)
SELECT data.country_code, country,
CASE
    WHEN indicator_code = "NY.GDP.MKTP.KD" THEN "GDP"
    WHEN indicator_code = "NE.CON.TOTL.KD" THEN "Consumption"
    WHEN indicator_code = "NE.GDI.FTOT.KD" THEN "Investment"
    WHEN indicator_code = "NE.EXP.GNFS.KD" THEN "Exports"
    WHEN indicator_code = "NE.IMP.GNFS.KD" THEN "Imports"
    WHEN indicator_code = "SP.POP.TOTL" THEN "Population"
    WHEN indicator_code = "SP.DYN.CBRT.IN" THEN "Birth_Rate"
END AS indicator,
year,
value, 
ROUND(
  (100*(value - LAG(value, 1) OVER(PARTITION BY data.country_code, indicator_code ORDER BY year ASC)))
  / (
    NULLIF(LAG(value, 1) OVER(PARTITION BY data.country_code, indicator_code ORDER BY year ASC), 0)
    )
, 2) AS growth_rate,
region, 
income_group
FROM bigquery-public-data.world_bank_wdi.indicators_data data 
LEFT JOIN country_data
ON data.country_code = country_data.country_code
WHERE indicator_code IN (
    "NY.GDP.MKTP.KD", --GDP
    "NE.CON.TOTL.KD", --Consumption
    "NE.GDI.FTOT.KD", --Investment
    "NE.EXP.GNFS.KD", --Exports
    "NE.IMP.GNFS.KD",  --Imports
    "SP.POP.TOTL", --Population
    "SP.DYN.CBRT.IN" --Birth Rate
  ) 
AND country_data.region IS NOT NULL
AND country_data.income_group IS NOT NULL
ORDER BY country_code, indicator, year
"""

In [39]:
# Perform a query.
query_job = client.query(QUERY)  # API request
df = query_job.to_dataframe()

In [41]:
df

Unnamed: 0,country_code,country,indicator,year,value,growth_rate,region,income_group
0,ABW,Aruba,Birth_Rate,1960,3.567900e+01,,Latin America & Caribbean,High income
1,ABW,Aruba,Birth_Rate,1961,3.452900e+01,-3.22,Latin America & Caribbean,High income
2,ABW,Aruba,Birth_Rate,1962,3.332000e+01,-3.50,Latin America & Caribbean,High income
3,ABW,Aruba,Birth_Rate,1963,3.205000e+01,-3.81,Latin America & Caribbean,High income
4,ABW,Aruba,Birth_Rate,1964,3.073700e+01,-4.10,Latin America & Caribbean,High income
...,...,...,...,...,...,...,...,...
60509,ZWE,Zimbabwe,Population,2016,1.403034e+07,1.56,Sub-Saharan Africa,Lower middle income
60510,ZWE,Zimbabwe,Population,2017,1.423660e+07,1.47,Sub-Saharan Africa,Lower middle income
60511,ZWE,Zimbabwe,Population,2018,1.443881e+07,1.42,Sub-Saharan Africa,Lower middle income
60512,ZWE,Zimbabwe,Population,2019,1.464547e+07,1.43,Sub-Saharan Africa,Lower middle income


In [42]:
import sqlalchemy
engine = sqlalchemy.create_engine('postgresql://mitch:password@localhost:5432/wdi')
db = df.to_sql('main', engine, if_exists='replace', index=False)

## Population

In [29]:
QUERY_POP = """
SELECT data.country_code, 
year,
CAST(value AS INTEGER) population, 
ROUND(
  (100*(value - LAG(value, 1) OVER(PARTITION BY data.country_code ORDER BY year ASC)))
  / (
    NULLIF(LAG(value, 1) OVER(PARTITION BY data.country_code ORDER BY year ASC), 0)
    )
, 2) AS population_growth
FROM bigquery-public-data.world_bank_wdi.indicators_data data 
WHERE indicator_code IN (
    "SP.POP.TOTL" --Population
  ) 
ORDER BY country_code, year
"""

In [30]:
query_job = client.query(QUERY_POP)  # API request

In [31]:
df_pop = query_job.to_dataframe()
df_pop

Unnamed: 0,country_code,year,population,population_growth
0,ABW,1960,54208,
1,ABW,1961,55434,2.26
2,ABW,1962,56234,1.44
3,ABW,1963,56699,0.83
4,ABW,1964,57029,0.58
...,...,...,...,...
16118,ZWE,2016,14030338,1.56
16119,ZWE,2017,14236599,1.47
16120,ZWE,2018,14438812,1.42
16121,ZWE,2019,14645473,1.43


In [43]:
engine = sqlalchemy.create_engine('postgresql://mitch:password@localhost:5432/wdi')
db_pop = df_pop.to_sql('pop', engine, if_exists='replace', index=False)

In [44]:
df.to_csv('../data/main.csv', index=False)
df

Unnamed: 0,country_code,country,indicator,year,value,growth_rate,region,income_group
0,ABW,Aruba,Birth_Rate,1960,3.567900e+01,,Latin America & Caribbean,High income
1,ABW,Aruba,Birth_Rate,1961,3.452900e+01,-3.22,Latin America & Caribbean,High income
2,ABW,Aruba,Birth_Rate,1962,3.332000e+01,-3.50,Latin America & Caribbean,High income
3,ABW,Aruba,Birth_Rate,1963,3.205000e+01,-3.81,Latin America & Caribbean,High income
4,ABW,Aruba,Birth_Rate,1964,3.073700e+01,-4.10,Latin America & Caribbean,High income
...,...,...,...,...,...,...,...,...
60509,ZWE,Zimbabwe,Population,2016,1.403034e+07,1.56,Sub-Saharan Africa,Lower middle income
60510,ZWE,Zimbabwe,Population,2017,1.423660e+07,1.47,Sub-Saharan Africa,Lower middle income
60511,ZWE,Zimbabwe,Population,2018,1.443881e+07,1.42,Sub-Saharan Africa,Lower middle income
60512,ZWE,Zimbabwe,Population,2019,1.464547e+07,1.43,Sub-Saharan Africa,Lower middle income
