In [4]:
import os
from google.cloud import bigquery

os.environ.setdefault("GCLOUD_PROJECT", "enduring-rush-410922")
client = bigquery.Client()



In [20]:
QUERY = """
SELECT country_code,
indicator_code,
year Year,
value, 
(
  (value - LAG(value, 1) OVER(PARTITION BY country_code, indicator_code ORDER BY year ASC))
  / LAG(value, 1) OVER(PARTITION BY country_code, indicator_code ORDER BY year ASC)
) AS `Growth Rate`
FROM bigquery-public-data.world_bank_wdi.indicators_data data 
WHERE indicator_code IN (
    "NY.GDP.MKTP.KD", --GDP
    "NE.CON.TOTL.KD", --Consumption
    "NE.GDI.FTOT.KD", --Investment
    "NE.EXP.GNFS.KD", --Exports
    "NE.IMP.GNFS.KD",  --Imports
    "SP.POP.TOTL" --Population
  ) 
ORDER BY country_code, indicator_code, year
"""

In [21]:
query_job = client.query(QUERY)  # API request
df = query_job.to_dataframe()

In [22]:
df

Unnamed: 0,country_code,indicator_code,Year,value,Growth Rate
0,ABW,NE.CON.TOTL.KD,2015,2.328514e+09,
1,ABW,NE.EXP.GNFS.KD,2015,2.158564e+09,
2,ABW,NE.IMP.GNFS.KD,2015,2.162352e+09,
3,ABW,NY.GDP.MKTP.KD,1986,1.079460e+09,
4,ABW,NY.GDP.MKTP.KD,1987,1.253020e+09,0.160784
...,...,...,...,...,...
60138,ZWE,SP.POP.TOTL,2016,1.403034e+07,0.015614
60139,ZWE,SP.POP.TOTL,2017,1.423660e+07,0.014701
60140,ZWE,SP.POP.TOTL,2018,1.443881e+07,0.014204
60141,ZWE,SP.POP.TOTL,2019,1.464547e+07,0.014313


In [17]:
df[(df.indicator_code == 'SP.POP.TOTL') & (df.country_code == 'USA')].head(20)

Unnamed: 0,country_code,indicator_code,Year,value,value_lag
57754,USA,SP.POP.TOTL,1960,180671000.0,
57755,USA,SP.POP.TOTL,1961,183691000.0,180671000.0
57756,USA,SP.POP.TOTL,1962,186538000.0,183691000.0
57757,USA,SP.POP.TOTL,1963,189242000.0,186538000.0
57758,USA,SP.POP.TOTL,1964,191889000.0,189242000.0
57759,USA,SP.POP.TOTL,1965,194303000.0,191889000.0
57760,USA,SP.POP.TOTL,1966,196560000.0,194303000.0
57761,USA,SP.POP.TOTL,1967,198712000.0,196560000.0
57762,USA,SP.POP.TOTL,1968,200706000.0,198712000.0
57763,USA,SP.POP.TOTL,1969,202677000.0,200706000.0


In [6]:
QUERY = """
SELECT country_code, Name,
CASE
    WHEN indicator_code = "NY.GDP.MKTP.KD" THEN "GDP"
    WHEN indicator_code = "NE.CON.TOTL.KD" THEN "Consumption"
    WHEN indicator_code = "NE.GDI.FTOT.KD" THEN "Investment"
    WHEN indicator_code = "NE.EXP.GNFS.KD" THEN "Exports"
    WHEN indicator_code = "NE.IMP.GNFS.KD" THEN "Imports"
    WHEN indicator_code = "SP.POP.TOTL" THEN "Population"
END AS Indicator,
year Year,
value, 
FROM bigquery-public-data.world_bank_wdi.indicators_data data 
WHERE indicator_code IN (
    "NY.GDP.MKTP.KD", --GDP
    "NE.CON.TOTL.KD", --Consumption
    "NE.GDI.FTOT.KD", --Investment
    "NE.EXP.GNFS.KD", --Exports
    "NE.IMP.GNFS.KD",  --Imports
    "SP.POP.TOTL", --Population
  ) 
ORDER BY country_code, indicator, year
"""

In [7]:
# Perform a query.
query_job = client.query(QUERY)  # API request
df = query_job.to_dataframe()

In [10]:
df.to_csv('../data/main.csv', index=False)
df

Unnamed: 0,country_code,Name,Indicator,Year,value,Region,Income Group
0,ABW,Aruba,Birth Rate,1960,3.567900e+01,Latin America & Caribbean,High income
1,ABW,Aruba,Birth Rate,1961,3.452900e+01,Latin America & Caribbean,High income
2,ABW,Aruba,Birth Rate,1962,3.332000e+01,Latin America & Caribbean,High income
3,ABW,Aruba,Birth Rate,1963,3.205000e+01,Latin America & Caribbean,High income
4,ABW,Aruba,Birth Rate,1964,3.073700e+01,Latin America & Caribbean,High income
...,...,...,...,...,...,...,...
75025,ZWE,Zimbabwe,Population,2016,1.403034e+07,Sub-Saharan Africa,Lower middle income
75026,ZWE,Zimbabwe,Population,2017,1.423660e+07,Sub-Saharan Africa,Lower middle income
75027,ZWE,Zimbabwe,Population,2018,1.443881e+07,Sub-Saharan Africa,Lower middle income
75028,ZWE,Zimbabwe,Population,2019,1.464547e+07,Sub-Saharan Africa,Lower middle income
