In [1]:
# Dependencies and Setup
import pandas as pd
import requests
import json

## Query Building

In [2]:
regions = ["africa", "australia-oceania", "central-america-n-caribbean", "central-asia", 
           "east-n-southeast-asia", "europe", "middle-east", "north-america", "south-america", "south-asia"]

afr = ["ag","ao","bc","bn","by","cd","cf","cg","cm","cn","ct","cv","dj","eg","ek","er","et","ga","gb",
          "gh","gv","iv","ke","li","lt","ly","ma","mi","ml","mo","mp","mr","mz","ng","ni","od","pu","rw",
          "se","sf","sg","sh","sl","so","su","to","tp","ts","tz","ug","uv","wa","wi","wz","za","zi"]
aus = ["as","aq","at","ck","bp","ck","cq","cr","cw","fj","fm","fp","gq","kr","kt","nc","ne","nf",
             "nh","nr","nz","pc","ps","rm","tl","tn","tv","um","wf","wq","ws"]
cenam = ["aa","ac","av","bb","bf","bh","bq","cj","cs","cu","do","dr","es","gj","gt","ha","ho","jm","mh",
         "nn","nu","pm","rn","rq","sc","st","tb","td","tk","uc","vc","vi","vq"]
cenas = ["kg","kz","rs","ti","tx","uz"]
easas = ["bm","bx","cb","ch","hk","id","ja","kn","ks","la","mc","mg","my","pf","pg","pp","rp","sn","th",
         "tt","tw","vm"]
eur = ["al","an","au","ax","be","bk","bo","bu","cy","da","dx","ee","ei","en","ez","fi","fo","fr","gi",
"gk","gm","gr","hr","hu","ic","im","it","je","jn","kv","lg","lh","lo","ls","lu","md","mj","mk","mn","mt",
          "nl","no","pl","po","ri","ro","si","sm","sp","sv","sw","sz","uk","up","vt"]
mideas = ["ae","aj","am","ba","gg","gz","ir","is","iz","jo","ku","le","mu","qa","sa","sy","tu","we","ym"]
noram = ["bd", "ca", "gl", "ip", "mx", "sb", "us"]
souam = ["ar","bl","br","ci","co","ec","fk","gy","ns","pa","pe","sx","uy","ve"]
souas = ["af","bg","bt","ce","in","io","mv","np","pk"]

countries = [afr, aus, cenam, cenas, easas, eur, mideas, noram, souam, souas]


complete = []
y = 0

while y < len(regions):
    block1 = (f'{regions[y]}/')

    for cs in countries[y]:
        merged = (f'{block1}{cs}')
        complete.append(merged)
    y += 1

## API Data Retrieval

In [3]:
# Set the API base URL
url = "https://github.com/factbook/factbook.json/raw/master/"

# Define an empty list to fetch the data for each country
country_data = []

# Print to logger
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters
record_count = 1
set_count = 1

# Loop through all the countries and their respective regions in our list to fetch data       
for i, country in enumerate(complete):
    # Group countries in sets of 50 for logging purposes
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 0        
        
    # Add query block to create endpoint URL for each country
    country_url = (f'{url}{complete[i]}.json')         
    
    # Log the url, record, and set numbers
    print("Processing Record %s of Set %s | %s" % (record_count, set_count, country))
    print(country_url)
    
    # Add 1 to the record count
    record_count += 1 
    
    # Run an API request for each of the countries
    response = requests.get(country_url) 
    
    try:
        # Parse the JSON and retrieve data
        country_info = response.json()
        
        # Parse out desired info
        # Base country info
        country_name = country_info['Government']['Country name']['conventional short form']['text']
        country_coords = country_info['Geography']['Geographic coordinates']['text']   
    
        # Economic Data
        country_gdp = country_info['Economy']['GDP (official exchange rate)']['text']
        country_realgdp = country_info['Economy']['Real GDP (purchasing power parity)']
        country_realcapgdp = country_info['Economy']['Real GDP per capita']
        
        # Recycling Data
        country_wastegen = country_info['Environment']['Waste and recycling']['municipal solid waste generated annually']['text']
        country_wasterec = country_info['Environment']['Waste and recycling']['municipal solid waste recycled annually']['text']
        country_wasterecper = country_info['Environment']['Waste and recycling']['percent of municipal solid waste recycled']['text']
        
        # Environmental Data
        country_envpoll1 = country_info['Environment']['Air pollutants']['particulate matter emissions']['text']
        country_envpoll2 = country_info['Environment']['Air pollutants']['carbon dioxide emissions']['text']
        country_envpoll3 = country_info['Environment']['Air pollutants']['methane emissions']['text']
        country_envagr = country_info['Environment']['Environment - international agreements']['party to']['text']
        country_envsig = country_info['Environment']['Environment - international agreements']['signed, but not ratified']['text']
        
        # Energy Data
        country_elecfoss = country_info['Energy']['Electricity generation sources']['fossil fuels']['text']
        country_elecnuc = country_info['Energy']['Electricity generation sources']['nuclear']['text']
        country_elecsol = country_info['Energy']['Electricity generation sources']['solar']['text']
        country_elecwind = country_info['Energy']['Electricity generation sources']['wind']['text']
        country_elechyd = country_info['Energy']['Electricity generation sources']['hydroelectricity']['text']
        country_electide = country_info['Energy']['Electricity generation sources']['tide and wave']['text']
        country_elecgeo = country_info['Energy']['Electricity generation sources']['geothermal']['text']
        country_elecbio = country_info['Energy']['Electricity generation sources']['biomass and waste']['text']
        
        # People and Society Data
        country_urban = country_info['People and Society']['Urbanization']['urban population']['text']
        country_immig = country_info['People and Society']['Net migration rate']['text']
        
        # Append the Country information into country_data list
        country_data.append({"Country": country_name, 
                             "Geographical Coordinates": country_coords,                           
                             "GDP (Adjusted to $US)": country_gdp,
                             "Real GDP (Purchasing Power Parity) 2020": country_realgdp,
                             "Real GDP Per Capita 2020": country_realcapgdp,
                             "Waste Generated Annually": country_wastegen,
                             "Waste Recycled Annually": country_wasterec,
                             "% of Waste Recycled Annually": country_wasterecper,
                             "Particulate Matter Emissions": country_envpoll1,
                             "C02 Emissions": country_envpoll2,
                             "Methane Emissions": country_envpoll3,
                             "Environmental Agreements (party to)": country_envagr,
                             "Environmental Agreements (signed but not ratified)": country_envsig,
                             "Electricity by Fossil Fuels": country_elecfoss,
                             "Electricity by Nuclear": country_elecnuc,
                             "Electricity by Solar": country_elecsol,
                             "Electricity by Wind": country_elecwind,
                             "Electricity by Hydroelectricty": country_elechyd,
                             "Electricity by Tide and Wave": country_electide,
                             "Electricity by Geothermal": country_elecgeo,
                             "Electricity by Biomass and Waste": country_elecbio, 
                             "Urban Population (%)": country_urban,
                             "Net Migration": country_immig})
    

    except:
        print("Incomplete Dataset, Reducing Scope")
        response = requests.get(country_url) 
        
        try:
            country_info = response.json()   
            country_name = country_info['Government']['Country name']['conventional short form']['text']
            country_coords = country_info['Geography']['Geographic coordinates']['text']
            country_gdp = country_info['Economy']['GDP (official exchange rate)']['text']
            country_realgdp = country_info['Economy']['Real GDP (purchasing power parity)']
            country_realcapgdp = country_info['Economy']['Real GDP per capita']
            country_wastegen = country_info['Environment']['Waste and recycling']['municipal solid waste generated annually']['text']
            country_envpoll1 = country_info['Environment']['Air pollutants']['particulate matter emissions']['text']
            country_envpoll2 = country_info['Environment']['Air pollutants']['carbon dioxide emissions']['text']
            country_envpoll3 = country_info['Environment']['Air pollutants']['methane emissions']['text']
            country_envagr = country_info['Environment']['Environment - international agreements']['party to']['text']
            country_envsig = country_info['Environment']['Environment - international agreements']['signed, but not ratified']['text']
            country_elecfoss = country_info['Energy']['Electricity generation sources']['fossil fuels']['text']        
            country_elecnuc = country_info['Energy']['Electricity generation sources']['nuclear']['text']
            country_elecsol = country_info['Energy']['Electricity generation sources']['solar']['text']
            country_elecwind = country_info['Energy']['Electricity generation sources']['wind']['text']
            country_elechyd = country_info['Energy']['Electricity generation sources']['hydroelectricity']['text']
            country_electide = country_info['Energy']['Electricity generation sources']['tide and wave']['text']
            country_elecgeo = country_info['Energy']['Electricity generation sources']['geothermal']['text']
            country_elecbio = country_info['Energy']['Electricity generation sources']['biomass and waste']['text']
            country_urban = country_info['People and Society']['Urbanization']['urban population']['text']
            country_immig = country_info['People and Society']['Net migration rate']['text']

            
            # Append the Country information into country_data list
            country_data.append({"Country": country_name, 
                                 "Geographical Coordinates": country_coords,                           
                                 "GDP (Adjusted to $US)": country_gdp,
                                 "Real GDP (Purchasing Power Parity) 2020": country_realgdp,
                                 "Real GDP Per Capita 2020": country_realcapgdp,
                                 "Waste Generated Annually": country_wastegen,
                                 "Particulate Matter Emissions": country_envpoll1,
                                 "C02 Emissions": country_envpoll2,
                                 "Methane Emissions": country_envpoll3,
                                 "Environmental Agreements (party to)": country_envagr,
                                 "Environmental Agreements (signed but not ratified)": country_envsig,
                                 "Electricity by Fossil Fuels": country_elecfoss,
                                 "Electricity by Nuclear": country_elecnuc,
                                 "Electricity by Solar": country_elecsol,
                                 "Electricity by Wind": country_elecwind,
                                 "Electricity by Hydroelectricty": country_elechyd,
                                 "Electricity by Tide and Wave": country_electide,
                                 "Electricity by Geothermal": country_elecgeo,
                                 "Electricity by Biomass and Waste": country_elecbio, 
                                 "Urban Population (%)": country_urban,
                                 "Net Migration": country_immig})

        except:
            print("Pass")
            pass

Beginning Data Retrieval     
-----------------------------
Processing Record 1 of Set 1 | africa/ag
https://github.com/factbook/factbook.json/raw/master/africa/ag.json
Processing Record 2 of Set 1 | africa/ao
https://github.com/factbook/factbook.json/raw/master/africa/ao.json
Incomplete Dataset, Reducing Scope
Processing Record 3 of Set 1 | africa/bc
https://github.com/factbook/factbook.json/raw/master/africa/bc.json
Processing Record 4 of Set 1 | africa/bn
https://github.com/factbook/factbook.json/raw/master/africa/bn.json
Processing Record 5 of Set 1 | africa/by
https://github.com/factbook/factbook.json/raw/master/africa/by.json
Incomplete Dataset, Reducing Scope
Processing Record 6 of Set 1 | africa/cd
https://github.com/factbook/factbook.json/raw/master/africa/cd.json
Incomplete Dataset, Reducing Scope
Processing Record 7 of Set 1 | africa/cf
https://github.com/factbook/factbook.json/raw/master/africa/cf.json
Processing Record 8 of Set 1 | africa/cg
https://github.com/factbook/fac

Incomplete Dataset, Reducing Scope
Processing Record 11 of Set 2 | australia-oceania/ck
https://github.com/factbook/factbook.json/raw/master/australia-oceania/ck.json
Incomplete Dataset, Reducing Scope
Pass
Processing Record 12 of Set 2 | australia-oceania/cq
https://github.com/factbook/factbook.json/raw/master/australia-oceania/cq.json
Incomplete Dataset, Reducing Scope
Pass
Processing Record 13 of Set 2 | australia-oceania/cr
https://github.com/factbook/factbook.json/raw/master/australia-oceania/cr.json
Incomplete Dataset, Reducing Scope
Pass
Processing Record 14 of Set 2 | australia-oceania/cw
https://github.com/factbook/factbook.json/raw/master/australia-oceania/cw.json
Incomplete Dataset, Reducing Scope
Pass
Processing Record 15 of Set 2 | australia-oceania/fj
https://github.com/factbook/factbook.json/raw/master/australia-oceania/fj.json
Processing Record 16 of Set 2 | australia-oceania/fm
https://github.com/factbook/factbook.json/raw/master/australia-oceania/fm.json
Incomplete Da

Incomplete Dataset, Reducing Scope
Processing Record 9 of Set 3 | central-america-n-caribbean/rn
https://github.com/factbook/factbook.json/raw/master/central-america-n-caribbean/rn.json
Incomplete Dataset, Reducing Scope
Pass
Processing Record 10 of Set 3 | central-america-n-caribbean/rq
https://github.com/factbook/factbook.json/raw/master/central-america-n-caribbean/rq.json
Incomplete Dataset, Reducing Scope
Pass
Processing Record 11 of Set 3 | central-america-n-caribbean/sc
https://github.com/factbook/factbook.json/raw/master/central-america-n-caribbean/sc.json
Incomplete Dataset, Reducing Scope
Processing Record 12 of Set 3 | central-america-n-caribbean/st
https://github.com/factbook/factbook.json/raw/master/central-america-n-caribbean/st.json
Incomplete Dataset, Reducing Scope
Processing Record 13 of Set 3 | central-america-n-caribbean/tb
https://github.com/factbook/factbook.json/raw/master/central-america-n-caribbean/tb.json
Incomplete Dataset, Reducing Scope
Pass
Processing Recor

Processing Record 12 of Set 4 | europe/ez
https://github.com/factbook/factbook.json/raw/master/europe/ez.json
Processing Record 13 of Set 4 | europe/fi
https://github.com/factbook/factbook.json/raw/master/europe/fi.json
Processing Record 14 of Set 4 | europe/fo
https://github.com/factbook/factbook.json/raw/master/europe/fo.json
Incomplete Dataset, Reducing Scope
Pass
Processing Record 15 of Set 4 | europe/fr
https://github.com/factbook/factbook.json/raw/master/europe/fr.json
Incomplete Dataset, Reducing Scope
Pass
Processing Record 16 of Set 4 | europe/gi
https://github.com/factbook/factbook.json/raw/master/europe/gi.json
Incomplete Dataset, Reducing Scope
Pass
Processing Record 17 of Set 4 | europe/gk
https://github.com/factbook/factbook.json/raw/master/europe/gk.json
Incomplete Dataset, Reducing Scope
Pass
Processing Record 18 of Set 4 | europe/gm
https://github.com/factbook/factbook.json/raw/master/europe/gm.json
Processing Record 19 of Set 4 | europe/gr
https://github.com/factbook/

Incomplete Dataset, Reducing Scope
Pass
Processing Record 26 of Set 5 | north-america/mx
https://github.com/factbook/factbook.json/raw/master/north-america/mx.json
Processing Record 27 of Set 5 | north-america/sb
https://github.com/factbook/factbook.json/raw/master/north-america/sb.json
Incomplete Dataset, Reducing Scope
Pass
Processing Record 28 of Set 5 | north-america/us
https://github.com/factbook/factbook.json/raw/master/north-america/us.json
Processing Record 29 of Set 5 | south-america/ar
https://github.com/factbook/factbook.json/raw/master/south-america/ar.json
Processing Record 30 of Set 5 | south-america/bl
https://github.com/factbook/factbook.json/raw/master/south-america/bl.json
Processing Record 31 of Set 5 | south-america/br
https://github.com/factbook/factbook.json/raw/master/south-america/br.json
Processing Record 32 of Set 5 | south-america/ci
https://github.com/factbook/factbook.json/raw/master/south-america/ci.json
Processing Record 33 of Set 5 | south-america/co
htt

## Dataframe Construction

In [4]:
# Convert the scraped response data into country_data DataFrame
country_data_df = pd.DataFrame(country_data)

# Show Record Count
country_data_df.count()

Country                                               182
Geographical Coordinates                              182
GDP (Adjusted to $US)                                 182
Real GDP (Purchasing Power Parity) 2020               182
Real GDP Per Capita 2020                              182
Waste Generated Annually                              182
Waste Recycled Annually                               103
% of Waste Recycled Annually                          103
Particulate Matter Emissions                          182
C02 Emissions                                         182
Methane Emissions                                     182
Environmental Agreements (party to)                   182
Environmental Agreements (signed but not ratified)    182
Electricity by Fossil Fuels                           182
Electricity by Nuclear                                182
Electricity by Solar                                  182
Electricity by Wind                                   182
Electricity by

In [5]:
# Export the country_data into a csv
country_data_df.to_csv("../output_data/countries.csv")