In [1]:
# download "wbgapi" package from Python Package Index using -pip- command line command
!pip install wbgapi

Defaulting to user installation because normal site-packages is not writeable
Collecting wbgapi
  Downloading wbgapi-1.0.12-py3-none-any.whl.metadata (13 kB)
Collecting tabulate (from wbgapi)
  Downloading tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)
Downloading wbgapi-1.0.12-py3-none-any.whl (36 kB)
Downloading tabulate-0.9.0-py3-none-any.whl (35 kB)
[33mDEPRECATION: distro-info 1.1build1 has a non-standard version number. pip 24.1 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of distro-info or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063[0m[33m
[0m[33mDEPRECATION: pdfminer-six -VERSION- has a non-standard version number. pip 24.1 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pdfminer-six or contact the author to suggest that they release a version with a conforming ver

In [5]:
# load the pandas and numpy packages to work with data
import pandas as pd
import numpy as np
# load the "wbgapi" package to download WDI data series
import wbgapi as wb
help(wb)

Help on package wbgapi:

NAME
    wbgapi

DESCRIPTION
    wbgapi provides a comprehensive interface to the World Bank's data and
    metadata API with built-in pandas integration

PACKAGE CONTENTS
    __version__
    data
    economy
    economy_coder
    economy_metadata
    income
    lending
    region
    series
    series_metadata
    source
    time
    topic
    utils

CLASSES
    builtins.Exception(builtins.BaseException)
        APIError
            APIResponseError
        URLError
    builtins.dict(builtins.object)
        Coder
    builtins.object
        Featureset
        Metadata
        MetadataCollection

    class APIError(builtins.Exception)
     |  APIError(url, msg, code=None)
     |
     |  Method resolution order:
     |      APIError
     |      builtins.Exception
     |      builtins.BaseException
     |      builtins.object
     |
     |  Methods defined here:
     |
     |  __init__(self, url, msg, code=None)
     |      Initialize self.  See help(type(self))



In [6]:
inicators = wb.get_indicator(source=1)

AttributeError: module 'wbgapi' has no attribute 'get_indicator'

In [None]:
# examples for -wbgapi- commands at https://pypi.org/project/wbgapi/
# download WDI data series for national CO2 emmissions per person & population into a dataframe
df = wb.data.DataFrame(['EN.ATM.CO2E.PC','SP.POP.TOTL'], time=2020, skipAggs=True)
# skipAggs=True skips entries for regional aggregates (i.e. only country observations)
# columns='series' would put multiple years in rows rather than in separate columns
df

In [None]:
# rename variables
# inplace=True saves changes to df
df.rename(columns={"EN.ATM.CO2E.PC": "CO2", "SP.POP.TOTL": "pop"}, inplace=True)

# rename index column from "economy" to "countrycode"
df.index.names = ['countrycode']
df

In [None]:
# download country names 
regions = wb.economy.info(skipAggs=True)
# this is a 'Featureset' object (GIS format for geographical features)
# convert to a DataFrame by converting to a Python 'dictionary' first
reg_dict = vars(regions)
# then use pandas .DataFrame function to assemble items into a dataframe
df_reg = pd.DataFrame(reg_dict.get('items'))

In [None]:
# rename some of the columns
df_reg = df_reg.rename(columns={"id": "countrycode", "value": "country"})
# create region name column from region codes
regions = ["EAS", "ECS", "LCN", "MEA", "NAC", "SAS", "SSF"]
regionnames = ["East Asia and Pacific", "Europe and Central Asia", "Latin America and Caribbean", "Middle East and North Africa", "North America", "South Asia", "Sub-Saharan Africa"]
df_reg['regionname'] = df_reg['region'].replace(regions,regionnames)
# create income level name for income level codes
inc_codes = ["HIC", "UMC", "LMC", "LIC"]
inc_names = ["High Income", "Upper Mid Income", "Lower Mid Income", "Low Income"]
df_reg['incomelevel'] = df_reg['incomeLevel'].replace(inc_codes,inc_names)

# keep country codes, country names, region names, income levels
df_reg = df_reg[['countrycode', 'country', 'regionname', 'incomelevel']]
df_reg

In [None]:
# merge CO2 & population variables onto region variables
df = pd.merge(df_reg, df, on='countrycode')
df
# delete df_reg
#del df_reg

In [None]:
pd.pivot_table(df, index='regionname', columns='incomelevel', values='CO2', aggfunc='mean')

In [None]:
# find and change INX (unclassified income)
df[df.incomelevel=='INX']

In [None]:
# find and change INX (unclassified income)
df.loc[df.incomelevel=='INX', 'incomelevel'] = "Low Income"
df[df.countrycode=='VEN']

In [None]:
# recalculate pivot_table
pd.pivot_table(df, index='regionname', columns='incomelevel', values='CO2', aggfunc='mean')

In [None]:
df.info()

In [None]:
# reorder categories of incomelevel
inc_cat = pd.CategoricalDtype(categories=["Low Income", "Lower Mid Income", "Upper Mid Income", "High Income"], ordered=True)
df['incomelevel'] = df['incomelevel'].astype(inc_cat)
pd.pivot_table(df, index='regionname', columns='incomelevel', values='CO2', aggfunc='mean')
# break to start Task 1

In [None]:
# problem: these are the means of country averages, not the means of people in the region
#  e.g. China and Singapore count equally
# we need the mean, weighted by the population in each country
#   Python does not use weights, and there is no weigthed average -aggfunc- (though we could write one)
#   So, we'll do it by hand: sum(pop*C02)/sum(pop)
# subsequent problem: if CO2 is missing, but pop isn't, we divide by the wrong sum
#   therefore get rid of any row of df with missing values:
df.dropna()
# calculate sum(pop*C02) and sum(pop)
df['CO2_pop'] = df['CO2'] * df['pop']
numerator = pd.pivot_table(df, index='regionname', columns='incomelevel', values='CO2_pop', aggfunc='sum', margins=True)
denominator = pd.pivot_table(df, index='regionname', columns='incomelevel', values='pop', aggfunc='sum', margins=True)
# element by element division of 2 tables
avCO2 = numerator/denominator  
avCO2

In [None]:
# round to 1 decimal place and change missing values to "None" with .replace()
avCO2 = round(avCO2,1).replace({np.nan:None})
# remove "incomelevel" and "regionname" headers
avCO2.index.names = ['']
avCO2.columns.names = ['']
# save to HTML file (e.g. for loading into Word)
avCO2.to_html(open('CO2_table.html', 'w'))
avCO2

In [None]:
# add a title with Styler (but messes up precision and missing values)
avCO2.style.set_caption("CO2 Emissions Per Person By Region and Income Level")

In [None]:
# add more styling elements:
#  - .background_gradient styling to highlight row maxima
#  - .format(precision=) to change numerical format
#  - .set_properties(**{'text-align':'center'}) to center textin columns
#  - .set_table_styles([{'selector': 'tr', 'props':'font-family:...}]) to change table font
#  - .map() to give NaN values a white (#ffffff) foreground and background color
# note that "\" is the line continuation character
#   more pandas styling information at https://pandas.pydata.org/docs/user_guide/style.html

avCO2 = avCO2.astype(float)  # required for background_gradient to work for some reason (?)
avCO2.style.set_caption("CO2 Emissions Per Person By Region and Income Level") \
  .background_gradient(cmap='Reds', axis=1) \
  .format(precision=1) \
  .set_properties(**{'text-align':'center'}) \
  .set_table_styles([{'selector': 'tr', 'props':'font-family: "Times New Roman", Times, serif'}]) \
  .map(lambda x: 'color: #ffffff' if pd.isnull(x) else '') \
  .map(lambda x: 'background: #ffffff' if pd.isnull(x) else '')