In [None]:
import io

import pandas as pd
import zipfile
import requests

pd.set_option('display.float_format', lambda x: f'{x:,.2f}')

## GDP Per Country

In [None]:
# List of World Bank Aggregate Groups
world_bank_aggregates = ['Arab World', 'Caribbean small states', 'Central Europe and the Baltics',
                        'Early-demographic dividend', 'East Asia & Pacific',
                        'East Asia & Pacific (excluding high income)', 'East Asia & Pacific (IDA & IBRD countries)',
                        'Euro area', 'Europe & Central Asia', 'Europe & Central Asia (excluding high income)',
                        'Europe & Central Asia (IDA & IBRD countries)', 'European Union',
                        'Fragile and conflict affected situations', 'Heavily indebted poor countries (HIPC)',
                        'High income', 'IBRD only', 'IDA & IBRD total',  'IDA blend',  'IDA only',  'IDA total',
                        'Late-demographic dividend', 'Latin America & Caribbean', 'Latin America & Caribbean (excluding high income)',
                        'Latin America & the Caribbean (IDA & IBRD countries)',
                        'Least developed countries: UN classification', 'Low & middle income', 'Low income',
                        'Lower middle income', 'Middle East & North Africa', 
                        'Middle East & North Africa (excluding high income)', 
                        'Middle East & North Africa (IDA & IBRD countries)',  'Middle income',  'North America',
                        'Not classified',  'OECD members', 'Other small states', 'Pacific island small states',
                        'Post-demographic dividend', 'Pre-demographic dividend', 'Small states', 'South Asia',
                        'South Asia (IDA & IBRD)', 'Sub-Saharan Africa', 
                        'Sub-Saharan Africa (excluding high income)', 'Sub-Saharan Africa (IDA & IBRD countries)',
                        'Upper middle income','World']


# Download zip archive from The World Bank
response = requests.get('http://api.worldbank.org/v2/en/indicator/NY.GDP.MKTP.CD?downloadformat=csv')
downloaded_zipfile = zipfile.ZipFile(io.BytesIO(response.content))
csv_filename = [filename for filename in downloaded_zipfile.namelist() 
                if filename.startswith('API_NY.GDP.MKTP.CD_DS2_en_csv_v2')]
csv_file = downloaded_zipfile.open(csv_filename[0])

# Read the CSV file contained in the zip archive
gdp_per_c = pd.read_csv(csv_file, skiprows=3)


# Cleanup the dataframe
gdp_per_c = gdp_per_c.drop(['Indicator Name', 'Indicator Code', 'Country Code', 'Unnamed: 63', '2018'], axis=1)
gdp_per_c = gdp_per_c.dropna(how='all')
gdp_per_c.set_index('Country Name', inplace=True)
gdp_per_c = gdp_per_c.drop(world_bank_aggregates)

### Top 20 GDP Countries 2011

In [None]:
gdp_per_c[['2011']][gdp_per_c['2011'].notna()].sort_values('2011', ascending=False).head(20)

## Top 20 GDP Countries 1970-2011

In [None]:
%matplotlib inline

plot = (gdp_per_c.assign(avg=gdp_per_c.mean(axis=1)).sort_values('avg',ascending=False).drop('avg', axis=1)
            .head(10).transpose().plot(figsize=(20,10)))

## GDP Country Distribution 2017

In [None]:
%matplotlib inline
top_10 = gdp_per_c['2017'].sort_values(ascending=False).head(10)
top_10.loc['Other'] = gdp_per_c['2017'].sort_values(ascending=False).iloc[10:].sum()
plot = (top_10.plot(kind='pie', subplots=True, figsize=(20, 10), colormap='Paired'))