/
World population.pct.py
111 lines (89 loc) · 2.82 KB
/
World population.pct.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# ---
# jupyter:
# jupytext:
# cell_markers: region,endregion
# formats: ipynb,.pct.py:percent,.lgt.py:light,.spx.py:sphinx,md,Rmd,.pandoc.md:pandoc
# text_representation:
# extension: .py
# format_name: percent
# format_version: '1.2'
# jupytext_version: 1.1.0
# kernelspec:
# display_name: Python 3
# language: python
# name: python3
# ---
# %% [markdown]
# # A quick insight at world population
#
# ## Collecting population data
#
# In the below we retrieve population data from the
# [World Bank](http://www.worldbank.org/)
# using the [wbdata](https://github.com/OliverSherouse/wbdata) python package
# %%
import pandas as pd
import wbdata as wb
pd.options.display.max_rows = 6
pd.options.display.max_columns = 20
# %% [markdown]
# Corresponding indicator is found using search method - or, directly,
# the World Bank site.
# %%
wb.search_indicators('Population, total') # SP.POP.TOTL
# wb.search_indicators('area')
# => https://data.worldbank.org/indicator is easier to use
# %% [markdown]
# Now we download the population data
# %%
indicators = {'SP.POP.TOTL': 'Population, total',
'AG.SRF.TOTL.K2': 'Surface area (sq. km)',
'AG.LND.TOTL.K2': 'Land area (sq. km)',
'AG.LND.ARBL.ZS': 'Arable land (% of land area)'}
data = wb.get_dataframe(indicators, convert_date=True).sort_index()
data
# %% [markdown]
# World is one of the countries
# %%
data.loc['World']
# %% [markdown]
# Can we classify over continents?
# %%
data.loc[(slice(None), '2017-01-01'), :]['Population, total'].dropna(
).sort_values().tail(60).index.get_level_values('country')
# %% [markdown]
# Extract zones manually (in order of increasing population)
# %%
zones = ['North America', 'Middle East & North Africa',
'Latin America & Caribbean', 'Europe & Central Asia',
'Sub-Saharan Africa', 'South Asia',
'East Asia & Pacific'][::-1]
# %% [markdown]
# And extract population information (and check total is right)
# %%
population = data.loc[zones]['Population, total'].swaplevel().unstack()
population = population[zones]
assert all(data.loc['World']['Population, total'] == population.sum(axis=1))
# %% [markdown]
# ## Stacked area plot with matplotlib
# %%
import matplotlib.pyplot as plt
# %%
plt.clf()
plt.figure(figsize=(10, 5), dpi=100)
plt.stackplot(population.index, population.values.T / 1e9)
plt.legend(population.columns, loc='upper left')
plt.ylabel('Population count (B)')
plt.show()
# %% [markdown]
# ## Stacked bar plot with plotly
# %%
import plotly.offline as offline
import plotly.graph_objs as go
offline.init_notebook_mode()
# %%
data = [go.Scatter(x=population.index, y=population[zone], name=zone, stackgroup='World')
for zone in zones]
fig = go.Figure(data=data,
layout=go.Layout(title='World population'))
offline.iplot(fig)