
# Exploration of a DataSet World's Population


### 1. Importing the libraries and necessary DataSets (World's population, Countries geographically for Folium)

In [1]:
import pandas as pd
import folium
import numpy as np
import matplotlib.pyplot as plt
import json
data = pd.read_csv('./data_population.csv', sep=',', skiprows = 1, header = 1)
country_geo = json.dumps(json.load(open('world-countries.json', 'r')))

data.head()

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2008,2009,2010,2011,2012,2013,2014,2015,2016,Unnamed: 61
0,Aruba,ABW,"Population, total",SP.POP.TOTL,54211.0,55438.0,56225.0,56695.0,57032.0,57360.0,...,101353.0,101453.0,101669.0,102053.0,102577.0,103187.0,103795.0,104341.0,104822.0,
1,Afghanistan,AFG,"Population, total",SP.POP.TOTL,8996351.0,9166764.0,9345868.0,9533954.0,9731361.0,9938414.0,...,27294031.0,28004331.0,28803167.0,29708599.0,30696958.0,31731688.0,32758020.0,33736494.0,34656032.0,
2,Angola,AGO,"Population, total",SP.POP.TOTL,5643182.0,5753024.0,5866061.0,5980417.0,6093321.0,6203299.0,...,21759420.0,22549547.0,23369131.0,24218565.0,25096150.0,25998340.0,26920466.0,27859305.0,28813463.0,
3,Albania,ALB,"Population, total",SP.POP.TOTL,1608800.0,1659800.0,1711319.0,1762621.0,1814135.0,1864791.0,...,2947314.0,2927519.0,2913021.0,2905195.0,2900401.0,2895092.0,2889104.0,2880703.0,2876101.0,
4,Andorra,AND,"Population, total",SP.POP.TOTL,13411.0,14375.0,15370.0,16412.0,17469.0,18549.0,...,83861.0,84462.0,84449.0,83751.0,82431.0,80788.0,79223.0,78014.0,77281.0,


### 2. Transforming data for the visualisation

In [2]:
def plotting_data(year):
      
    #data_to_plot.dropna(axis = 0, how = "any")
    data_to_plot = data[["Country Code", year]]
    #data_to_plot[year] = pd.to_numeric(data_to_plot[year]) 
    return data_to_plot

data_to_plot = plotting_data("2010")

data_to_plot.head()

Unnamed: 0,Country Code,2010
0,ABW,101669.0
1,AFG,28803167.0
2,AGO,23369131.0
3,ALB,2913021.0
4,AND,84449.0


### 3. Data visualisation using Folium

In [3]:
threshold = np.linspace(data_to_plot["2010"].min(), 1500000000, 6, dtype = int).tolist()
map = folium.Map(location=[0, 0], zoom_start=1)
map.choropleth(geo_data=country_geo, name='choropleth', data=data_to_plot, columns=['Country Code', '2010'], key_on='feature.id', fill_color='YlGnBu', fill_opacity=0.7, line_opacity=0.2, threshold_scale = threshold)


map.save('plot_data.html')
from IPython.display import HTML
HTML('<iframe src=https://github.com/mlynckat/World-s-population-with-folium/blob/master/plot_data.html width=700 height=450></iframe>')


Unfortunately this way of visualisation is not meaningful. This map is only providing the information that China and India have the most population, followed by the USA. All the other countries are still in the same segment 10'000-300'000'000. To get more understanding of the figures the data should be presented more specified. For this purpose let's consider only Europe. But in the provided database there is no information of what countries are Europe. This information will be extracted from a Wikipedia page as followed.

### 5. Extracting info from a Wikipedia page

In [4]:
import wikipedia
import requests
from bs4 import BeautifulSoup

wikipedia.search("Europe")

europe = wikipedia.page("Europe")
req = requests.get(europe.url)
soup = BeautifulSoup(req.content, "lxml") 
wikitables = soup.findAll("table", {"class": "sortable"} )


names = []
for row in wikitables[0].findAll("tr"):
    cells = row.findAll("td")
    if len(cells) == 8:
        name = cells[2].find(text=True)
        names.append(name)

print (names)

['Albania', 'Andorra', 'Armenia', 'Austria', 'Azerbaijan', 'Belarus', 'Belgium', 'Bosnia and Herzegovina', 'Bulgaria', 'Croatia', 'Cyprus', 'Czech Republic', 'Denmark', 'Estonia', 'Finland', 'France', 'Georgia', 'Germany', 'Greece', 'Hungary', 'Iceland', 'Ireland', 'Italy', 'Kazakhstan', 'Latvia', 'Liechtenstein', 'Lithuania', 'Luxembourg', 'Macedonia', 'Malta', 'Moldova', 'Monaco', 'Montenegro', 'Netherlands', 'Norway', 'Poland', 'Portugal', 'Romania', 'Russia', 'San Marino', 'Serbia', 'Slovakia', 'Slovenia', 'Spain', 'Sweden', 'Switzerland', 'Turkey', 'Ukraine', 'United Kingdom', 'Vatican City']


### 5. Restricting the initial database for European countries only

In [5]:
print("Number of countries in Europe: ", len(names))


country_europe = []

for name in names:
    for i in range(0, len(data["Country Name"])):
        if name in data["Country Name"][i]:
            country_europe.append(data["Country Name"][i])
        
print("Number of found european countries: ", len(country_europe))


'''
not_found = []

for name in names:
    if name not in country_europe:
        not_found.append(name)
        
        
print("Not found: ", not_found)
'''

print("Slovakia in data: ", data["Country Name"][data["Country Code"]=="SVK"])
#print("Vatican in data ", data["Country Name"][data["Country Code"]=="VA"])

country_europe.append("Slovak Republic")


bool_europe = []
for index, row in data.iterrows():
    if data["Country Name"][index] in country_europe:
        bool_europe.append(True)
    else:
        bool_europe.append(False)
data["bool_europe"] = bool_europe


data_to_plot_europe = data[data["bool_europe"]]
data_to_plot_europe


Number of countries in Europe:  50
Number of found european countries:  48
Slovakia in data:  219    Slovak Republic
Name: Country Name, dtype: object


Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2009,2010,2011,2012,2013,2014,2015,2016,Unnamed: 61,bool_europe
3,Albania,ALB,"Population, total",SP.POP.TOTL,1608800.0,1659800.0,1711319.0,1762621.0,1814135.0,1864791.0,...,2927519.0,2913021.0,2905195.0,2900401.0,2895092.0,2889104.0,2880703.0,2876101.0,,True
4,Andorra,AND,"Population, total",SP.POP.TOTL,13411.0,14375.0,15370.0,16412.0,17469.0,18549.0,...,84462.0,84449.0,83751.0,82431.0,80788.0,79223.0,78014.0,77281.0,,True
8,Armenia,ARM,"Population, total",SP.POP.TOTL,1874120.0,1941491.0,2009526.0,2077575.0,2144998.0,2211316.0,...,2888584.0,2877311.0,2875581.0,2881922.0,2893509.0,2906220.0,2916950.0,2924816.0,,True
12,Austria,AUT,"Population, total",SP.POP.TOTL,7047539.0,7086299.0,7129864.0,7175811.0,7223801.0,7270889.0,...,8343323.0,8363404.0,8391643.0,8429991.0,8479375.0,8541575.0,8633169.0,8747358.0,,True
13,Azerbaijan,AZE,"Population, total",SP.POP.TOTL,3895396.0,4030320.0,4171425.0,4315128.0,4456689.0,4592610.0,...,8947243.0,9054332.0,9173082.0,9295784.0,9416801.0,9535079.0,9649341.0,9762274.0,,True
15,Belgium,BEL,"Population, total",SP.POP.TOTL,9153489.0,9183948.0,9220578.0,9289770.0,9378113.0,9463667.0,...,10796493.0,10895586.0,11047744.0,11128246.0,11182817.0,11209057.0,11274196.0,11348159.0,,True
19,Bulgaria,BGR,"Population, total",SP.POP.TOTL,7867374.0,7943118.0,8012946.0,8078145.0,8144340.0,8204168.0,...,7444443.0,7395599.0,7348328.0,7305888.0,7265115.0,7223938.0,7177991.0,7127822.0,,True
22,Bosnia and Herzegovina,BIH,"Population, total",SP.POP.TOTL,3225668.0,3288602.0,3353226.0,3417574.0,3478995.0,3535640.0,...,3746561.0,3722084.0,3688865.0,3648200.0,3604999.0,3566002.0,3535961.0,3516816.0,,True
23,Belarus,BLR,"Population, total",SP.POP.TOTL,8198000.0,8271216.0,8351928.0,8437232.0,8524224.0,8610000.0,...,9506765.0,9490583.0,9473172.0,9464495.0,9465997.0,9474511.0,9489616.0,9507120.0,,True
35,Switzerland,CHE,"Population, total",SP.POP.TOTL,5327827.0,5434294.0,5573815.0,5694247.0,5789228.0,5856472.0,...,7743831.0,7824909.0,7912398.0,7996861.0,8089346.0,8188649.0,8282396.0,8372098.0,,True


### 6. Redrawing the map only for Europe area

In [6]:
threshold = np.linspace(data_to_plot_europe["2010"].min(), data_to_plot_europe["2010"].max(), 6, dtype = int).tolist()
map = folium.Map(location=[60, 30], tiles='cartodbpositron', zoom_start=3)
map.choropleth(geo_data=country_geo, name='choropleth', data=data_to_plot_europe, columns=['Country Code', '2010'], key_on='feature.id', fill_color='YlGnBu', fill_opacity=0.7, line_opacity=0.2, threshold_scale = threshold, legend_name='Population')


map.save('plot_data.html')
from IPython.display import HTML
HTML('<iframe src=plot_data.html width=700 height=450></iframe>')

gJson_layer_1 = L.geoJson(gjson_1, {
  style: style_1,
  onEachFeature: function (feature, layer) {
    layer.bindPopup(feature.properties.PCON13NM);}}).addTo(map)

SyntaxError: invalid syntax (<ipython-input-6-d2029eed940a>, line 12)

In [7]:
from ipywidgets import widgets, Layout, Label, interact
from IPython.display import display, HTML
import traitlets

columns = data.columns.tolist()
columns = columns[4:-2]


selection = widgets.SelectionSlider(description = "Which year should be graphed?", options = columns, value="2010", continuous_update=False, layout=Layout(width='70%', height='80px'))

year = selection.value

def draw_the_map(year):
    threshold = np.linspace(data_to_plot_europe[year].min(), data_to_plot_europe[year].max(), 6, dtype = int).tolist()
    map_europe = folium.Map(location=[60, 30], tiles='cartodbpositron', zoom_start=3)
    map_europe.choropleth(geo_data=country_geo, name='choropleth', data=data_to_plot_europe, columns=['Country Code', year], key_on='feature.id', fill_color='YlGnBu', fill_opacity=0.7, line_opacity=0.2, threshold_scale = threshold, legend_name='Population')
    map_europe = map_europe.save('plot_data.html')
    return HTML('<iframe src=plot_data.html width=700 height=450></iframe>')

interact(draw_the_map, year=selection)
    



In [97]:

'''
#initialize a map.
map1 = folium.Map(location=[40,10], zoom_start=4, control_scale=True, prefer_canvas=True)

#Iterate through the data. For each row, we add a separate geoJSON layer.
#Using the Parameter highlight_function = .... allows us to control the behaviour at mouse over.
for index, row in data.iterrows():
    c = folium.GeoJson(row['geojson'], name = (row['dep']+ row['dest']),overlay=False, 
                       style_function = lambda feature: {'fillColor': '#ffaf00','color': 'blue', 'weight': 1.5,'dashArray': '5, 5'},
                       highlight_function = lambda feature: {'fillColor': '#ffaf00','color': 'green', 'weight': 3,'dashArray': '5, 5'})
    c.add_child(folium.Popup(row['dep'] +'\n' + row['dest']))
    c.add_to(map1)

folium.LayerControl().add_to(map1)
map1.save(outfile='map1.html')

map1'''



#df = pd.read_json(country_geo)

country_geo[feature.id]




NameError: name 'feature' is not defined