In [None]:
pip install geopandas

In [None]:
pip install bokeh

In [None]:
pip install country_converter

In [None]:
pip install fuzzy_pandas

In [None]:
pip install selenium

In [None]:
pip install phantomjs

In [None]:
conda install -c conda-forge Phantomjs

In [1]:
import pandas as pd
import geopandas as gpd
import fiona
import numpy as np
import functools
import country_converter
import pycountry


In [2]:
data_path = '~/Documents/Uni/qm2g1/data/processed_data/protests_data_filtered.csv'
protest_data =  pd.read_csv(data_path, index_col=1)

In [3]:
# function Alpha 3 country codes

@functools.lru_cache(None)
def do_fuzzy_search(country):
    try:
        result = pycountry.countries.search_fuzzy(country)
    except Exception:
        return np.nan
    else:
        return result[0].alpha_3

In [4]:
#Applies function on protest dataset

iso_map = {country: do_fuzzy_search(country) for country in protest_data["country"].unique()}
protest_data["country_code"] = protest_data["country"].map(iso_map)

protest_data.describe()

Unnamed: 0.1,Unnamed: 0,ccode,year,protest,protestnumber,startday,startmonth,startyear,endday,endmonth,endyear,protesterviolence
count,11400.0,11400.0,11400.0,11400.0,11400.0,10205.0,10205.0,10205.0,10205.0,10205.0,10205.0,10471.0
mean,5699.5,446.28886,2010.200351,0.895175,7.920965,15.426948,6.100735,2010.255659,15.559334,6.116022,2010.258991,0.24076
std,3291.040869,232.086247,5.597616,0.306341,13.034151,8.870341,3.439721,5.556188,8.858835,3.440436,5.55748,0.427565
min,0.0,20.0,2000.0,0.0,0.0,1.0,1.0,2000.0,1.0,1.0,2000.0,0.0
25%,2849.75,230.0,2005.0,1.0,1.0,8.0,3.0,2005.0,8.0,3.0,2005.0,0.0
50%,5699.5,438.0,2011.0,1.0,4.0,15.0,6.0,2011.0,15.0,6.0,2011.0,0.0
75%,8549.25,652.0,2015.0,1.0,9.0,23.0,9.0,2015.0,23.0,9.0,2015.0,0.0
max,11399.0,910.0,2019.0,1.0,143.0,31.0,12.0,2019.0,31.0,12.0,2019.0,1.0


In [5]:
#Counts amount of protests
protest_data_grouped = protest_data.groupby(['country_code'])['protest'].sum()

protest_data_grouped = protest_data_grouped.to_frame(name="protests")
protest_data_grouped.head()

Unnamed: 0_level_0,protests
country_code,Unnamed: 1_level_1
AFG,35
AGO,20
ALB,33
ARE,3
ARG,72


In [6]:
#Download shapefile from Naturalearth
url = "https://d2ad6b4ur7yvpq.cloudfront.net/naturalearth-3.3.0/ne_50m_admin_0_countries.geojson"
shapefile_data = gpd.read_file(url)[['sovereignt', 'adm0_a3', 'geometry']]

#rename colls
shapefile_data.columns = ['country', 'country_code', 'geometry']
shapefile_data.head()

Unnamed: 0,country,country_code,geometry
0,Netherlands,ABW,"POLYGON ((-69.89912 12.45200, -69.89570 12.423..."
1,Afghanistan,AFG,"POLYGON ((74.89131 37.23164, 74.84023 37.22505..."
2,Angola,AGO,"MULTIPOLYGON (((14.19082 -5.87598, 14.39863 -5..."
3,United Kingdom,AIA,"POLYGON ((-63.00122 18.22178, -63.16001 18.171..."
4,Albania,ALB,"POLYGON ((20.06396 42.54727, 20.10352 42.52466..."


In [7]:
#merge shapefile data with protest data
merged = shapefile_data.merge(protest_data_grouped, left_on = 'country_code', right_on = 'country_code')
merged.describe()

df = merged
df.head()

Unnamed: 0,country,country_code,geometry,protests
0,Afghanistan,AFG,"POLYGON ((74.89131 37.23164, 74.84023 37.22505...",35
1,Angola,AGO,"MULTIPOLYGON (((14.19082 -5.87598, 14.39863 -5...",20
2,Albania,ALB,"POLYGON ((20.06396 42.54727, 20.10352 42.52466...",33
3,United Arab Emirates,ARE,"MULTIPOLYGON (((53.92783 24.17720, 53.92813 24...",3
4,Argentina,ARG,"MULTIPOLYGON (((-64.54917 -54.71621, -64.43882...",72


In [8]:
#(source: https://towardsdatascience.com/a-complete-guide-to-an-interactive-geographical-map-using-python-f4c5197e23e0)

import json
#Read data to json.
merged_json = json.loads(merged.to_json())
#Convert to String like object.
json_data = json.dumps(merged_json)

In [9]:
#(source: https://towardsdatascience.com/a-complete-guide-to-an-interactive-geographical-map-using-python-f4c5197e23e0)

from bokeh.io import output_notebook, show, output_file
from bokeh.plotting import figure
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar
from bokeh.palettes import brewer

#Input GeoJSON source that contains features for plotting.
geosource = GeoJSONDataSource(geojson = json_data)

#Define a sequential multi-hue color palette.
palette = brewer['YlGnBu'][8]

#Reverse color order so that dark blue is highest obesity.
palette = palette[::-1]

#Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
color_mapper = LinearColorMapper(palette = palette, low = 0, high = 400)

#Define custom tick labels for color bar.
tick_labels = {'0': '0', '100': '100', '200':'200', '300':'300', '400':'400'}

#Create color bar. 
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8,width = 500, height = 20,
border_line_color=None,location = (0,0), orientation = 'horizontal', major_label_overrides = tick_labels)

#Create figure object.
p = figure(title = 'Number of total protests per country', plot_height = 600 , plot_width = 950, toolbar_location = None)
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

#Add patch renderer to figure. 
p.patches('xs','ys', source = geosource,fill_color = {'field' :'protests', 'transform' : color_mapper},
          line_color = 'black', line_width = 0.25, fill_alpha = 1)

#Specify figure layout.
p.add_layout(color_bar, 'below')

#Display figure inline in Jupyter Notebook.




In [10]:
#Display figure.
show(p)