#### Gender equality

In [1]:
import folium
import json
import pandas as pd

In [2]:
with open('input/world-countries.json') as data_file:
    country_geo = json.load(data_file)

In [3]:
data = pd.read_csv('input/Indicators.csv')

In [4]:
data.head()

Unnamed: 0,CountryName,CountryCode,IndicatorName,IndicatorCode,Year,Value
0,Arab World,ARB,"Adolescent fertility rate (births per 1,000 wo...",SP.ADO.TFRT,1960,133.5609
1,Arab World,ARB,Age dependency ratio (% of working-age populat...,SP.POP.DPND,1960,87.7976
2,Arab World,ARB,"Age dependency ratio, old (% of working-age po...",SP.POP.DPND.OL,1960,6.634579
3,Arab World,ARB,"Age dependency ratio, young (% of working-age ...",SP.POP.DPND.YG,1960,81.02333
4,Arab World,ARB,Arms exports (SIPRI trend indicator values),MS.MIL.XPRT.KD,1960,3000000.0


In [5]:
countries = data['CountryName'].unique().tolist()
indicators = data['IndicatorName'].unique().tolist()
print(len(countries))
print(len(indicators))

247
1344


In [6]:
# Find usefule features
for i, x in enumerate(indicators):
    if 'female (%)' in x:
        print('index = %d, Indicator name is '%i + x)

index = 343, Indicator name is Adjusted net enrolment rate, primary, female (%)
index = 364, Indicator name is Gross enrolment ratio, pre-primary, female (%)
index = 368, Indicator name is Gross enrolment ratio, primary, female (%)
index = 372, Indicator name is Gross enrolment ratio, secondary, female (%)
index = 376, Indicator name is Gross enrolment ratio, tertiary, female (%)
index = 383, Indicator name is Net enrolment rate, primary, female (%)
index = 386, Indicator name is Net enrolment rate, secondary, female (%)
index = 395, Indicator name is Percentage of repeaters in primary education, all grades, female (%)
index = 397, Indicator name is Percentage of students in primary education who are female (%)
index = 398, Indicator name is Percentage of students in secondary education who are female (%)
index = 399, Indicator name is Percentage of students in secondary general education who are female (%)
index = 400, Indicator name is Percentage of students in secondary vocational e

In [7]:
# hist_indicator = 'Proportion of seats held by women in national parliaments (%)'
hist_indicator = 'Proportion of seats held by women in national parliaments (%)'

hist_year = 2011
mask1 = data['IndicatorName'].str.contains(hist_indicator,na=False, regex=False) 
mask2 = data['Year'].isin([hist_year])
# apply our mask
stage = data[mask1 & mask2]

In [8]:
# num of countries with this indicator
print('%d countries have this indicator'%stage.shape[0])

219 countries have this indicator


In [9]:
data_to_plot = stage[['CountryCode','Value']]
data_to_plot.head()

Unnamed: 0,CountryCode,Value
5026667,ARB,11.335266
5027161,CSS,15.849777
5027701,CEB,18.421243
5028302,EAS,17.725096
5029073,EAP,17.873388


In [21]:
hist_indicator = stage.iloc[0]['IndicatorName']
map0 = folium.Map(location=[100, 0], zoom_start=1.5)
map0.choropleth(geo_data=country_geo, data=data_to_plot,
             columns=['CountryCode', 'Value'],
             key_on='feature.id',
             fill_color='YlGnBu', fill_opacity=0.7, line_opacity=0.2, nan_fill_color='White',
             legend_name=hist_indicator)

In [23]:
# Create Folium plot
x = map0.save('plot/plot_gender.html')
# Import the Folium interactive html file
from IPython.display import IFrame
IFrame(src= 'plot/plot_gender.html', width=1000 ,height=450)

#### GINI index
The countries with indicator GINI index in Indicator.csv file are incomplete. Therefore, we use gini.csv from https://www.gapminder.org instead. Restart the kernel here.

In [1]:
import folium
import json
import pandas as pd

In [2]:
with open('input/world-countries.json') as data_file:
    country_geo = json.load(data_file)
    
# read gini data
gini = pd.read_csv('input/gini.csv')
countries = pd.read_csv('input/country.csv')
countries_code = countries[['ShortName','CountryCode']]

In [3]:
gini.head()

Unnamed: 0,country,1800,1801,1802,1803,1804,1805,1806,1807,1808,...,2031,2032,2033,2034,2035,2036,2037,2038,2039,2040
0,Afghanistan,30.5,30.5,30.5,30.5,30.5,30.5,30.5,30.5,30.5,...,36.8,36.8,36.8,36.8,36.8,36.8,36.8,36.8,36.8,36.8
1,Albania,38.9,38.9,38.9,38.9,38.9,38.9,38.9,38.9,38.9,...,29.0,29.0,29.0,29.0,29.0,29.0,29.0,29.0,29.0,29.0
2,Algeria,56.2,56.2,56.2,56.2,56.2,56.2,56.2,56.2,56.2,...,27.6,27.6,27.6,27.6,27.6,27.6,27.6,27.6,27.6,27.6
3,Andorra,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,...,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0
4,Angola,57.2,57.2,57.2,57.2,57.2,57.2,57.2,57.2,57.2,...,42.6,42.6,42.6,42.6,42.6,42.6,42.6,42.6,42.6,42.6


In [4]:
# Create a country name to country code mapping to use the gini data
code_mapping = countries_code.to_dict(orient='list')

keys = code_mapping['ShortName']
values = code_mapping['CountryCode']
code_mapping_dict = dict(zip(keys,values))

In [5]:
data_to_plot1 = gini[['country','2019']]

In [6]:
# Add a new column Country Code to the dataframe
data_to_plot1['CountryCode']= data_to_plot1['country'].map(code_mapping_dict)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [7]:
data_to_plot1.head()

Unnamed: 0,country,2019,CountryCode
0,Afghanistan,36.8,AFG
1,Albania,29.0,ALB
2,Algeria,27.6,DZA
3,Andorra,40.0,ADO
4,Angola,42.6,AGO


In [8]:
hist_indicator1 = 'GINI index at 2019'

In [9]:
hist_indicator1

'GINI index at 2019'

In [13]:
map1 = folium.Map(location=[100, 100], zoom_start=1)
map1.choropleth(geo_data=country_geo, data=data_to_plot1,
             columns=['CountryCode', '2019'],
             key_on='feature.id',
             fill_color='YlGnBu', fill_opacity=0.7, line_opacity=0.2,nan_fill_color='White',
             legend_name=hist_indicator1)

# Create Folium plot
y = map1.save('plot/plot_GINI.html')
# Import the Folium interactive html file
from IPython.display import IFrame
IFrame(src= 'plot/plot_GINI.html', width=1000 ,height=450)