# Chloropleth map

Creating chloropleth map of total Female R&D personnel (in head count percentages) in 2015.  

In [1]:
import pandas as pd 
import numpy as np
import plotly.plotly as py
import plotly.graph_objs as go

In [2]:
data = pd.read_csv('../data/total_rd.csv')

In [3]:
data.head()

Unnamed: 0,INDICATOR,Indicator,LOCATION,Country,TIME,Time,Value,Flag Codes,Flags
0,20000,Total R&D personnel (FTE) - Total,AUT,Austria,2011,2011,61170.5,,
1,20000,Total R&D personnel (FTE) - Total,AUT,Austria,2012,2012,65088.2,,
2,20000,Total R&D personnel (FTE) - Total,AUT,Austria,2013,2013,66186.1,,
3,20000,Total R&D personnel (FTE) - Total,AUT,Austria,2014,2014,69842.0,,
4,20000,Total R&D personnel (FTE) - Total,AUT,Austria,2015,2015,71396.0,,


In [4]:
set(data.Indicator)

{'Total R&D personnel (FTE) - % Female',
 'Total R&D personnel (FTE) - Female',
 'Total R&D personnel (FTE) - Total ',
 'Total R&D personnel (HC) - % Female',
 'Total R&D personnel (HC) - Female',
 'Total R&D personnel (HC) - Total ',
 'Total R&D personnel per million inhabitants (FTE)',
 'Total R&D personnel per million inhabitants (HC)'}

In [5]:
# filter out rows that are Total R&D personnel (HC) - % Female 
hc_female = data[data['Indicator'] == 'Total R&D personnel (HC) - % Female']

In [6]:
# 2015 has the highest count value 
hc_female['Time'].value_counts()

2015    78
2013    71
2011    71
2014    60
2012    60
2016    31
2017    17
Name: Time, dtype: int64

In [7]:
hc_female_2015 = hc_female[hc_female['Time'] == 2015]

In [8]:
hc_female_2015.head()

Unnamed: 0,INDICATOR,Indicator,LOCATION,Country,TIME,Time,Value,Flag Codes,Flags
1765,FPERSP_THC,Total R&D personnel (HC) - % Female,AUT,Austria,2015,2015,30.18443,,
1768,FPERSP_THC,Total R&D personnel (HC) - % Female,BEL,Belgium,2015,2015,36.36156,,
1773,FPERSP_THC,Total R&D personnel (HC) - % Female,CZE,Czechia,2015,2015,31.08191,,
1777,FPERSP_THC,Total R&D personnel (HC) - % Female,DNK,Denmark,2015,2015,37.13062,,
1782,FPERSP_THC,Total R&D personnel (HC) - % Female,FIN,Finland,2015,2015,33.36838,,


In [9]:
world = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')

In [10]:
addition = set(world['CODE']) - set(hc_female_2015['LOCATION'])

In [11]:
world = world[world['CODE'].isin(addition)]

In [12]:
world.drop(columns="GDP (BILLIONS)", inplace=True)

In [13]:
world['INDICATOR'] = np.nan
world['Indicator'] = np.nan
world['TIME'] = 2015
world['Time'] = 2015
world['Value'] = 0
world['Flag Codes'] = np.nan
world['Flags'] = np.nan
world.rename(columns={'CODE':'LOCATION'}, inplace=True)
world.rename(columns={'COUNTRY':'Country'}, inplace=True)

In [14]:
# add countries so that they populate on the map
hc_female_2015 = pd.concat([hc_female_2015, world])


Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.





In [15]:
hc_female_2015.tail()

Unnamed: 0,Country,Flag Codes,Flags,INDICATOR,Indicator,LOCATION,TIME,Time,Value
217,Virgin Islands,,,,,VGB,2015,2015,0.0
218,West Bank,,,,,WBG,2015,2015,0.0
219,Yemen,,,,,YEM,2015,2015,0.0
220,Zambia,,,,,ZMB,2015,2015,0.0
221,Zimbabwe,,,,,ZWE,2015,2015,0.0


In [16]:
hc_female_2015.sort_values('Value')

Unnamed: 0,Country,Flag Codes,Flags,INDICATOR,Indicator,LOCATION,TIME,Time,Value
40,Chad,,,,,TCD,2015,2015,0.00000
86,Guyana,,,,,GUY,2015,2015,0.00000
87,Haiti,,,,,HTI,2015,2015,0.00000
89,Hong Kong,,,,,HKG,2015,2015,0.00000
92,India,,,,,IND,2015,2015,0.00000
93,Indonesia,,,,,IDN,2015,2015,0.00000
94,Iran,,,,,IRN,2015,2015,0.00000
97,Isle of Man,,,,,IMN,2015,2015,0.00000
98,Israel,,,,,ISR,2015,2015,0.00000
100,Jamaica,,,,,JAM,2015,2015,0.00000


In [16]:
fem_map = [go.Choropleth(
    locations = hc_female_2015['LOCATION'],
    z = hc_female_2015['Value'],
    text = hc_female_2015['Country'],
    colorscale = [
        [0, "rgb(5, 10, 172)"],
        [0.35, "rgb(40, 60, 190)"],
        [0.5, "rgb(70, 100, 245)"],
        [0.6, "rgb(90, 120, 245)"],
        [0.7, "rgb(106, 137, 247)"],
        [1, "rgb(220, 220, 220)"]
    ],
    autocolorscale = False,
    reversescale = True,
    marker = go.choropleth.Marker(
        line = go.choropleth.marker.Line(
            color = 'rgb(180,180,180)',
            width = 0.5
        )),
    colorbar = go.choropleth.ColorBar(
        tickprefix = '%',
        title = 'Total Female R&D personnel <br> (Head count %)'),
)]

layout = go.Layout(
    title = go.layout.Title(
        text = 'Head Count Percentages of Female R&D personnel (2015)'
    ),
    geo = go.layout.Geo(
        showframe = False,
        showcoastlines = False,
        projection = go.layout.geo.Projection(
            type = 'equirectangular'
        )
    ),
    annotations = [go.layout.Annotation(
        x = 0.55,
        y = 0.1,
        xref = 'paper',
        yref = 'paper',
        text = 'Source: <a href="http://data.uis.unesco.org/">\
            UNESCO Institute for Statistics</a>',
        showarrow = False
    )]
)

fig = go.Figure(data = fem_map, layout = layout)
py.iplot(fig, filename = 'd3-world-map')


Consider using IPython.display.IFrame instead

