# World Maternity Leave Data

A little bit of playing with some data on the maternity leave policies of the world. Leave is in days, pay in percentage of working salary covered. For details on the data sources see [the readme](README.md). 

In [4]:
import pandas as pd
un = pd.read_csv('matleave-un.csv', index_col=0)
wiki = pd.read_csv('matleave-wikipedia.csv', index_col=0)

# A bit of data munging, merge the two datasets into one favouring wikipedia, filling 
# in UN data where needed and removing empty rows, then manual numeric conversion due
# to some notes in where things are missing.

df = wiki.combine_first(un).dropna(how='all')
df['Pay'] = pd.to_numeric(df['Pay'], errors='coerce')
df['Leave'] = pd.to_numeric(df['Leave'], errors='coerce')


In [52]:
import bqplot.pyplot as plt
from bqplot.interacts import PanZoom

sc_x = plt.LinearScale()
sc_y = plt.LinearScale()

panzoom = PanZoom(scales={'x': [sc_x], 'y': [sc_y]})

scatter = plt.Scatter(
    x=df['Leave'],
    y=df['Pay'],
    names=df.index.values,
    scales={'x': sc_x, 'y': sc_y})

ax_x = plt.Axis(label='Days of Leave', scale=sc_x)
ax_y = plt.Axis(label='Percent of Pay', orientation='vertical', scale=sc_y)

figure = plt.Figure(marks=[scatter], axes=[ax_x, ax_y], interaction=panzoom)
figure

In [78]:
from bokeh.models import Title, LabelSet, ColumnDataSource
from bokeh.plotting import figure, output_file, show

output_file("chart.html")

p = figure()
p.title.text="World Maternity Leave Data"
p.title.align="right"
p.title.text_font_size="20px"

p.add_layout(Title(text="Percent of Pay", align="center"), "below")
p.add_layout(Title(text="Days of Leave", align="center"), "left")

source = ColumnDataSource(data=dict(days=df['Leave'], pay=df['Pay'], country=df.index.values))

p.scatter(x='pay', y='days', size=5, source=source)

p.add_layout(LabelSet(
        x='pay',
        y='days',
        text='country',
        x_offset=5,
        y_offset=5,
        level='glyph',
        source=source))

#p.circle(df['Pay'], df['Leave'], size=5)

show(p)