In [2]:
from bokeh.plotting import figure
from bokeh.io import output_file, output_notebook, show

import pandas as pd
import matplotlib.pyplot as plt

# display inline bokeh plots in notebook
output_notebook()

## Dataset
Making scatter plots of female literacy vs fertility using data from the [European Environmental Agency](http://www.eea.europa.eu/data-and-maps/figures/correlation-between-fertility-and-female-education). This dataset highlights that countries with low female literacy have high birthrates.

In [3]:
df = pd.read_csv('datasets/literacy_birth_rate.csv')
df.columns = ['country','continent','female_literacy','fertility','population']

df.head()

Unnamed: 0,country,continent,female_literacy,fertility,population
0,Chine,ASI,90.5,1.769,1324655000.0
1,Inde,ASI,50.8,2.682,1139965000.0
2,USA,NAM,99.0,2.077,304060000.0
3,Indonésie,ASI,88.8,2.132,227345100.0
4,Brésil,LAT,90.2,1.827,191971500.0


In [4]:
# cleansing
df.dropna(inplace=True)

df = df.assign(female_literacy=df.female_literacy.astype(float),
                fertility=df.fertility.astype(float),
                population=df.population.astype(float))
df.info()
# for future use
df.to_csv('datasets/literacy_birth_rate_cleansed.csv', index=False)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 162 entries, 0 to 161
Data columns (total 5 columns):
country            162 non-null object
continent          162 non-null object
female_literacy    162 non-null float64
fertility          162 non-null float64
population         162 non-null float64
dtypes: float64(3), object(2)
memory usage: 7.6+ KB


### Scatter plots

In [5]:
p = figure(x_axis_label='fertility (children per woman)', y_axis_label='female_literacy (% population)')

p.circle(df['fertility'], df['female_literacy'])
output_file('output/fert_lit.html')
show(p)

### Glyph markers and colors

In [7]:
p = figure(x_axis_label='fertility (children per woman)', 
           y_axis_label='female_literacy (% population)')

p.circle(df.query('continent == "LAT"')['fertility'], 
         df.query('continent == "LAT"')['female_literacy'],
         color='blue', size=10, alpha=0.8)

p.x(df.query('continent == "AF"')['fertility'], 
         df.query('continent == "AF"')['female_literacy'],
         color='red', size=10, alpha=0.8)

output_file('output/fert_lit_separate_colors.html')
show(p)