# Histogram

Creating a histogram of the total head counts of female researchers in 2015.  

In [4]:
import pandas as pd 
import numpy as np
import plotly.plotly as py
import plotly.graph_objs as go
from collections import Counter

In [2]:
# reading number of female researchers 
data = pd.read_csv('../data/total_researchers.csv')

In [5]:
# find the year with the greatest amount of information 
Counter(data['Time'])  # use 2015 data 

Counter({2011: 608,
         2012: 524,
         2013: 590,
         2014: 526,
         2015: 638,
         2016: 354,
         2017: 178})

In [12]:
# filter data 
# Time: 2015
# I
data_2015 = data[(data['Time'] == 2015) & (data['Indicator'] == 'Researchers (HC) - Total ') & ~(data['Value'].isna())]

In [17]:
min(data_2015.Value)

34.0

In [13]:
data_2015.head()

Unnamed: 0,INDICATOR,Indicator,LOCATION,Country,TIME,Time,Value,Flag Codes,Flags
412,21001,Researchers (HC) - Total,AUT,Austria,2015,2015,78051.0,,
415,21001,Researchers (HC) - Total,BEL,Belgium,2015,2015,73709.0,,
420,21001,Researchers (HC) - Total,CZE,Czechia,2015,2015,56604.54,,
425,21001,Researchers (HC) - Total,DNK,Denmark,2015,2015,59985.0,,
430,21001,Researchers (HC) - Total,FIN,Finland,2015,2015,55728.0,,


In [25]:
data_hist = [go.Histogram(x=data_2015['Value'],marker=dict(color='purple'), opacity=0.75)]

layout = go.Layout(
    title = go.layout.Title(
        text = 'Total Female Researchers (Head count) in 2015'
    ),
    xaxis=dict(
        title='Head Count Value'
    ),
    yaxis=dict(
        title='Count'
    ),
    annotations = [go.layout.Annotation(
        x = 1,
        y = 1,
        xref = 'paper',
        yref = 'paper',
        text = 'Source: <a href="http://data.uis.unesco.org/">\
            UNESCO Institute for Statistics</a>',
        showarrow = False
    )]
)
fig = go.Figure(data=data_hist, layout=layout)

py.iplot(fig, filename='Histogram: 2015 Female Researchers (HC)')