# Bar Chart

Bar Chart of Female Researchers (head count %) across different sectors:
- Business enterprise  
- Government  
- Higher education  
- Private non-profit  
- Not specified

In [1]:
import pandas as pd 
import numpy as np
import plotly.plotly as py
import plotly.graph_objs as go
from collections import Counter

In [2]:
data = pd.read_csv('../data/section_researchers.csv')  # reading in data 

In [3]:
Counter(data.Time)  # Use 2015 data 

Counter({2011: 2537,
         2012: 2044,
         2013: 2490,
         2014: 2450,
         2015: 2876,
         2016: 1484,
         2017: 729})

In [4]:
# filtering 
sectors = ['Researchers (HC) - Female - Business enterprise',
       'Researchers (HC) - Female - Government',
       'Researchers (HC) - Female - Higher education',
       'Researchers (HC) - Female - Private non-profit',
       'Researchers (HC) - Female - Not specified sectors']
data_sectors = data[(data.Indicator.isin(sectors)) & (data.Time == 2015) & ~(data.Value.isna())]

In [5]:
data_sectors.head()

Unnamed: 0,INDICATOR,Indicator,LOCATION,Country,TIME,Time,Value,Flag Codes,Flags
8878,21726,Researchers (HC) - Female - Business enterprise,AUT,Austria,2015,2015,6320.0,,
8881,21726,Researchers (HC) - Female - Business enterprise,BEL,Belgium,2015,2015,10030.0,,
8886,21726,Researchers (HC) - Female - Business enterprise,CZE,Czechia,2015,2015,2886.54,,
8890,21726,Researchers (HC) - Female - Business enterprise,DNK,Denmark,2015,2015,6860.0,,
8895,21726,Researchers (HC) - Female - Business enterprise,FIN,Finland,2015,2015,4849.0,,


In [6]:
# sum headcounts per section across countries 
data_sector_agg = data_sectors.groupby(['Indicator'])['Indicator','Value'].sum().reset_index()

In [7]:
# add sector name for cleaner plotting 
data_sector_agg['Sector'] = ['Business enterprise', 'Government', 'Higher education', 'Not specified', 'Private non-profit']

In [8]:
# drop Not specified sector because so insignifcant compared to other sectors 
data_sector_agg = data_sector_agg.iloc[[0, 1, 2, 4],:]

# round value 
data_sector_agg['Value'] = data_sector_agg.apply(lambda row: np.round(row['Value']), axis=1)


In [9]:
# round value 
data_sector_agg

Unnamed: 0,Indicator,Value,Sector
0,Researchers (HC) - Female - Business enterprise,444241.0,Business enterprise
1,Researchers (HC) - Female - Government,266885.0,Government
2,Researchers (HC) - Female - Higher education,1052424.0,Higher education
4,Researchers (HC) - Female - Private non-profit,16194.0,Private non-profit


In [10]:
x = data_sector_agg.Sector
y = data_sector_agg.Value

data_bar = [go.Bar(
            x=x,
            y=y,
            text=y,
            textposition = 'auto',
            marker=dict(
                color='rgb(255, 191, 0)',
                line=dict(
                    color='rgb(8,48,107)',
                    width=1.5),
            ),
            opacity=0.6
        )]

layout = go.Layout(
    title = go.layout.Title(
        text = 'Total Female Researchers (Head count) across Sectors (2015)'
    ),
    xaxis=dict(
        title='Sectors'
    ),
    yaxis=dict(
        title='Total Head Count'
    )
)
fig = go.Figure(data=data_bar, layout=layout)

py.iplot(fig, filename='Female Researchers HC by Sector')


Consider using IPython.display.IFrame instead

