# Graphs

## Aggregated Number of Degrees in Education (Award Levels)

Will cover years: 1984-2916. These graphs will include the counts by award level (Bachelor, Master's, PhD). The goal is to create a button to select the type of award type that you want to focus on. This may require structuring the data differently. 

In [1]:
%matplotlib inline

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os

### Import Files from Stata

In [2]:
os.chdir(r"H:\Teacher Labor MRKT Shortage\IPEDS\Completions_CIP codes\New CIP data\Data files used for graphs")
df = pd.read_stata("aggregate num of degrees by cipcode(4) awlevel.dta")

In [4]:
df.head(5)

Unnamed: 0,year,stem_ba,sped_ba,elem_ba,other_ba,stem_ma,sped_ma,elem_ma,other_ma,stem_phd,sped_phd,elem_phd,other_phd,aggtotal
0,1984.0,4079.0,10687.0,40072.0,39988.0,2919.0,10575.0,10821.0,51627.0,449.0,239.0,151.0,6072.0,177679.0
1,1985.0,3764.0,9529.0,38582.0,38547.0,2927.0,9942.0,10400.0,51643.0,369.0,235.0,160.0,5848.0,171946.0
2,1986.0,4244.0,8708.0,40186.0,36652.0,2972.0,9342.0,10876.0,51994.0,375.0,245.0,143.0,5845.0,171582.0
3,1987.0,4456.0,7503.0,41054.0,36828.0,2760.0,8863.0,11679.0,51080.0,337.0,231.0,130.0,5739.0,170660.0
4,1988.0,4655.0,6946.0,44938.0,37312.0,3695.0,8593.0,12558.0,52033.0,315.0,218.0,121.0,5446.0,176830.0


### Clean

* Create New variables
* Find max values for annotations

In [3]:
# Add totals for BA, MA, PhD as new columns
def add_total(dfname, award):
    df[dfname] = df['stem_'+award] + df['sped_'+award] + df['elem_'+award] + df['other_'+award]
    

In [4]:
add_total('ba_total', 'ba')
add_total('ma_total', 'ma')
add_total('phd_total', 'phd')
df.head(5)

Unnamed: 0,year,stem_ba,sped_ba,elem_ba,other_ba,stem_ma,sped_ma,elem_ma,other_ma,stem_phd,sped_phd,elem_phd,other_phd,aggtotal,ba_total,ma_total,phd_total
0,1984.0,6978.0,10687.0,40072.0,37089.0,3751.0,10575.0,10821.0,50795.0,532.0,239.0,151.0,5989.0,177679.0,94826.0,75942.0,6911.0
1,1985.0,6451.0,9529.0,38582.0,35860.0,3549.0,9942.0,10400.0,51021.0,421.0,235.0,160.0,5796.0,171946.0,90422.0,74912.0,6612.0
2,1986.0,6627.0,8708.0,40186.0,34269.0,3620.0,9342.0,10876.0,51346.0,428.0,245.0,143.0,5792.0,171582.0,89790.0,75184.0,6608.0
3,1987.0,6795.0,7503.0,41054.0,34489.0,3371.0,8863.0,11679.0,50469.0,376.0,231.0,130.0,5700.0,170660.0,89841.0,74382.0,6437.0
4,1988.0,6812.0,6946.0,44938.0,35155.0,4340.0,8593.0,12558.0,51388.0,356.0,218.0,121.0,5405.0,176830.0,93851.0,76879.0,6100.0


In [5]:
# Create variable for sum of SPED, STEM, Elementary, Other as new columns
def major_total(dfname, major):
    df[dfname] = df.fillna(0)[major+'_ba'] + df.fillna(0)[major+'_ma'] + df.fillna(0)[major+'_phd'] 

In [6]:
major_total('sped_total', 'sped')
major_total('stem_total', 'stem')
major_total('elem_total', 'elem')
major_total('other_total', 'other')

In [7]:
[col for col in df.columns if 'total' in col]

['aggtotal',
 'ba_total',
 'ma_total',
 'phd_total',
 'sped_total',
 'stem_total',
 'elem_total',
 'other_total']

In [8]:
# Find the year for the max value of each award type
test = df[['year','aggtotal', 'ba_total', 'ma_total', 'phd_total']]
test = test.set_index('year')
for i in test:
    print("Var:", str(i), "\n", test[str(i)].max(), test[str(i)].idxmax())

Var: aggtotal 
 304530.0 2011.0
Var: ba_total 
 113092.0 1991.0
Var: ma_total 
 188304.0 2011.0
Var: phd_total 
 11834.0 2016.0


## Merge in SASS data

Data from SASS (School And Staffing Survey) on the number new teachers entering the workforce.

In [9]:
path = r"H:\Teacher Labor MRKT Shortage\SASS\Raw data"
sass = pd.read_excel(path+"\\SASS New teachers - IPEDS Completion data.xls")

In [11]:
sass.head(3)

Unnamed: 0,year,total_teachers,recentgraduates,DeleyedEntrants,new_teachers
0,1987,170660,36800,22600,59400
1,1990,199148,51600,39100,90700
2,1993,211457,58100,44400,102500


In [10]:
# Merge with IPEDS data set
df = df.merge(sass, on='year', how='outer')

In [11]:
df.head(3)

Unnamed: 0,year,stem_ba,sped_ba,elem_ba,other_ba,stem_ma,sped_ma,elem_ma,other_ma,stem_phd,...,ma_total,phd_total,sped_total,stem_total,elem_total,other_total,total_teachers,recentgraduates,DeleyedEntrants,new_teachers
0,1984,6978.0,10687.0,40072.0,37089.0,3751.0,10575.0,10821.0,50795.0,532.0,...,75942.0,6911.0,21501.0,11261.0,51044.0,93873.0,,,,
1,1985,6451.0,9529.0,38582.0,35860.0,3549.0,9942.0,10400.0,51021.0,421.0,...,74912.0,6612.0,19706.0,10421.0,49142.0,92677.0,,,,
2,1986,6627.0,8708.0,40186.0,34269.0,3620.0,9342.0,10876.0,51346.0,428.0,...,75184.0,6608.0,18295.0,10675.0,51205.0,91407.0,,,,


In [12]:
df[['aggtotal', 'total_teachers', 'new_teachers']].head(5)

Unnamed: 0,aggtotal,total_teachers,new_teachers
0,177679.0,,
1,171946.0,,
2,171582.0,,
3,170660.0,170660.0,59400.0
4,176830.0,,


### Graphs

#### Pyplot

In [13]:
import plotly.plotly as py
import plotly.graph_objs as go
import plotly 

In [14]:
# Initiate using credential and api key
plotly.tools.set_credentials_file(username='otteheng', api_key='tAHF2Gvmu31xNtYhuxjS')

In [15]:
# Define an annotation-generating function
def make_annotation(xval, yval, txt):
    return go.Annotation(
        x=xval,
        y=yval,
        align='left',
        ax=-1,
        ay=174.608329773,
        text=txt,     # text
        showarrow=False, # annotation w/o arrows, default is True
        xref='paper',          # position text horizontally with x-coords
        xanchor='auto',  
        yref='paper',            # set y position 
        yanchor='auto', 
        font=go.Font(
            color='#262626',  # set font color
            size=9           #   and size   
        )
    )


In [17]:
# Variables that will be plotted

sped_ba = go.Scatter(
    visible = False,
    x=df.year,
    y=df['sped_ba'],
    name = "SPED BA",
    line = dict(color = '#8dd3c7'),
    opacity = 0.8)

stem_ba = go.Scatter(
    visible = False,
    x=df.year,
    y=df['stem_ba'],
    name = "STEM BA",
    line = dict(color = '#fdb462'),
    opacity = 0.8)

elem_ba = go.Scatter(
    visible = False,
    x=df.year,
    y=df['elem_ba'],
    name = "Elementary BA",
    line = dict(color = '#bebada'),
    opacity = 0.8)

other_ba = go.Scatter(
    visible = False,
    x=df.year,
    y=df['other_ba'],
    name = "Other BA",
    line = dict(color = '#fb8072', 
               width = 4),
    opacity = 0.8)

sped_ma = go.Scatter(
    visible = False,
    x=df.year,
    y=df['sped_ma'],
    name = "SPED MA",
    line = dict(color = '#8dd3c7'),
    opacity = 0.8, 
    marker={'color': '#8dd3c7', 'symbol': 103, 'size': "7"}, 
    mode="lines+markers")

stem_ma = go.Scatter(
    visible = False,
    x=df.year,
    y=df['stem_ma'],
    name = "STEM MA",
    line = dict(color = '#fdb462'),
    opacity = 0.8, 
    marker={'color': '#fdb462', 'symbol': 103, 'size': "7"}, 
    mode="lines+markers")

elem_ma = go.Scatter(
    visible = False,
    x=df.year,
    y=df['elem_ma'],
    name = "Elementary MA",
    line = dict(color = '#bebada'),
    opacity = 0.8, 
    marker={'color': '#bebada', 'symbol': 103, 'size': "7"}, 
    mode="lines+markers")

other_ma = go.Scatter(
    visible = False,
    x=df.year,
    y=df['other_ma'],
    name = "Other MA",
    line = dict(color = '#fb8072', 
               width = 4),
    opacity = 0.8, 
    marker={'color': '#fb8072', 'symbol': 103, 'size': "7"}, 
    mode="lines+markers")

######################
# Totals
#####################
ba_total = go.Scatter(
    visible = False,
    x=df.year,
    y=df['ba_total'],
    name = "Total Bachelor",
    line = dict(
        color = ('#6b6ecf'),
        width = 3,
        dash = 'dash'),
        opacity = 0.8)

ma_total = go.Scatter(
    visible = False,
    x=df.year,
    y=df['ma_total'],
    name = "Total Master's",
    line = dict(
        color = ('#80b1d3'),
        width = 3,
        dash = 'dash'),
        opacity = 0.8)

phd_total = go.Scatter(
    visible = False,
    x=df.year,
    y=df['phd_total'],
    name = "Total PhD",
    line = dict(
        color = ('#35B778'),
        width = 3,
        dash = 'dash'),
        opacity = 0.8)

aggtotal = go.Scatter(
    x=df.year,
    y=df['aggtotal'],
    name = "Total Ed. Credentials",
    line = dict(
        color = ('#333333'),
        width = 3),
    opacity = 0.8,
#     showlegend=False
    )

#######################################################################
# Create Averages (Not used but I'm loath to delete them just in case)
#######################################################################
aggtotal_avg = go.Scatter(x=df.year,
                           y=[df.aggtotal.mean()]*len(df.year),
                           name='Total Average',
                           visible=False,
                           line=dict(color='#333333'),
                                    opacity=0.5, showlegend=False)

ba_avg = go.Scatter(x=df.year,
                           y=[df.ba_total.mean()]*len(df.year),
                           name='Bachelor Average',
                           visible=False,
                           line=dict(color='#6b6ecf'),
                                    opacity=0.5, showlegend=False)

ma_avg = go.Scatter(x=df.year,
                           y=[df.ma_total.mean()]*len(df.year),
                           name="Master's Average",
                           visible=False,
                           line=dict(color='#80b1d3'),
                                    opacity=0.5, showlegend=False)

phd_avg = go.Scatter(x=df.year,
                           y=[df.phd_total.mean()]*len(df.year),
                           name='PhD Average',
                           visible=False,
                           line=dict(color='#35B778'), 
                                    opacity = 0.5, showlegend=False)
aggtotal_none = go.Scatter(
    visible = False,
    x=df.year,
    y=df['aggtotal'],
    name = "Total Education Graduates",
    line = dict(
        color = ('#333333'),
        width = 3,
        dash = 'dash'),
        opacity = 0.8, showlegend=False)
ba_total_none = go.Scatter(
    visible = False,
    x=df.year,
    y=df['ba_total'],
    name = "Total Bachelor",
    line = dict(
        color = ('#6b6ecf'),
        width = 3,
        dash = 'dash'),
        opacity = 0.8, showlegend=False)
ma_total_none = go.Scatter(
    visible = False,
    x=df.year,
    y=df['ma_total'],
    name = "Total Master's",
    line = dict(
        color = ('#80b1d3'),
        width = 3,
        dash = 'dash'),
        opacity = 0.8, showlegend=False)
phd_total_none = go.Scatter(
    visible = False,
    x=df.year,
    y=df['phd_total'],
    name = "Total PhD",
    line = dict(
        color = ('#35B778'),
        width = 3,
        dash = 'dash'),
        opacity = 0.8, showlegend=False)

#############
# Bar graph
#############
new_teachers = go.Bar(
    visible = False,
    x=df.year,
    y=df['new_teachers'],
    text='', 
    textposition='outside',
    name = "New to Profession",
    marker = dict(color = '#8dd3c7'),
    opacity = 0.8,
#     showlegend=False
    )

total_teachers = go.Bar(
    visible = False,
    x=df.year,
    y=df['total_teachers']-df['new_teachers'],
    marker = dict(color = '#333333'),
    opacity = 0.6,
    showlegend=False,
    name='Difference'
    #hoverinfo='none'
    )

#####################################
# Totals for STEM, SPED, Elem, Other
#####################################
stem_total = go.Scatter(
    visible = False,
    x=df.year,
    y=df['stem_total'],
    name = "Total STEM",
    line = dict(
        color = ('#8dd3c7'),
        width = 3,
        dash = 'dot'),
        opacity = 0.8)
   #fb8072
sped_total = go.Scatter(
    visible = False,
    x=df.year,
    y=df['sped_total'],
    name = "Total SPED",
    line = dict(
        color = ('#fdb462'),
        width = 3,
        dash = 'dot'),
        opacity = 0.8)

elem_total = go.Scatter(
    visible = False,
    x=df.year,
    y=df['elem_total'],
    name = "Total Elementary",
    line = dict(
        color = ('#bebada'),
        width = 3,
        dash = 'dot'),
        opacity = 0.8)

other_total = go.Scatter(
    visible = False,
    x=df.year,
    y=df['other_total'],
    name = "Total Other",
    line = dict(
        color = ('#fb8072'),
        width = 3,
        dash = 'dot'),
        opacity = 0.8)

# List of variables
data = [sped_ba, stem_ba, elem_ba, other_ba, 
        sped_ma, stem_ma, elem_ma, other_ma, 
        ba_total, ma_total, phd_total, aggtotal, 
       aggtotal_avg, ba_avg, ma_avg, phd_avg,
       ba_total_none, ma_total_none, phd_total_none, aggtotal_none,
       new_teachers, total_teachers,
       stem_total, sped_total, elem_total, other_total]

# Add text indicating the average, and max values
## Text: Max values 
total_annotations_max=[dict(x='2011', # Years come from field above "Graph"
                       y=int(df.aggtotal.max()),
                       text='Total Max:<br>'+'<i>2011</i><br>'+"{:,}".format(int(df.aggtotal.max())),
                       ax=0, ay=-40)]
ma_annotations_max=[dict(x='2011', # Years come from field above "Graph"
                      y=int(df.ma_total.max()),
                      xref='x', yref='y',
                      text='MA Max:<br>'+'<i>2011</i><br>'+"{:,}".format(int(df.ma_total.max())),
                      ax=0, ay=-40)]
ba_annotations_max=[dict(x='1991', # Years come from field above "Graph"
                      y=int(df.ba_total.max()),
                      xref='x', yref='y',
                      text='BA Max:<br>'+'<i>1991</i><br>'+"{:,}".format(int(df.ba_total.max())),
                      ax=0, ay=-40)]
phd_annotations_max=[dict(x='2016', # Years come from field above "Graph"
                      y=int(df.phd_total.max()),
                      xref='x', yref='y',
                      text='PhD Max:<br>'+'<i>2016</i><br>'+"{:,}".format(int(df.phd_total.max())),
                      ax=0, ay=-40)]

## Averages

total_annotations_avg=[dict(x='1993',
                       y=int(df['aggtotal'].mean()),
                       xref='x', yref='y',
                       text='Total Average:<br>'+str(int(df['aggtotal'].mean())),
                       ax=0, ay=-40)]
ma_annotations_avg=[dict(x='2008',
                      y=int(df['ma_total'].mean()),
                      xref='x', yref='y',
                      text='MA Average:<br>'+str(int(df['ma_total'].mean())),
                      ax=0, ay=-40)]
ba_annotations_avg=[dict(x='2000',
                      y=int(df['ba_total'].mean()),
                      xref='x', yref='y',
                      text='BA Average:<br>'+str(int(df['ba_total'].mean())),
                      ax=0, ay=40)]
phd_annotations_avg=[dict(x='1989',
                      y=int(df['phd_total'].mean()),
                      xref='x', yref='y',
                      text='PhD Average:<br>'+str(int(df['phd_total'].mean())),
                      ax=0, ay=-40)]

# Buttons for Bachelor, Master's, PhD
updatemenus = list([
    dict(showactive = False,
         x=-.12,
         y=1.1,
         yanchor='top',
         active = 6,
         buttons=list([ 
            dict(label = 'Bachelor by Major',
                 method = 'update',
                 args = [{'visible': [True, True, True, True, 
                                      False, False, False, False, 
                                      False, False, False, False, 
                                      False, False, False, False, 
                                      False, False, False, False,
                                      False, False,
                                      False, False, False, False]},
                         {'title': 'Bachelor by STEM, SPED, Elementary, and Other degrees', 
                         'annotations': go.Annotations([make_annotation(0.0, -0.23, '<b>Note: </b><i>Award Type "Other" includes, English, Social Studies, Teaching Assistants, Foreign Languages,<br>           Physical Ed., and Admin. degrees</i>')]
                                                      + [make_annotation(0.0, -0.16, "Source: <a href=\"https://nces.ed.gov/ipeds/datacenter/DataFiles.aspx\">Integrated Postsecondary Educational Data System (IPEDS)</a>")])}]),
            dict(label = "Master's by Major",
                 method = 'update',
                 args = [{'visible': [False, False, False, False, 
                                      True, True, True, True, 
                                      False, False, False, False, 
                                      False, False, False, False, 
                                      False, False, False, False,
                                      False, False,
                                      False, False, False, False]},
                         {'title': "Master's by STEM, SPED, Elementary, and Other degrees", 
                         'annotations': go.Annotations([make_annotation(0.0, -0.23, '<b>Note: </b><i>Award Type "Other" includes, English, Social Studies, Teaching Assistants, Foreign Languages,<br>           Physical Ed., and Admin. degrees</i>')]
                                                      + [make_annotation(0.0, -0.16, "Source: <a href=\"https://nces.ed.gov/ipeds/datacenter/DataFiles.aspx\">Integrated Postsecondary Educational Data System (IPEDS)</a>")])}]),
            dict(label = "BA + MA by Major",
                 method = 'update',
                 args = [{'visible': [True, True, True, True, 
                                      True, True, True, True, 
                                      False, False, False, False, 
                                      False, False, False, False, 
                                      False, False, False, False,
                                      False, False,
                                      False, False, False, False]},
                         {'title': 'Aggregated Number of Graduates in Education', 
                         'annotations': go.Annotations([make_annotation(0.0, -0.23, '<b>Note: </b><i>Award Type "Other" includes, English, Social Studies, Teaching Assistants, Foreign Languages,<br>           Physical Ed., and Admin. degrees</i>')]
                                                      + [make_annotation(0.0, -0.16, "Source: <a href=\"https://nces.ed.gov/ipeds/datacenter/DataFiles.aspx\">Integrated Postsecondary Educational Data System (IPEDS)</a>")])}]),
            dict(label = 'Totals by Award',
                 method = 'update',
                 args = [{'visible': [False, False, False, False, 
                                      False, False, False, False, 
                                      True, True, True, True, 
                                      False, False, False, False, 
                                      False, False, False, False,
                                      False, False,
                                      False, False, False, False]},
                         {'title': "Grand Total and by Award Type", 
                         'annotations': [make_annotation(0.0, -0.16, "Source: <a href=\"https://nces.ed.gov/ipeds/datacenter/DataFiles.aspx\">Integrated Postsecondary Educational Data System (IPEDS)</a>")]}]),
#             dict(label = 'Total by Award', # Was 'Total Max by Award'
#                  method = 'update',
#                  args = [{'visible': [False, False, False, False, 
#                                       False, False, False, False, 
#                                       False, False, False, False, 
#                                       False, False, False, False, 
#                                       True, True, True, True,
#                                       False, False,
#                                       False, False, False, False]},
#                          {'title': "Grand Total and by Award Type", 
#                          'annotations': total_annotations_max+ma_annotations_max+ba_annotations_max+phd_annotations_max
#                                          + [make_annotation(0.0, -0.16, "Source: <a href=\"https://nces.ed.gov/ipeds/datacenter/DataFiles.aspx\">Integrated Postsecondary Educational Data System (IPEDS)</a>")]}]),
            dict(label = "Total by Major",
                 method = 'update',
                 args = [{'visible': [False, False, False, False, 
                                      False, False, False, False, 
                                      False, False, False, True, 
                                      False, False, False, False, 
                                      False, False, False, False,
                                      False, False,
                                      True, True, True, True]},
                         {'title': 'Grand Total and by Major', 
                         'annotations': go.Annotations([make_annotation(0.0, -0.23, '<b>Note: </b><i>Award Type "Other" includes, English, Social Studies, Teaching Assistants, Foreign Languages,<br>           Physical Ed., and Admin. degrees</i>')]
                                                      + [make_annotation(0.0, -0.16, "Source: <a href=\"https://nces.ed.gov/ipeds/datacenter/DataFiles.aspx\">Integrated Postsecondary Educational Data System (IPEDS)</a>")])}]),
#             dict(label = 'New Teachers',
#                  method = 'update',
#                  args = [{'visible': [False, False, False, False, 
#                                       False, False, False, False, 
#                                       False, False, False, True, 
#                                       False,False, False, False, 
#                                       False, False, False, False,
#                                       True, False
#                                         False, False, False, False]},
#                          {'title': 'Teachers New to Profession & Total Education Graduates', 
#                          'annotations': go.Annotations([make_annotation(0.0, -0.22,"Source: <a href=\"https://nces.ed.gov/ipeds/datacenter/DataFiles.aspx\">Integrated Postsecondary Educational Data System (IPEDS)</a> and <a href=\"https://nces.ed.gov/surveys/sass/tables/sass0708_034_t1n.asp\">School and Staffing Survey</a>")]
#                                                      #+ [make_annotation(0.0, -0.20, "<b>Note: </b><i>Grey bars represent difference between Total Graduates & New Teachers entering the workforce</i>")] 
#                                                       )}]),
            dict(label = 'All',
                 method = 'update',
                 args = [{'visible': [True, True, True, True, 
                                      True, True, True, True, 
                                      True, True, True, True, 
                                      False, False, False, False, 
                                      False, False, False, False,
                                      False, False,
                                      True, True, True, True]},
                         {'title': "Aggregated Number of Graduates in Education", 
                         'annotations': go.Annotations([make_annotation(0.0, -0.23, '<b>Note: </b><i>Award Type "Other" includes, English, Social Studies, Teaching Assistants, Foreign Languages,<br>           Physical Ed., and Admin. degrees</i>')]
                                                      + [make_annotation(0.0, -0.16, "Source: <a href=\"https://nces.ed.gov/ipeds/datacenter/DataFiles.aspx\">Integrated Postsecondary Educational Data System (IPEDS)</a>")])}]),
            dict(label = 'Total Graduates',
                 method = 'update',
                 args = [{'visible': [False, False, False, False,  
                                      False, False, False, False, 
                                      False, False, False, True, 
                                      False,False, False, False, 
                                      False, False, False, False,
                                      False, False,
                                      False, False, False, False]},
                         {'title': 'Aggregated Number of Graduates in Education',
                          'annotations': go.Annotations([make_annotation(0.0, -0.16, "Source: <a href=\"https://nces.ed.gov/ipeds/datacenter/DataFiles.aspx\">Integrated Postsecondary Educational Data System (IPEDS)</a>")]
                                                       )
                         }])
                    ]),
                )
            ])

# # Create minor and major tick marks variables
# ttxt = ["" if x>=0 else x for x in range(0, 33)]
# newtchtxt = ["" if x>=0 else x for x in range(0, 33)]
# lyr = list(df.year.astype(int))
# for n,i in enumerate(lyr):
#     if i in (1984, 2016):
#         ttxt[n]=lyr[n]

# # Create tick marks and text for SASS data. This looks like an academic year (20xx-xx).
# n = 0 
# while n < 34:
#     if n==3:
#         ttxt[n] = "1987-88"
#         newtchtxt[n] = "59K"
#     if n==6:
#         ttxt[n] = "1990-91"
#         newtchtxt[n] = "90K"
#     if n==9:
#         ttxt[n] = "1993-94"
#         newtchtxt[n] = "102K"
#     if n==15:
#         ttxt[n] = "1999-00"
#         newtchtxt[n] = "124K"
#     if n==19:
#         ttxt[n] = "2003-04"
#         newtchtxt[n] = "125K"
#     if n==23:
#         ttxt[n] = "2007-08"
#         newtchtxt[n] = "146K"
#     if n==27:
#         ttxt[n] = "2011-12"
#         newtchtxt[n] = "96K"
#     n = n + 1
    
# Assign text that will go over bar graphs to data
# data[-2]['text']= newtchtxt

# Dict containing the title, slider
layout = go.Layout(dict(
    title='Aggregated Number of Graduates in Education',
    margin=go.Margin(
            r=5,
            l=10,
            b=100),
    updatemenus=updatemenus,
    hidesources=False,
    #barmode='stack',
    legend = dict(x=-.39, y=0.0),
    yaxis=dict(
#          range=[0, 310000],
        hoverformat=',f',
        showline=True,
        zeroline=False),
    xaxis=dict(
        range=["1984","2017"],
        zeroline=False,
#         tickangle=30,
        nticks=[k for k in range(1984, 2017,5)],
        ticks="outside",
#         tickvals=[k for k in range(1984, 2017,5)],
#         ticktext=ttxt,
        showline=True,
        rangeselector=dict(
            buttons=list([
                dict(count=6,
                     label='5 Years',
                     step='year',
                     stepmode='backward'),
                dict(count=11,
                     label='10 Years',
                     step='year',
                     stepmode='backward'),
                dict(count=21,
                     label='20 Years',
                     step='year',
                     stepmode='backward'),
                dict(step='all')
            ])
        ),
#           rangeslider=dict(),
        type='date'
    ),
      annotations= go.Annotations([make_annotation(0.0, -0.16, "Source: <a href=\"https://nces.ed.gov/ipeds/datacenter/DataFiles.aspx\">Integrated Postsecondary Educational Data System (IPEDS)</a>")]
                                    )
    ))

fig = go.Figure(dict(data=data, layout=layout))
py.iplot(fig, filename = "Aggregated Number of Graduates in Education (With Notes)")

In [20]:
"{:,}".format(int(df.ba_total.max()))

'113,092'

In [21]:
s = 100000000
"{:,}".format(s)

'100,000,000'

## Graph version for Dan's NCTQ presentation

In [29]:
# Variables that will be plotted

sped_ba = go.Scatter(
    visible = False,
    x=df.year,
    y=df['sped_ba'],
    name = "SPED BA",
    line = dict(color = '#8dd3c7'),
    opacity = 0.8)

stem_ba = go.Scatter(
    visible = False,
    x=df.year,
    y=df['stem_ba'],
    name = "STEM BA",
    line = dict(color = '#fdb462'),
    opacity = 0.8)

elem_ba = go.Scatter(
    visible = False,
    x=df.year,
    y=df['elem_ba'],
    name = "Elementary BA",
    line = dict(color = '#bebada'),
    opacity = 0.8)

other_ba = go.Scatter(
    visible = False,
    x=df.year,
    y=df['other_ba'],
    name = "Other BA",
    line = dict(color = '#fb8072', 
               width = 4),
    opacity = 0.8)

sped_ma = go.Scatter(
    visible = False,
    x=df.year,
    y=df['sped_ma'],
    name = "SPED MA",
    line = dict(color = '#8dd3c7'),
    opacity = 0.8, 
    marker={'color': '#8dd3c7', 'symbol': 103, 'size': "7"}, 
    mode="lines+markers")

stem_ma = go.Scatter(
    visible = False,
    x=df.year,
    y=df['stem_ma'],
    name = "STEM MA",
    line = dict(color = '#fdb462'),
    opacity = 0.8, 
    marker={'color': '#fdb462', 'symbol': 103, 'size': "7"}, 
    mode="lines+markers")

elem_ma = go.Scatter(
    visible = False,
    x=df.year,
    y=df['elem_ma'],
    name = "Elementary MA",
    line = dict(color = '#bebada'),
    opacity = 0.8, 
    marker={'color': '#bebada', 'symbol': 103, 'size': "7"}, 
    mode="lines+markers")

other_ma = go.Scatter(
    visible = False,
    x=df.year,
    y=df['other_ma'],
    name = "Other MA",
    line = dict(color = '#fb8072', 
               width = 4),
    opacity = 0.8, 
    marker={'color': '#fb8072', 'symbol': 103, 'size': "7"}, 
    mode="lines+markers")

aggtotal = go.Scatter(
    x=df.year,
    y=df['aggtotal'],
    name = "Total Ed. Credentials",
    line = dict(
        color = ('#333333'),
        width = 3),
    opacity = 0.8,
     showlegend=False
    )

aggtotal_avg = go.Scatter(x=df.year,
                           y=[df.aggtotal.mean()]*len(df.year),
                           name='Total Average',
                           visible=False,
                           line=dict(color='#333333'),
                                    opacity=0.5, showlegend=False)

ba_total = go.Scatter(
    visible = False,
    x=df.year,
    y=df['ba_total'],
    name = "Total Bachelor",
    line = dict(
        color = ('#6b6ecf'),
        width = 3,
        dash = 'dash'),
        opacity = 0.8)

ba_avg = go.Scatter(x=df.year,
                           y=[df.ba_total.mean()]*len(df.year),
                           name='Bachelor Average',
                           visible=False,
                           line=dict(color='#6b6ecf'),
                                    opacity=0.5, showlegend=False)

ma_total = go.Scatter(
    visible = False,
    x=df.year,
    y=df['ma_total'],
    name = "Total Master's",
    line = dict(
        color = ('#80b1d3'),
        width = 3,
        dash = 'dash'),
        opacity = 0.8)

ma_avg = go.Scatter(x=df.year,
                           y=[df.ma_total.mean()]*len(df.year),
                           name="Master's Average",
                           visible=False,
                           line=dict(color='#80b1d3'),
                                    opacity=0.5, showlegend=False)

phd_total = go.Scatter(
    visible = False,
    x=df.year,
    y=df['phd_total'],
    name = "Total PhD",
    line = dict(
        color = ('#35B778'),
        width = 3,
        dash = 'dash'),
        opacity = 0.8)

phd_avg = go.Scatter(x=df.year,
                           y=[df.phd_total.mean()]*len(df.year),
                           name='PhD Average',
                           visible=False,
                           line=dict(color='#35B778'), 
                                    opacity = 0.5, showlegend=False)
aggtotal_none = go.Scatter(
    visible = False,
    x=df.year,
    y=df['aggtotal'],
    name = "Total Education Graduates",
    line = dict(
        color = ('#333333'),
        width = 3,
        dash = 'dash'),
        opacity = 0.8, showlegend=False)
ba_total_none = go.Scatter(
    visible = False,
    x=df.year,
    y=df['ba_total'],
    name = "Total Bachelor",
    line = dict(
        color = ('#6b6ecf'),
        width = 3,
        dash = 'dash'),
        opacity = 0.8, showlegend=False)
ma_total_none = go.Scatter(
    visible = False,
    x=df.year,
    y=df['ma_total'],
    name = "Total Master's",
    line = dict(
        color = ('#80b1d3'),
        width = 3,
        dash = 'dash'),
        opacity = 0.8, showlegend=False)
phd_total_none = go.Scatter(
    visible = False,
    x=df.year,
    y=df['phd_total'],
    name = "Total PhD",
    line = dict(
        color = ('#35B778'),
        width = 3,
        dash = 'dash'),
        opacity = 0.8, showlegend=False)

# Bar graph
new_teachers = go.Bar(
    visible = False,
    x=df.year,
    y=df['new_teachers'],
    text='', 
    textposition='outside',
    name = "New to Profession",
    marker = dict(color = '#8dd3c7'),
    opacity = 0.8,
#     showlegend=False
    )

total_teachers = go.Bar(
    visible = False,
    x=df.year,
    y=df['total_teachers']-df['new_teachers'],
    marker = dict(color = '#333333'),
    opacity = 0.6,
    showlegend=False,
    name='Difference'
    #hoverinfo='none'
    )

# Line tracing from 1984 to the 2016
aggtotal_avg = go.Scatter(x=df.year,
                           y=[df.aggtotal.loc[df['year']==1984]]*len(df.year),
                           name='Total Average',
                           visible=True,
                           line=dict(color='#333333', 
                                     dash='dash'),
                                    opacity=0.8, showlegend=False)

# List of variables
data = [sped_ba, stem_ba, elem_ba, other_ba, 
        sped_ma, stem_ma, elem_ma, other_ma, 
        ba_total, ma_total, phd_total, aggtotal, 
       aggtotal_avg, ba_avg, ma_avg, phd_avg,
       ba_total_none, ma_total_none, phd_total_none, aggtotal_none,
       new_teachers, total_teachers, aggtotal_avg]

# Add text indicating the average, and max values
## Text: Max values 
total_annotations_max=[dict(x='2011', # Years come from field above "Graph"
                       y=int(df.aggtotal.max()),
                       text='Total Max:<br>'+str(int(df.aggtotal.max())),
                       ax=0, ay=-40)]
ma_annotations_max=[dict(x='2011', # Years come from field above "Graph"
                      y=int(df.ma_total.max()),
                      xref='x', yref='y',
                      text='MA Max:<br>'+str(int(df.ma_total.max())),
                      ax=0, ay=-40)]
ba_annotations_max=[dict(x='1991', # Years come from field above "Graph"
                      y=int(df.ba_total.max()),
                      xref='x', yref='y',
                      text='BA Max:<br>'+str(int(df.ba_total.max())),
                      ax=0, ay=-40)]
phd_annotations_max=[dict(x='2016', # Years come from field above "Graph"
                      y=int(df.phd_total.max()),
                      xref='x', yref='y',
                      text='PhD Max:<br>'+str(int(df.phd_total.max())),
                      ax=0, ay=-40)]

## Averages

total_annotations_avg=[dict(x='1993',
                       y=int(df['aggtotal'].mean()),
                       xref='x', yref='y',
                       text='Total Average:<br>'+str(int(df['aggtotal'].mean())),
                       ax=0, ay=-40)]
ma_annotations_avg=[dict(x='2008',
                      y=int(df['ma_total'].mean()),
                      xref='x', yref='y',
                      text='MA Average:<br>'+str(int(df['ma_total'].mean())),
                      ax=0, ay=-40)]
ba_annotations_avg=[dict(x='2000',
                      y=int(df['ba_total'].mean()),
                      xref='x', yref='y',
                      text='BA Average:<br>'+str(int(df['ba_total'].mean())),
                      ax=0, ay=40)]
phd_annotations_avg=[dict(x='1989',
                      y=int(df['phd_total'].mean()),
                      xref='x', yref='y',
                      text='PhD Average:<br>'+str(int(df['phd_total'].mean())),
                      ax=0, ay=-40)]

# Buttons for Bachelor, Master's, PhD
updatemenus = list([
    dict(showactive = False,
         x=-.10,
         active = 6,
         buttons=list([ 
            dict(label = 'Bachelor Only',
                 method = 'update',
                 args = [{'visible': [True, True, True, True, 
                                      False, False, False, False, 
                                      False, False, False, False, 
                                      False,False, False, False, 
                                      False, False, False, False,
                                      False, False, False]},
                         {'title': 'Bachelor by STEM, SPED, Elementary, and Other degrees', 
                         'annotations': go.Annotations([make_annotation(0.0, -0.20, '<b>Note: </b><i>Award Type "Other" includes, English, Social Studies, Teaching Assistants, Foreign Languages, Physical Ed., and Administrative degrees</i>')]
                                                      + [make_annotation(0.0, -0.16, "Source: <a href=\"https://nces.ed.gov/ipeds/datacenter/DataFiles.aspx\">Integrated Postsecondary Educational Data System (IPEDS)</a>")])}]),
            dict(label = "Master's Only",
                 method = 'update',
                 args = [{'visible': [False, False, False, False, 
                                      True, True, True, True, 
                                      False, False, False, False, 
                                      False, False, False, False, 
                                      False, False, False, False,
                                      False, False, False]},
                         {'title': "Master's by STEM, SPED, Elementary, and Other degrees", 
                         'annotations': go.Annotations([make_annotation(0.0, -0.20, '<b>Note: </b><i>Award Type "Other" includes, English, Social Studies, Teaching Assistants, Foreign Languages, Physical Ed., and Administrative degrees</i>')]
                                                      + [make_annotation(0.0, -0.16, "Source: <a href=\"https://nces.ed.gov/ipeds/datacenter/DataFiles.aspx\">Integrated Postsecondary Educational Data System (IPEDS)</a>")])}]),
            dict(label = "Bachelor + Master's",
                 method = 'update',
                 args = [{'visible': [True, True, True, True, 
                                      True, True, True, True, 
                                      False, False, False, False, 
                                      False, False, False, False, 
                                      False, False, False, False,
                                      False, False, False]},
                         {'title': 'Aggregated Number of Graduates in Education', 
                         'annotations': go.Annotations([make_annotation(0.0, -0.20, '<b>Note: </b><i>Award Type "Other" includes, English, Social Studies, Teaching Assistants, Foreign Languages, Physical Ed., and Administrative degrees</i>')]
                                                      + [make_annotation(0.0, -0.16, "Source: <a href=\"https://nces.ed.gov/ipeds/datacenter/DataFiles.aspx\">Integrated Postsecondary Educational Data System (IPEDS)</a>")])}]),
            dict(label = 'Totals by Award',
                 method = 'update',
                 args = [{'visible': [False, False, False, False, 
                                      False, False, False, False, 
                                      True, True, True, True, 
                                      False, False, False, False, 
                                      False, False, False, False,
                                      False, False, False]},
                         {'title': "Grand Total and by Award Type", 
                         'annotations': [make_annotation(0.0, -0.16, "Source: <a href=\"https://nces.ed.gov/ipeds/datacenter/DataFiles.aspx\">Integrated Postsecondary Educational Data System (IPEDS)</a>")]}]),
            dict(label = 'Total Max',
                 method = 'update',
                 args = [{'visible': [False, False, False, False, 
                                      False, False, False, False, 
                                      False, False, False, False, 
                                      False, False, False, False, 
                                      True, True, True, True,
                                      False, False, False]},
                         {'title': "Grand Total and by Award Type", 
                         'annotations': total_annotations_max+ma_annotations_max+ba_annotations_max+phd_annotations_max
                                         + [make_annotation(0.0, -0.16, "Source: <a href=\"https://nces.ed.gov/ipeds/datacenter/DataFiles.aspx\">Integrated Postsecondary Educational Data System (IPEDS)</a>")]}]),
            dict(label = 'New Teachers',
                 method = 'update',
                 args = [{'visible': [False, False, False, False, 
                                      False, False, False, False, 
                                      False, False, False, True, 
                                      False,False, False, False, 
                                      False, False, False, False,
                                      True, False, False]},
                         {'title': 'Teachers New to Profession & Total Education Graduates', 
                         'annotations': go.Annotations([make_annotation(0.0, -0.22,"Source: <a href=\"https://nces.ed.gov/ipeds/datacenter/DataFiles.aspx\">Integrated Postsecondary Educational Data System (IPEDS)</a> and <a href=\"https://nces.ed.gov/surveys/sass/tables/sass0708_034_t1n.asp\">School and Staffing Survey</a>")]
                                                     #+ [make_annotation(0.0, -0.20, "<b>Note: </b><i>Grey bars represent difference between Total Graduates & New Teachers entering the workforce</i>")] 
                                                      )}]),
            dict(label = 'All',
                 method = 'update',
                 args = [{'visible': [True, True, True, True, 
                                      True, True, True, True, 
                                      True, True, True, True, 
                                      False, False, False, False, 
                                      False, False, False, False,
                                      False, False, False]},
                         {'title': "Aggregated Number of Graduates in Education", 
                         'annotations': go.Annotations([make_annotation(0.0, -0.20, '<b>Note: </b><i>Award Type "Other" includes, English, Social Studies, Teaching Assistants, Foreign Languages, Physical Ed., and Administrative degrees</i>')]
                                                      + [make_annotation(0.0, -0.16, "Source: <a href=\"https://nces.ed.gov/ipeds/datacenter/DataFiles.aspx\">Integrated Postsecondary Educational Data System (IPEDS)</a>")])}]),
            dict(label = 'Total Graduates',
                 method = 'update',
                 args = [{'visible': [False, False, False, False,  
                                      False, False, False, False, 
                                      False, False, False, True, 
                                      False,False, False, False, 
                                      False, False, False, False,
                                      False, False, False]},
                         {'title': 'Aggregated Number of Graduates in Education',
                          'annotations': go.Annotations([make_annotation(0.0, -0.16, "Source: <a href=\"https://nces.ed.gov/ipeds/datacenter/DataFiles.aspx\">Integrated Postsecondary Educational Data System (IPEDS)</a>")]
                                                       )
                         }])
                    ]),
                )
            ])

# Create minor and major tick marks variables
ttxt = ["" if x>=0 else x for x in range(0, 33)]
newtchtxt = ["" if x>=0 else x for x in range(0, 33)]
lyr = list(df.year.astype(int))
for n,i in enumerate(lyr):
    if i in (1984, 2016):
        ttxt[n]=lyr[n]

# Create tick marks and text for SASS data. This looks like an academic year (20xx-xx).
n = 0 
while n < 34:
    if n==3:
        ttxt[n] = "1987-88"
        newtchtxt[n] = "59K"
    if n==6:
        ttxt[n] = "1990-91"
        newtchtxt[n] = "90K"
    if n==9:
        ttxt[n] = "1993-94"
        newtchtxt[n] = "102K"
    if n==15:
        ttxt[n] = "1999-00"
        newtchtxt[n] = "124K"
    if n==19:
        ttxt[n] = "2003-04"
        newtchtxt[n] = "125K"
    if n==23:
        ttxt[n] = "2007-08"
        newtchtxt[n] = "146K"
    if n==27:
        ttxt[n] = "2011-12"
        newtchtxt[n] = "96K"
    n = n + 1
    
# Assign text that will go over bar graphs to data
data[-2]['text']= newtchtxt

# Dict containing the title, slider
layout = go.Layout(dict(
    title='Aggregated Number of Graduates in Education',
    updatemenus=updatemenus,
    hidesources=False,
    #barmode='stack',
    legend = dict(x=-.43, y=0.0),
    yaxis=dict(
          range=[0, 310000],
        hoverformat=',f',
        showline=True,
        zeroline=False),
    xaxis=dict(
#         range=[1984,2016],
        zeroline=False,
#         tickangle=30,
        nticks=[k for k in range(1984, 2017,5)],
        ticks="outside",
#         tickvals=[k for k in range(1984, 2017,5)],
#         ticktext=ttxt,
        showline=True,
        rangeselector=dict(
            buttons=list([
                dict(count=5,
                     label='5 Years',
                     step='year',
                     stepmode='backward'),
                dict(count=10,
                     label='10 Years',
                     step='year',
                     stepmode='backward'),
                dict(count=20,
                     label='20 Years',
                     step='year',
                     stepmode='backward'),
                dict(step='all')
            ])
        ),
           rangeslider=dict(),
        type='date'
    ),
      annotations= go.Annotations([make_annotation(0.0, -0.16, "Source: <a href=\"https://nces.ed.gov/ipeds/datacenter/DataFiles.aspx\">Integrated Postsecondary Educational Data System (IPEDS)</a>")]
                                    )
    ))

fig = go.Figure(dict(data=data, layout=layout))
py.iplot(fig, filename = "Aggregated Number of Graduates in Education (Dan NCTQ)")

In [27]:
df.aggtotal.loc[df['year']==1984]

0    177679.0
Name: aggtotal, dtype: float64