# Graphs

## "Other" Disaggregated (Award Levels) 

Will cover years: 1984-2916. These graphs will include the counts by award level (Bachelor, Master's, PhD). The goal is to create a plot with the "Other" category in "*aggregate num of degrees by cipcode(4) awlevel.dta*" used in "*Line - (Awlevel) Aggregated Number of Degrees in Education (1984-2016)*" dissaggregated and be able to see which CIP codes are driving the increase in the other category. 

In [1]:
import numpy as np
import pandas as pd
import os

In [19]:
os.chdir(r"H:\Teacher Labor MRKT Shortage\IPEDS\Completions_CIP codes\New CIP data\Data files used for graphs")
df = pd.read_stata("aggregate num of degrees by cipcode(47) awlevel_other disaggregated.dta")

In [3]:
df.head(5)

Unnamed: 0,year,admin__special_ed5,adult_continuing_ed_admin5,adult_continuing_ed_tch5,agricultural_tch_ed5,art_tch_ed5,business_tch_ed5,coll_student_counseling__perso5,community_coll_ed5,counselor_ed_school_counseling5,...,spanish_tch_ed9,sped9,speech_tch_ed9,stem9,tch_assistant_aide9,tch_ed_multiple_levels9,tch_ed_prof_dev_levelmethod9,tch_ed_prof_dev_subjectarea9,teaching_english_as_a_second_o9,aggtotal
0,1984.0,0.0,0.0,136.0,866.0,1448.0,2106.0,0.0,0.0,122.0,...,0.0,239.0,0.0,532.0,0.0,0.0,66.0,159.0,4.0,177679.0
1,1985.0,0.0,1.0,300.0,817.0,1292.0,2104.0,0.0,0.0,118.0,...,0.0,235.0,0.0,421.0,0.0,0.0,86.0,64.0,11.0,171946.0
2,1986.0,3.0,0.0,204.0,719.0,1086.0,2163.0,0.0,0.0,122.0,...,0.0,245.0,0.0,428.0,0.0,0.0,84.0,56.0,10.0,171582.0
3,1987.0,3.0,0.0,79.0,693.0,1231.0,1951.0,0.0,0.0,103.0,...,0.0,231.0,0.0,376.0,0.0,0.0,92.0,47.0,1.0,170660.0
4,1988.0,3.0,0.0,92.0,575.0,1049.0,1966.0,0.0,0.0,76.0,...,0.0,218.0,0.0,356.0,0.0,0.0,97.0,66.0,5.0,176830.0


### Clean

* Create New variables that are the sum of Bachelor (5), Master (7), and PhD (9) degrees

In [20]:
# Rename column names
for oldcol in df.columns:
    if "5" in oldcol:
        newcol = oldcol.replace("5", "_ba")
        df = df.rename(columns={oldcol: newcol})
    if "7" in oldcol:
        newcol = oldcol.replace("7", "_ma")
        df = df.rename(columns={oldcol: newcol})
    if "9" in oldcol:
        newcol = oldcol.replace("9", "_phd")
        df = df.rename(columns={oldcol: newcol})   

In [5]:
df.columns

Index(['year', 'admin__special_ed_ba', 'adult_continuing_ed_admin_ba',
       'adult_continuing_ed_tch_ba', 'agricultural_tch_ed_ba', 'art_tch_ed_ba',
       'business_tch_ed_ba', 'coll_student_counseling__perso_ba',
       'community_coll_ed_ba', 'counselor_ed_school_counseling_ba',
       ...
       'spanish_tch_ed_phd', 'sped_phd', 'speech_tch_ed_phd', 'stem_phd',
       'tch_assistant_aide_phd', 'tch_ed_multiple_levels_phd',
       'tch_ed_prof_dev_levelmethod_phd', 'tch_ed_prof_dev_subjectarea_phd',
       'teaching_english_as_a_second_o_phd', 'aggtotal'],
      dtype='object', length=143)

In [35]:
# Create unique list of column names with the ba, ma, phd suffixes removed
collist = []
unique = []
for oldcol in df.columns:
    if oldcol[-3:] == "_ba":
        newcol = oldcol[:-3] 
        collist.append(newcol)
    if oldcol[-3:] == "_ma":
        newcol = oldcol[:-3] 
        collist.append(newcol)
    if oldcol[-4:] == "_phd":
        newcol = oldcol[:-4] 
        collist.append(newcol)
for i in collist:
    if i not in unique:
        unique.append(i)

In [38]:
# Create variable for sum of SPED, STEM, Elementary, Other as new columns
def cip_total(dfname, major):
    df[dfname] = df.fillna(0)[major+'_ba'] + df.fillna(0)[major+'_ma'] + df.fillna(0)[major+'_phd']    

In [39]:
for colname in unique:
    cip_total(colname+"_total", colname)

## Graph 

### Use Plotly

Will plot all of the totals onto the graph. 

In [42]:
import plotly.plotly as py
import plotly.graph_objs as go
import plotly 

In [45]:
import colorlover as cl

In [147]:
from IPython.display import HTML
HTML(cl.to_html( cl.flipper()['seq']['4'] ))

In [222]:
from random import shuffle

dictcolors = cl.scales['9']['seq']
colors = []
for key in dictcolors:
    if key in ('Blues', 'Greens', 'Oranges', 'Purples', 'Reds', 'GnBu', 'RdPu'):
        for values in dictcolors[key][2:]:
            if len(colors)<47:
                colors.append(values)
print("Done:", len(colors))    
shuffle(colors)

Done: 47


In [43]:
# Initiate using credential and api key
plotly.tools.set_credentials_file(username='otteheng', api_key='tAHF2Gvmu31xNtYhuxjS')

In [226]:
# Define a trace-generating function (returns a Bar object)
def make_trace(y, name, color, dashes):
    return go.Scatter(
        x=df.year,       # x-coords are the years (1984-2016)
        y=df[y],            # take in the y-coordinates
        name=name,      # label for legend/hover
        line = dict(color = color, 
                    width = 3,
                    dash = dashes),
        opacity = 0.2,
        showlegend = False,
            )

In [228]:
# Define an annotation-generating function
def make_annotation2(xval, yval, fsize,txt):
    return go.Annotation(
        x=xval,
        y=yval,
        align='left',
        ax=-1,
        ay=174.608329773,
        text=txt,     # text
        showarrow=False, # annotation w/o arrows, default is True
        xref='paper',          # position text horizontally with x-coords
        xanchor='auto',  
        yref='paper',            # set y position 
        yanchor='auto', 
        font=go.Font(
            color='#262626',  # set font color
            size=fsize        #   and size   
        )
    )

In [250]:
# Assign a color to each line 
total = [x for x in df.columns if "total"  in x]
total = [x for x in test if x not in ('aggtotal', 'stem_total', 'sped_total', 'elem_total')]
totalcolor = dict(zip(test, colors))  

ba = [x for x in df.columns if i[-3:] == "_ba"]
ba = [x for x in ba if x not in ('aggtotal', 'stem_total', 'sped_total', 'elem_total')]
bacolor = dict(zip(ba, colors))  

ma = [x for x in df.columns if i[-3:] == "_ma"]
ma = [x for x in ma if x not in ('aggtotal', 'stem_total', 'sped_total', 'elem_total')]
macolor = dict(zip(ma, colors))  

phd = [x for x in df.columns if i[-4:] == "_phd"]
phd = [x for x in phd if x not in ('aggtotal', 'stem_total', 'sped_total', 'elem_total')]
phdcolor = dict(zip(phd, colors))  

In [251]:
data = []
for key, value in totalcolor.items():
    data.append(make_trace(key, key.replace("__", " ").title().replace("Total", "").replace("_", " "), value, ''))
    if df[key].max()>5000 and df[key].max()<10000:
        data[-1]['showlegend'] = True
        data[-1]['line']['dash'] = 'dot'
        data[-1]['opacity'] = 0.8
        data[-1]['legendgroup'] = 'group1'
    if df[key].max()>10000 and df[key].max()<15000:
        data[-1]['showlegend'] = True
        data[-1]['line']['dash'] = 'dash'
        data[-1]['opacity'] = 1.0
        data[-1]['legendgroup'] = 'group2'
    if df[key].max()>15000:
        data[-1]['showlegend'] = True
#         data[-1]['line']['dash'] = 'dash'
        data[-1]['opacity'] = 1.0
        data[-1]['legendgroup'] = 'group3'
        data[-1]['marker'] = {'color': value, 'symbol': 100, 'size': "6"}
        data[-1]['mode'] = 'lines+markers'
        
# Dict containing the title, slider
layout = go.Layout(dict(
    title='Explore Categories within "Other"',
#     margin=go.Margin(
#             r=5,
#             l=10,
#             b=100),
    hidesources=False,
    legend = dict(x=-.39, y=0.0),
    yaxis=dict(
#          range=[0, 310000],
        hoverformat=',f',
        showline=True,
        zeroline=False),
    xaxis=dict(
        range=["1984","2017"],
        zeroline=False,
        nticks=[k for k in range(1984, 2017,5)],
        ticks="outside",
        showline=True,
        type='date'
    ),
    annotations = [make_annotation2(-.39, 0.9, 12, "Dashed Lines have a max, in the<br>time observed, over 10,000 but<br>less than 15,000. Dotted<br>have max greater than 5,000 but less than<br>10,000. Lines with a symbol have<br>a max greater than 15,000.")]
    )
)

fig = go.Figure(dict(data=data, layout=layout))
py.iplot(fig, filename = 'Explore Categories within "Other"')

In [249]:
for i in df.columns:
    if i[-3:] == "_ma":
        print(i)

admin__special_ed_ma
adult_continuing_ed_admin_ma
adult_continuing_ed_tch_ma
agricultural_tch_ed_ma
art_tch_ed_ma
business_tch_ed_ma
coll_student_counseling__perso_ma
community_coll_ed_ma
counselor_ed_school_counseling_ma
curr__instruction_ma
drama__dance_tch_ed_ma
driver__safety_tch_ed_ma
ed_general_ma
ed_other_ma
edal_admin__supervision_other_ma
edal_instructional__curr_super_ma
edal_leadership__admin_general_ma
elem_ma
english_language_arts_tch_ed_ma
family__consumer_sciences_home_ma
foreign_language_tch_ed_ma
french_language_tch_ed_ma
german_language_tch_ed_ma
health_occupations_tch_ed_ma
health_tch_ed_ma
higher_ed_higher_ed_admin_ma
history_tch_ed_ma
international__comparative_ed_ma
junior_high_intermediate_middl_ma
music_tch_ed_ma
nurtitional_ed_ma
physical_ed_teaching__coaching_ma
reading_tch_ed_ma
sales__marketing_operations_ma_ma
secondary_ed__teaching_ma
social__philosophical_foundati_ma
social_science_tch_ed_ma
social_studies_tch_ed_ma
spanish_tch_ed_ma
sped_ma
speech_tch_ed