## Q1: What is the salary distribution of one department?
1. createSalaryCompareHist with one department

## Q2: What is the salary distribution across several department?
1. createSalaryCompareHist with more than one department

## Q3: What is the size of the department across several department?
1. createFigDiffDepAndCampus

## Q4: List the median salary of all department/all job in a campus
1. createFigListAllDepSalary

In [1]:
import numpy as np
import pandas as pd
import plotly
import plotly.plotly as py
from plotly.graph_objs import *
import plotly.tools as tls

from ipywidgets import widgets 
from IPython.display import display, clear_output, Image
from plotly.widgets import GraphWidget

plotly.tools.set_credentials_file(username='wangqiwen', api_key='8r40jKGvIemGyOP1PEZ1')

<IPython.core.display.Javascript object>

In [2]:
# tls.get_embed('https://plot.ly/~chris/1638')
# import csv data
SalaryOverYears = pd.read_csv("../csv/SalaryOverYears.csv")

In [3]:
def getSalary(department, campus, year_col):
    salary = SalaryOverYears.loc[\
                (SalaryOverYears['Department'] == department) \
                & (SalaryOverYears['Campus'] == campus), year_col].dropna()
    return salary


def createFigHist(departments, campus="Urbana-Champaign", year = 2017):
    # define year column
    assert year >= 2013 and year <= 2017
    year_col = "Salary" + str(year)
    # get the salary
    data = []
    for dep in departments:
        salary = getSalary(dep, campus, year_col)
        data.append(Histogram(
            x=salary,
            xbins=dict(
                size=20000
            ),
            histnorm='percent',
            name=dep,
            opacity=0.75
        ))


    layout = Layout(
        title='Salary Range of Faculty in {}'.format(' & '.join(departments)),
        xaxis=dict(
            title='Salary'
        ),
        yaxis=dict(
            title='Percentage of Faculty'
        ),
        barmode='overlay',
        #bargap=0.2,
        #bargroupgap=0.1
    )
    fig = Figure(data=data, layout=layout)
    return fig

def createFigDiffDepAndCampus(departments, campus, year=2017):
    assert year >= 2013 and year <= 2017
    assert len(departments) == len(campus)
    year_col = "Salary" + str(year)
    # get the salary
    populationSize = []
    labels = []
    medians = []
    for idx in range(len(departments)):
        salary = getSalary(departments[idx], campus[idx], year_col)
        populationSize.append(len(salary))
        median = salary.median()
        medians.append(int(median))
        labels.append('{} - {}'.format(campus[idx], departments[idx]))
    # parameters used to create graph
        
    population_bar = Bar(
            x = labels,
            y = populationSize,  
            name="population size",       
            opacity=0.75,    
           )
    salary_scatter = Scatter(
            x = labels,
            y = medians,
            name="median salary",
            # marker + lines
            mode = 'lines+markers',
            line = dict(
                width = 4,),
            yaxis='y2',           
            opacity=0.75
)
    layout = Layout(
        title= "Number of Faculty in the Dataset",
        xaxis=dict(
            title='Department'
        ),
        yaxis=dict(
            title='Number of Faculty'
        ),
        yaxis2=dict(
            title='Median Salary',
#             titlefont=dict(
#                 color='rgb(148, 103, 189)'
#             ),
#             tickfont=dict(
#                 color='rgb(148, 103, 189)'
#             ),
            overlaying='y',
            side='right'
        )
    )
    fig = Figure(data=[population_bar, salary_scatter], layout=layout)
    return fig
    
def createFigListAllDepSalary(campus = "Urbana-Champaign", year = 2017, showJob = False, top = None):
    assert year >= 2013 and year <= 2017
    year_col = "Salary" + str(year)
    
    if showJob:
        # get the median salary of each job & department
        gb = SalaryOverYears.loc[SalaryOverYears['Campus'] == campus, ["Department", "JobTitle",year_col]].dropna()\
                .groupby(['Department', 'JobTitle'])
        sorted_depMedian = gb.median().sort_values(year_col, ascending = False)
        x = sorted_depMedian[year_col].tolist()
        y = [dj[1] + " in " + dj[0] for dj in sorted_depMedian.index.tolist()]
        
    else:
        # get the median salary of each department
        gb = SalaryOverYears.loc[SalaryOverYears['Campus'] == campus, ["Department", year_col]].dropna()\
                    .groupby(['Department'])
        sorted_depMedian = gb.median().sort_values(year_col, ascending = False)
        x = sorted_depMedian[year_col].tolist()
        y = sorted_depMedian.index.tolist()
        
    # select top subset elements
    if top:
        x = x[:top]
        y = y[:top]

    x.reverse()
    y.reverse()
    data = [Bar(
        y=y,
        x=x,
        name='Median Salary',
        orientation = 'h',

        marker=dict(
            color='rgba(50, 171, 96, 0.7)',
            line=dict(
                color='rgba(50, 171, 96, 1.0)',
            )
        ),
        opacity=0.75
    )]
    layout = Layout(
        title= "Median Salary of Each Department in {}".format(campus),
        margin=Margin(
        l=360,
        ),
        legend=dict(orientation="h")
    )
    fig = Figure(data=data, layout=layout)
    return fig
        

In [5]:
# call the create fig function
fig = createFigHist(['Information Sciences', 'Computer Science'])
py.iplot(fig, filename='salaryHist3')

In [6]:
fig = createFigHist(['Information Sciences'], year = 2017)
salaryHist = py.iplot(fig, filename='salaryHist')
salaryHist_url = salaryHist.resource

# Assign an emptry graph widget with two traces
g_salary_dist = GraphWidget(salaryHist_url)

# the widget for the distribution of the salary
salary_dist_year_container = widgets.IntSlider(
    value=2017,
    min=2013,
    max=2017,
    step=1,
    description='Year:',
    continuous_update = False
)

salary_dist_department_one_textbox = widgets.Text(
    description='Department:   ',
    value='Mathematics',
)

salary_dist_campus_dropdown = widgets.Dropdown(
    options=["Chicago", "Springfield", "Urbana-Champaign"],
    value='Urbana-Champaign',
    description='Campus:',
)


def response_salary_dist(change):
    year = salary_dist_year_container.value
    department = [salary_dist_department_one_textbox.value]
    campus = salary_dist_campus_dropdown.value
    fig = createFigHist(department, campus=campus, year=year)
    if len(fig.data[0].x) > 0:
        g_salary_dist.restyle({'x': [list(fig.data[0].x)], "name": fig.data[0].name})
        g_salary_dist.relayout(fig.layout)

# invoke changes
salary_dist_year_container.observe(response_salary_dist, names="value")
salary_dist_department_one_textbox.observe(response_salary_dist, names="value")
salary_dist_campus_dropdown.observe(response_salary_dist, names="value")

# define layout container
salary_dist_container = widgets.HBox([salary_dist_campus_dropdown, salary_dist_department_one_textbox, salary_dist_year_container])
display(salary_dist_container)
display(g_salary_dist)

PlotlyRequestError: Hi there, you've reached the threshold of 100 combined image exports and chart saves per day. If you need to raise your daily limit, consider upgrading to a Student or Personal Plan (see: https://plot.ly/products/cloud).

In [None]:
# Q3: What is the size of Computer Science Science department across UIC, UIS, UIUC?
fig = createFigDiffDepAndCampus(['History']*3, \
                          ["Chicago", "Springfield", "Urbana-Champaign"], year=2017)
py.iplot(fig, filename='salaryCampus')

In [None]:
# Q3: What is the size of Information Science department across UIC, UIS, UIUC?
fig = createFigDiffDepAndCampus(\
        ["Information/Decision Sciences", "Management Information Systems", "Information Sciences"], \
        ["Chicago", "Springfield", "Urbana-Champaign"], year=2017)
py.iplot(fig, filename='salaryCampus')

In [None]:
fig = createFigListAllDepSalary(top=50)
py.iplot(fig, filename='listSalaryBar')

In [None]:
fig = createFigListAllDepSalary(showJob=True, top=10)
py.iplot(fig, filename='listSalaryBarJob')