## Q1: What is the salary distribution of one department?
1. createSalaryCompareHist with one department

## Q2: What is the salary distribution across several department?
1. createSalaryCompareHist with more than one department

## Q3: What is the size of the department across several department?
1. createFigDiffDepAndCampus

## Q4: List the median salary of all department/all job in a campus
1. createFigListAllDepSalary

In [1]:
import numpy as np
import pandas as pd
import plotly
import plotly.plotly as py
from plotly.graph_objs import *
import plotly.tools as tls
plotly.tools.set_credentials_file(username='wangqiwen', api_key='8r40jKGvIemGyOP1PEZ1')

In [2]:
# tls.get_embed('https://plot.ly/~chris/1638')
# import csv data
SalaryOverYears = pd.read_csv("../csv/SalaryOverYears.csv")

In [3]:
def getSalary(department, campus, year_col):
    salary = SalaryOverYears.loc[\
                (SalaryOverYears['Department'] == department) \
                & (SalaryOverYears['Campus'] == campus), year_col].dropna()
    return salary


def createFigHist(departments, campus="Urbana-Champaign", year = 2017):
    # define year column
    assert year >= 2013 and year <= 2017
    year_col = "Salary" + str(year)
    # get the salary
    data = []
    for dep in departments:
        salary = getSalary(dep, campus, year_col)
        data.append(Histogram(
            x=salary,
            histnorm='percent',
            name=dep,
            opacity=0.75
        ))


    layout = Layout(
        title=' VS '.join(departments),
        xaxis=dict(
            title='Salary'
        ),
        yaxis=dict(
            title='Percentage'
        ),
        #bargap=0.2,
        bargroupgap=0.1
    )
    fig = Figure(data=data, layout=layout)
    return fig

def createFigDiffDepAndCampus(departments, campus, year=2017):
    assert year >= 2013 and year <= 2017
    assert len(departments) == len(campus)
    year_col = "Salary" + str(year)
    # get the salary
    salary = []
    for idx in range(len(departments)):
        salary.append(getSalary(departments[idx], campus[idx], year_col).median())
    # parameters used to create graph
    labels = []
    for idx in range(len(departments)):
        labels.append(campus[idx] + " - " + departments[idx])
    data = Pie(labels=labels, values=salary,
           hoverinfo='label+percent', textinfo='value', 
           textfont=dict(size=20)
           )
    layout = Layout(
        title= "Median Salary Comparasion Across Department",
    )
    fig = Figure(data=[data], layout=layout)
    return fig
    
def createFigListAllDepSalary(campus = "Urbana-Champaign", year = 2017, showJob = False, top = None):
    assert year >= 2013 and year <= 2017
    year_col = "Salary" + str(year)
    
    if showJob:
        # get the median salary of each job & department
        gb = SalaryOverYears.loc[SalaryOverYears['Campus'] == campus, ["Department", "JobTitle",year_col]].dropna()\
                .groupby(['Department', 'JobTitle'])
        sorted_depMedian = gb.median().sort_values(year_col, ascending = False)
        x = sorted_depMedian[year_col].tolist()
        y = [dj[1] + " in " + dj[0] for dj in sorted_depMedian.index.tolist()]
        
    else:
        # get the median salary of each department
        gb = SalaryOverYears.loc[SalaryOverYears['Campus'] == campus, ["Department", year_col]].dropna()\
                    .groupby(['Department'])
        sorted_depMedian = gb.median().sort_values(year_col, ascending = False)
        x = sorted_depMedian[year_col].tolist()
        y = sorted_depMedian.index.tolist()
        
    # select top subset elements
    if top:
        x = x[:top]
        y = y[:top]

    x.reverse()
    y.reverse()
    data = [Bar(
        y=y,
        x=x,
        name='Median Salary',
        orientation = 'h',

        marker=dict(
            color='rgba(50, 171, 96, 0.7)',
            line=dict(
                color='rgba(50, 171, 96, 1.0)',
            )
        ),
        opacity=0.75
    )]
    layout = Layout(
        title= "Median Salary of Each Department in {}".format(campus),
        margin=Margin(
        l=360,
        ),
    )
    fig = Figure(data=data, layout=layout)
    return fig
        

In [4]:
# call the create fig function
fig = createFigHist(['Information Sciences', 'History'])
py.iplot(fig, filename='salaryHist3')

In [5]:
fig = createFigHist(['Information Sciences'], year = 2017)
py.iplot(fig, filename='salaryHistIS')

In [6]:
# Q3: What is the size of Computer Science Science department across UIC, UIS, UIUC?
fig = createFigDiffDepAndCampus(['Computer Science']*3, \
                          ["Chicago", "Springfield", "Urbana-Champaign"], year=2017)
py.iplot(fig, filename='salaryPieCS')

In [7]:
# Q3: What is the size of Information Science department across UIC, UIS, UIUC?
ig = createFigDiffDepAndCampus(\
        ["Information/Decision Sciences", "Management Information Systems", "Information Sciences"], \
        ["Chicago", "Springfield", "Urbana-Champaign"], year=2017)
py.iplot(fig, filename='salaryPieIS')

In [8]:
fig = createFigListAllDepSalary(top=25)
py.iplot(fig, filename='listSalaryBar')

In [9]:
fig = createFigListAllDepSalary(showJob=True, top=50)
py.iplot(fig, filename='listSalaryBarJob')