In [None]:
import pandas as pd

hr = pd.read_csv('https://raw.githubusercontent.com/byui-cse/cse450-course/master/data/hr.csv')

In [None]:
import altair as alt

In [None]:
def calcZScore (row):
  if len(hr[hr['Position'] == row.Position]) > 1 and hr[hr['Position'] == row.Position].SalaryRate.std() != 0:
    return (row.SalaryRate - hr[hr['Position'] == row.Position].SalaryRate.mean())/ (hr[hr['Position'] == row.Position].SalaryRate.std())
  else:
    return 0

**Equitable Pay**

---

Race, Marital Status, Sex

In [None]:
hourly = ['Production Technician I', 'Production Technician II']

hr['SalaryRate'] = hr.apply(lambda x: x.PayRate * 40 * 52 if x.Position in hourly else x.PayRate, axis=1)
hr['SalaryZ'] = hr.apply(calcZScore, axis=1)

In [82]:
singularPositions = ['Principal Data Architect', 'IT Manager - Support', 'Data Analyst', 'Software Engineering Manager', 'CIO', 'IT Manager - Infra', 'Data Architect', 'BI Director', 'President & CEO', 'IT Director', 'Director of Operations', 'Director of Sales', 'Enterprise Architect']
singularData = hr[~hr['Position'].isin(singularPositions)]
singularData = singularData[singularData['Position'] != 'Data Analyst']
singularData['Position'].value_counts()

Production Technician I     136
Production Technician II     57
Area Sales Manager           27
Production Manager           14
Software Engineer             9
IT Support                    8
Network Engineer              5
Sr. Network Engineer          5
Database Administrator        5
BI Developer                  4
Sales Manager                 3
Senior BI Developer           3
Accountant I                  3
Administrative Assistant      3
Sr. DBA                       2
Sr. Accountant                2
Shared Services Manager       2
IT Manager - DB               2
Data Analyst                  1
Name: Position, dtype: int64

In [78]:
bars = alt.Chart(singularData).mark_bar().encode(
    x = alt.X('Sex'),
    y = alt.Y('average(SalaryRate):Q', axis=alt.Axis(format='$', title="Average Salary Rate")),
    color = 'count()'
    ).properties(
    width = 100
)
text = text = bars.mark_text(
    align='center',
    baseline='middle',
    dy=-5  # Nudges text to right so it doesn't appear on top of the bar
).encode(
    text='count():Q',
    color=alt.value('black')
)

alt.layer(bars, text, data=singularData).facet(
    column='Position'
)

In [75]:
bars = alt.Chart(singularData).mark_bar().encode(
    x = alt.X('RaceDesc:N', axis=alt.Axis(title='Race')),
    y = alt.Y('average(SalaryRate):Q', axis=alt.Axis(format='$', title="Average Salary Rate")),
    color = 'count():Q'
    )

text = text = bars.mark_text(
    align='center',
    baseline='middle',
    dy=-5  # Nudges text to right so it doesn't appear on top of the bar
).encode(
    text='count():Q',
    color=alt.value('black')
)

alt.layer(bars, text, data=singularData).facet(
    column='Position'
)

In [76]:
bars = alt.Chart(singularData).mark_bar().encode(
    x = alt.X('MaritalDesc', axis=alt.Axis(title='Marital Status')),
    y = alt.Y('average(SalaryRate)', axis=alt.Axis(title='Average Salary Rate')),
    color = 'count()',
)
text = text = bars.mark_text(
    align='center',
    baseline='middle',
    dy=-5  # Nudges text to right so it doesn't appear on top of the bar
).encode(
    text='count():Q',
    color=alt.value('black')
)

alt.layer(bars, text, data=singularData).facet(
    column='Position'
)

**Equitable Pay Analysis**

All of the positions that have only one employee where assumed to be receiving equitable pay as there is no comparable pay for these positions.

---
**Sex** : For the hourly positions the pay is flat across the board, for the salaried positions they vary by very small amounts however that could be due to raises accrued or other factors. Areas that need a more indepth analysis would be  IT Support to see if females are getting paid less than males and Software Engineer's to see if males are getting paid less than females.

**Race** : Senior BI Developers, Shared Services Manager, Sr. Accountant, Sr. DBA, Sr. Network Engineer, and Network Engineer are the only positions that will not require a more in depth analysis as all employees are paid the same amount. Areas that drastically need evaluation are IT Manager - DB where one employee is being paid three times more than his counterpart, IT Support where white employees appear to be paid more, Production Manager has no two races getting paid the same way, both groups of Production Technicians has no two races getting paid the same way. 

**Marital Status** : Area Sales Manager, Senior BI Developer, Shared Services Manager, and Sr Accountant don't appear to have equitable pay issues all other positions that contain more than one position need a case-by-case analysis to ensure equitable pay.

