# Processing Pasadena (Texas) employee payroll

### Import Python tools and Jupyter configuration

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import datetime as dt
import matplotlib.pyplot as plt
import altair as alt

In [3]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

### Import wages

In [4]:
wages = pd.read_csv(
    "data/raw/payroll-gross-wages.csv",
    names=["first_wages", "last_wages", "wages"],
    header=None,
)

In [5]:
wages["full_name_wages"] = wages["first_wages"] + " " + wages["last_wages"]

In [6]:
wages[wages["full_name_wages"].fillna("").str.contains("MCCLAIN")]

Unnamed: 0,first_wages,last_wages,wages,full_name_wages
670,BRANDON M,MCCLAIN,102921.02,BRANDON M MCCLAIN


In [7]:
wages["wages"] = wages["wages"].str.replace(",", "", regex=False).astype(float)

In [8]:
len(wages)

1243

### Import employee details

In [9]:
employees = pd.read_csv(
    "data/raw/payroll-employee-list.csv",
    names=[
        "first_employee",
        "last_employee",
        "title",
        "departmentid",
        "department",
        "gender",
        "hiredate",
        "ee_number",
    ],
    parse_dates=["hiredate"],
    dtype={"ee_number": str, "department": str},
)

In [10]:
employees = employees.drop_duplicates().copy()

In [11]:
employees["full_name_employee"] = (
    employees["first_employee"] + " " + employees["last_employee"]
)

In [12]:
employees.head()

Unnamed: 0,first_employee,last_employee,title,departmentid,department,gender,hiredate,ee_number,full_name_employee
0,CHRISTOPHER A,AARON,NPO5 POLICE OFFICER,13421,PATROL,M,2013-06-17,25,CHRISTOPHER A AARON
1,DANIEL J,AARON,NPO5 POLICE OFFICER,13421,PATROL,M,2007-11-26,20,DANIEL J AARON
2,JOE A,ABILEZ,N01#*PT POOLED STR & BRDG,14400,STREET/BRIDGE,M,2019-06-04,52,JOE A ABILEZ
3,SELENE Z,ACEVEDO,E02 FIN ASST HOUS ADMIN,62,HOUSING ASSIST-VOUCHERS,F,2016-08-22,514,SELENE Z ACEVEDO
4,ALEJANDRA,ACOSTA,N01#PT POOLED AQUATICS,15220,AQUATICS,F,2020-06-10,183,ALEJANDRA ACOSTA


In [13]:
len(employees)

1270

In [14]:
employees[employees["full_name_employee"].str.contains("ALONSO")]

Unnamed: 0,first_employee,last_employee,title,departmentid,department,gender,hiredate,ee_number,full_name_employee
20,JORGE ANTONIO,ALONSO,N11 MAINT SUPERVISOR,20410,BLDG & EQUIPMENT,M,1991-10-23,115,JORGE ANTONIO ALONSO
21,JULISSA A,ALONSO,N05*PT REC ATTENDANT,15230,RECREATION CENTERS,F,2018-05-21,112,JULISSA A ALONSO
22,LILIANA,ALONSO,N06 LIBRARY ASSISTANT I,17110,LIBRARY-ADMIN SERVICES,F,2013-12-23,119,LILIANA ALONSO
367,ALONSO J,GARCIA,N07 WATER BILL ASSIST II,30700,WATER BILLING,M,2020-03-09,2438,ALONSO J GARCIA


### Merge employee list with wages

In [15]:
df = pd.merge(
    employees,
    wages,
    left_on="full_name_employee",
    right_on="full_name_wages",
)

In [16]:
len(df)

1182

In [17]:
df[["title_id", "title_description"]] = df["title"].str.split(" ", 1, expand=True)

In [18]:
today = pd.to_datetime("today")

In [19]:
df["time_since"] = (today - df["hiredate"]).dt.days

---

### How many officers? 

In [20]:
len(df[df["title_description"] == "POLICE OFFICER"])

236

In [21]:
patrol_officer = df[
    (df["title_description"] == "POLICE OFFICER") & (df["department"] == "PATROL")
]

In [22]:
patrol_officer.sort_values("wages", ascending=False).head()

Unnamed: 0,first_employee,last_employee,title,departmentid,department,gender,hiredate,ee_number,full_name_employee,first_wages,last_wages,wages,full_name_wages,title_id,title_description,time_since
300,MARK ALAN,FERGUSON,NPO5 POLICE OFFICER,13421,PATROL,M,1996-03-04,2122,MARK ALAN FERGUSON,MARK ALAN,FERGUSON,137937.51,MARK ALAN FERGUSON,NPO5,POLICE OFFICER,9213
323,STEVEN J,FOWLER,NPO5 POLICE OFFICER,13421,PATROL,M,1991-06-03,2264,STEVEN J FOWLER,STEVEN J,FOWLER,132889.14,STEVEN J FOWLER,NPO5,POLICE OFFICER,10949
870,JERRY C,ROCK,NPO5 POLICE OFFICER,13421,PATROL,M,1995-07-10,5525,JERRY C ROCK,JERRY C,ROCK,132237.54,JERRY C ROCK,NPO5,POLICE OFFICER,9451
332,ALBERTO,GALVAN,NPO5 POLICE OFFICER,13421,PATROL,M,1997-04-22,2397,ALBERTO GALVAN,ALBERTO,GALVAN,132175.52,ALBERTO GALVAN,NPO5,POLICE OFFICER,8799
917,RIGOBERTO RUBEN,SALDIVAR,NPO5 POLICE OFFICER,13421,PATROL,M,1999-04-21,5750,RIGOBERTO RUBEN SALDIVAR,RIGOBERTO RUBEN,SALDIVAR,132120.61,RIGOBERTO RUBEN SALDIVAR,NPO5,POLICE OFFICER,8070


In [23]:
patrol_officer.sort_values("wages", ascending=False).tail()

Unnamed: 0,first_employee,last_employee,title,departmentid,department,gender,hiredate,ee_number,full_name_employee,first_wages,last_wages,wages,full_name_wages,title_id,title_description,time_since
279,SARAH L,ELLIOTT,NPO5 POLICE OFFICER,13421,PATROL,F,2020-05-04,1916,SARAH L ELLIOTT,SARAH L,ELLIOTT,45140.8,SARAH L ELLIOTT,NPO5,POLICE OFFICER,386
957,SKYLER L,SCHREURS,NPO5 POLICE OFFICER,13421,PATROL,M,2020-05-04,5877,SKYLER L SCHREURS,SKYLER L,SCHREURS,45140.8,SKYLER L SCHREURS,NPO5,POLICE OFFICER,386
1002,KERI L,SMITH,NPO5 POLICE OFFICER,13421,PATROL,F,2020-05-04,6067,KERI L SMITH,KERI L,SMITH,45140.8,KERI L SMITH,NPO5,POLICE OFFICER,386
263,IAN G,DOSKOCIL,NPO5 POLICE OFFICER,13421,PATROL,M,2020-05-04,1762,IAN G DOSKOCIL,IAN G,DOSKOCIL,45140.8,IAN G DOSKOCIL,NPO5,POLICE OFFICER,386
962,MARCELLOS,SEALE,NPO5 POLICE OFFICER,13421,PATROL,M,2017-07-17,5941,MARCELLOS SEALE,MARCELLOS,SEALE,26777.69,MARCELLOS SEALE,NPO5,POLICE OFFICER,1408


In [24]:
patrol_officer[patrol_officer["full_name_wages"].str.contains("CARTER")]

Unnamed: 0,first_employee,last_employee,title,departmentid,department,gender,hiredate,ee_number,full_name_employee,first_wages,last_wages,wages,full_name_wages,title_id,title_description,time_since
160,JASON M,CARTER,NPO5 POLICE OFFICER,13421,PATROL,M,2017-01-03,1018,JASON M CARTER,JASON M,CARTER,96359.95,JASON M CARTER,NPO5,POLICE OFFICER,1603


In [25]:
patrol_officer[
    (patrol_officer["time_since"] > 1500) & (patrol_officer["time_since"] < 1800)
].sort_values("wages", ascending=False)

Unnamed: 0,first_employee,last_employee,title,departmentid,department,gender,hiredate,ee_number,full_name_employee,first_wages,last_wages,wages,full_name_wages,title_id,title_description,time_since
372,CODY A,GASSETT,NPO5 POLICE OFFICER,13421,PATROL,M,2017-01-03,2665,CODY A GASSETT,CODY A,GASSETT,111427.72,CODY A GASSETT,NPO5,POLICE OFFICER,1603
77,PAUL M,BENNETT,NPO5 POLICE OFFICER,13421,PATROL,M,2017-01-03,461,PAUL M BENNETT,PAUL M,BENNETT,103488.07,PAUL M BENNETT,NPO5,POLICE OFFICER,1603
177,ANGEL,CERVANTES,NPO5 POLICE OFFICER,13421,PATROL,M,2017-01-03,1050,ANGEL CERVANTES,ANGEL,CERVANTES,98923.56,ANGEL CERVANTES,NPO5,POLICE OFFICER,1603
779,RAY A,PEREZ,NPO5 POLICE OFFICER,13421,PATROL,M,2017-01-03,5166,RAY A PEREZ,RAY A,PEREZ,96914.8,RAY A PEREZ,NPO5,POLICE OFFICER,1603
160,JASON M,CARTER,NPO5 POLICE OFFICER,13421,PATROL,M,2017-01-03,1018,JASON M CARTER,JASON M,CARTER,96359.95,JASON M CARTER,NPO5,POLICE OFFICER,1603
1156,BRANDON S,WILSON,NPO5 POLICE OFFICER,13421,PATROL,M,2017-01-03,6960,BRANDON S WILSON,BRANDON S,WILSON,93740.71,BRANDON S WILSON,NPO5,POLICE OFFICER,1603
759,JESUS O,PAZ,NPO5 POLICE OFFICER,13421,PATROL,M,2017-01-03,5159,JESUS O PAZ,JESUS O,PAZ,92759.28,JESUS O PAZ,NPO5,POLICE OFFICER,1603
750,MICHAEL P,PALITZ,NPO5 POLICE OFFICER,13421,PATROL,M,2017-01-03,5153,MICHAEL P PALITZ,MICHAEL P,PALITZ,85379.24,MICHAEL P PALITZ,NPO5,POLICE OFFICER,1603


### Export

In [26]:
df.to_csv("data/processed/all_employees.csv", index=False)

In [27]:
patrol_officer.to_csv("data/processed/all_patrol_officers.csv", index=False)

In [28]:
patrol_officer[
    (patrol_officer["time_since"] > 1500) & (patrol_officer["time_since"] < 1800)
].sort_values("wages", ascending=False).to_csv(
    "data/processed/carter_academy_peers_still_in_patrol.csv", index=False
)