In [1]:
from pyspark.sql import SparkSession
from pyspark.conf import SparkConf
from pyspark.sql.types import DateType, StringType, FloatType, IntegerType
from pyspark.sql.window import Window
import pyspark.sql.functions as F

In [2]:
spark = SparkSession.builder \
    .master("local[*]") \
    .appName("PySpark City Payroll Analysis") \
    .config("spark.driver.memory", "2g") \
    .config("spark.executor.memory", "2g") \
    .getOrCreate()

In [3]:
nycdf = spark.read.option("header",True).csv("City_Employee_Payroll_New_York_City.csv")
ladf = spark.read.option("header",True).csv("City_Employee_Payroll_Los_Angeles.csv")

In [4]:
nycdf.printSchema()

root
 |-- Fiscal Year: string (nullable = true)
 |-- Payroll Number: string (nullable = true)
 |-- Agency Name: string (nullable = true)
 |-- Last Name: string (nullable = true)
 |-- First Name: string (nullable = true)
 |-- Mid Init: string (nullable = true)
 |-- Agency Start Date: string (nullable = true)
 |-- Work Location Borough: string (nullable = true)
 |-- Title Description: string (nullable = true)
 |-- Leave Status as of June 30: string (nullable = true)
 |-- Base Salary: string (nullable = true)
 |-- Pay Basis: string (nullable = true)
 |-- Regular Hours: string (nullable = true)
 |-- Regular Gross Paid: string (nullable = true)
 |-- OT Hours: string (nullable = true)
 |-- Total OT Paid: string (nullable = true)
 |-- Total Other Pay: string (nullable = true)



In [5]:
ladf.printSchema()

root
 |-- RECORD_NBR: string (nullable = true)
 |-- PAY_YEAR: string (nullable = true)
 |-- DEPARTMENT_NO: string (nullable = true)
 |-- DEPARTMENT_TITLE: string (nullable = true)
 |-- JOB_CLASS_PGRADE: string (nullable = true)
 |-- JOB_TITLE: string (nullable = true)
 |-- EMPLOYMENT_TYPE: string (nullable = true)
 |-- JOB_STATUS: string (nullable = true)
 |-- MOU: string (nullable = true)
 |-- MOU_TITLE: string (nullable = true)
 |-- REGULAR_PAY: string (nullable = true)
 |-- OVERTIME_PAY: string (nullable = true)
 |-- ALL_OTHER_PAY: string (nullable = true)
 |-- TOTAL_PAY: string (nullable = true)
 |-- CITY_RETIREMENT_CONTRIBUTIONS: string (nullable = true)
 |-- BENEFIT_PAY: string (nullable = true)
 |-- GENDER: string (nullable = true)
 |-- ETHNICITY: string (nullable = true)



In [6]:
nycdf.sample(False, 0.0002, 999).show(10)

+-----------+--------------+-----------------+-----------+----------+--------+-----------------+---------------------+--------------------+--------------------------+-----------+---------+-------------+------------------+--------+-------------+---------------+
|Fiscal Year|Payroll Number|      Agency Name|  Last Name|First Name|Mid Init|Agency Start Date|Work Location Borough|   Title Description|Leave Status as of June 30|Base Salary|Pay Basis|Regular Hours|Regular Gross Paid|OT Hours|Total OT Paid|Total Other Pay|
+-----------+--------------+-----------------+-----------+----------+--------+-----------------+---------------------+--------------------+--------------------------+-----------+---------+-------------+------------------+--------+-------------+---------------+
|       2020|            56|POLICE DEPARTMENT|     WALKER|     JAMAL|       H|       01/10/2005|             BROOKLYN|     P.O. DA DET GR3|                    ACTIVE|   96502.00|per Annum|         2080|          95892

In [7]:
nycduplicated = nycdf.groupby(nycdf.columns).count().where('count > 1').sort('count', ascending=False)
print(f'There are {nycduplicated.count()} unique records that are duplicated.')
nycduplicated.show()

There are 14 unique records that are duplicated.
+-----------+--------------+--------------------+---------+----------+--------+-----------------+---------------------+--------------------+--------------------------+-----------+---------+-------------+------------------+--------+-------------+---------------+-----+
|Fiscal Year|Payroll Number|         Agency Name|Last Name|First Name|Mid Init|Agency Start Date|Work Location Borough|   Title Description|Leave Status as of June 30|Base Salary|Pay Basis|Regular Hours|Regular Gross Paid|OT Hours|Total OT Paid|Total Other Pay|count|
+-----------+--------------+--------------------+---------+----------+--------+-----------------+---------------------+--------------------+--------------------------+-----------+---------+-------------+------------------+--------+-------------+---------------+-----+
|       2019|           903|DISTRICT ATTORNEY...|     null|      null|    null|       01/30/2019|             BROOKLYN|RACKETS INVESTIGA...|       

In [8]:
nycdf = nycdf.distinct()
nycdf = nycdf.withColumn('Agency Start Date', F.col('Agency Start Date').cast(DateType())) \
        .withColumn('Base Salary', F.col('Base Salary').cast(FloatType())) \
        .withColumn('Regular Hours', F.col('Regular Hours').cast(IntegerType())) \
        .withColumn('Regular Gross Paid', F.col('Regular Gross Paid').cast(FloatType())) \
        .withColumn('OT Hours', F.col('OT Hours').cast(FloatType())) \
        .withColumn('Total OT Paid', F.col('Total OT Paid').cast(FloatType())) \
        .withColumn('Total Other Pay', F.col('Total Other Pay').cast(FloatType())) \
        .withColumn("Row Index", F.monotonically_increasing_id())

nycdf.printSchema()

root
 |-- Fiscal Year: string (nullable = true)
 |-- Payroll Number: string (nullable = true)
 |-- Agency Name: string (nullable = true)
 |-- Last Name: string (nullable = true)
 |-- First Name: string (nullable = true)
 |-- Mid Init: string (nullable = true)
 |-- Agency Start Date: date (nullable = true)
 |-- Work Location Borough: string (nullable = true)
 |-- Title Description: string (nullable = true)
 |-- Leave Status as of June 30: string (nullable = true)
 |-- Base Salary: float (nullable = true)
 |-- Pay Basis: string (nullable = true)
 |-- Regular Hours: integer (nullable = true)
 |-- Regular Gross Paid: float (nullable = true)
 |-- OT Hours: float (nullable = true)
 |-- Total OT Paid: float (nullable = true)
 |-- Total Other Pay: float (nullable = true)
 |-- Row Index: long (nullable = false)



In [9]:
ladf.sample(False, 0.0002, 999).show(10)

+------------+--------+-------------+----------------+----------------+--------------------+---------------+----------+---+--------------------+-----------+------------+-------------+---------+-----------------------------+-----------+------+---------+
|  RECORD_NBR|PAY_YEAR|DEPARTMENT_NO|DEPARTMENT_TITLE|JOB_CLASS_PGRADE|           JOB_TITLE|EMPLOYMENT_TYPE|JOB_STATUS|MOU|           MOU_TITLE|REGULAR_PAY|OVERTIME_PAY|ALL_OTHER_PAY|TOTAL_PAY|CITY_RETIREMENT_CONTRIBUTIONS|BENEFIT_PAY|GENDER|ETHNICITY|
+------------+--------+-------------+----------------+----------------+--------------------+---------------+----------+---+--------------------+-----------+------------+-------------+---------+-----------------------------+-----------+------+---------+
|303532353731|    2016|           98| WATER AND POWER|          1539-5|            MGT ASST|      FULL_TIME|    ACTIVE|  4|ADMINISTRATIVE RE...|   79284.64|     3478.23|      1452.68| 84215.55|                      5233.00|   22763.98|FEMALE

In [10]:
laduplicated = ladf.groupby(ladf.columns).count().where('count > 1').sort('count', ascending=False)
print(f'There are {laduplicated.count()} unique records that are duplicated.')
laduplicated.show()

There are 0 unique records that are duplicated.
+----------+--------+-------------+----------------+----------------+---------+---------------+----------+---+---------+-----------+------------+-------------+---------+-----------------------------+-----------+------+---------+-----+
|RECORD_NBR|PAY_YEAR|DEPARTMENT_NO|DEPARTMENT_TITLE|JOB_CLASS_PGRADE|JOB_TITLE|EMPLOYMENT_TYPE|JOB_STATUS|MOU|MOU_TITLE|REGULAR_PAY|OVERTIME_PAY|ALL_OTHER_PAY|TOTAL_PAY|CITY_RETIREMENT_CONTRIBUTIONS|BENEFIT_PAY|GENDER|ETHNICITY|count|
+----------+--------+-------------+----------------+----------------+---------+---------------+----------+---+---------+-----------+------------+-------------+---------+-----------------------------+-----------+------+---------+-----+
+----------+--------+-------------+----------------+----------------+---------+---------------+----------+---+---------+-----------+------------+-------------+---------+-----------------------------+-----------+------+---------+-----+



In [11]:
ladf = ladf.withColumn('REGULAR_PAY', F.col('REGULAR_PAY').cast(FloatType())) \
        .withColumn('OVERTIME_PAY', F.col('OVERTIME_PAY').cast(FloatType())) \
        .withColumn('ALL_OTHER_PAY', F.col('ALL_OTHER_PAY').cast(FloatType())) \
        .withColumn('TOTAL_PAY', F.col('TOTAL_PAY').cast(FloatType())) \
        .withColumn('CITY_RETIREMENT_CONTRIBUTIONS', F.col('CITY_RETIREMENT_CONTRIBUTIONS').cast(FloatType())) \
        .withColumn('BENEFIT_PAY', F.col('BENEFIT_PAY').cast(FloatType()))

ladf.printSchema()

root
 |-- RECORD_NBR: string (nullable = true)
 |-- PAY_YEAR: string (nullable = true)
 |-- DEPARTMENT_NO: string (nullable = true)
 |-- DEPARTMENT_TITLE: string (nullable = true)
 |-- JOB_CLASS_PGRADE: string (nullable = true)
 |-- JOB_TITLE: string (nullable = true)
 |-- EMPLOYMENT_TYPE: string (nullable = true)
 |-- JOB_STATUS: string (nullable = true)
 |-- MOU: string (nullable = true)
 |-- MOU_TITLE: string (nullable = true)
 |-- REGULAR_PAY: float (nullable = true)
 |-- OVERTIME_PAY: float (nullable = true)
 |-- ALL_OTHER_PAY: float (nullable = true)
 |-- TOTAL_PAY: float (nullable = true)
 |-- CITY_RETIREMENT_CONTRIBUTIONS: float (nullable = true)
 |-- BENEFIT_PAY: float (nullable = true)
 |-- GENDER: string (nullable = true)
 |-- ETHNICITY: string (nullable = true)



In [12]:
nycdf.createOrReplaceTempView("nycpayroll")
tab1 = spark.sql("SELECT DISTINCT `Agency Name` AS Agency FROM nycpayroll ORDER BY Agency")
print(f"There are a total of {tab1.count()} different agencies in New York City Government.")
tab1.show(tab1.count(), truncate=False)

There are a total of 165 different agencies in New York City Government.
+------------------------------+
|Agency                        |
+------------------------------+
|ADMIN FOR CHILDREN'S SVCS     |
|ADMIN TRIALS AND HEARINGS     |
|BOARD OF CORRECTION           |
|BOARD OF CORRECTIONS          |
|BOARD OF ELECTION             |
|BOARD OF ELECTION POLL WORKERS|
|BOROUGH PRESIDENT-BRONX       |
|BOROUGH PRESIDENT-BROOKLYN    |
|BOROUGH PRESIDENT-QUEENS      |
|BOROUGH PRESIDENT-STATEN IS   |
|BRONX COMMUNITY BOARD #1      |
|BRONX COMMUNITY BOARD #10     |
|BRONX COMMUNITY BOARD #11     |
|BRONX COMMUNITY BOARD #12     |
|BRONX COMMUNITY BOARD #2      |
|BRONX COMMUNITY BOARD #3      |
|BRONX COMMUNITY BOARD #4      |
|BRONX COMMUNITY BOARD #5      |
|BRONX COMMUNITY BOARD #6      |
|BRONX COMMUNITY BOARD #7      |
|BRONX COMMUNITY BOARD #8      |
|BRONX COMMUNITY BOARD #9      |
|BRONX DISTRICT ATTORNEY       |
|BROOKLYN COMMUNITY BOARD #1   |
|BROOKLYN COMMUNITY BOARD #10  |
|BR

In [20]:
tab2 = spark.sql("""SELECT `Fiscal Year` AS Year,
                    SUM(`Regular Gross Paid` + `Total OT Paid` + `Total Other Pay`) AS TotalPay
                    FROM nycpayroll
                    GROUP BY Year
                    ORDER BY TotalPay DESC
        """)

tab2.show(truncate=False)

+----+---------------------+
|Year|TotalPay             |
+----+---------------------+
|2020|3.0418574408038723E10|
|2019|2.9516889519514572E10|
|2018|2.754931705163799E10 |
|2017|2.71458727757124E10  |
|2016|2.551809716312446E10 |
|2015|2.4334088182743454E10|
|2014|2.286248314362995E10 |
+----+---------------------+



In [13]:
tab3 = spark.sql("""SELECT *, ROUND(TotalPay/NumberOfEmployees, 2) AS AveragePayOfAgency
                    FROM (SELECT `Agency Name` AS Agency, 
                    SUM(`Regular Gross Paid` + `Total OT Paid` + `Total Other Pay`) AS TotalPay,
                    COUNT(`Row Index`) AS NumberOfEmployees FROM nycpayroll
                    WHERE `Fiscal Year` == 2020
                    GROUP BY Agency
                    ORDER BY TotalPay DESC, NumberOfEmployees DESC
                    LIMIT 25)
        """)

tab3.show(25, truncate=False)

+------------------------------+--------------------+-----------------+------------------+
|Agency                        |TotalPay            |NumberOfEmployees|AveragePayOfAgency|
+------------------------------+--------------------+-----------------+------------------+
|DEPT OF ED PEDAGOGICAL        |9.971256719987915E9 |114999           |86707.33          |
|POLICE DEPARTMENT             |5.356858772763836E9 |60316            |88813.23          |
|FIRE DEPARTMENT               |1.8644133478075857E9|19193            |97140.28          |
|DEPT OF ED PARA PROFESSIONALS |1.2480015119021907E9|41353            |30179.23          |
|DEPARTMENT OF CORRECTION      |1.0949519880879471E9|12723            |86060.83          |
|DEPARTMENT OF EDUCATION ADMIN |1.0548904804637426E9|17869            |59034.67          |
|DEPARTMENT OF SANITATION      |1.0275529499124413E9|11787            |87176.8           |
|NYC HOUSING AUTHORITY         |8.617265504048588E8 |13978            |61648.77          |

In [22]:
tab4 = spark.sql("""SELECT Agency, TotalHours, ROUND(TotalWorkPay/TotalHours, 2) AS AverageHourlyPay
                    FROM (SELECT `Agency Name` AS Agency,
                    SUM(`Regular Hours` + `OT Hours`) AS TotalHours,
                    SUM(`Regular Gross Paid` + `Total OT Paid`) AS TotalWorkPay
                    FROM nycpayroll
                    WHERE `FISCAL YEAR` == 2020
                    GROUP BY Agency
                    ORDER BY TotalHours DESC
                    LIMIT 30)
        """)

tab4.show(30, truncate=False)

+------------------------------+--------------------+----------------+
|Agency                        |TotalHours          |AverageHourlyPay|
+------------------------------+--------------------+----------------+
|POLICE DEPARTMENT             |1.268098118532486E8 |37.21           |
|FIRE DEPARTMENT               |4.362525443268359E7 |37.77           |
|DEPARTMENT OF CORRECTION      |2.724735552547455E7 |36.2            |
|DEPARTMENT OF EDUCATION ADMIN |2.47645985E7        |39.8            |
|NYC HOUSING AUTHORITY         |2.4324237E7         |33.74           |
|HRA/DEPT OF SOCIAL SERVICES   |2.4090660520507812E7|32.43           |
|DEPARTMENT OF SANITATION      |2.375066957244873E7 |38.16           |
|DEPT OF PARKS & RECREATION    |1.4635142969848633E7|28.15           |
|ADMIN FOR CHILDREN'S SVCS     |1.4023586790222168E7|37.96           |
|DEPT OF HEALTH/MENTAL HYGIENE |1.2891069119873047E7|40.06           |
|DEPT OF ENVIRONMENT PROTECTION|1.2672862378845215E7|43.99           |
|DEPAR

In [28]:
tab4 = spark.sql("""SELECT Agency, NumberOfEmployees, ROUND(TotalWorkPay/TotalHours, 2) AS AverageHourlyPay
                    FROM (SELECT `Agency Name` AS Agency, COUNT(`Row Index`) AS NumberOfEmployees,
                    SUM(`Regular Hours` + `OT Hours`) AS TotalHours,
                    SUM(`Regular Gross Paid` + `Total OT Paid`) AS TotalWorkPay
                    FROM nycpayroll
                    WHERE `FISCAL YEAR` = 2020 AND `Agency Name` LIKE '%COLLEGE%' 
                    OR `Agency Name` LIKE 'DEPT OF ED%'
                    GROUP BY Agency
                    ORDER BY TotalHours DESC)
        """)

tab4.show(30, truncate=False)

+------------------------------+-----------------+----------------+
|Agency                        |NumberOfEmployees|AverageHourlyPay|
+------------------------------+-----------------+----------------+
|COMMUNITY COLLEGE (MANHATTAN) |6539             |60.66           |
|COMMUNITY COLLEGE (LAGUARDIA) |4902             |55.83           |
|COMMUNITY COLLEGE (KINGSBORO) |4085             |54.98           |
|COMMUNITY COLLEGE (QUEENSBORO)|4222             |62.23           |
|COMMUNITY COLLEGE (BRONX)     |3484             |56.51           |
|COMMUNITY COLLEGE (HOSTOS)    |2353             |51.9            |
|GUTTMAN COMMUNITY COLLEGE     |795              |57.94           |
|HUNTER COLLEGE HIGH SCHOOL    |351              |52.97           |
|DEPT OF ED PER DIEM TEACHERS  |90784            |null            |
|DEPT OF ED PER SESSION TEACHER|608565           |null            |
|DEPT OF ED PEDAGOGICAL        |758360           |null            |
|DEPT OF ED PARA PROFESSIONALS |245259          

In [57]:
colleges = ['COMMUNITY COLLEGE (MANHATTAN)', 'COMMUNITY COLLEGE (LAGUARDIA)', 'COMMUNITY COLLEGE (KINGSBORO)', 'COMMUNITY COLLEGE (QUEENSBORO)', 'COMMUNITY COLLEGE (BRONX)', 'COMMUNITY COLLEGE (HOSTOS)', 'GUTTMAN COMMUNITY COLLEGE']
nycdf.where((F.col('Fiscal Year') == 2020) & (F.col('Agency Name').isin(colleges))) \
.groupby('Title Description').count().sort(F.desc('count')).show()

+--------------------+-----+
|   Title Description|count|
+--------------------+-----+
|   COLLEGE ASSISTANT| 6917|
|    ADJUNCT LECTURER| 4131|
|ADJUNCT ASSISTANT...| 1377|
|CONTINUING EDUCAT...| 1350|
|NON-TEACHING ADJU...| 1305|
| ASSISTANT PROFESSOR| 1110|
| ASSOCIATE PROFESSOR|  981|
|           PROFESSOR|  844|
|HIGHER EDUCATION ...|  761|
|            LECTURER|  672|
|CUNY CUSTODIAL AS...|  537|
|CUNY OFFICE ASSIS...|  512|
|NON-TEACHING ADJU...|  509|
|ADJUNCT COLLEGE L...|  476|
|        STUDENT AIDE|  431|
|    ASSISTANT TO HEO|  415|
|HIGHER EDUCATION ...|  380|
|NON-TEACHING ADJU...|  361|
|HIGHER EDUCATION ...|  299|
|NON-TEACHING ADJU...|  212|
+--------------------+-----+
only showing top 20 rows



In [70]:
tab5 = spark.sql("""SELECT Position, `Pay Basis`, Count,
            ROUND(TotalWorkPay/TotalHours, 2) AS AverageHourlyPay, 
            MaxHourlyPay, MinHourlyPay
            FROM (SELECT `Title Description` AS Position, `Pay Basis`, COUNT(`Row Index`) AS Count,
            SUM(`Regular Hours` + `OT Hours`) AS TotalHours,
            SUM(`Regular Gross Paid` + `Total OT Paid`) AS TotalWorkPay,
            ROUND(MAX((`Regular Gross Paid` + `Total OT Paid`)/(`Regular Hours` + `OT Hours`)), 2) AS MaxHourlyPay,
            ROUND(MIN((`Regular Gross Paid` + `Total OT Paid`)/(`Regular Hours` + `OT Hours`)), 2) AS MinHourlyPay
            FROM nycpayroll
            WHERE `Fiscal Year` = 2020 AND `Agency Name` LIKE '%COMMUNITY COLLEGE%'
            GROUP BY Position, `Pay Basis`
            ORDER BY TotalHours DESC) 
            ORDER BY AverageHourlyPay DESC
        """)

tab5.show(truncate=False)

+----------------------------+---------+-----+----------------+------------+------------+
|Position                    |Pay Basis|Count|AverageHourlyPay|MaxHourlyPay|MinHourlyPay|
+----------------------------+---------+-----+----------------+------------+------------+
|DISTINGUISHED PROFESSOR     |per Annum|1    |658.83          |658.83      |658.83      |
|UNIVERSITY PROFESSOR        |per Annum|2    |623.56          |630.89      |620.2       |
|PROFESSOR                   |per Annum|540  |444.06          |1107.71     |35.68       |
|CLINICAL PROFESSOR          |per Annum|1    |411.96          |411.96      |411.96      |
|ASSOCIATE PROFESSOR         |per Annum|639  |345.15          |516.51      |47.33       |
|NON-TEACHING ADJUNCT II     |per Day  |2    |339.01          |427.78      |294.63      |
|EOC LECTURER                |per Annum|10   |309.28          |331.72      |252.58      |
|ASSISTANT PROFESSOR         |per Annum|773  |296.06          |438.49      |-298.44     |
|LECTURER 

In [72]:
nycdf.filter((F.col('Fiscal Year') == 2020) & (F.col('Title Description') == 'ASSISTANT PROFESSOR') 
             & (F.col('Regular Gross Paid') + F.col('Total OT Paid') < 0)).show()

+-----------+--------------+--------------------+---------+----------+--------+-----------------+---------------------+-------------------+--------------------------+-----------+---------+-------------+------------------+--------+-------------+---------------+-------------+
|Fiscal Year|Payroll Number|         Agency Name|Last Name|First Name|Mid Init|Agency Start Date|Work Location Borough|  Title Description|Leave Status as of June 30|Base Salary|Pay Basis|Regular Hours|Regular Gross Paid|OT Hours|Total OT Paid|Total Other Pay|    Row Index|
+-----------+--------------+--------------------+---------+----------+--------+-----------------+---------------------+-------------------+--------------------------+-----------+---------+-------------+------------------+--------+-------------+---------------+-------------+
|       2020|           468|COMMUNITY COLLEGE...|   LAUCER|    HAMIDE|    null|             null|                BRONX|ASSISTANT PROFESSOR|                  SEASONAL|     151.

In [74]:
nycdf.filter((F.col('Fiscal Year') == 2020) & (F.col('Last Name').isin({'LAUCER', 'RONCA'})) 
             & (F.col('First Name').isin({'HAMIDE', 'KATHLEEN'}))).show()

+-----------+--------------+--------------------+---------+----------+--------+-----------------+---------------------+--------------------+--------------------------+-----------+---------+-------------+------------------+--------+-------------+---------------+-------------+
|Fiscal Year|Payroll Number|         Agency Name|Last Name|First Name|Mid Init|Agency Start Date|Work Location Borough|   Title Description|Leave Status as of June 30|Base Salary|Pay Basis|Regular Hours|Regular Gross Paid|OT Hours|Total OT Paid|Total Other Pay|    Row Index|
+-----------+--------------+--------------------+---------+----------+--------+-----------------+---------------------+--------------------+--------------------------+-----------+---------+-------------+------------------+--------+-------------+---------------+-------------+
|       2020|           468|COMMUNITY COLLEGE...|    RONCA|  KATHLEEN|       A|             null|                BRONX|NON-TEACHING ADJU...|                    CEASED|     

In [76]:
nycdf.filter((F.col('Fiscal Year') == 2020) & (F.col('Title Description').isin({'DISTINGUISHED PROFESSOR', 'UNIVERSITY PROFESSOR'}))).show()

+-----------+--------------+--------------------+----------+----------+--------+-----------------+---------------------+--------------------+--------------------------+-----------+---------+-------------+------------------+--------+-------------+---------------+-------------+
|Fiscal Year|Payroll Number|         Agency Name| Last Name|First Name|Mid Init|Agency Start Date|Work Location Borough|   Title Description|Leave Status as of June 30|Base Salary|Pay Basis|Regular Hours|Regular Gross Paid|OT Hours|Total OT Paid|Total Other Pay|    Row Index|
+-----------+--------------+--------------------+----------+----------+--------+-----------------+---------------------+--------------------+--------------------------+-----------+---------+-------------+------------------+--------+-------------+---------------+-------------+
|       2020|           466|COMMUNITY COLLEGE...|     PEREZ|   ANTONIO|    null|             null|            MANHATTAN|UNIVERSITY PROFESSOR|                    ACTIVE| 