This is an analysis of the *San Francisco Salaries* dataset acquired from the following link at [Kaggle](https://www.kaggle.com/datasets/kaggle/sf-salaries?resource=download).

THe data is for San Francisco city employees from 2011-2014. This allows for a comparisons in one broad category: how compensation is distributed and how it changed over the four-year period. This encompases a variety of aspects.

Compensation distribution can refer to the whole government budger and within specific groups or positions. For example, looking at what portion of a position's compensation is overtime, and what portion of the whole budget goes towards IT Workers in general.

In [1]:
import sqlite3 as sql
import pandas as pd
import numpy as np
import seaborn as sns

In [2]:
con = sql.connect("Salaries.sqlite")
cur = con.cursor()

In [3]:
query = "SELECT name FROM sqlite_master WHERE type='table';"
cur.execute(query)
tables = cur.fetchall()

In [4]:
columnDict = {}

for i,table in enumerate(tables):
    query = "SELECT * FROM %s;" % table
    cur.execute(query)
    cols = list(cur.description)
    valuelist = []
    for j, col in enumerate(cols):
        collist = list(col)
        valuelist.append(collist[0])
    columnDict[table] = valuelist

columnDict

{('Salaries',): ['Id',
  'EmployeeName',
  'JobTitle',
  'BasePay',
  'OvertimePay',
  'OtherPay',
  'Benefits',
  'TotalPay',
  'TotalPayBenefits',
  'Year',
  'Notes',
  'Agency',
  'Status']}

In [16]:
query = """ SELECT DISTINCT Year from Salaries"""
cur.execute(query)
cur.fetchall()

[(2011,), (2012,), (2013,), (2014,)]

In [19]:
query = """ SELECT Year, COUNT(Year) from Salaries GROUP By Year"""
cur.execute(query)
cur.fetchall()

[(2011, 36159), (2012, 36766), (2013, 37606), (2014, 38123)]

In [15]:
query = """ SELECT DISTINCT Notes from Salaries"""
cur.execute(query)
cur.fetchall()

[('',)]

In [5]:
query = """ SELECT DISTINCT Agency from Salaries"""
cur.execute(query)
cur.fetchall()

[('San Francisco',)]

In [14]:
query = """ SELECT DISTINCT Status from Salaries"""
cur.execute(query)
cur.fetchall()

[('',), ('PT',), ('FT',)]

In [20]:
query = """ SELECT Status, COUNT(Status) from Salaries GROUP By Status"""
cur.execute(query)
cur.fetchall()

[('', 110535), ('FT', 22334), ('PT', 15785)]

In [6]:
query = """SELECT DISTINCT JobTitle from Salaries"""
cur.execute(query)
cur.fetchall()

[('GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY',),
 ('CAPTAIN III (POLICE DEPARTMENT)',),
 ('WIRE ROPE CABLE MAINTENANCE MECHANIC',),
 ('DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)',),
 ('ASSISTANT DEPUTY CHIEF II',),
 ('BATTALION CHIEF, (FIRE DEPARTMENT)',),
 ('DEPUTY DIRECTOR OF INVESTMENTS',),
 ('CHIEF OF DEPARTMENT, (FIRE DEPARTMENT)',),
 ('ASSISTANT CHIEF OF DEPARTMENT, (FIRE DEPARTMENT)',),
 ('EXECUTIVE CONTRACT EMPLOYEE',),
 ('DEPARTMENT HEAD V',),
 ('COMMANDER III, (POLICE DEPARTMENT)',),
 ('CAPTAIN, EMERGENCYCY MEDICAL SERVICES',),
 ('ASSISTANT MEDICAL EXAMINER',),
 ('CAPTAIN, FIRE SUPPRESSION',),
 ('CHIEF OF POLICE',),
 ('DEPUTY CHIEF III (POLICE DEPARTMENT)',),
 ('INSPECTOR III, (POLICE DEPARTMENT)',),
 ('ELECTRONIC MAINTENANCE TECHNICIAN',),
 ('ADMINISTRATOR, SFGH MEDICAL CENTER',),
 ('LIEUTENANT III (POLICE DEPARTMENT)',),
 ('FIREFIGHTER',),
 ('NURSING SUPERVISOR PSYCHIATRIC',),
 ('MAYOR',),
 ('LIEUTENANT, FIRE DEPARTMENT',),
 ('INCIDENT SUPPORT SPECIALIST',),
 ('ANEST

In [7]:
query = """ SELECT DISTINCT Status from Salaries"""
cur.execute(query)
cur.fetchall()

[('',), ('PT',), ('FT',)]

In [8]:
query = """SELECT DISTINCT JobTitle, AVG(TotalPay), AVG(TotalPayBenefits) from Salaries GROUP BY JobTitle ORDER BY TotalPayBenefits ASC"""
cur.execute(query)
cur.fetchall()

[('Not provided', 0.0, 0.0),
 ('PUBLIC SAFETY COMMUNICATIONS TECHNICIAN', 149.51, 149.51),
 ('SPECIAL ASSISTANT XIV', 673.8, 673.8),
 ('BdComm Mbr, Grp2,M=$25/Mtg', 263.76612903225805, 475.0477419354839),
 ('BOARD/COMMISSION MEMBER, GROUP II', 296.51162790697674, 296.51162790697674),
 ('BOARD/COMMISSION MEMBER, GROUP III', 638.7878787878788, 638.7878787878788),
 ('BdComm Mbr, Grp3,M=$50/Mtg', 706.698275862069, 973.1060344827587),
 ('AIRPORT ASSISTANT DEPUTY DIRECTOR, BUSINESS ADMINI', 1927.5, 1927.5),
 ('Cashier 3', 2074.6, 2074.6),
 ('BOARD/COMMISSION MEMBER, GROUP V', 1195.9044642857145, 1195.9044642857145),
 ('ASSISTANT RECREATION SUPERVISOR', 2474.41, 2474.41),
 ('Public Service Aide-Technical', 2552.55, 2774.27),
 ('Barber', 2194.5342857142855, 2281.2685714285717),
 ('BdComm Mbr, Grp5,M$100/Mo', 1216.3906024096386, 1274.827469879518),
 ('Assistant Recreation Director', 2620.614545454546, 8446.774090909092),
 ('Conversion', 3361.44, 3361.44),
 ('WAREHOUSE WORKER', 3369.71, 3369.71)

In [23]:
query = """SELECT DISTINCT JobTitle,
AVG(BasePay) / AVG(TotalPayBenefits) * 100 as BasePayPercentage,
AVG(OvertimePay) / AVG(TotalPayBenefits) * 100 as OvertimePayPercentage,
AVG(OtherPay) / AVG(TotalPayBenefits) * 100 as OtherPayPercentage,
AVG(Benefits) / AVG(TotalPayBenefits) * 100 as BenefitsPercentage
from Salaries GROUP BY JobTitle ORDER BY TotalPayBenefits ASC"""
cur.execute(query)
cur.fetchmany(5)

[('Not provided', None, None, None, None),
 ('PUBLIC SAFETY COMMUNICATIONS TECHNICIAN', 0.0, 0.0, 100.0, 0.0),
 ('SPECIAL ASSISTANT XIV', 100.0, 0.0, 0.0, 0.0),
 ('BdComm Mbr, Grp2,M=$25/Mtg',
  55.52413068160212,
  0.0,
  0.0,
  44.47586931839788),
 ('BOARD/COMMISSION MEMBER, GROUP II', 100.0, 0.0, 0.0, 0.0)]

In [46]:
query ="""SELECT JobTitle, Status, COUNT(Status) from Salaries GROUP BY JobTitle Order By JobTitle"""
cur.execute(query)
cur.fetchall()

[('ACCOUNT CLERK', '', 83),
 ('ACCOUNTANT', '', 5),
 ('ACCOUNTANT INTERN', '', 48),
 ('ACPO,JuvP, Juv Prob (SFERS)', 'PT', 1),
 ('ACUPUNCTURIST', '', 1),
 ('ADMINISTRATIVE ANALYST', '', 93),
 ('ADMINISTRATIVE ANALYST II', '', 2),
 ('ADMINISTRATIVE ANALYST III', '', 2),
 ('ADMINISTRATIVE ENGINEER', '', 10),
 ('ADMINISTRATIVE SERVICES MANAGER', '', 3),
 ('ADMINISTRATOR, SFGH MEDICAL CENTER', '', 1),
 ('AFFIRMATIVE ACTION SPECIALIST', '', 6),
 ('AGRICULTURAL INSPECTOR', '', 2),
 ('AIRPORT ASSISTANT DEPUTY DIRECTOR, BUSINESS ADMINI', '', 1),
 ('AIRPORT ASSISTANT DEPUTY DIRECTOR, OPERATIONS', '', 1),
 ('AIRPORT COMMUNICATIONS OPERATOR', '', 27),
 ('AIRPORT COMMUNICATIONS SUPERVISOR', '', 2),
 ('AIRPORT ECONOMIC PLANNER', '', 13),
 ('AIRPORT ELECTRICIAN', '', 17),
 ('AIRPORT ELECTRICIAN SUPERVISOR', '', 2),
 ('AIRPORT EMERGENCY PLANNING COORDINATOR', '', 2),
 ('AIRPORT MECHANICAL MAINTENANCE SUPERVISOR', '', 1),
 ('AIRPORT NOISE ABATEMENT SPECIALIST', '', 2),
 ('AIRPORT OPERATIONS SUPERVISOR

In [32]:
query = """SELECT DISTINCT Status, JobTitle,
AVG(BasePay) / AVG(TotalPayBenefits) * 100 as BasePayPercentage,
AVG(OvertimePay) / AVG(TotalPayBenefits) * 100 as OvertimePayPercentage,
AVG(OtherPay) / AVG(TotalPayBenefits) * 100 as OtherPayPercentage,
AVG(Benefits) / AVG(TotalPayBenefits) * 100 as BenefitsPercentage
from Salaries GROUP BY JobTitle ORDER BY JobTitle ASC"""
cur.execute(query)
cur.fetchall()

[('',
  'ACCOUNT CLERK',
  98.33122119903611,
  0.8474967937680437,
  0.821282007195843,
  0.0),
 ('', 'ACCOUNTANT', 98.34259301661582, 0.0, 1.657406983384184, 0.0),
 ('',
  'ACCOUNTANT INTERN',
  98.96982086403902,
  0.08415142373685999,
  0.9460277122241327,
  0.0),
 ('PT',
  'ACPO,JuvP, Juv Prob (SFERS)',
  77.60507918820797,
  0.0,
  0.0,
  22.394920811792037),
 ('', 'ACUPUNCTURIST', 98.19511675523415, 0.0, 1.8048832447658387, 0.0),
 ('',
  'ADMINISTRATIVE ANALYST',
  99.18811664411167,
  0.0,
  0.8118833558883296,
  0.0),
 ('',
  'ADMINISTRATIVE ANALYST II',
  96.54727256362848,
  0.0,
  3.4527274363715255,
  0.0),
 ('', 'ADMINISTRATIVE ANALYST III', 100.0, 0.0, 0.0, 0.0),
 ('',
  'ADMINISTRATIVE ENGINEER',
  94.49188009230593,
  0.0,
  5.50811990769406,
  0.0),
 ('',
  'ADMINISTRATIVE SERVICES MANAGER',
  98.10356986642617,
  0.0,
  1.8964301335738392,
  0.0),
 ('',
  'ADMINISTRATOR, SFGH MEDICAL CENTER',
  95.33299907235579,
  0.0,
  4.667000927644218,
  0.0),
 ('',
  'AFFIRMATI