# Using Python, Pandas and Jupyter for data journalism

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import altair as alt
import altair_latimes as lat

In [3]:
alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

### Fetch some County of Los Angeles employee payroll data from an open-data API

In [4]:
# https://data.lacounty.gov/Human-Resources/LA-County-Employee-Salaries/8rdv-6nb6

In [5]:
url = "https://data.lacounty.gov/api/views/8rdv-6nb6/rows.csv?accessType=DOWNLOAD"

### Import the data and store it as a variable

In [6]:
df = pd.read_csv(url)

### Remove uppercase characters and spaces from the headers

In [7]:
df.columns = (
    df.columns.str.lower().str.replace(" ", "_").str.replace(",", "").str.lstrip("_")
)

### Let's just get the most recent year

In [8]:
df.year = df.year.astype(str)

In [9]:
current_df = df[df["year"] == "2019"]

### What do the data look like?

In [10]:
current_df.head()

Unnamed: 0,year,employee_last_name,employee_first_name,middle_initial,position_title,department,base_earnings,overtime_earnings,other_earnings,leave_time_payouts,total_earnings,medical_dental_vision,retirement_surcharge,pension_contributions,deferred_contributions,ltd_life_&_medical_tax,other_benefits,total_benefits,total_compensation,employee_name
0,2019,YEE,SALLY,,SHERIFF STATION CLERK II,SHERIFF,54263.0,1397.0,3248.0,5016.0,63924.0,8890.0,0.0,11084.0,2171.0,1527.0,45.0,23717.0,87641.0,SALLY YEE
1,2019,YEE,SO,G,"CASHIERING SERVICES REPRESENTATIVE I, SHERIFF",SHERIFF,23716.0,0.0,4099.0,1253.0,29068.0,15611.0,0.0,9593.0,1372.0,788.0,34.0,27399.0,56467.0,SO YEE
12,2019,YEE,TAI,C,"AUDIO, VIDEO & SECURITY SYST TECH",SHERIFF,88182.0,5073.0,8225.0,6695.0,108175.0,15635.0,0.0,18757.0,3527.0,1828.0,492.0,40240.0,148415.0,TAI YEE
15,2019,YEGAVIAN,BEDROS,R,DEPUTY SHERIFF,SHERIFF,97330.0,0.0,2298.0,0.0,99628.0,26001.0,2977.0,25473.0,3893.0,1692.0,6.0,60043.0,159671.0,BEDROS YEGAVIAN
17,2019,YEH,GREGORY,,"CRIME ANALYST,SHERIFF",SHERIFF,63582.0,724.0,1663.0,0.0,65968.0,11256.0,0.0,11940.0,2397.0,1169.0,216.0,26978.0,92947.0,GREGORY YEH


### How many employees were there in 2019? 

In [11]:
len(current_df)

110988

### How are the employees does the Sheriff's Department have? 

In [12]:
df_sheriff = current_df[current_df["department"] == "SHERIFF"]

In [13]:
len(df_sheriff)

18427

### Who are the highest paid employees at the Sheriff's Department? 

In [14]:
df_sheriff[
    [
        "employee_name",
        "position_title",
        "department",
        "overtime_earnings",
        "total_earnings",
    ]
].sort_values("total_earnings", ascending=False).head(10)

Unnamed: 0,employee_name,position_title,department,overtime_earnings,total_earnings
736745,MARIA GUTIERREZ,ASSISTANT SHERIFF/UC/,SHERIFF,0.0,613029.0
745225,JEFFREY SCROGGIN,COMMANDER,SHERIFF,0.0,587050.0
737105,KEVIN HEBERT,COMMANDER,SHERIFF,0.0,523597.0
746480,JAMES THORNTON JR,CAPTAIN,SHERIFF,0.0,522605.0
742452,ANTHONY PAEZ,DEPUTY SHERIFF,SHERIFF,0.0,485828.0
732598,TIMOTHY CARR,LIEUTENANT,SHERIFF,1.0,471757.0
741493,DAVID MOSER,SERGEANT,SHERIFF,5737.0,471207.0
747961,EDWARD WINSLOW,LIEUTENANT,SHERIFF,0.0,442888.0
735123,JOSEPH FENNELL JR,COMMANDER,SHERIFF,0.0,424623.0
746318,JON TEDDER,SERGEANT,SHERIFF,51633.0,419081.0


### Who earns the most in overtime, countywide? 

In [15]:
current_df[
    [
        "employee_name",
        "position_title",
        "department",
        "overtime_earnings",
        "total_earnings",
    ]
].sort_values("overtime_earnings", ascending=False).head(10)

Unnamed: 0,employee_name,position_title,department,overtime_earnings,total_earnings
662755,ERIC PACHECO,"SENIOR PILOT, FIRE SERVICES",FIRE DEPARTMENT,294028.0,471745.0
663464,THOMAS SHORT,"SENIOR PILOT, FIRE SERVICES",FIRE DEPARTMENT,259046.0,452924.0
663032,THOMAS RAY,BATTALION CHIEF/56 HOURS/,FIRE DEPARTMENT,251074.0,471220.0
727722,SOHEIL NAJIBI,PHYS SPEC SURGERY-ORTHOPEDICS NON MEGAFLEX,RANCHO LOS AMIGOS NATIONAL REHABILITATION CENTER,235889.0,665021.0
663953,RYAN VAN DYCK,"PILOT, FIRE SERVICES",FIRE DEPARTMENT,229193.0,374176.0
663298,MICHAEL SAGELY,"SENIOR PILOT, FIRE SERVICES",FIRE DEPARTMENT,221455.0,412577.0
659709,RANDALL BISHOP,FIRE FIGHTER (56 HOUR),FIRE DEPARTMENT,215228.0,371325.0
661584,MEDARDO JUAREZ,FIRE FIGHTER SPECIALIST (56 HOUR),FIRE DEPARTMENT,211994.0,357565.0
659768,MATTHEW BOYAN,FIRE CAPTAIN (56 HOUR),FIRE DEPARTMENT,211411.0,365205.0
662745,ALEX OWENS,FIRE CAPTAIN (56 HOUR),FIRE DEPARTMENT,210996.0,370178.0


In [16]:
pilots = current_df[current_df["position_title"].str.contains("PILOT")]

In [17]:
pilots[
    [
        "employee_name",
        "position_title",
        "department",
        "overtime_earnings",
        "total_earnings",
    ]
].sort_values("overtime_earnings", ascending=False).head(10)

Unnamed: 0,employee_name,position_title,department,overtime_earnings,total_earnings
662755,ERIC PACHECO,"SENIOR PILOT, FIRE SERVICES",FIRE DEPARTMENT,294028.0,471745.0
663464,THOMAS SHORT,"SENIOR PILOT, FIRE SERVICES",FIRE DEPARTMENT,259046.0,452924.0
663953,RYAN VAN DYCK,"PILOT, FIRE SERVICES",FIRE DEPARTMENT,229193.0,374176.0
663298,MICHAEL SAGELY,"SENIOR PILOT, FIRE SERVICES",FIRE DEPARTMENT,221455.0,412577.0
661512,ETHAN JENSEN,"PILOT, FIRE SERVICES",FIRE DEPARTMENT,208019.0,351712.0
661309,ADAM HENRY,"PILOT, FIRE SERVICES",FIRE DEPARTMENT,203077.0,358597.0
661058,PAUL GOTTWIG,"SENIOR PILOT, FIRE SERVICES",FIRE DEPARTMENT,201938.0,358571.0
663142,THOMAS ROBERTS,"PILOT, FIRE SERVICES",FIRE DEPARTMENT,79951.0,200629.0
663638,PATRICK STEFANSKI,"PILOT, FIRE SERVICES",FIRE DEPARTMENT,31224.0,214395.0
664174,KENYON WILLIAMS,"PILOT, FIRE SERVICES",FIRE DEPARTMENT,25030.0,53062.0


### What's the average overtime, by department? 

In [18]:
ot = (
    current_df.groupby(["department"])
    .agg({"overtime_earnings": "mean"})
    .reset_index()
    .round()
)

### Chart the top spending overtime spending departments

In [19]:
ot_top = ot.sort_values("overtime_earnings", ascending=False).head(10)

In [20]:
alt.Chart(ot_top).mark_bar().encode(
    x="overtime_earnings", y=alt.Y("department:N", sort="-x")
).properties(height=500, width=320)

### How has overtime at the Fire Department changed over time?

In [27]:
fire = df[df["department"].str.contains("FIRE DEPARTMENT")]

In [28]:
fire_ot = (
    fire.groupby(["year"]).agg({"overtime_earnings": "mean"}).reset_index().round()
)

In [29]:
fire_ot

Unnamed: 0,year,overtime_earnings
0,2018,41190.0
1,2019,44314.0
