# CDPH penalties

### Import Python tools and Jupyter configuration

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd

In [3]:
import altair as alt
import altair_latimes as lat

alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")

ThemeRegistry.enable('latimes')

In [4]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

---

In [5]:
penalties_src = pd.read_excel(
    "data/raw/CDPHpenalties.xlsx",
    skiprows=0,
    sheet_name="DETAIL",
    dtype={"PENALTY_NUMBER": str, "FACID": str, "PENALTY_NUMBER": str},
)

In [6]:
penalties_src.columns = penalties_src.columns.str.lower()

### How many facilities are in the data (some have multiple cases)?

In [7]:
len(penalties_src.facid.unique())

2585

In [8]:
penalties_df = penalties_src.copy()

In [9]:
penalties_df["facid_join"] = penalties_df["facid"]

### How many cases? 

In [10]:
len(penalties_df)

17027

### How much in fines initially? 

In [11]:
penalties_df["total_amount_initial"].sum()

131994613.75

### After appeal? 

In [12]:
penalties_df["total_amount_due_final"].sum()

117682829.6

### Change?

In [35]:
round(
    (
        penalties_df["total_amount_due_final"].sum()
        - penalties_df["total_amount_initial"].sum()
    ),
    2,
)

-14311784.15

### Count penalties by facility

In [14]:
penalties_grouped = (
    penalties_df.groupby(["facid", "facility_name"])
    .agg({"facid_join": "count", "total_amount_due_final": sum})
    .reset_index()
    .rename(
        columns={"facid_join": "penalties", "total_amount_due_final": "penalties_sum"}
    )
)

In [15]:
len(penalties_grouped)

2585

In [16]:
penalties_grouped.head()

Unnamed: 0,facid,facility_name,penalties,penalties_sum
0,10000001,VINEYARD POST ACUTE,21,151109.0
1,10000003,CREEKSIDE REHABILITATION & BEHAVIORAL HEALTH,35,116879.04
2,10000004,CRESCENT CITY SKILLED NURSING,15,58000.0
3,10000005,WINDSOR CARE CENTER OF PETALUMA,41,292800.0
4,10000020,FIRCREST CONVALESCENT HOSPITAL,24,81600.0


---

### Facilities and bed counts

In [17]:
facilities_df = pd.read_csv("data/processed/facility_beds.csv", dtype={"facid": str})

In [18]:
facilities_grouped = (
    facilities_df[facilities_df["bed_capacity"] >= 20]
    .groupby(["facid", "facname", "fac_fdr"])
    .agg({"bed_capacity": sum})
    .reset_index()
)

In [19]:
# facilities_df.drop_duplicates(subset=["facid"], keep="first", inplace=True)

---

### Merge bed counts

In [20]:
merge = penalties_grouped.merge(
    facilities_grouped[["facid", "bed_capacity", "fac_fdr"]], on="facid", how="left"
).fillna(0)

In [21]:
merge_df = merge[merge["bed_capacity"] > 0].copy()

In [22]:
merge_df.head()

Unnamed: 0,facid,facility_name,penalties,penalties_sum,bed_capacity,fac_fdr
0,10000001,VINEYARD POST ACUTE,21,151109.0,99.0,SKILLED NURSING FACILITY
1,10000003,CREEKSIDE REHABILITATION & BEHAVIORAL HEALTH,35,116879.04,181.0,SKILLED NURSING FACILITY
2,10000004,CRESCENT CITY SKILLED NURSING,15,58000.0,99.0,SKILLED NURSING FACILITY
3,10000005,WINDSOR CARE CENTER OF PETALUMA,41,292800.0,79.0,SKILLED NURSING FACILITY
6,10000024,"GRANADA REHAB & WELLNESS CENTER, LP",29,142259.0,87.0,SKILLED NURSING FACILITY


### Rate for penalty count per 10 beds

In [23]:
merge_df["penalties_per_10_beds"] = (
    (merge_df["penalties"] / merge_df["bed_capacity"]) * 10
).round(2)

### Rate for penalty cost per 10 beds

In [24]:
merge_df["penalties_sum_per_10_beds"] = (
    (merge_df["penalties_sum"] / merge_df["bed_capacity"]) * 10
).round(2)

In [25]:
merge_df.sort_values(["penalties_sum_per_10_beds"], ascending=False).head()

Unnamed: 0,facid,facility_name,penalties,penalties_sum,bed_capacity,fac_fdr,penalties_per_10_beds,penalties_sum_per_10_beds
1710,250000507,SOUTHWEST HEALTHCARE SYSTEM,32,996162.5,113.0,GENERAL ACUTE CARE HOSPITAL,2.83,88155.97
2175,940000041,VILLA DEL RIO GARDENS,36,699600.0,84.0,SKILLED NURSING FACILITY,4.29,83285.71
1654,250000004,KINDRED HOSPITAL RIVERSIDE,16,208400.0,32.0,GENERAL ACUTE CARE HOSPITAL,5.0,65125.0
1319,220000019,PRISCILLA CHAN AND MARK ZUCKERBERG SAN FRANCISCO GENERAL HOSPITAL AND TRAUM,25,1544040.0,263.0,GENERAL ACUTE CARE HOSPITAL,0.95,58708.75
1931,910000049,HYDE PARK HEALTHCARE CENTER,21,397650.0,72.0,SKILLED NURSING FACILITY,2.92,55229.17


---

### How frequently are penalties reduced? 

In [36]:
def penalty_change(row):
    if row.total_amount_initial > row.total_amount_due_final:
        return "decrease"
    elif row.total_amount_initial < row.total_amount_due_final:
        return "increase"
    else:
        return "same"

In [37]:
penalties_df["penalty_change_category"] = penalties_df.apply(penalty_change, axis=1)

### What happens to penalty amounts? 

In [43]:
penalties_df.penalty_change_category.value_counts(normalize=True).round(2)

same        0.76
decrease    0.16
increase    0.08
Name: penalty_change_category, dtype: float64

### When reduced, by how much in total? 

In [63]:
def penalty_diff(row):
    if row.total_amount_initial > row.total_amount_due_final:
        return row.total_amount_due_final - row.total_amount_initial
    elif row.total_amount_initial < row.total_amount_due_final:
        return row.total_amount_due_final - row.total_amount_initial
    else:
        return 0

In [64]:
penalties_df["penalty_change"] = penalties_df.apply(penalty_diff, axis=1)

### Fine decreased? 

In [71]:
penalties_df[penalties_df["penalty_change_category"] == "decrease"][
    "penalty_change"
].sum()

-19912260.65

### Increased? 

In [72]:
penalties_df[penalties_df["penalty_change_category"] == "increase"][
    "penalty_change"
].sum()

5600476.5

### Net?

In [111]:
penalties_df["penalty_change"].sum()

-14311784.15

---

### Toplines

In [122]:
today = pd.to_datetime("today").strftime("%m/%d/%Y")

In [123]:
toplines = pd.DataFrame([today], columns=["update_date"])

In [124]:
toplines["total_penalties_initial"] = penalties_df["total_amount_initial"].sum()

In [124]:
toplines["total_penalties_final"] = penalties_df["total_amount_due_final"].sum()

In [125]:
toplines["net_penalty_change"] = penalties_df["penalty_change"].sum()

In [125]:
toplines["net_penalty_decrease"] = penalties_df[
    penalties_df["penalty_change_category"] == "decrease"
]["penalty_change"].sum()

In [125]:
toplines["net_penalty_increase"] = penalties_df[
    penalties_df["penalty_change_category"] == "increase"
]["penalty_change"].sum()

In [126]:
toplines["facilities_w_penalty"] = len(penalties_grouped)

In [127]:
toplines.T

Unnamed: 0,0
update_date,11/08/2021
total_penalties_initial,131994613.75
total_penalties_final,117682829.6
net_penalty_change,-14311784.15
net_penalty_decrease,-19912260.65
net_penalty_increase,5600476.5
facilities_w_penalty,2585


---

### Exports

In [128]:
merge_df.to_csv("data/processed/penalties_by_facility_rates_count_sum.csv", index=False)

In [129]:
toplines.to_json("data/processed/toplines.json", indent=4, orient="records")