# CDPH penalties

### Import Python tools and Jupyter configuration

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd

In [3]:
import altair as alt
import altair_latimes as lat

alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")

ThemeRegistry.enable('latimes')

In [4]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

---

In [5]:
penalties_src = pd.read_excel(
    "data/raw/CDPHpenalties.xlsx",
    skiprows=0,
    sheet_name="DETAIL",
    dtype={"PENALTY_NUMBER": str, "FACID": str, "PENALTY_NUMBER": str},
)

In [6]:
penalties_src.columns = penalties_src.columns.str.lower()

### How many facilities are in the data (some have multiple cases)?

In [7]:
len(penalties_src.facid.unique())

2585

In [8]:
penalties_df = penalties_src.copy()

In [9]:
penalties_df["facid_join"] = penalties_df["facid"]

### How many cases? 

In [10]:
len(penalties_df)

17027

### Count penalties by facility

In [43]:
penalties_grouped = (
    penalties_df.groupby(["facid", "facility_name"])
    .agg({"facid_join": "count", "total_amount_due_final": sum})
    .reset_index()
    .rename(
        columns={"facid_join": "penalties", "total_amount_due_final": "penalties_sum"}
    )
)

In [44]:
len(penalties_grouped)

2585

In [45]:
penalties_grouped

Unnamed: 0,facid,facility_name,penalties,penalties_sum
0,010000001,VINEYARD POST ACUTE,21,151109.00
1,010000003,CREEKSIDE REHABILITATION & BEHAVIORAL HEALTH,35,116879.04
2,010000004,CRESCENT CITY SKILLED NURSING,15,58000.00
3,010000005,WINDSOR CARE CENTER OF PETALUMA,41,292800.00
4,010000020,FIRCREST CONVALESCENT HOSPITAL,24,81600.00
...,...,...,...,...
2580,970000194,BRIGHTON CARE CENTER,11,21200.00
2581,980000764,CAREMERIDIAN - BERMUDA HOUSE,4,4000.00
2582,980000858,CAREMERIDIAN - MAYALL HOUSE,2,1500.00
2583,980002279,NEURORESTORATIVE CALIFORNIA,1,1000.00


---

### Facilities and bed counts

In [46]:
facilities_df = pd.read_csv("data/processed/facility_beds.csv", dtype={"facid": str})

In [47]:
facilities_grouped = (
    facilities_df[facilities_df["bed_capacity"] >= 20]
    .groupby(["facid", "facname", "fac_fdr"])
    .agg({"bed_capacity": sum})
    .reset_index()
)

In [48]:
# facilities_df.drop_duplicates(subset=["facid"], keep="first", inplace=True)

---

### Merge bed counts

In [49]:
merge = penalties_grouped.merge(
    facilities_grouped[["facid", "bed_capacity", "fac_fdr"]], on="facid", how="left"
).fillna(0)

In [50]:
merge_df = merge[merge["bed_capacity"] > 0].copy()

In [51]:
merge_df

Unnamed: 0,facid,facility_name,penalties,penalties_sum,bed_capacity,fac_fdr
0,010000001,VINEYARD POST ACUTE,21,151109.00,99.0,SKILLED NURSING FACILITY
1,010000003,CREEKSIDE REHABILITATION & BEHAVIORAL HEALTH,35,116879.04,181.0,SKILLED NURSING FACILITY
2,010000004,CRESCENT CITY SKILLED NURSING,15,58000.00,99.0,SKILLED NURSING FACILITY
3,010000005,WINDSOR CARE CENTER OF PETALUMA,41,292800.00,79.0,SKILLED NURSING FACILITY
6,010000024,"GRANADA REHAB & WELLNESS CENTER, LP",29,142259.00,87.0,SKILLED NURSING FACILITY
...,...,...,...,...,...,...
2575,970000178,VILLA GARDENS HEALTH CARE UNIT,1,500.00,54.0,SKILLED NURSING FACILITY
2576,970000184,PASADENA PARK HEALTHCARE AND WELLNESS CENTER,4,23000.00,99.0,SKILLED NURSING FACILITY
2577,970000186,LEGACY HEALTHCARE CENTER,15,82450.00,54.0,SKILLED NURSING FACILITY
2578,970000188,"PASADENA CARE CENTER, LLC",8,57000.00,99.0,SKILLED NURSING FACILITY


In [52]:
merge_df["penalties_per_10_beds"] = (
    (merge_df["penalties"] / merge_df["bed_capacity"]) * 10
).round(2)

In [55]:
merge_df["penalties_sum_per_10_beds"] = (
    (merge_df["penalties_sum"] / merge_df["bed_capacity"]) * 10
).round(2)

In [57]:
merge_df.sort_values(["penalties_sum_per_10_beds"], ascending=False).head(20)

---

### Exports

In [58]:
merge_df.to_csv("data/processed/penalties_by_facility_rates_count_sum.csv", index=False)