# CDPH penalties

### Import Python tools and Jupyter configuration

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd

In [3]:
import altair as alt
import altair_latimes as lat

alt.themes.register("latimes", lat.theme)
alt.themes.enable("stiles")

ThemeRegistry.enable('latimes')

In [4]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

---

In [5]:
penalties_src = pd.read_excel(
    "data/raw/CDPHpenalties.xlsx",
    skiprows=0,
    sheet_name="DETAIL",
    dtype={"PENALTY_NUMBER": str, "FACID": str, "PENALTY_NUMBER": str},
)

In [6]:
penalties_src.columns = penalties_src.columns.str.lower()

### How many facilities are in the data (some have multiple cases)?

In [7]:
len(penalties_src.facid.unique())

2585

In [8]:
penalties_df = penalties_src[
    penalties_src["penalty_issue_date"] > "" "2000-01-01"
].copy()

In [9]:
penalties_df["facid_join"] = penalties_df["facid"]

### Dates

In [10]:
penalties_df["month"] = penalties_df["penalty_issue_date"].dt.month_name()

In [11]:
penalties_df["year"] = penalties_df["penalty_issue_date"].dt.year.astype(str)

----

### How many cases? 

In [12]:
len(penalties_df)

17007

### How much in fines initially? 

In [13]:
penalties_df["total_amount_initial"].sum()

131875913.75

### After appeal? 

In [14]:
penalties_df["total_amount_due_final"].sum()

117609829.6

### Change?

In [15]:
round(
    (
        penalties_df["total_amount_due_final"].sum()
        - penalties_df["total_amount_initial"].sum()
    ),
    2,
)

-14266084.15

### Count penalties by facility

In [16]:
penalties_grouped = (
    penalties_df.groupby(["facid", "facility_name"])
    .agg({"facid_join": "count", "total_amount_due_final": sum})
    .reset_index()
    .rename(
        columns={"facid_join": "penalties", "total_amount_due_final": "penalties_sum"}
    )
)

In [17]:
len(penalties_grouped)

2584

In [18]:
penalties_grouped.head()

Unnamed: 0,facid,facility_name,penalties,penalties_sum
0,10000001,VINEYARD POST ACUTE,21,151109.0
1,10000003,CREEKSIDE REHABILITATION & BEHAVIORAL HEALTH,35,116879.04
2,10000004,CRESCENT CITY SKILLED NURSING,15,58000.0
3,10000005,WINDSOR CARE CENTER OF PETALUMA,41,292800.0
4,10000020,FIRCREST CONVALESCENT HOSPITAL,24,81600.0


---

### Penalties over time

In [95]:
penalties_year_group = (
    penalties_df.groupby(["year"])
    .agg({"facid_join": "count", "total_amount_due_final": {sum, max}})
    .reset_index()
)

In [96]:
penalties_year_group.columns = [
    "_".join(col).rstrip("_") for col in penalties_year_group.columns.values
]

In [97]:
penalties_year_group.rename(
    columns={
        "facid_join_count": "penalties_count",
        "total_amount_due_final_sum": "final_penalty_annual_total",
        "total_amount_due_final_max": "final_penalty_annual_max",
    },
    inplace=True,
)

In [99]:
alt.Chart(penalties_year_group).mark_bar().encode(
    x="year", y="final_penalty_annual_total"
)

In [100]:
alt.Chart(penalties_year_group).mark_bar().encode(
    x="year", y="final_penalty_annual_max"
)

In [102]:
penalties_df[
    penalties_df["total_amount_due_final"]
    == penalties_df["total_amount_due_final"].max()
]

Unnamed: 0,facid,facility_name,ltc,fac_type_code,fac_fdr,district_office,penalty_issue_date,penalty_number,disposition,penalty_type,penalty_detail,penalty_category,penalty_category_other,violation_from_date,violation_to_date,appealed,appeal_due_date,appeal_received_date,class_assessed_initial,class_assessed_final,total_amount_initial,total_amount_due_final,total_penalty_offset_amount,total_collected_amount,total_balance_due,eventid,death_related,intakeid_all,priority_all,sfy,facid_join,month,year,penalty_change_category,penalty_change
14906,40000101,COMMUNITY REGIONAL MEDICAL CENTER,,GACH,General Acute Care Hospital,Fresno,2013-11-15,40010268,Open,Administrative Penalty,AP - Breach (HSC 1280.15),Deliberate breach by person other than a h/c worker,,2012-10-04,2012-10-04,Yes,2015-11-09,2015-10-22,AP BR,,250000.0,250000.0,0.0,0.0,250000.0,DL8611,,CA00329115,D,SFY2013-14,40000101,November,2013,same,0.0
15144,60000027,LAC/HARBOR UCLA MEDICAL CENTER,,GACH,General Acute Care Hospital,Orange,2013-02-25,60009755,Closed,Administrative Penalty,AP - Breach (HSC 1280.15),Deliberate breach of PHI by health care worker,,2012-09-07,2012-09-07,No,2015-06-03,NaT,AP BR,,250000.0,250000.0,-62500.0,187500.0,0.0,4WSM11,,CA00326686,B,SFY2012-13,60000027,February,2013,same,0.0
15693,80000152,TRI-CITY MEDICAL CENTER,,GACH,General Acute Care Hospital,San Diego,2015-09-04,80011706,Closed,Administrative Penalty,AP - Breach (HSC 1280.15),Deliberate breach by person other than a h/c worker,,2015-08-10,2015-08-10,No,2015-09-28,NaT,AP BR,,250000.0,250000.0,-62500.0,187500.0,0.0,WX8T11,,CA00409247,C,SFY2015-16,80000152,September,2015,same,0.0
15694,80000023,RADY CHILDREN'S HOSPITAL - SAN DIEGO,,GACH,General Acute Care Hospital,San Diego,2015-11-02,80011817,Closed,Administrative Penalty,AP - Breach (HSC 1280.15),Breach to person/entity outside facility/hc system,,2012-07-01,2012-07-01,No,2016-08-01,NaT,AP BR,,250000.0,250000.0,-62500.0,187500.0,0.0,LZG911,,"CA00401886, CA00402445, CA00403150, CA00403287","B, B, E, D",SFY2015-16,80000023,November,2015,same,0.0
15831,110000011,KAISER FOUNDATION HOSPITAL & REHAB CENTER - VALLEJO,,GACH,General Acute Care Hospital,Santa Rosa,2011-04-26,110007971,Closed,Administrative Penalty,AP - Breach (HSC 1280.15),Breach to person/entity outside facility/hc system,,2010-09-03,2010-09-03,No,2012-10-04,NaT,AP BR,,250000.0,250000.0,-62500.0,187500.0,0.0,DOWG11,,CA00207226,C,SFY2010-11,110000011,April,2011,same,0.0
16024,120000342,KERN MEDICAL CENTER,,GACH,General Acute Care Hospital,Bakersfield,2010-09-23,120007268,Closed,Administrative Penalty,AP - Breach (HSC 1280.15),Breach of IT system theft/loss of edevice/med rec,,2009-10-31,2009-10-31,No,2010-10-14,NaT,AP BR,,250000.0,250000.0,-62500.0,187500.0,0.0,WOYN11,,CA00209743,B,SFY2010-11,120000342,September,2010,same,0.0
16152,220000019,PRISCILLA CHAN AND MARK ZUCKERBERG SAN FRANCISCO GENERAL HOSPITAL AND TRAUM,,GACH,General Acute Care Hospital,San Francisco,2011-03-04,220008001,Closed,Administrative Penalty,AP - Breach (HSC 1280.15),Breach to person/entity outside facility/hc system,,2011-01-03,2011-01-03,No,2014-02-10,NaT,AP BR,,250000.0,250000.0,-62500.0,187500.0,0.0,IV7511,,CA00255005,C,SFY2010-11,220000019,March,2011,same,0.0
16182,220000030,ST. MARY'S MEDICAL CENTER,,GACH,General Acute Care Hospital,San Francisco,2011-10-26,220008594,Closed,Administrative Penalty,AP - Breach (HSC 1280.15),Immediate Jeopardy,,2010-12-14,2010-12-14,Yes,2014-02-10,2014-01-24,AP BR,,250000.0,250000.0,-250000.0,0.0,0.0,I8K811,,CA00259080,B,SFY2011-12,220000030,October,2011,same,0.0
16195,220000019,PRISCILLA CHAN AND MARK ZUCKERBERG SAN FRANCISCO GENERAL HOSPITAL AND TRAUM,,GACH,General Acute Care Hospital,San Francisco,2012-03-27,220009190,Closed,Administrative Penalty,AP - Breach (HSC 1280.15),Deliberate breach of PHI by health care worker,,2011-05-03,2011-05-03,No,2015-02-24,NaT,AP BR,,250000.0,250000.0,-62500.0,187500.0,0.0,T1JO11,,CA00268593,B,SFY2011-12,220000019,March,2012,same,0.0
16199,220000019,PRISCILLA CHAN AND MARK ZUCKERBERG SAN FRANCISCO GENERAL HOSPITAL AND TRAUM,,GACH,General Acute Care Hospital,San Francisco,2012-04-24,220009255,Closed,Administrative Penalty,AP - Breach (HSC 1280.15),Breach of IT system theft/loss of edevice/med rec,,2010-03-12,2010-03-12,No,2013-07-22,NaT,AP BR,,250000.0,250000.0,-62500.0,187500.0,0.0,LDDD11,,CA00224352,C,SFY2011-12,220000019,April,2012,same,0.0


---

### Facilities and bed counts

In [21]:
facilities_df = pd.read_csv("data/processed/facility_beds.csv", dtype={"facid": str})

In [22]:
facilities_grouped = (
    facilities_df[facilities_df["bed_capacity"] >= 20]
    .groupby(["facid", "facname", "fac_fdr"])
    .agg({"bed_capacity": sum})
    .reset_index()
)

In [23]:
# facilities_df.drop_duplicates(subset=["facid"], keep="first", inplace=True)

---

### Merge bed counts

In [24]:
merge = penalties_grouped.merge(
    facilities_grouped[["facid", "bed_capacity", "fac_fdr"]], on="facid", how="left"
).fillna(0)

In [25]:
merge_df = merge[merge["bed_capacity"] > 0].copy()

In [26]:
merge_df.head()

Unnamed: 0,facid,facility_name,penalties,penalties_sum,bed_capacity,fac_fdr
0,10000001,VINEYARD POST ACUTE,21,151109.0,99.0,SKILLED NURSING FACILITY
1,10000003,CREEKSIDE REHABILITATION & BEHAVIORAL HEALTH,35,116879.04,181.0,SKILLED NURSING FACILITY
2,10000004,CRESCENT CITY SKILLED NURSING,15,58000.0,99.0,SKILLED NURSING FACILITY
3,10000005,WINDSOR CARE CENTER OF PETALUMA,41,292800.0,79.0,SKILLED NURSING FACILITY
6,10000024,"GRANADA REHAB & WELLNESS CENTER, LP",29,142259.0,87.0,SKILLED NURSING FACILITY


### Rate for penalty count per 10 beds

In [27]:
merge_df["penalties_per_10_beds"] = (
    (merge_df["penalties"] / merge_df["bed_capacity"]) * 10
).round(2)

### Rate for penalty cost per 10 beds

In [28]:
merge_df["penalties_sum_per_10_beds"] = (
    (merge_df["penalties_sum"] / merge_df["bed_capacity"]) * 10
).round(2)

In [29]:
merge_df.sort_values(["penalties_sum_per_10_beds"], ascending=False).head()

Unnamed: 0,facid,facility_name,penalties,penalties_sum,bed_capacity,fac_fdr,penalties_per_10_beds,penalties_sum_per_10_beds
1710,250000507,SOUTHWEST HEALTHCARE SYSTEM,32,996162.5,113.0,GENERAL ACUTE CARE HOSPITAL,2.83,88155.97
2174,940000041,VILLA DEL RIO GARDENS,36,699600.0,84.0,SKILLED NURSING FACILITY,4.29,83285.71
1654,250000004,KINDRED HOSPITAL RIVERSIDE,16,208400.0,32.0,GENERAL ACUTE CARE HOSPITAL,5.0,65125.0
1319,220000019,PRISCILLA CHAN AND MARK ZUCKERBERG SAN FRANCISCO GENERAL HOSPITAL AND TRAUM,25,1544040.0,263.0,GENERAL ACUTE CARE HOSPITAL,0.95,58708.75
1930,910000049,HYDE PARK HEALTHCARE CENTER,21,397650.0,72.0,SKILLED NURSING FACILITY,2.92,55229.17


---

### How frequently are penalties reduced? 

In [30]:
def penalty_change(row):
    if row.total_amount_initial > row.total_amount_due_final:
        return "decrease"
    elif row.total_amount_initial < row.total_amount_due_final:
        return "increase"
    else:
        return "same"

In [31]:
penalties_df["penalty_change_category"] = penalties_df.apply(penalty_change, axis=1)

### What happens to penalty amounts? 

In [32]:
penalties_df.penalty_change_category.value_counts(normalize=True).round(2)

same        0.76
decrease    0.16
increase    0.08
Name: penalty_change_category, dtype: float64

### When reduced, by how much in total? 

In [33]:
def penalty_diff(row):
    if row.total_amount_initial > row.total_amount_due_final:
        return row.total_amount_due_final - row.total_amount_initial
    elif row.total_amount_initial < row.total_amount_due_final:
        return row.total_amount_due_final - row.total_amount_initial
    else:
        return 0

In [34]:
penalties_df["penalty_change"] = penalties_df.apply(penalty_diff, axis=1)

### Fine decreased? 

In [35]:
penalties_df[penalties_df["penalty_change_category"] == "decrease"][
    "penalty_change"
].sum()

-19865360.65

### Increased? 

In [36]:
penalties_df[penalties_df["penalty_change_category"] == "increase"][
    "penalty_change"
].sum()

5599276.5

### Net?

In [37]:
penalties_df["penalty_change"].sum()

-14266084.149999999

---

### Toplines

In [38]:
today = pd.to_datetime("today").strftime("%m/%d/%Y")

In [39]:
toplines = pd.DataFrame([today], columns=["update_date"])

In [40]:
toplines["total_penalties_initial"] = penalties_df["total_amount_initial"].sum()

In [41]:
toplines["total_penalties_final"] = penalties_df["total_amount_due_final"].sum()

In [42]:
toplines["net_penalty_change"] = penalties_df["penalty_change"].sum()

In [43]:
toplines["net_penalty_decrease"] = penalties_df[
    penalties_df["penalty_change_category"] == "decrease"
]["penalty_change"].sum()

In [44]:
toplines["net_penalty_increase"] = penalties_df[
    penalties_df["penalty_change_category"] == "increase"
]["penalty_change"].sum()

In [45]:
toplines["facilities_w_penalty"] = len(penalties_grouped)

In [46]:
toplines

Unnamed: 0,update_date,total_penalties_initial,total_penalties_final,net_penalty_change,net_penalty_decrease,net_penalty_increase,facilities_w_penalty
0,11/08/2021,131875900.0,117609829.6,-14266084.15,-19865360.65,5599276.5,2584


---

### Exports

In [47]:
merge_df.to_csv("data/processed/penalties_by_facility_rates_count_sum.csv", index=False)

In [48]:
toplines.to_json("data/processed/toplines.json", indent=4, orient="records")