In [1]:
import pandas as pd
hrrp = pd.read_csv("data/readmission.csv")
print(f"Total rows: {len(hrrp)}")
hrrp.head()

Total rows: 18510


Unnamed: 0,Facility Name,Facility ID,State,Measure Name,Number of Discharges,Footnote,Excess Readmission Ratio,Predicted Readmission Rate,Expected Readmission Rate,Number of Readmissions,Start Date,End Date
0,SOUTHEAST HEALTH MEDICAL CENTER,10001,AL,READM-30-AMI-HRRP,296.0,,0.9483,13.0146,13.7235,36,07/01/2020,06/30/2023
1,SOUTHEAST HEALTH MEDICAL CENTER,10001,AL,READM-30-CABG-HRRP,151.0,,0.9509,9.6899,10.1898,13,07/01/2020,06/30/2023
2,SOUTHEAST HEALTH MEDICAL CENTER,10001,AL,READM-30-HF-HRRP,681.0,,1.0597,21.5645,20.3495,151,07/01/2020,06/30/2023
3,SOUTHEAST HEALTH MEDICAL CENTER,10001,AL,READM-30-HIP-KNEE-HRRP,,,0.9654,4.268,4.4211,Too Few to Report,07/01/2020,06/30/2023
4,SOUTHEAST HEALTH MEDICAL CENTER,10001,AL,READM-30-PN-HRRP,490.0,,0.9715,16.1137,16.5863,77,07/01/2020,06/30/2023


In [2]:
# STEP 2: Keep only Heart Failure and COPD measures
hrrp = hrrp[hrrp["Measure Name"].isin(["READM-30-HF-HRRP", "READM-30-COPD-HRRP"])]
print(f"Rows after filtering: {len(hrrp)}")

print(hrrp["Measure Name"].value_counts())

Rows after filtering: 6170
Measure Name
READM-30-HF-HRRP      3085
READM-30-COPD-HRRP    3085
Name: count, dtype: int64


In [3]:
# STEP 3: Create disease column
def get_disease_hrrp(measure_name):
    if "HF" in measure_name:
        return "Heart_Failure"
    elif "COPD" in measure_name:
        return "COPD"
    return None

hrrp["Disease"] = hrrp["Measure Name"].apply(get_disease_hrrp)

print(hrrp["Disease"].value_counts())

Disease
Heart_Failure    3085
COPD             3085
Name: count, dtype: int64


In [4]:
# STEP 4: Keep only required columns
hrrp = hrrp[[
    "State",
    "Disease",
    "Number of Discharges",
    "Predicted Readmission Rate",
    "Expected Readmission Rate",
    "Excess Readmission Ratio"
]].copy()

hrrp.rename(columns={
    "Number of Discharges": "Total_Discharges",
    "Excess Readmission Ratio": "Excess_Readmission_Ratio"
}, inplace=True)

hrrp.head()

Unnamed: 0,State,Disease,Total_Discharges,Predicted Readmission Rate,Expected Readmission Rate,Excess_Readmission_Ratio
2,AL,Heart_Failure,681.0,21.5645,20.3495,1.0597
5,AL,COPD,130.0,15.4544,16.5637,0.933
8,AL,Heart_Failure,176.0,20.1511,20.2835,0.9935
11,AL,COPD,144.0,15.5737,17.909,0.8696
12,AL,COPD,154.0,17.788,18.7982,0.9463


In [5]:
# STEP 5: Remove missing values
print("\nREMOVE MISSING VALUES")
print(f"Rows before: {len(hrrp)}")
hrrp = hrrp.dropna(subset=["Predicted Readmission Rate", "Expected Readmission Rate", "Excess_Readmission_Ratio"])
print(f"Rows after: {len(hrrp)}")

# STEP 6: Remove zero discharge rows
print("\nREMOVE ZERO DISCHARGE ROWS")
print(f"Rows before: {len(hrrp)}")
hrrp = hrrp[hrrp["Total_Discharges"] > 0]
print(f"Rows after: {len(hrrp)}")



REMOVE MISSING VALUES
Rows before: 6170
Rows after: 4962

REMOVE ZERO DISCHARGE ROWS
Rows before: 4962
Rows after: 3892


In [6]:
# STEP 7: Create weighted columns
hrrp["Weighted_Predicted"] = hrrp["Predicted Readmission Rate"] * hrrp["Total_Discharges"]
hrrp["Weighted_Expected"] = hrrp["Expected Readmission Rate"] * hrrp["Total_Discharges"]

# STEP 8: Aggregate by State and Disease
print(f"Rows before aggregation: {len(hrrp)}")

hrrp_state = hrrp.groupby(["State", "Disease"], as_index=False).agg({
    "Total_Discharges": "sum",
    "Weighted_Predicted": "sum",
    "Weighted_Expected": "sum"
})
print(f"Rows after aggregation: {len(hrrp_state)}")

# STEP 9: Calculate weighted averages
hrrp_state["Predicted_Readmission_Rate"] = hrrp_state["Weighted_Predicted"] / hrrp_state["Total_Discharges"]
hrrp_state["Expected_Readmission_Rate"] = hrrp_state["Weighted_Expected"] / hrrp_state["Total_Discharges"]


Rows before aggregation: 3892
Rows after aggregation: 102


In [7]:
# STEP 10: Keep only final columns 
hrrp_state = hrrp_state[[
    "State",
    "Disease",
    "Total_Discharges",
    "Predicted_Readmission_Rate",
    "Expected_Readmission_Rate",
    "Excess_Readmission_Ratio"
]]

KeyError: "['Excess_Readmission_Ratio'] not in index"