In [1]:
import pandas as pd
import warnings
from IPython.utils import io
import sys
import numpy as np

warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

stars_dir = '~/GitHub/stars-data-builder/'

## Load SAS pack primary output data

In [2]:
# 2026 -- prognosticated
df_2026 = pd.read_csv(stars_dir + 'Reproduce_Stars_Input/2026/SAS_output/CMS_Stars_2026_predictions_from_Nov_2025_data.csv')
df_2026['PROVIDER_ID'] = df_2026['PROVIDER_ID'].astype(str)
df_2026 = df_2026.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df_2026.columns = df_2026.columns.str.strip()

## Replace the imputed 666666 suffixes of VHA hospitals with their original 'F' suffix
prvdrs = []
for p in df_2026['PROVIDER_ID'].tolist():
    p = str(p)
    if '666666' in p:
        p = p[:-6]
        p = p + 'F'
    while len(p) < 6:
        p = '0' + p
    prvdrs.append(p)
    
df_2026['PROVIDER_ID'] = prvdrs

for i in list(df_2026):
    if i == 'cnt_grp':
        df_2026[i] = df_2026[i].astype(str).str[0]
    if i == 'PROVIDER_ID':
        pass
    else:
        df_2026[i] = pd.to_numeric(df_2026[i], errors='coerce')

df_2026['Release year'] = ['2026']*df_2026.shape[0]
df_2026 = df_2026[~df_2026['star'].isin([np.nan, float("NaN")])]
prvdrs_2026 = df_2026['PROVIDER_ID'].unique()

print(len(prvdrs_2026), 'hospitals with stars predictions for 2026')
for star in [1,2,3,4,5]:
    tdf = df_2026[df_2026['star'] == star]
    print(tdf.shape[0], str(star)+'-star hospitals')

df_2026.head()


3210 hospitals with stars predictions for 2026
222 1-star hospitals
683 2-star hospitals
965 3-star hospitals
932 4-star hospitals
408 5-star hospitals


Unnamed: 0,PROVIDER_ID,Std_Outcomes_Mortality_score,Std_Outcomes_Readmission_score,Std_Outcomes_Safety_score,Std_PatientExp_score,Std_Process_score,std_weight_PatientExperience,std_weight_Readmission,std_weight_Mortality,std_weight_safety,std_weight_Process,weight_PatientExperience,weight_Outcomes_Readmission,weight_Outcomes_Mortality,weight_Outcomes_Safety,weight_Process,summary_score,Outcomes_Mortality_cnt,Outcomes_safety_cnt,Outcomes_Readmission_cnt,Patient_Experience_cnt,Process_cnt,Total_measure_group_cnt,MortSafe_Group_cnt,report_indicator,cnt_grp,star,Q_25,Release year
0,10001,-0.253894,0.613401,0.575849,0.069504,-0.153563,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,0.202642,8,7,11,15,10,5,2,1,3.0,4.0,-0.304441,2026
1,10005,-0.76892,0.938422,-0.177874,-0.060684,-0.771065,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.10772,6,7,9,15,10,5,2,1,3.0,3.0,,2026
2,10006,-1.711257,0.128104,0.605285,-1.099271,-0.350298,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.499006,8,8,9,15,9,5,2,1,3.0,2.0,,2026
3,10007,-2.14461,-1.215442,-0.538607,-1.447122,-1.888925,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-1.402743,4,2,5,5,7,4,1,1,2.0,1.0,,2026
5,10011,-0.296882,0.21036,-0.44243,-0.472254,-0.0238,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.223121,8,7,8,10,7,5,2,1,3.0,3.0,,2026


In [3]:
tdf = df_2026[df_2026['PROVIDER_ID'].isin(['140119', '140063', '140029'])]
tdf.head()


Unnamed: 0,PROVIDER_ID,Std_Outcomes_Mortality_score,Std_Outcomes_Readmission_score,Std_Outcomes_Safety_score,Std_PatientExp_score,Std_Process_score,std_weight_PatientExperience,std_weight_Readmission,std_weight_Mortality,std_weight_safety,std_weight_Process,weight_PatientExperience,weight_Outcomes_Readmission,weight_Outcomes_Mortality,weight_Outcomes_Safety,weight_Process,summary_score,Outcomes_Mortality_cnt,Outcomes_safety_cnt,Outcomes_Readmission_cnt,Patient_Experience_cnt,Process_cnt,Total_measure_group_cnt,MortSafe_Group_cnt,report_indicator,cnt_grp,star,Q_25,Release year
1062,140029,0.596369,0.196678,0.373559,-0.014642,0.696556,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,0.337019,8,8,11,15,10,5,2,1,3.0,4.0,,2026
1075,140063,1.607526,0.557798,-0.3956,-0.576602,-0.905579,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,0.153817,6,5,8,15,8,5,2,1,3.0,4.0,,2026
1099,140119,2.2151,0.471158,0.370794,0.204082,-1.167632,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,0.577334,8,8,11,15,9,5,2,1,3.0,5.0,,2026


In [4]:
# 2025 -- actual

df_2025 = pd.read_csv(stars_dir + '2025/2025-07 Stars Release/SAS_CSV_output/CMS_Stars_Jul_2025.csv')
df_2025['PROVIDER_ID'] = df_2025['PROVIDER_ID'].astype(str)
df_2025 = df_2025.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df_2025.columns = df_2025.columns.str.strip()

prvdrs = df_2025['PROVIDER_ID'].astype(str).tolist()
for i, p in enumerate(prvdrs):
    if len(p) < 6:
        prvdrs[i] = '0' + p
df_2025['PROVIDER_ID'] = prvdrs

for i in list(df_2025):
    if i == 'cnt_grp':
        df_2025[i] = df_2025[i].astype(str).str[0]
    if i == 'PROVIDER_ID':
        pass
    else:
        df_2025[i] = pd.to_numeric(df_2025[i], errors='coerce')

df_2025['Release year'] = ['2025']*df_2025.shape[0]
df_2025 = df_2025[~df_2025['star'].isin([np.nan, float("NaN")])]
prvdrs_2025 = df_2025['PROVIDER_ID'].unique()

print(len(prvdrs_2025), 'hospitals with stars predictions for 2025')
df_2025.head()


2891 hospitals with stars predictions for 2025


Unnamed: 0,PROVIDER_ID,Std_Outcomes_Mortality_score,Std_Outcomes_Readmission_score,Std_Outcomes_Safety_score,Std_PatientExp_score,Std_Process_score,std_weight_PatientExperience,std_weight_Readmission,std_weight_Mortality,std_weight_safety,std_weight_Process,weight_PatientExperience,weight_Outcomes_Readmission,weight_Outcomes_Mortality,weight_Outcomes_Safety,weight_Process,summary_score,Outcomes_Mortality_cnt,Outcomes_safety_cnt,Outcomes_Readmission_cnt,Patient_Experience_cnt,Process_cnt,Total_measure_group_cnt,MortSafe_Group_cnt,report_indicator,cnt_grp,star,Release year
0,10001,0.29611,0.361406,0.344883,0.042072,-0.484678,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,0.171622,7,7,11,8,11,5,2,1,3.0,4.0,2025
1,10005,-0.843034,0.631203,-0.403414,0.125181,-0.352656,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.150133,6,7,9,8,12,5,2,1,3.0,3.0,2025
2,10006,-1.540677,-0.10489,0.685911,-1.301785,-0.458178,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.552498,7,8,9,8,10,5,2,1,3.0,2.0,2025
3,10007,-3.3307,-0.931587,-0.329281,1.298719,-3.269085,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-1.116717,3,3,7,8,7,5,2,1,3.0,1.0,2025
5,10011,-0.554085,-0.590401,0.302812,-0.033641,-1.054047,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.319055,7,7,9,8,8,5,2,1,3.0,2.0,2025


In [5]:
# 2024 -- actual

df_2024 = pd.read_csv(stars_dir + '2024/2024-07 Stars Release/SAS_CSV_output/CMS_Stars_Jul_2024.csv')
df_2024['PROVIDER_ID'] = df_2024['PROVIDER_ID'].astype(str)
df_2024 = df_2024.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df_2024.columns = df_2024.columns.str.strip()

prvdrs = df_2024['PROVIDER_ID'].astype(str).tolist()
for i, p in enumerate(prvdrs):
    if len(p) < 6:
        prvdrs[i] = '0' + p
df_2024['PROVIDER_ID'] = prvdrs

for i in list(df_2024):
    if i == 'cnt_grp':
        df_2024[i] = df_2024[i].astype(str).str[0]
    if i == 'PROVIDER_ID':
        pass
    else:
        df_2024[i] = pd.to_numeric(df_2024[i], errors='coerce')

df_2024['Release year'] = ['2024']*df_2024.shape[0]
df_2024 = df_2024[~df_2024['star'].isin([np.nan, float("NaN")])]
prvdrs_2024 = df_2024['PROVIDER_ID'].unique()

print(len(prvdrs_2024), 'hospitals with stars predictions for 2024')
df_2024.head()


2847 hospitals with stars predictions for 2024


Unnamed: 0,PROVIDER_ID,Std_Outcomes_Mortality_score,Std_Outcomes_Readmission_score,Std_Outcomes_Safety_score,Std_PatientExp_score,Std_Process_score,std_weight_PatientExperience,std_weight_Readmission,std_weight_Mortality,std_weight_safety,std_weight_Process,weight_PatientExperience,weight_Outcomes_Readmission,weight_Outcomes_Mortality,weight_Outcomes_Safety,weight_Process,summary_score,Outcomes_Mortality_cnt,Outcomes_safety_cnt,Outcomes_Readmission_cnt,Patient_Experience_cnt,Process_cnt,Total_measure_group_cnt,MortSafe_Group_cnt,report_indicator,cnt_grp,star,Release year
0,10001,0.007071,0.230867,-0.146867,0.137779,-0.677624,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.030968,7,7,11,8,10,5,2,1,3.0,3.0,2024
1,10005,-1.440587,0.720263,-0.08772,-0.255125,-0.489001,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.292577,6,7,9,8,12,5,2,1,3.0,2.0,2024
2,10006,-1.462748,-0.269475,-0.173331,-1.097088,-0.754912,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.751171,7,7,9,8,11,5,2,1,3.0,1.0,2024
3,10007,-3.527615,-0.470903,0.393999,0.199767,-1.804879,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.965631,3,2,7,8,7,4,1,1,2.0,1.0,2024
5,10011,-0.47672,-0.402191,0.346623,-0.030498,-1.046286,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.249367,7,7,9,8,8,5,2,1,3.0,3.0,2024


In [6]:
# 2023 -- actual

df_2023 = pd.read_csv(stars_dir + '2023/2023-07 Stars Release/SAS_CSV_output/CMS_Stars_Jul_2023.csv')
print(df_2023.shape)
df_2023['PROVIDER_ID'] = df_2023['PROVIDER_ID'].astype(str)
df_2023 = df_2023.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df_2023.columns = df_2023.columns.str.strip()
print(df_2023.shape)

prvdrs = df_2023['PROVIDER_ID'].astype(str).tolist()
for i, p in enumerate(prvdrs):
    if len(p) < 6:
        prvdrs[i] = '0' + p
df_2023['PROVIDER_ID'] = prvdrs

for i in list(df_2023):
    if i == 'cnt_grp':
        df_2023[i] = df_2023[i].astype(str).str[0]
    if i == 'PROVIDER_ID':
        pass
    else:
        df_2023[i] = pd.to_numeric(df_2023[i], errors='coerce')

df_2023['Release year'] = ['2023']*df_2023.shape[0]
df_2023 = df_2023[~df_2023['star'].isin([np.nan, float("NaN")])]
prvdrs_2023 = df_2023['PROVIDER_ID'].unique()

print(len(prvdrs_2023), 'hospitals with a star ratings in 2023')
df_2023.head()


(4654, 27)
(4654, 27)
3076 hospitals with a star ratings in 2023


Unnamed: 0,PROVIDER_ID,Std_Outcomes_Mortality_score,Std_Outcomes_Readmission_score,Std_Outcomes_Safety_score,Std_PatientExp_score,Std_Process_score,std_weight_PatientExperience,std_weight_Readmission,std_weight_Mortality,std_weight_safety,std_weight_Process,weight_PatientExperience,weight_Outcomes_Readmission,weight_Outcomes_Mortality,weight_Outcomes_Safety,weight_Process,summary_score,Outcomes_Mortality_cnt,Outcomes_safety_cnt,Outcomes_Readmission_cnt,Patient_Experience_cnt,Process_cnt,Total_measure_group_cnt,MortSafe_Group_cnt,report_indicator,cnt_grp,star,Release year
0,10001,-0.647058,0.284081,0.312074,-0.127836,-1.024044,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.162208,7,8,11,8,10,5,2,1,3.0,3.0,2023
1,10005,-1.564103,0.560369,-0.237844,-0.166838,-0.302742,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.346181,6,7,10,8,11,5,2,1,3.0,2.0,2023
2,10006,-1.694318,-0.554988,-0.089526,-1.241108,-0.17935,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.809109,7,7,9,8,11,5,2,1,3.0,1.0,2023
3,10007,-2.40715,-0.488553,0.022657,0.993806,-0.65976,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.492604,3,2,6,8,7,4,1,1,2.0,2.0,2023
5,10011,-0.517349,-0.624302,0.42877,0.134223,-2.385055,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.413511,7,7,9,8,8,5,2,1,3.0,2.0,2023


In [7]:
# 2022 - actual

df_2022 = pd.read_csv(stars_dir + '2022/2022-07 Stars Release/SAS_CSV_output/CMS_Stars_Jul_2022.csv')
df_2022['PROVIDER_ID'] = df_2022['PROVIDER_ID'].astype(int).astype(str)
df_2022 = df_2022.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df_2022.columns = df_2022.columns.str.strip()

prvdrs = df_2022['PROVIDER_ID'].astype(str).tolist()
for i, p in enumerate(prvdrs):
    if len(p) < 6:
        prvdrs[i] = '0' + p
df_2022['PROVIDER_ID'] = prvdrs

for i in list(df_2022):
    if i == 'cnt_grp':
        df_2022[i] = df_2022[i].astype(str).str[0]
    if i == 'PROVIDER_ID':
        pass
    else:
        df_2022[i] = pd.to_numeric(df_2022[i], errors='coerce')

df_2022['Release year'] = ['2022']*df_2022.shape[0]
df_2022 = df_2022[~df_2022['star'].isin([np.nan, float("NaN")])]
prvdrs_2022 = df_2022['PROVIDER_ID'].unique()

print(len(prvdrs_2022), 'hospitals with a star ratings in 2022')
df_2022.head()


3121 hospitals with a star ratings in 2022


Unnamed: 0,PROVIDER_ID,Std_Outcomes_Mortality_score,Std_Outcomes_Readmission_score,Std_Outcomes_Safety_score,Std_PatientExp_score,Std_Process_score,std_weight_PatientExperience,std_weight_Readmission,std_weight_Mortality,std_weight_safety,std_weight_Process,weight_PatientExperience,weight_Outcomes_Readmission,weight_Outcomes_Mortality,weight_Outcomes_Safety,weight_Process,summary_score,Outcomes_Mortality_cnt,Outcomes_safety_cnt,Outcomes_Readmission_cnt,Patient_Experience_cnt,Process_cnt,Total_measure_group_cnt,MortSafe_Group_cnt,report_indicator,cnt_grp,star,Release year
0,10001,-0.846744,-0.321758,0.591483,0.0721,-0.320038,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.149487,7,8,11,8,9,5,2,1,3.0,3.0,2022
1,10005,-1.354201,1.419195,-0.145974,0.056106,0.003965,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.004997,6,7,10,8,11,5,2,1,3.0,3.0,2022
2,10006,-1.645507,0.033444,-0.269957,-0.966379,0.02668,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.623446,7,7,9,8,9,5,2,1,3.0,2.0,2022
3,10007,-1.663574,-0.804531,1.163333,0.357169,0.841066,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.107545,3,2,6,8,5,4,1,1,2.0,3.0,2022
5,10011,-1.032452,-1.670115,0.164188,-0.31908,-1.17147,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.769217,7,7,9,8,5,5,2,1,3.0,2.0,2022


In [8]:
# 2021 - actual

df_2021 = pd.read_csv(stars_dir + '2021/2021-04 Stars Release/SAS_CSV_output/CMS_Stars_Apr_2021.csv')
df_2021['PROVIDER_ID'] = df_2021['PROVIDER_ID'].astype(int).astype(str)
df_2021 = df_2021.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df_2021.columns = df_2021.columns.str.strip()

prvdrs = df_2021['PROVIDER_ID'].astype(str).tolist()
for i, p in enumerate(prvdrs):
    if len(p) < 6:
        prvdrs[i] = '0' + p
df_2021['PROVIDER_ID'] = prvdrs

for i in list(df_2021):
    if i == 'cnt_grp':
        df_2021[i] = df_2021[i].astype(str).str[0]
    if i == 'PROVIDER_ID':
        pass
    else:
        df_2021[i] = pd.to_numeric(df_2021[i], errors='coerce')

df_2021['Release year'] = ['2021']*df_2021.shape[0]
df_2021 = df_2021[~df_2021['star'].isin([np.nan, float("NaN")])]
prvdrs_2021 = df_2021['PROVIDER_ID'].unique()

print(len(prvdrs_2021), 'hospitals with a star ratings in 2021')
df_2021.head()

3355 hospitals with a star ratings in 2021


Unnamed: 0,PROVIDER_ID,Std_Outcomes_Mortality_score,Std_Outcomes_Readmission_score,Std_Outcomes_Safety_score,Std_PatientExp_score,Std_Process_score,std_weight_PatientExperience,std_weight_Readmission,std_weight_Mortality,std_weight_safety,std_weight_Process,weight_PatientExperience,weight_Outcomes_Readmission,weight_Outcomes_Mortality,weight_Outcomes_Safety,weight_Process,summary_score,Outcomes_Mortality_cnt,Outcomes_safety_cnt,Outcomes_Readmission_cnt,Patient_Experience_cnt,Process_cnt,Total_measure_group_cnt,MortSafe_Group_cnt,report_indicator,cnt_grp,star,Release year
0,10001,-0.752932,-0.070355,0.556127,0.071425,-0.050316,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.0491,7,8,11,8,11,5,2,1,3.0,3.0,2021
1,10005,-2.799407,0.064559,-0.114854,0.055327,-0.071058,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.62329,6,7,10,8,14,5,2,1,3.0,2.0,2021
2,10006,-1.984738,-0.079649,0.302983,-0.966582,-0.462672,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.655678,7,7,9,8,11,5,2,1,3.0,2.0,2021
3,10007,-2.050643,-0.885254,1.200369,0.356272,-0.642649,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.380554,4,2,6,8,7,4,1,1,2.0,2.0,2021
4,10008,-1.612519,0.261221,-0.00278,,-0.867725,0.22,0.22,0.22,0.22,0.12,,0.282051,0.282051,0.282051,0.153846,-0.515416,3,1,4,0,8,3,1,1,1.0,2.0,2021


## Merge 2026 with 2025, 2024, 2023, 2022, and 2021

In [9]:
main_df = df_2026.merge(df_2025, how='outer')
main_df = main_df.merge(df_2024, how='outer')
main_df = main_df.merge(df_2023, how='outer')
main_df = main_df.merge(df_2022, how='outer')
main_df = main_df.merge(df_2021, how='outer')

print(df_2021.shape)
print(df_2022.shape)
print(df_2023.shape)
print(df_2024.shape)
print(df_2025.shape)
print(df_2026.shape)
print(main_df.shape)

main_df.sort_values(by=['summary_score', 'cnt_grp', 'star'], inplace=True, ascending=False)
print(list(main_df))

prvdrs = main_df['PROVIDER_ID'].unique()
print(len(prvdrs), 'hospitals in main_df')

main_df.head()

(3355, 28)
(3121, 28)
(3076, 28)
(2847, 28)
(2891, 28)
(3210, 29)
(18500, 29)
['PROVIDER_ID', 'Std_Outcomes_Mortality_score', 'Std_Outcomes_Readmission_score', 'Std_Outcomes_Safety_score', 'Std_PatientExp_score', 'Std_Process_score', 'std_weight_PatientExperience', 'std_weight_Readmission', 'std_weight_Mortality', 'std_weight_safety', 'std_weight_Process', 'weight_PatientExperience', 'weight_Outcomes_Readmission', 'weight_Outcomes_Mortality', 'weight_Outcomes_Safety', 'weight_Process', 'summary_score', 'Outcomes_Mortality_cnt', 'Outcomes_safety_cnt', 'Outcomes_Readmission_cnt', 'Patient_Experience_cnt', 'Process_cnt', 'Total_measure_group_cnt', 'MortSafe_Group_cnt', 'report_indicator', 'cnt_grp', 'star', 'Q_25', 'Release year']
3648 hospitals in main_df


Unnamed: 0,PROVIDER_ID,Std_Outcomes_Mortality_score,Std_Outcomes_Readmission_score,Std_Outcomes_Safety_score,Std_PatientExp_score,Std_Process_score,std_weight_PatientExperience,std_weight_Readmission,std_weight_Mortality,std_weight_safety,std_weight_Process,weight_PatientExperience,weight_Outcomes_Readmission,weight_Outcomes_Mortality,weight_Outcomes_Safety,weight_Process,summary_score,Outcomes_Mortality_cnt,Outcomes_safety_cnt,Outcomes_Readmission_cnt,Patient_Experience_cnt,Process_cnt,Total_measure_group_cnt,MortSafe_Group_cnt,report_indicator,cnt_grp,star,Q_25,Release year
6655,170183,,4.290477,2.032927,2.223623,0.873621,0.22,0.22,0.22,0.22,0.12,0.282051,0.282051,,0.282051,0.153846,2.545103,0,3,3,8,3,4,1,1,2.0,5.0,,2023
11625,330270,0.546448,7.145042,1.162883,1.589132,1.830755,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,2.517262,1,5,3,8,3,4,1,1,2.0,5.0,,2023
11629,330270,2.882665,5.122073,1.454899,1.353749,0.20903,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,2.404028,1,6,3,8,2,3,1,1,1.0,5.0,,2021
13000,360351,,3.795795,1.824891,1.475751,2.547755,0.22,0.22,0.22,0.22,0.12,0.282051,0.282051,,0.282051,0.153846,2.393521,0,3,3,8,2,3,1,1,1.0,5.0,,2021
6654,170183,,2.762484,2.093786,2.082586,1.056321,0.22,0.22,0.22,0.22,0.12,0.282051,0.282051,,0.282051,0.153846,2.119624,0,3,3,8,1,3,1,1,1.0,5.0,,2022


## Load SAS pack secondary (domain-specific) files
These data files pertain to standardized scores for each component of a particular measure domain.

In [10]:
# For 2026 -- predicted

fnames = ['outcome_readmission', 'outcome_safety', 'process', 'ptexp']
f_ext = ['readmission_', 'safety_', 'process_', 'patient_exp_']

with io.capture_output() as captured: 
    df_2026 = pd.read_sas(stars_dir + 'SAS_Downloaded_Databases/2026/predicted/Nov2025/outcome_mortality.sas7bdat')
    #df_2026 = df_2026.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
    
    ls = []
    for p in df_2026['PROVIDER_ID'].tolist():
        p = int(p)
        p = str(p)
        if '666666' in p:
            p = p[:-6]
            p = p + 'F'
        while len(p) < 6:
            p = '0' + p
        ls.append(p)
    df_2026['PROVIDER_ID'] = ls
    

    
cols = list(df_2026)
new_cols = []
ind = cols.index('C1')
for i, c in enumerate(cols):
    if i >= ind:
        c = 'mortality_' + c
    new_cols.append(c)
print(new_cols)
df_2026.columns = new_cols

for j, f in enumerate(fnames):
    with io.capture_output() as captured: tdf = pd.read_sas(stars_dir + 'SAS_Downloaded_Databases/2026/predicted/Nov2025/' + f + '.sas7bdat')
    #tdf = tdf.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
    
    ls = []
    for p in tdf['PROVIDER_ID'].tolist():
        p = int(p)
        p = str(p)
        if '666666' in p:
            p = p[:-6]
            p = p + 'F'
        while len(p) < 6:
            p = '0' + p
        ls.append(p)
    tdf['PROVIDER_ID'] = ls
    
    cols = list(tdf)
    new_cols = []
    ind = cols.index('C1')
    for i, c in enumerate(cols):
        if i >= ind:
            c = f_ext[j] + c
        new_cols.append(c)
    print(new_cols)
    tdf.columns = new_cols

    df_2026 = df_2026.merge(tdf, on='PROVIDER_ID', how='outer')
    
df_2026['Release year'] = ['2026']*df_2026.shape[0]
df_2026 = df_2026.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df_2026 = df_2026[df_2026['PROVIDER_ID'].isin(prvdrs_2026)]
prvdrs = df_2026['PROVIDER_ID'].unique()

print(len(prvdrs), 'hospitals in 2026')
df_2026.head()


['PROVIDER_ID', 'std_MORT_30_AMI', 'std_MORT_30_CABG', 'std_MORT_30_COPD', 'std_MORT_30_HF', 'std_MORT_30_PN', 'std_MORT_30_STK', 'std_PSI_4_SURG_COMP', 'std_Hybrid_HWM', 'mortality_C1', 'mortality_C2', 'mortality_C3', 'mortality_C4', 'mortality_C5', 'mortality_C6', 'mortality_C7', 'mortality_C8', 'mortality_total_cnt', 'mortality_measure_wt', 'mortality_score_before_std', 'mortality_Mean', 'mortality_StdDev', 'mortality_grp_score']
['PROVIDER_ID', 'std_EDAC_30_AMI', 'std_EDAC_30_HF', 'std_EDAC_30_PN', 'std_OP_32', 'std_READM_30_CABG', 'std_READM_30_COPD', 'std_READM_30_HIP_KNEE', 'std_OP_35_ADM', 'std_OP_35_ED', 'std_OP_36', 'std_Hybrid_HWR', 'readmission_C1', 'readmission_C2', 'readmission_C3', 'readmission_C4', 'readmission_C5', 'readmission_C6', 'readmission_C7', 'readmission_C8', 'readmission_C9', 'readmission_C10', 'readmission_C11', 'readmission_total_cnt', 'readmission_measure_wt', 'readmission_score_before_std', 'readmission_Mean', 'readmission_StdDev', 'readmission_grp_score'

Unnamed: 0,PROVIDER_ID,std_MORT_30_AMI,std_MORT_30_CABG,std_MORT_30_COPD,std_MORT_30_HF,std_MORT_30_PN,std_MORT_30_STK,std_PSI_4_SURG_COMP,std_Hybrid_HWM,mortality_C1,mortality_C2,mortality_C3,mortality_C4,mortality_C5,mortality_C6,mortality_C7,mortality_C8,mortality_total_cnt,mortality_measure_wt,mortality_score_before_std,mortality_Mean,mortality_StdDev,mortality_grp_score,std_EDAC_30_AMI,std_EDAC_30_HF,std_EDAC_30_PN,std_OP_32,std_READM_30_CABG,std_READM_30_COPD,std_READM_30_HIP_KNEE,std_OP_35_ADM,std_OP_35_ED,std_OP_36,std_Hybrid_HWR,readmission_C1,readmission_C2,readmission_C3,readmission_C4,readmission_C5,readmission_C6,readmission_C7,readmission_C8,readmission_C9,readmission_C10,readmission_C11,readmission_total_cnt,readmission_measure_wt,readmission_score_before_std,readmission_Mean,readmission_StdDev,readmission_grp_score,std_COMP_HIP_KNEE,std_HAI_1,std_HAI_2,std_HAI_3,std_HAI_4,std_HAI_5,std_HAI_6,std_PSI_90_SAFETY,safety_C1,safety_C2,safety_C3,safety_C4,safety_C5,safety_C6,safety_C7,safety_C8,safety_total_cnt,safety_measure_wt,safety_score_before_std,safety_Mean,safety_StdDev,safety_grp_score,std_IMM_3,std_OP_10,std_OP_13,std_OP_18B,std_OP_22,std_OP_23,std_OP_29,std_OP_8,std_SAFE_USE_OF_OPIOIDS,std_SEP_1,process_C1,process_C2,process_C3,process_C4,process_C5,process_C6,process_C7,process_C8,process_C9,process_C10,process_total_cnt,process_measure_wt,process_score_before_std,process_Mean,process_StdDev,process_grp_score,std_O_COMP_1_LINEAR_SCORE,std_O_COMP_2_LINEAR_SCORE,std_O_COMP_3_LINEAR_SCORE,std_O_PATIENT_RATE_LINEAR_SCORE,std_O_PATIENT_REC_LINEAR_SCORE,std_H_COMP_1_LINEAR_SCORE,std_H_COMP_2_LINEAR_SCORE,std_H_COMP_3_LINEAR_SCORE,std_H_COMP_5_LINEAR_SCORE,std_H_COMP_6_LINEAR_SCORE,std_H_COMP_7_LINEAR_SCORE,std_H_CLEAN_LINEAR_SCORE,std_H_QUIET_LINEAR_SCORE,std_H_RECMND_LINEAR_SCORE,std_H_HSP_RATING_LINEAR_SCORE,patient_exp_C1,patient_exp_C2,patient_exp_C3,patient_exp_C4,patient_exp_C5,patient_exp_C6,patient_exp_C7,patient_exp_C8,patient_exp_C9,patient_exp_C10,patient_exp_C11,patient_exp_C12,patient_exp_C13,patient_exp_C14,patient_exp_C15,patient_exp_total_cnt,patient_exp_measure_wt,patient_exp_score_before_std,patient_exp_Mean,patient_exp_StdDev,patient_exp_grp_score,Release year
0,10001,0.635861,-0.407503,-0.336903,0.63017,-0.741228,-0.155169,-1.255617,-0.502326,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,-0.26659,-0.079177,0.738153,-0.253894,0.930664,0.193207,-0.369507,0.294215,0.524002,0.266962,0.079849,1.577732,-0.191118,0.662844,-0.167496,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.345578,-9.9e-05,0.563541,0.613401,0.570514,0.134271,0.794787,0.544171,,0.84381,0.055399,0.242553,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,0.455072,-0.025094,0.833842,0.575849,0.944322,0.125806,-0.216523,-1.245548,-1.161984,1.06792,-1.508442,0.793318,0.182926,0.408151,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,10.0,0.1,-0.061005,0.017718,0.512643,-0.153563,-0.128381,-1.276518,-0.390364,0.03758,0.490315,-0.453797,0.472017,-0.305413,0.563206,0.166795,0.347443,-0.38317,1.069861,0.837525,0.268063,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,15.0,0.066667,0.087678,0.029341,0.839329,0.069504,2026
1,10005,,,-0.014416,-1.15698,-1.755154,0.196999,-0.469844,-0.681152,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,6.0,0.166667,-0.646758,-0.079177,0.738153,-0.76892,,-0.217008,0.774312,-1.240167,,1.318246,0.928441,1.508638,-0.786882,0.096395,2.376687,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,9.0,0.111111,0.52874,-9.9e-05,0.563541,0.938422,0.844963,-0.145311,-0.842976,0.574237,,-1.496084,-0.288416,0.139697,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,-0.173413,-0.025094,0.833842,-0.177874,-0.801819,-1.478114,0.080293,0.393039,-0.394444,-2.2975,0.620678,-0.680188,0.004698,0.777728,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,10.0,0.1,-0.377563,0.017718,0.512643,-0.771065,-0.128381,0.51402,0.533043,0.03758,0.146388,-0.078576,0.848537,-1.300616,-0.861492,0.426545,-0.283475,-0.38317,0.888369,-0.40775,-0.274923,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,15.0,0.066667,-0.021593,0.029341,0.839329,-0.060684,2026
2,10006,-2.02674,-3.278654,0.114579,-0.423791,-1.175767,0.490473,-2.684768,-1.754104,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,-1.342347,-0.079177,0.738153,-1.711257,1.028891,0.304243,0.235461,1.6094,0.035082,-1.017941,-0.344447,,,0.096395,-1.298244,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,9.0,0.111111,0.072093,-9.9e-05,0.563541,0.128104,-1.487857,0.882647,0.948383,1.198461,1.120116,1.148146,0.761616,-0.734572,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.479618,-0.025094,0.833842,0.605285,-0.545034,-0.62269,-0.394612,0.142431,0.373096,-0.194113,-0.443882,,0.004698,0.223363,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,9.0,0.111111,-0.16186,0.017718,0.512643,-0.350298,-0.128381,-0.679672,-0.390364,-0.468851,-0.541468,-0.829018,-1.034062,-1.499656,-1.268548,-1.131957,-1.229852,-1.735334,0.343894,-1.445479,-1.360894,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,15.0,0.066667,-0.893309,0.029341,0.839329,-1.099271,2026
3,10007,,,-1.497857,-0.83621,-3.276042,,,-1.038803,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,4.0,0.25,-1.662228,-0.079177,0.738153,-2.14461,,-2.243412,-0.779431,0.623011,,-0.433894,,,,,-0.591526,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,5.0,0.2,-0.685051,-9.9e-05,0.563541,-1.215442,,,,,,,-0.625263,-0.323151,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,0.5,-0.474207,-0.025094,0.833842,-0.538607,-2.342532,-0.002507,,0.778588,-0.010674,,-2.040722,,0.004698,-3.041232,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,7.0,0.142857,-0.950626,0.017718,0.512643,-1.888925,-1.059965,-1.873364,-1.313771,-1.481713,-0.19754,,,,,,,,,,,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.2,-1.185271,0.029341,0.839329,-1.447122,2026
5,10011,-0.480714,0.549548,-0.143411,-1.294453,-1.356826,0.255694,-0.129385,0.212975,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,-0.298321,-0.079177,0.738153,-0.296882,1.258087,-0.494598,-0.392648,0.294215,-1.23611,0.033343,,,,0.946068,0.539222,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,8.0,0.125,0.118447,-9.9e-05,0.563541,0.21036,-0.527284,-2.54435,0.335785,1.198461,,-1.234987,0.183169,-0.168868,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,-0.394011,-0.025094,0.833842,-0.44243,0.225323,-0.708232,,0.065321,-0.010674,,-0.672002,,0.361155,0.777728,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,7.0,0.142857,0.005517,0.017718,0.512643,-0.0238,,,,,,-0.829018,-0.281022,0.092668,-0.657963,-0.612456,-0.598934,-0.833892,0.525386,-0.200204,-0.274923,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,10.0,0.1,-0.367036,0.029341,0.839329,-0.472254,2026


In [11]:
# For 2025 -- actual

fnames = ['outcome_readmission', 'outcome_safety', 'process', 'ptexp']
f_ext = ['readmission_', 'safety_', 'process_', 'patient_exp_']

with io.capture_output() as captured: 
    df_2025 = pd.read_sas(stars_dir + 'SAS_Downloaded_Databases/2025/actual/outcome_mortality.sas7bdat')
    #df_2025 = df_2025.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
    
    ls = []
    for p in df_2025['PROVIDER_ID'].tolist():
        if len(p) < 6: 
            p = '0' + p
        ls.append(p)
    df_2025['PROVIDER_ID'] = ls
    

    
cols = list(df_2025)
new_cols = []
ind = cols.index('C1')
for i, c in enumerate(cols):
    if i >= ind:
        c = 'mortality_' + c
    new_cols.append(c)
print(new_cols)
df_2025.columns = new_cols

for j, f in enumerate(fnames):
    with io.capture_output() as captured: tdf = pd.read_sas(stars_dir + 'SAS_Downloaded_Databases/2025/actual/' + f + '.sas7bdat')
    #tdf = tdf.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
    
    ls = []
    for p in df_2025['PROVIDER_ID'].tolist():
        if len(p) < 6: 
            p = '0' + p
        ls.append(p)
    df_2025['PROVIDER_ID'] = ls
    
    cols = list(tdf)
    new_cols = []
    ind = cols.index('C1')
    for i, c in enumerate(cols):
        if i >= ind:
            c = f_ext[j] + c
        new_cols.append(c)
    print(new_cols)
    tdf.columns = new_cols

    df_2025 = df_2025.merge(tdf, on='PROVIDER_ID', how='outer')
    
df_2025['Release year'] = ['2025']*df_2025.shape[0]
df_2025 = df_2025.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df_2025 = df_2025[df_2025['PROVIDER_ID'].isin(prvdrs_2025)]
prvdrs = df_2025['PROVIDER_ID'].unique()

print(len(prvdrs), 'hospitals in 2025')
df_2025.head()


['PROVIDER_ID', 'std_MORT_30_AMI', 'std_MORT_30_CABG', 'std_MORT_30_COPD', 'std_MORT_30_HF', 'std_MORT_30_PN', 'std_MORT_30_STK', 'std_PSI_4_SURG_COMP', 'mortality_C1', 'mortality_C2', 'mortality_C3', 'mortality_C4', 'mortality_C5', 'mortality_C6', 'mortality_C7', 'mortality_total_cnt', 'mortality_measure_wt', 'mortality_score_before_std', 'mortality_Mean', 'mortality_StdDev', 'mortality_grp_score']
['PROVIDER_ID', 'std_EDAC_30_AMI', 'std_EDAC_30_HF', 'std_EDAC_30_PN', 'std_OP_32', 'std_READM_30_CABG', 'std_READM_30_COPD', 'std_READM_30_HIP_KNEE', 'std_READM_30_HOSP_WIDE', 'std_OP_35_ADM', 'std_OP_35_ED', 'std_OP_36', 'readmission_C1', 'readmission_C2', 'readmission_C3', 'readmission_C4', 'readmission_C5', 'readmission_C6', 'readmission_C7', 'readmission_C8', 'readmission_C9', 'readmission_C10', 'readmission_C11', 'readmission_total_cnt', 'readmission_measure_wt', 'readmission_score_before_std', 'readmission_Mean', 'readmission_StdDev', 'readmission_grp_score']
['PROVIDER_ID', 'std_COM

Unnamed: 0,PROVIDER_ID,std_MORT_30_AMI,std_MORT_30_CABG,std_MORT_30_COPD,std_MORT_30_HF,std_MORT_30_PN,std_MORT_30_STK,std_PSI_4_SURG_COMP,mortality_C1,mortality_C2,mortality_C3,mortality_C4,mortality_C5,mortality_C6,mortality_C7,mortality_total_cnt,mortality_measure_wt,mortality_score_before_std,mortality_Mean,mortality_StdDev,mortality_grp_score,std_EDAC_30_AMI,std_EDAC_30_HF,std_EDAC_30_PN,std_OP_32,std_READM_30_CABG,std_READM_30_COPD,std_READM_30_HIP_KNEE,std_READM_30_HOSP_WIDE,std_OP_35_ADM,std_OP_35_ED,std_OP_36,readmission_C1,readmission_C2,readmission_C3,readmission_C4,readmission_C5,readmission_C6,readmission_C7,readmission_C8,readmission_C9,readmission_C10,readmission_C11,readmission_total_cnt,readmission_measure_wt,readmission_score_before_std,readmission_Mean,readmission_StdDev,readmission_grp_score,std_COMP_HIP_KNEE,std_HAI_1,std_HAI_2,std_HAI_3,std_HAI_4,std_HAI_5,std_HAI_6,std_PSI_90_SAFETY,safety_C1,safety_C2,safety_C3,safety_C4,safety_C5,safety_C6,safety_C7,safety_C8,safety_total_cnt,safety_measure_wt,safety_score_before_std,safety_Mean,safety_StdDev,safety_grp_score,std_HCP_COVID_19,std_IMM_3,std_OP_10,std_OP_13,std_OP_18B,std_OP_22,std_OP_23,std_OP_29,std_OP_8,std_PC_01,std_SAFE_USE_OF_OPIOIDS,std_SEP_1,process_C1,process_C2,process_C3,process_C4,process_C5,process_C6,process_C7,process_C8,process_C9,process_C10,process_C11,process_C12,process_total_cnt,process_measure_wt,process_score_before_std,process_Mean,process_StdDev,process_grp_score,std_H_COMP_1_STAR_RATING,std_H_COMP_2_STAR_RATING,std_H_COMP_3_STAR_RATING,std_H_COMP_5_STAR_RATING,std_H_COMP_6_STAR_RATING,std_H_COMP_7_STAR_RATING,std_H_GLOB_STAR_RATING,std_H_INDI_STAR_RATING,patient_exp_C1,patient_exp_C2,patient_exp_C3,patient_exp_C4,patient_exp_C5,patient_exp_C6,patient_exp_C7,patient_exp_C8,patient_exp_total_cnt,patient_exp_measure_wt,patient_exp_score_before_std,patient_exp_Mean,patient_exp_StdDev,patient_exp_grp_score,Release year
0,10001,1.405923,-1.04537,1.011496,0.650904,-0.315375,0.023766,-0.732077,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,0.142753,-0.073484,0.730255,0.29611,1.024273,-0.368843,-0.448127,0.289973,0.607499,0.859382,0.192452,0.768359,-1.028886,0.62791,-0.25798,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.206001,0.013108,0.53373,0.361406,0.698729,0.30367,0.739078,-0.467783,,0.476446,-0.105703,0.103001,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,0.249634,-0.01903,0.779001,0.344883,-0.525017,1.044716,0.080291,0.781373,-1.062058,-1.138829,,-3.171398,0.588011,0.140984,0.563049,0.449772,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,-0.204464,0.028411,0.480473,-0.484678,-1.154064,0.570761,-0.231671,-0.254181,-0.312251,0.933177,0.719371,0.020336,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.036435,4.8997890000000005e-17,0.866021,0.042072,2025
1,10005,-1.492798,,0.150309,-0.366857,-1.766289,-0.083025,-0.576021,1.0,0.0,1.0,1.0,1.0,1.0,1.0,6.0,0.166667,-0.689113,-0.073484,0.730255,-0.843034,,-0.330781,0.54836,-1.03515,,2.448388,1.038374,1.653526,1.891523,-0.071403,-2.992838,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,9.0,0.111111,0.35,0.013108,0.53373,0.631203,1.158692,-0.332255,-1.072825,1.149255,,-2.749027,-0.428086,-0.05878,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,-0.333289,-0.01903,0.779001,-0.403414,-0.396114,-0.264294,-1.615666,0.373993,0.303229,-0.270542,-0.094782,0.334795,-1.162943,0.346333,-0.171693,0.925313,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,12.0,0.083333,-0.141031,0.028411,0.480473,-0.352656,-0.243473,0.570761,-1.17226,0.768655,0.678414,-0.038195,-0.226012,0.529388,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.10841,4.8997890000000005e-17,0.866021,0.125181,2025
2,10006,-3.138018,-2.730472,-0.512142,-0.135547,-0.626285,-0.243212,-1.004318,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-1.198571,-0.073484,0.730255,-1.540677,-0.541852,0.3686,-0.017926,1.105434,-0.951609,0.859382,0.192452,-0.116807,,,-1.283552,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,9.0,0.111111,-0.042875,0.013108,0.53373,-0.10489,-0.834482,0.977642,1.028777,1.149255,1.108414,0.595372,0.965067,-0.867684,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.515295,-0.01903,0.779001,0.685911,-0.525017,-0.591547,-1.06448,0.315796,-0.313955,0.597745,,-0.45231,,0.551682,-0.171693,-0.263539,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,10.0,0.1,-0.191732,0.028411,0.480473,-0.458178,-1.154064,-1.443055,-1.17226,-1.277016,-1.302916,-1.009567,-1.171395,-0.488716,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,-1.127373,4.8997890000000005e-17,0.866021,-1.301785,2025
3,10007,,,-2.698232,-1.014523,-3.804479,,,0.0,0.0,1.0,1.0,1.0,0.0,0.0,3.0,0.333333,-2.505745,-0.073484,0.730255,-3.3307,,-0.858886,-0.764193,-0.21969,,-0.431686,0.192452,-0.706918,,,-0.599837,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,7.0,0.142857,-0.484108,0.013108,0.53373,-0.931587,0.085445,,,,,,-1.284702,0.372636,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,3.0,0.333333,-0.27554,-0.01903,0.779001,-0.329281,-0.525017,-2.773231,-0.322499,,0.583768,-0.704686,,-4.888717,,,,-2.165701,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,7.0,0.142857,-1.542298,0.028411,0.480473,-3.269085,1.577707,1.577668,0.708919,1.791491,1.669078,0.933177,0.719371,0.020336,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,1.124718,4.8997890000000005e-17,0.866021,1.298719,2025
5,10011,-0.160953,1.000826,-0.777123,-0.968261,-1.559016,0.450931,-1.333151,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-0.478107,-0.073484,0.730255,-0.554085,0.943664,-1.291837,-0.096942,0.391906,-1.431334,-0.133747,0.361636,-1.88714,,,0.425734,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,9.0,0.111111,-0.302007,0.013108,0.53373,-0.590401,-0.52784,-0.033316,0.516233,1.149255,,0.270736,-0.121823,0.264782,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,0.216861,-0.01903,0.779001,0.302812,-0.485355,,-0.428496,,-0.557089,-1.138829,-1.454347,0.621014,,,-0.355379,-0.025768,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,8.0,0.125,-0.478031,0.028411,0.480473,-1.054047,-0.243473,0.570761,-0.231671,-0.254181,0.678414,-0.038195,-0.226012,-0.488716,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,-0.029134,4.8997890000000005e-17,0.866021,-0.033641,2025


In [12]:
# For 2024 -- actual

fnames = ['outcome_readmission', 'outcome_safety', 'process', 'ptexp']
f_ext = ['readmission_', 'safety_', 'process_', 'patient_exp_']

with io.capture_output() as captured: 
    df_2024 = pd.read_sas(stars_dir + 'SAS_Downloaded_Databases/2024/actual/outcome_mortality.sas7bdat')
    #df_2024 = df_2024.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
    
    ls = []
    for p in df_2024['PROVIDER_ID'].tolist():
        if len(p) < 6: 
            p = '0' + p
        ls.append(p)
    df_2024['PROVIDER_ID'] = ls
    

    
cols = list(df_2024)
new_cols = []
ind = cols.index('C1')
for i, c in enumerate(cols):
    if i >= ind:
        c = 'mortality_' + c
    new_cols.append(c)
print(new_cols)
df_2024.columns = new_cols

for j, f in enumerate(fnames):
    with io.capture_output() as captured: tdf = pd.read_sas(stars_dir + 'SAS_Downloaded_Databases/2024/actual/' + f + '.sas7bdat')
    #tdf = tdf.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
    
    ls = []
    for p in df_2024['PROVIDER_ID'].tolist():
        if len(p) < 6: 
            p = '0' + p
        ls.append(p)
    df_2024['PROVIDER_ID'] = ls
    
    cols = list(tdf)
    new_cols = []
    ind = cols.index('C1')
    for i, c in enumerate(cols):
        if i >= ind:
            c = f_ext[j] + c
        new_cols.append(c)
    print(new_cols)
    tdf.columns = new_cols

    df_2024 = df_2024.merge(tdf, on='PROVIDER_ID', how='outer')
    
df_2024['Release year'] = ['2024']*df_2024.shape[0]
df_2024 = df_2024.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df_2024 = df_2024[df_2024['PROVIDER_ID'].isin(prvdrs_2024)]
prvdrs = df_2024['PROVIDER_ID'].unique()

print(len(prvdrs), 'hospitals in 2024')
df_2024.head()


['PROVIDER_ID', 'std_MORT_30_AMI', 'std_MORT_30_CABG', 'std_MORT_30_COPD', 'std_MORT_30_HF', 'std_MORT_30_PN', 'std_MORT_30_STK', 'std_PSI_4_SURG_COMP', 'mortality_C1', 'mortality_C2', 'mortality_C3', 'mortality_C4', 'mortality_C5', 'mortality_C6', 'mortality_C7', 'mortality_total_cnt', 'mortality_measure_wt', 'mortality_score_before_std', 'mortality_Mean', 'mortality_StdDev', 'mortality_grp_score']
['PROVIDER_ID', 'std_EDAC_30_AMI', 'std_EDAC_30_HF', 'std_EDAC_30_PN', 'std_OP_32', 'std_READM_30_CABG', 'std_READM_30_COPD', 'std_READM_30_HIP_KNEE', 'std_READM_30_HOSP_WIDE', 'std_OP_35_ADM', 'std_OP_35_ED', 'std_OP_36', 'readmission_C1', 'readmission_C2', 'readmission_C3', 'readmission_C4', 'readmission_C5', 'readmission_C6', 'readmission_C7', 'readmission_C8', 'readmission_C9', 'readmission_C10', 'readmission_C11', 'readmission_total_cnt', 'readmission_measure_wt', 'readmission_score_before_std', 'readmission_Mean', 'readmission_StdDev', 'readmission_grp_score']
['PROVIDER_ID', 'std_COM

Unnamed: 0,PROVIDER_ID,std_MORT_30_AMI,std_MORT_30_CABG,std_MORT_30_COPD,std_MORT_30_HF,std_MORT_30_PN,std_MORT_30_STK,std_PSI_4_SURG_COMP,mortality_C1,mortality_C2,mortality_C3,mortality_C4,mortality_C5,mortality_C6,mortality_C7,mortality_total_cnt,mortality_measure_wt,mortality_score_before_std,mortality_Mean,mortality_StdDev,mortality_grp_score,std_EDAC_30_AMI,std_EDAC_30_HF,std_EDAC_30_PN,std_OP_32,std_READM_30_CABG,std_READM_30_COPD,std_READM_30_HIP_KNEE,std_READM_30_HOSP_WIDE,std_OP_35_ADM,std_OP_35_ED,std_OP_36,readmission_C1,readmission_C2,readmission_C3,readmission_C4,readmission_C5,readmission_C6,readmission_C7,readmission_C8,readmission_C9,readmission_C10,readmission_C11,readmission_total_cnt,readmission_measure_wt,readmission_score_before_std,readmission_Mean,readmission_StdDev,readmission_grp_score,std_COMP_HIP_KNEE,std_HAI_1,std_HAI_2,std_HAI_3,std_HAI_4,std_HAI_5,std_HAI_6,std_PSI_90_SAFETY,safety_C1,safety_C2,safety_C3,safety_C4,safety_C5,safety_C6,safety_C7,safety_C8,safety_total_cnt,safety_measure_wt,safety_score_before_std,safety_Mean,safety_StdDev,safety_grp_score,std_HCP_COVID_19,std_IMM_3,std_OP_10,std_OP_13,std_OP_18B,std_OP_22,std_OP_23,std_OP_29,std_OP_3B,std_OP_8,std_PC_01,std_SEP_1,process_C1,process_C2,process_C3,process_C4,process_C5,process_C6,process_C7,process_C8,process_C9,process_C10,process_C11,process_C12,process_total_cnt,process_measure_wt,process_score_before_std,process_Mean,process_StdDev,process_grp_score,std_H_COMP_1_STAR_RATING,std_H_COMP_2_STAR_RATING,std_H_COMP_3_STAR_RATING,std_H_COMP_5_STAR_RATING,std_H_COMP_6_STAR_RATING,std_H_COMP_7_STAR_RATING,std_H_GLOB_STAR_RATING,std_H_INDI_STAR_RATING,patient_exp_C1,patient_exp_C2,patient_exp_C3,patient_exp_C4,patient_exp_C5,patient_exp_C6,patient_exp_C7,patient_exp_C8,patient_exp_total_cnt,patient_exp_measure_wt,patient_exp_score_before_std,patient_exp_Mean,patient_exp_StdDev,patient_exp_grp_score,Release year
0,10001,0.470514,-1.535074,0.282996,1.510467,0.129818,-0.554262,-0.748332,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-0.063411,-0.068391,0.704371,0.007071,1.123231,-0.791585,-0.63392,0.29207,0.584835,0.293805,0.886353,0.423838,-1.03233,0.626949,-0.252865,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.138216,0.014038,0.537879,0.230867,0.879845,-0.232451,0.488068,-0.71166,,-0.210159,0.003034,-1.162386,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,-0.135101,-0.021526,0.773315,-0.146867,-0.431525,0.940083,-0.029295,0.419351,-0.942734,-1.140182,,-3.129707,,0.000412,0.583665,0.371166,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,10.0,0.1,-0.335877,0.029039,0.538522,-0.677624,-0.373358,-0.400063,-0.243371,0.124487,0.479704,0.876397,0.133873,0.361496,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.119895,1.093644e-15,0.870204,0.137779,2024
1,10005,-0.914388,,-0.524963,-1.584524,-1.953144,-0.829076,-0.692504,1.0,0.0,1.0,1.0,1.0,1.0,1.0,6.0,0.166667,-1.0831,-0.068391,0.704371,-1.440587,,-0.736899,0.486741,-1.03689,,2.375902,1.641861,2.033114,1.890125,-0.073504,-2.967381,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,9.0,0.111111,0.401452,0.014038,0.537879,0.720263,1.609414,-2.431638,-0.539793,0.114598,,0.421669,0.024071,0.176149,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,-0.089361,-0.021526,0.773315,-0.08772,-0.722505,0.123623,-1.210843,-0.383371,0.310092,-0.273613,-0.62181,0.33593,0.277803,-1.342585,0.090196,0.605497,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,12.0,0.083333,-0.234299,0.029039,0.538522,-0.489001,-0.373358,0.659449,-1.990779,0.124487,0.479704,-0.124536,-0.390422,-0.160629,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,-0.222011,1.093644e-15,0.870204,-0.255125,2024
2,10006,-3.424522,-0.831956,-0.524963,-0.346527,-0.4597,-1.873369,-0.229927,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-1.098709,-0.068391,0.704371,-1.462748,-1.118873,0.39047,0.268626,1.109892,-1.462457,1.508362,-1.946801,1.343425,,,-1.270808,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,9.0,0.111111,-0.130907,0.014038,0.537879,-0.269475,-2.585604,0.836443,0.4962,1.213392,,-1.028257,0.918161,-0.939297,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,-0.155566,-0.021526,0.773315,-0.173331,-0.832877,-0.583976,-0.830344,-0.555383,-0.107517,0.592956,0.242409,-0.44207,,-1.134905,-0.403273,-0.097497,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,11.0,0.090909,-0.377498,0.029039,0.538522,-0.754912,-1.363769,-0.400063,-1.117075,-0.950959,-0.558423,-1.125468,-1.439013,-0.682755,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,-0.954691,1.093644e-15,0.870204,-1.097088,2024
3,10007,,,-3.316094,-0.346527,-3.996805,,,0.0,0.0,1.0,1.0,1.0,0.0,0.0,3.0,0.333333,-2.553142,-0.068391,0.704371,-3.527615,,0.272686,0.607081,-0.219069,,-0.573736,0.130845,-1.300386,,,-0.592179,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,7.0,0.142857,-0.239251,0.014038,0.537879,-0.470903,,,,,,,0.278624,0.287694,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,0.5,0.283159,-0.021526,0.773315,0.393999,-2.789467,-1.346006,0.511414,,0.546132,-0.706898,,-4.827162,,,,2.011485,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,7.0,0.142857,-0.942929,0.029039,0.538522,-1.804879,-0.373358,1.718961,0.630332,0.124487,-0.558423,-0.124536,0.133873,-0.160629,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.173838,1.093644e-15,0.870204,0.199767,2024
5,10011,-0.568162,0.574279,-0.818766,-0.862359,-1.009916,0.819808,-0.964139,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-0.404179,-0.068391,0.704371,-0.47672,0.793358,-0.900956,-0.291705,0.394298,-1.677962,-0.313474,0.130845,-0.380799,,,0.425764,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,9.0,0.111111,-0.202292,0.014038,0.537879,-0.402191,-0.579291,-0.201752,0.219719,0.83415,,0.15493,-0.272556,1.570457,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,0.246522,-0.021526,0.773315,0.346623,0.100266,0.178053,0.05081,,-0.924578,-1.140182,-2.299412,0.61884,,,,-0.859074,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,8.0,0.125,-0.534409,0.029039,0.538522,-1.046286,-0.373358,-0.400063,0.630332,0.124487,0.479704,-0.124536,0.133873,-0.682755,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,-0.02654,1.093644e-15,0.870204,-0.030498,2024


In [13]:
# For 2023

fnames = ['outcome_readmission', 'outcome_safety', 'process', 'ptexp']
f_ext = ['readmission_', 'safety_', 'process_', 'patient_exp_']

with io.capture_output() as captured: 
    df_2023 = pd.read_sas(stars_dir + 'SAS_Downloaded_Databases/2023/actual/outcome_mortality.sas7bdat')
    #df_2023 = df_2023.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
    
    ls = []
    for p in df_2023['PROVIDER_ID'].tolist():
        if len(p) < 6: 
            p = '0' + p
        ls.append(p)
    df_2023['PROVIDER_ID'] = ls
    
    
cols = list(df_2023)
new_cols = []
ind = cols.index('C1')
for i, c in enumerate(cols):
    if i >= ind:
        c = 'mortality_' + c
    new_cols.append(c)
print(new_cols)
df_2023.columns = new_cols

for j, f in enumerate(fnames):
    with io.capture_output() as captured: tdf = pd.read_sas(stars_dir + 'SAS_Downloaded_Databases/2023/actual/' + f + '.sas7bdat')
    #tdf = tdf.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
    
    ls = []
    for p in tdf['PROVIDER_ID'].tolist():
        if len(p) < 6: p = '0' + p
        ls.append(p)
    tdf['PROVIDER_ID'] = ls
    
    cols = list(tdf)
    new_cols = []
    ind = cols.index('C1')
    for i, c in enumerate(cols):
        if i >= ind:
            c = f_ext[j] + c
        new_cols.append(c)
    print(new_cols)
    tdf.columns = new_cols

    df_2023 = df_2023.merge(tdf, on='PROVIDER_ID', how='outer')
    
    
df_2023['Release year'] = ['2023']*df_2023.shape[0]
df_2023 = df_2023.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df_2023 = df_2023[df_2023['PROVIDER_ID'].isin(prvdrs_2023)]
prvdrs = df_2023['PROVIDER_ID'].unique()

print(len(prvdrs), 'hospitals in 2023')
df_2023.head()


['PROVIDER_ID', 'std_MORT_30_AMI', 'std_MORT_30_CABG', 'std_MORT_30_COPD', 'std_MORT_30_HF', 'std_MORT_30_PN', 'std_MORT_30_STK', 'std_PSI_4_SURG_COMP', 'mortality_C1', 'mortality_C2', 'mortality_C3', 'mortality_C4', 'mortality_C5', 'mortality_C6', 'mortality_C7', 'mortality_total_cnt', 'mortality_measure_wt', 'mortality_score_before_std', 'mortality_Mean', 'mortality_StdDev', 'mortality_grp_score']
['PROVIDER_ID', 'std_EDAC_30_AMI', 'std_EDAC_30_HF', 'std_EDAC_30_PN', 'std_OP_32', 'std_READM_30_CABG', 'std_READM_30_COPD', 'std_READM_30_HIP_KNEE', 'std_READM_30_HOSP_WIDE', 'std_OP_35_ADM', 'std_OP_35_ED', 'std_OP_36', 'readmission_C1', 'readmission_C2', 'readmission_C3', 'readmission_C4', 'readmission_C5', 'readmission_C6', 'readmission_C7', 'readmission_C8', 'readmission_C9', 'readmission_C10', 'readmission_C11', 'readmission_total_cnt', 'readmission_measure_wt', 'readmission_score_before_std', 'readmission_Mean', 'readmission_StdDev', 'readmission_grp_score']
['PROVIDER_ID', 'std_COM

Unnamed: 0,PROVIDER_ID,std_MORT_30_AMI,std_MORT_30_CABG,std_MORT_30_COPD,std_MORT_30_HF,std_MORT_30_PN,std_MORT_30_STK,std_PSI_4_SURG_COMP,mortality_C1,mortality_C2,mortality_C3,mortality_C4,mortality_C5,mortality_C6,mortality_C7,mortality_total_cnt,mortality_measure_wt,mortality_score_before_std,mortality_Mean,mortality_StdDev,mortality_grp_score,std_EDAC_30_AMI,std_EDAC_30_HF,std_EDAC_30_PN,std_OP_32,std_READM_30_CABG,std_READM_30_COPD,std_READM_30_HIP_KNEE,std_READM_30_HOSP_WIDE,std_OP_35_ADM,std_OP_35_ED,std_OP_36,readmission_C1,readmission_C2,readmission_C3,readmission_C4,readmission_C5,readmission_C6,readmission_C7,readmission_C8,readmission_C9,readmission_C10,readmission_C11,readmission_total_cnt,readmission_measure_wt,readmission_score_before_std,readmission_Mean,readmission_StdDev,readmission_grp_score,std_COMP_HIP_KNEE,std_HAI_1,std_HAI_2,std_HAI_3,std_HAI_4,std_HAI_5,std_HAI_6,std_PSI_90_SAFETY,safety_C1,safety_C2,safety_C3,safety_C4,safety_C5,safety_C6,safety_C7,safety_C8,safety_total_cnt,safety_measure_wt,safety_score_before_std,safety_Mean,safety_StdDev,safety_grp_score,std_HCP_COVID_19,std_IMM_3,std_OP_10,std_OP_13,std_OP_18B,std_OP_22,std_OP_23,std_OP_29,std_OP_3B,std_OP_8,std_PC_01,std_SEP_1,process_C1,process_C2,process_C3,process_C4,process_C5,process_C6,process_C7,process_C8,process_C9,process_C10,process_C11,process_C12,process_total_cnt,process_measure_wt,process_score_before_std,process_Mean,process_StdDev,process_grp_score,std_H_COMP_1_STAR_RATING,std_H_COMP_2_STAR_RATING,std_H_COMP_3_STAR_RATING,std_H_COMP_5_STAR_RATING,std_H_COMP_6_STAR_RATING,std_H_COMP_7_STAR_RATING,std_H_GLOB_STAR_RATING,std_H_INDI_STAR_RATING,patient_exp_C1,patient_exp_C2,patient_exp_C3,patient_exp_C4,patient_exp_C5,patient_exp_C6,patient_exp_C7,patient_exp_C8,patient_exp_total_cnt,patient_exp_measure_wt,patient_exp_score_before_std,patient_exp_Mean,patient_exp_StdDev,patient_exp_grp_score,Release year
0,10001,-0.041613,-2.19781,-0.019543,1.708727,0.366398,-1.543654,-1.741473,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-0.495567,-0.05126,0.686658,-0.647058,0.207832,-0.702681,0.28604,0.105508,0.219318,-0.048609,-0.198058,0.924456,0.159253,0.872692,0.062022,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.171616,0.020822,0.530811,0.284081,0.024204,0.444297,0.745807,-0.407579,1.025843,0.314693,-0.315731,-0.212425,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.202389,-0.013939,0.693192,0.312074,-1.039363,0.992908,0.10185,-1.77947,-0.904124,-0.237088,,-0.634479,,0.488322,-1.763346,-0.609958,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,10.0,0.1,-0.538475,0.030917,0.556023,-1.024044,-1.07585,-0.175531,-1.051153,0.808598,0.68837,0.037272,-0.445032,0.339427,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,-0.109237,2.611408e-16,0.85451,-0.127836,2023
1,10005,-0.22706,,0.327558,-3.058794,-2.178369,-1.652062,0.037145,1.0,0.0,1.0,1.0,1.0,1.0,1.0,6.0,0.166667,-1.125264,-0.05126,0.686658,-1.564103,0.084469,-0.194954,0.13891,-0.342026,,2.081928,0.212564,1.264919,-0.422406,-0.324483,0.6838,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,10.0,0.1,0.318272,0.020822,0.530811,0.560369,1.323856,-2.537332,-0.430824,-0.543966,,1.269319,-0.733262,0.400536,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,-0.17881,-0.013939,0.693192,-0.237844,-0.329039,0.635389,-1.352864,-0.401411,0.265629,-0.237088,0.091268,0.562534,,-1.240294,0.350919,0.143403,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,11.0,0.090909,-0.137414,0.030917,0.556023,-0.302742,-0.12019,0.826626,-1.051153,-0.306002,-0.421465,0.037272,-0.445032,0.339427,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,-0.142565,2.611408e-16,0.85451,-0.166838,2023
2,10006,-3.843277,-0.631715,0.587885,-0.453288,-0.453103,-2.898752,-0.810482,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-1.214676,-0.05126,0.686658,-1.694318,-0.849561,0.276217,-1.455655,1.537615,-2.896632,1.989296,-1.429926,0.924456,,,-0.559756,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,9.0,0.111111,-0.273772,0.020822,0.530811,-0.554988,-2.141884,0.349265,0.881622,0.601683,,-0.349052,0.89047,-0.764089,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,-0.075998,-0.013939,0.693192,-0.089526,-1.7666,-0.692541,-0.85,1.263743,0.305281,0.493077,-0.717089,-0.168974,,0.675589,0.615202,0.085452,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,11.0,0.090909,-0.068805,0.030917,0.556023,-0.17935,-1.07585,-0.175531,-2.019165,-1.420603,-0.421465,-1.119948,-1.527722,-0.724034,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,-1.06054,2.611408e-16,0.85451,-1.241108,2023
3,10007,,,-1.581501,-1.395705,-2.135237,,,0.0,0.0,1.0,1.0,1.0,0.0,0.0,3.0,0.333333,-1.704148,-0.05126,0.686658,-2.40715,,-1.291644,0.727429,-0.968573,,0.136655,,-0.096931,,,0.062022,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,6.0,0.166667,-0.238507,0.020822,0.530811,-0.488553,,,,,,,0.093366,-0.089833,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,0.5,0.001767,-0.013939,0.693192,0.022657,-2.41773,-0.845764,0.065931,,0.800939,-0.237088,,-1.831492,,,,2.113732,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,7.0,0.142857,-0.335924,0.030917,0.556023,-0.65976,-0.12019,1.828782,0.884869,0.808598,0.68837,1.194493,0.637658,0.871157,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.849217,2.611408e-16,0.85451,0.993806,2023
5,10011,-1.339742,-0.631715,0.240783,-0.453288,-0.151182,-0.893207,0.382842,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-0.406501,-0.05126,0.686658,-0.517349,-0.281212,-0.54427,-0.079796,0.553041,-1.458502,-0.697033,-0.198058,0.470506,,,-0.559756,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,9.0,0.111111,-0.310564,0.020822,0.530811,-0.624302,0.024204,0.483894,-0.046884,1.170201,,-0.369268,-0.231381,0.952201,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,0.283281,-0.013939,0.693192,0.42877,-0.075352,-0.130725,0.173687,,-0.646382,-0.967252,-2.586416,-4.824023,,,,-1.305368,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,8.0,0.125,-1.295229,0.030917,0.556023,-2.385055,-0.12019,-0.175531,0.884869,0.808598,-0.421465,0.037272,0.096313,-0.192304,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.114695,2.611408e-16,0.85451,0.134223,2023


In [14]:
# For 2022

fnames = ['outcome_readmission', 'outcome_safety', 'process', 'ptexp']
f_ext = ['readmission_', 'safety_', 'process_', 'patient_exp_']

with io.capture_output() as captured: 
    df_2022 = pd.read_sas(stars_dir + 'SAS_Downloaded_Databases/2022/actual/outcome_mortality.sas7bdat')
    
cols = list(df_2022)
new_cols = []
ind = cols.index('C1')
for i, c in enumerate(cols):
    if i >= ind:
        c = 'mortality_' + c
    new_cols.append(c)
print(new_cols)
df_2022.columns = new_cols

print(df_2022.shape)
for j, f in enumerate(fnames):
    with io.capture_output() as captured: tdf = pd.read_sas(stars_dir + 'SAS_Downloaded_Databases/2022/actual/' + f + '.sas7bdat')
    
    cols = list(tdf)
    new_cols = []
    ind = cols.index('C1')
    for i, c in enumerate(cols):
        if i >= ind:
            c = f_ext[j] + c
        new_cols.append(c)
    print(new_cols)
    tdf.columns = new_cols
    
    print(tdf.shape)
    
    df_2022 = df_2022.merge(tdf, on='PROVIDER_ID', how='outer')
    
print(df_2022.shape)
df_2022['Release year'] = ['2022']*df_2022.shape[0]
print(df_2022.shape)
df_2022 = df_2022.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
print(df_2022.shape)
df_2022 = df_2022[df_2022['PROVIDER_ID'].isin(prvdrs_2022)]
print(df_2022.shape)
prvdrs = df_2022['PROVIDER_ID'].unique()
print(df_2022.shape)

print(len(prvdrs), 'hospitals in 2022')
df_2022.head()


['PROVIDER_ID', 'std_MORT_30_AMI', 'std_MORT_30_CABG', 'std_MORT_30_COPD', 'std_MORT_30_HF', 'std_MORT_30_PN', 'std_MORT_30_STK', 'std_PSI_4_SURG_COMP', 'mortality_C1', 'mortality_C2', 'mortality_C3', 'mortality_C4', 'mortality_C5', 'mortality_C6', 'mortality_C7', 'mortality_total_cnt', 'mortality_measure_wt', 'mortality_score_before_std', 'mortality_Mean', 'mortality_StdDev', 'mortality_grp_score']
(4489, 21)
['PROVIDER_ID', 'std_EDAC_30_AMI', 'std_EDAC_30_HF', 'std_EDAC_30_PN', 'std_OP_32', 'std_READM_30_CABG', 'std_READM_30_COPD', 'std_READM_30_HIP_KNEE', 'std_READM_30_HOSP_WIDE', 'std_OP_35_ADM', 'std_OP_35_ED', 'std_OP_36', 'readmission_C1', 'readmission_C2', 'readmission_C3', 'readmission_C4', 'readmission_C5', 'readmission_C6', 'readmission_C7', 'readmission_C8', 'readmission_C9', 'readmission_C10', 'readmission_C11', 'readmission_total_cnt', 'readmission_measure_wt', 'readmission_score_before_std', 'readmission_Mean', 'readmission_StdDev', 'readmission_grp_score']
(4489, 29)
['

Unnamed: 0,PROVIDER_ID,std_MORT_30_AMI,std_MORT_30_CABG,std_MORT_30_COPD,std_MORT_30_HF,std_MORT_30_PN,std_MORT_30_STK,std_PSI_4_SURG_COMP,mortality_C1,mortality_C2,mortality_C3,mortality_C4,mortality_C5,mortality_C6,mortality_C7,mortality_total_cnt,mortality_measure_wt,mortality_score_before_std,mortality_Mean,mortality_StdDev,mortality_grp_score,std_EDAC_30_AMI,std_EDAC_30_HF,std_EDAC_30_PN,std_OP_32,std_READM_30_CABG,std_READM_30_COPD,std_READM_30_HIP_KNEE,std_READM_30_HOSP_WIDE,std_OP_35_ADM,std_OP_35_ED,std_OP_36,readmission_C1,readmission_C2,readmission_C3,readmission_C4,readmission_C5,readmission_C6,readmission_C7,readmission_C8,readmission_C9,readmission_C10,readmission_C11,readmission_total_cnt,readmission_measure_wt,readmission_score_before_std,readmission_Mean,readmission_StdDev,readmission_grp_score,std_COMP_HIP_KNEE,std_HAI_1,std_HAI_2,std_HAI_3,std_HAI_4,std_HAI_5,std_HAI_6,std_PSI_90_SAFETY,safety_C1,safety_C2,safety_C3,safety_C4,safety_C5,safety_C6,safety_C7,safety_C8,safety_total_cnt,safety_measure_wt,safety_score_before_std,safety_Mean,safety_StdDev,safety_grp_score,std_IMM_3,std_OP_22,std_OP_23,std_OP_29,std_OP_33,std_PC_01,std_SEP_1,std_OP_3B,std_OP_18B,std_OP_8,std_OP_10,std_OP_13,process_C1,process_C2,process_C3,process_C4,process_C5,process_C6,process_C7,process_C8,process_C9,process_C10,process_C11,process_C12,process_total_cnt,process_measure_wt,process_score_before_std,process_Mean,process_StdDev,process_grp_score,std_H_COMP_1_STAR_RATING,std_H_COMP_2_STAR_RATING,std_H_COMP_3_STAR_RATING,std_H_COMP_5_STAR_RATING,std_H_COMP_6_STAR_RATING,std_H_COMP_7_STAR_RATING,std_H_GLOB_STAR_RATING,std_H_INDI_STAR_RATING,patient_exp_C1,patient_exp_C2,patient_exp_C3,patient_exp_C4,patient_exp_C5,patient_exp_C6,patient_exp_C7,patient_exp_C8,patient_exp_total_cnt,patient_exp_measure_wt,patient_exp_score_before_std,patient_exp_Mean,patient_exp_StdDev,patient_exp_grp_score,Release year
0,10001,0.703271,-1.054647,-0.072635,1.355101,0.507204,-3.873239,-1.364075,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-0.542717,0.000879,0.641984,-0.846744,0.012064,-0.457136,0.497621,1.664542,-0.606328,-1.47008,-1.684933,-0.053004,0.60215,-0.703618,0.623486,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,-0.143203,0.027686,0.531111,-0.321758,-0.427377,0.325583,0.722043,0.29595,1.045465,0.077072,-0.002603,1.040003,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.384517,0.002796,0.645363,0.591483,0.643289,-0.863287,,0.009932,,0.442175,-0.264434,,-0.906488,-0.897899,0.376713,-0.520081,1.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,9.0,0.111111,-0.220009,0.040971,0.815466,-0.320038,-0.536306,-0.084743,-0.269007,-0.072623,0.801375,-0.116805,0.284308,0.481635,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.060979,-5.363828e-17,0.84576,0.0721,2022
1,10005,-0.113897,,-0.359269,-3.038802,-1.230466,-1.23433,0.765784,1.0,0.0,1.0,1.0,1.0,1.0,1.0,6.0,0.166667,-0.868497,0.000879,0.641984,-1.354201,1.031113,-0.432991,-0.296054,1.172588,,1.650104,0.258268,1.220694,1.951595,0.635548,0.623486,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,10.0,0.1,0.781435,0.027686,0.531111,1.419195,0.866214,0.354127,-0.227217,0.485385,,-0.692282,-0.320719,-1.105376,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,-0.09141,0.002796,0.645363,-0.145974,0.125136,-0.329809,-0.478581,0.434506,0.689565,-0.218358,0.264906,,0.478618,-0.249889,-1.577127,1.347285,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.044205,0.040971,0.815466,0.003965,-0.536306,0.876653,-0.269007,-0.072623,0.801375,-0.116805,-0.254812,-0.048857,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.047452,-5.363828e-17,0.84576,0.056106,2022
2,10006,-3.484718,-0.531852,0.405087,-0.616522,-0.386455,-0.92748,-1.84663,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-1.05551,0.000879,0.641984,-1.645507,0.284947,0.516707,-1.339736,2.015937,-0.816654,1.549453,-1.490613,-0.371429,,,0.060418,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,9.0,0.111111,0.045448,0.027686,0.531111,0.033444,-1.720968,0.341894,0.587157,0.587502,,-0.326058,0.435881,-1.105376,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,-0.171424,0.002796,0.645363,-0.269957,0.297854,0.203668,,0.131239,,0.442175,-1.558378,,-0.634008,1.753052,-1.738453,1.667405,1.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,9.0,0.111111,0.062728,0.040971,0.815466,0.02668,-0.536306,-0.084743,-2.195088,-1.06817,-1.291935,-1.058688,-0.254812,-0.048857,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,-0.817325,-5.363828e-17,0.84576,-0.966379,2022
3,10007,,,-0.645902,-0.729187,-1.826238,,,0.0,0.0,1.0,1.0,1.0,0.0,0.0,3.0,0.333333,-1.067109,0.000879,0.641984,-1.663574,,-0.131181,-0.335737,-1.357461,,-0.262267,,-0.371429,,,0.060418,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,6.0,0.166667,-0.399609,0.027686,0.531111,-0.804531,,,,,,,1.201078,0.306057,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,0.5,0.753568,0.002796,0.645363,1.163333,,-0.329809,,0.131239,,,2.382268,,0.751098,,0.699366,,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,5.0,0.2,0.726832,0.040971,0.815466,0.841066,-0.536306,-0.084743,-0.269007,0.922923,0.801375,0.825078,-0.254812,1.012127,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.302079,-5.363828e-17,0.84576,0.357169,2022
5,10011,-2.054673,-1.446744,-0.645902,0.172127,-0.287159,-0.25241,-0.118808,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-0.661939,0.000879,0.641984,-1.032452,-1.1349,-2.416895,-0.871468,-0.092437,-1.868286,0.240989,-0.324692,-1.326703,,,0.060418,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,9.0,0.111111,-0.85933,0.027686,0.531111,-1.670115,-0.211778,0.411215,0.217907,-0.272356,,0.328319,0.659421,-0.371431,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,0.108757,0.002796,0.645363,0.164188,,-0.863287,,,-1.087459,,-1.499562,,-0.906488,,-0.214816,,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,5.0,0.2,-0.914322,0.040971,0.815466,-1.17147,-0.536306,-0.084743,-0.269007,-0.072623,-0.24528,-0.116805,-0.254812,-0.579349,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,-0.269866,-5.363828e-17,0.84576,-0.31908,2022


In [15]:
# For 2021

fnames = ['outcome_readmission', 'outcome_safety', 'process', 'ptexp']
f_ext = ['readmission_', 'safety_', 'process_', 'patient_exp_']

with io.capture_output() as captured: 
    df_2021 = pd.read_sas(stars_dir + 'SAS_Downloaded_Databases/2021/actual/outcome_mortality.sas7bdat')
    
cols = list(df_2021)
new_cols = []
ind = cols.index('C1')
for i, c in enumerate(cols):
    if i >= ind:
        c = 'mortality_' + c
    new_cols.append(c)
print(new_cols)
df_2021.columns = new_cols

for j, f in enumerate(fnames):
    with io.capture_output() as captured: tdf = pd.read_sas(stars_dir + 'SAS_Downloaded_Databases/2021/actual/' + f + '.sas7bdat')
    
    cols = list(tdf)
    new_cols = []
    ind = cols.index('C1')
    for i, c in enumerate(cols):
        if i >= ind:
            c = f_ext[j] + c
        new_cols.append(c)
    print(new_cols)
    tdf.columns = new_cols

    df_2021 = df_2021.merge(tdf, on='PROVIDER_ID', how='outer')
    
    
df_2021['Release year'] = ['2021']*df_2021.shape[0]
df_2021 = df_2021.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df_2021 = df_2021[df_2021['PROVIDER_ID'].isin(prvdrs_2021)]
prvdrs = df_2021['PROVIDER_ID'].unique()

print(len(prvdrs), 'hospitals in 2021')
df_2021.head()

['PROVIDER_ID', 'std_MORT_30_AMI', 'std_MORT_30_CABG', 'std_MORT_30_COPD', 'std_MORT_30_HF', 'std_MORT_30_PN', 'std_MORT_30_STK', 'std_PSI_4_SURG_COMP', 'mortality_C1', 'mortality_C2', 'mortality_C3', 'mortality_C4', 'mortality_C5', 'mortality_C6', 'mortality_C7', 'mortality_total_cnt', 'mortality_measure_wt', 'mortality_score_before_std', 'mortality_Mean', 'mortality_StdDev', 'mortality_grp_score']
['PROVIDER_ID', 'std_EDAC_30_AMI', 'std_EDAC_30_HF', 'std_EDAC_30_PN', 'std_OP_32', 'std_READM_30_CABG', 'std_READM_30_COPD', 'std_READM_30_HIP_KNEE', 'std_READM_30_HOSP_WIDE', 'std_OP_35_ADM', 'std_OP_35_ED', 'std_OP_36', 'readmission_C1', 'readmission_C2', 'readmission_C3', 'readmission_C4', 'readmission_C5', 'readmission_C6', 'readmission_C7', 'readmission_C8', 'readmission_C9', 'readmission_C10', 'readmission_C11', 'readmission_total_cnt', 'readmission_measure_wt', 'readmission_score_before_std', 'readmission_Mean', 'readmission_StdDev', 'readmission_grp_score']
['PROVIDER_ID', 'std_COM

Unnamed: 0,PROVIDER_ID,std_MORT_30_AMI,std_MORT_30_CABG,std_MORT_30_COPD,std_MORT_30_HF,std_MORT_30_PN,std_MORT_30_STK,std_PSI_4_SURG_COMP,mortality_C1,mortality_C2,mortality_C3,mortality_C4,mortality_C5,mortality_C6,mortality_C7,mortality_total_cnt,mortality_measure_wt,mortality_score_before_std,mortality_Mean,mortality_StdDev,mortality_grp_score,std_EDAC_30_AMI,std_EDAC_30_HF,std_EDAC_30_PN,std_OP_32,std_READM_30_CABG,std_READM_30_COPD,std_READM_30_HIP_KNEE,std_READM_30_HOSP_WIDE,std_OP_35_ADM,std_OP_35_ED,std_OP_36,readmission_C1,readmission_C2,readmission_C3,readmission_C4,readmission_C5,readmission_C6,readmission_C7,readmission_C8,readmission_C9,readmission_C10,readmission_C11,readmission_total_cnt,readmission_measure_wt,readmission_score_before_std,readmission_Mean,readmission_StdDev,readmission_grp_score,std_COMP_HIP_KNEE,std_HAI_1,std_HAI_2,std_HAI_3,std_HAI_4,std_HAI_5,std_HAI_6,std_PSI_90_SAFETY,safety_C1,safety_C2,safety_C3,safety_C4,safety_C5,safety_C6,safety_C7,safety_C8,safety_total_cnt,safety_measure_wt,safety_score_before_std,safety_Mean,safety_StdDev,safety_grp_score,std_IMM_3,std_OP_22,std_OP_23,std_OP_29,std_OP_30,std_OP_33,std_PC_01,std_SEP_1,std_OP_3B,std_OP_18B,std_ED_2B,std_OP_8,std_OP_10,std_OP_13,process_C1,process_C2,process_C3,process_C4,process_C5,process_C6,process_C7,process_C8,process_C9,process_C10,process_C11,process_C12,process_C13,process_C14,process_total_cnt,process_measure_wt,process_score_before_std,process_Mean,process_StdDev,process_grp_score,std_H_COMP_1_STAR_RATING,std_H_COMP_2_STAR_RATING,std_H_COMP_3_STAR_RATING,std_H_COMP_5_STAR_RATING,std_H_COMP_6_STAR_RATING,std_H_COMP_7_STAR_RATING,std_H_GLOB_STAR_RATING,std_H_INDI_STAR_RATING,patient_exp_C1,patient_exp_C2,patient_exp_C3,patient_exp_C4,patient_exp_C5,patient_exp_C6,patient_exp_C7,patient_exp_C8,patient_exp_total_cnt,patient_exp_measure_wt,patient_exp_score_before_std,patient_exp_Mean,patient_exp_StdDev,patient_exp_grp_score,Release year
0,10001,0.730853,-2.055159,0.069106,0.103403,-0.022557,-1.941449,-0.327016,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-0.491831,-0.004941,0.64666,-0.752932,-0.027028,-0.705673,1.039984,2.185101,-1.542989,-1.037529,-1.164917,-0.070634,1.1826,-1.18578,1.207127,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,-0.010885,0.027901,0.551294,-0.070355,0.303431,-0.078888,0.680925,0.244905,1.037183,0.403644,0.098335,0.31923,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.376096,0.009921,0.658437,0.556127,0.644574,-0.979501,,-0.459111,0.466001,,0.473469,0.031191,,-0.761776,-0.054072,0.140655,-0.102729,0.684379,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.007553,0.037755,0.600242,-0.050316,-0.537213,-0.085263,-0.269656,-0.073296,0.801144,-0.115323,0.284361,0.478432,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.060398,-1.104271e-16,0.845613,0.071425,2021
1,10005,-1.095515,,-1.273727,-3.077512,-1.846949,-2.26162,-1.335906,1.0,0.0,1.0,1.0,1.0,1.0,1.0,6.0,0.166667,-1.815205,-0.004941,0.64666,-2.799407,0.539823,-0.24435,-0.744184,0.119295,,0.896062,0.210441,-0.420048,0.989506,-0.195267,-0.516355,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,10.0,0.1,0.063492,0.027901,0.551294,0.064559,0.303431,1.102714,-0.609029,0.4594,,-1.712026,0.039355,-0.043766,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,-0.065703,0.009921,0.658437,-0.114854,0.12667,-0.346365,-0.150232,-0.40149,0.144736,0.716782,-0.34011,0.504799,0.076084,0.572659,0.228145,-0.410303,-1.202673,0.412738,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,14.0,0.071429,-0.004897,0.037755,0.600242,-0.071058,-0.537213,0.874962,-0.269656,-0.073296,0.801144,-0.115323,-0.254265,-0.052073,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.046785,-1.104271e-16,0.845613,0.055327,2021
2,10006,-2.921882,-1.298828,-0.915638,-0.590615,-1.318836,0.683952,-2.65689,1.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,0.142857,-1.288391,-0.004941,0.64666,-1.984738,0.479089,0.888725,-0.932855,0.04806,-0.33382,0.992741,-1.164917,0.395251,,,-0.516355,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,9.0,0.111111,-0.016009,0.027901,0.551294,-0.079649,-0.649202,1.102714,0.856593,-0.011884,,0.242305,0.332149,-0.406762,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,0.209416,0.009921,0.658437,0.302983,0.299305,0.28677,,-0.459111,-0.417479,,0.473469,-1.567236,,-0.211057,-0.148144,-0.4393,-1.249813,0.793035,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,-0.23996,0.037755,0.600242,-0.462672,-0.537213,-0.085263,-2.19386,-1.067899,-1.292453,-1.055803,-0.254265,-0.052073,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,-0.817354,-1.104271e-16,0.845613,-0.966582,2021
3,10007,,,-0.468027,-1.515972,-2.231032,-1.109004,,0.0,0.0,1.0,1.0,1.0,1.0,0.0,4.0,0.25,-1.331009,-0.004941,0.64666,-2.050643,,-1.009175,-0.498092,-0.877991,,0.219305,,-0.652991,,,0.058139,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,6.0,0.166667,-0.460134,0.027901,0.551294,-0.885254,,,,,,,1.229489,0.371087,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,0.5,0.800288,0.009921,0.658437,1.200369,,-0.979501,,-4.377296,-0.17653,,,2.280829,,0.720929,0.496922,,-0.401285,,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,7.0,0.142857,-0.34799,0.037755,0.600242,-0.642649,-0.537213,-0.085263,-0.269656,0.921307,0.801144,0.825157,-0.254265,1.008938,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.301269,-1.104271e-16,0.845613,0.356272,2021
4,10008,,,-1.81086,-0.301441,-1.030774,,,0.0,0.0,1.0,1.0,1.0,0.0,0.0,3.0,0.333333,-1.047692,-0.004941,0.64666,-1.612519,,,0.342723,-0.735521,,0.219305,,0.861136,,,,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,4.0,0.25,0.171911,0.027901,0.551294,0.261221,,,,,,,,0.008091,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.008091,0.009921,0.658437,-0.00278,-3.584977,0.28677,,-1.726759,0.385685,,,-0.975226,,1.081015,0.174389,,0.494383,,1.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,8.0,0.125,-0.48309,0.037755,0.600242,-0.867725,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,-1.104271e-16,0.845613,,2021


In [16]:
print(df_2026.shape)
print(df_2025.shape)
print(df_2024.shape)
print(df_2023.shape)
print(df_2022.shape)
print(df_2021.shape)

(3210, 136)
(2891, 124)
(2847, 124)
(3076, 124)
(3121, 124)
(3355, 128)


In [17]:
ls = np.setdiff1d(list(df_2026), list(df_2025)) # in 2026, not in 2025
print(ls, 'in 2026, not in 2025')

ls = np.setdiff1d(list(df_2025), list(df_2024)) # in 2025, not in 2024
print(ls, 'in 2025, not in 2024')

ls = np.setdiff1d(list(df_2024), list(df_2023)) # in 2024, not in 2023
print(ls, 'in 2024, not in 2023')

ls = np.setdiff1d(list(df_2023), list(df_2022)) # in 2023, not in 2022
print(ls, 'in 2023, not in 2022')

ls = np.setdiff1d(list(df_2022), list(df_2021)) # in 2022, not in 2021
print(ls, 'in 2022, not in 2021')


tdf = df_2026.merge(df_2025, how='outer')
tdf = tdf.merge(df_2024, how='outer')
tdf = tdf.merge(df_2023, how='outer')
tdf = tdf.merge(df_2022, how='outer')
tdf = tdf.merge(df_2021, how='outer')
main_df = main_df.merge(tdf, how='outer', on=['PROVIDER_ID', 'Release year'])

print(main_df.shape)

print(len(main_df['PROVIDER_ID'].unique()))
main_df.sort_values(by=['PROVIDER_ID'], ascending=True, inplace=True)

ls = main_df['PROVIDER_ID'].unique().tolist()

print(ls[:10])

main_df.head()

['mortality_C8' 'patient_exp_C10' 'patient_exp_C11' 'patient_exp_C12'
 'patient_exp_C13' 'patient_exp_C14' 'patient_exp_C15' 'patient_exp_C9'
 'std_H_CLEAN_LINEAR_SCORE' 'std_H_COMP_1_LINEAR_SCORE'
 'std_H_COMP_2_LINEAR_SCORE' 'std_H_COMP_3_LINEAR_SCORE'
 'std_H_COMP_5_LINEAR_SCORE' 'std_H_COMP_6_LINEAR_SCORE'
 'std_H_COMP_7_LINEAR_SCORE' 'std_H_HSP_RATING_LINEAR_SCORE'
 'std_H_QUIET_LINEAR_SCORE' 'std_H_RECMND_LINEAR_SCORE' 'std_Hybrid_HWM'
 'std_Hybrid_HWR' 'std_O_COMP_1_LINEAR_SCORE' 'std_O_COMP_2_LINEAR_SCORE'
 'std_O_COMP_3_LINEAR_SCORE' 'std_O_PATIENT_RATE_LINEAR_SCORE'
 'std_O_PATIENT_REC_LINEAR_SCORE'] in 2026, not in 2025
['std_SAFE_USE_OF_OPIOIDS'] in 2025, not in 2024
[] in 2024, not in 2023
['std_HCP_COVID_19'] in 2023, not in 2022
[] in 2022, not in 2021
(18500, 182)
3648
['010001', '010005', '010006', '010007', '010008', '010011', '010012', '010016', '010019', '010021']


Unnamed: 0,PROVIDER_ID,Std_Outcomes_Mortality_score,Std_Outcomes_Readmission_score,Std_Outcomes_Safety_score,Std_PatientExp_score,Std_Process_score,std_weight_PatientExperience,std_weight_Readmission,std_weight_Mortality,std_weight_safety,std_weight_Process,weight_PatientExperience,weight_Outcomes_Readmission,weight_Outcomes_Mortality,weight_Outcomes_Safety,weight_Process,summary_score,Outcomes_Mortality_cnt,Outcomes_safety_cnt,Outcomes_Readmission_cnt,Patient_Experience_cnt,Process_cnt,Total_measure_group_cnt,MortSafe_Group_cnt,report_indicator,cnt_grp,star,Q_25,Release year,std_MORT_30_AMI,std_MORT_30_CABG,std_MORT_30_COPD,std_MORT_30_HF,std_MORT_30_PN,std_MORT_30_STK,std_PSI_4_SURG_COMP,std_Hybrid_HWM,mortality_C1,mortality_C2,mortality_C3,mortality_C4,mortality_C5,mortality_C6,mortality_C7,mortality_C8,mortality_total_cnt,mortality_measure_wt,mortality_score_before_std,mortality_Mean,mortality_StdDev,mortality_grp_score,std_EDAC_30_AMI,std_EDAC_30_HF,std_EDAC_30_PN,std_OP_32,std_READM_30_CABG,std_READM_30_COPD,std_READM_30_HIP_KNEE,std_OP_35_ADM,std_OP_35_ED,std_OP_36,std_Hybrid_HWR,readmission_C1,readmission_C2,readmission_C3,readmission_C4,readmission_C5,readmission_C6,readmission_C7,readmission_C8,readmission_C9,readmission_C10,readmission_C11,readmission_total_cnt,readmission_measure_wt,readmission_score_before_std,readmission_Mean,readmission_StdDev,readmission_grp_score,std_COMP_HIP_KNEE,std_HAI_1,std_HAI_2,std_HAI_3,std_HAI_4,std_HAI_5,std_HAI_6,std_PSI_90_SAFETY,safety_C1,safety_C2,safety_C3,safety_C4,safety_C5,safety_C6,safety_C7,safety_C8,safety_total_cnt,safety_measure_wt,safety_score_before_std,safety_Mean,safety_StdDev,safety_grp_score,std_IMM_3,std_OP_10,std_OP_13,std_OP_18B,std_OP_22,std_OP_23,std_OP_29,std_OP_8,std_SAFE_USE_OF_OPIOIDS,std_SEP_1,process_C1,process_C2,process_C3,process_C4,process_C5,process_C6,process_C7,process_C8,process_C9,process_C10,process_total_cnt,process_measure_wt,process_score_before_std,process_Mean,process_StdDev,process_grp_score,std_O_COMP_1_LINEAR_SCORE,std_O_COMP_2_LINEAR_SCORE,std_O_COMP_3_LINEAR_SCORE,std_O_PATIENT_RATE_LINEAR_SCORE,std_O_PATIENT_REC_LINEAR_SCORE,std_H_COMP_1_LINEAR_SCORE,std_H_COMP_2_LINEAR_SCORE,std_H_COMP_3_LINEAR_SCORE,std_H_COMP_5_LINEAR_SCORE,std_H_COMP_6_LINEAR_SCORE,std_H_COMP_7_LINEAR_SCORE,std_H_CLEAN_LINEAR_SCORE,std_H_QUIET_LINEAR_SCORE,std_H_RECMND_LINEAR_SCORE,std_H_HSP_RATING_LINEAR_SCORE,patient_exp_C1,patient_exp_C2,patient_exp_C3,patient_exp_C4,patient_exp_C5,patient_exp_C6,patient_exp_C7,patient_exp_C8,patient_exp_C9,patient_exp_C10,patient_exp_C11,patient_exp_C12,patient_exp_C13,patient_exp_C14,patient_exp_C15,patient_exp_total_cnt,patient_exp_measure_wt,patient_exp_score_before_std,patient_exp_Mean,patient_exp_StdDev,patient_exp_grp_score,std_READM_30_HOSP_WIDE,std_HCP_COVID_19,std_PC_01,process_C11,process_C12,std_H_COMP_1_STAR_RATING,std_H_COMP_2_STAR_RATING,std_H_COMP_3_STAR_RATING,std_H_COMP_5_STAR_RATING,std_H_COMP_6_STAR_RATING,std_H_COMP_7_STAR_RATING,std_H_GLOB_STAR_RATING,std_H_INDI_STAR_RATING,std_OP_3B,std_OP_33,std_OP_30,std_ED_2B,process_C13,process_C14
0,10001,-0.752932,-0.070355,0.556127,0.071425,-0.050316,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.0491,7,8,11,8,11,5,2,1,3.0,3.0,,2021,0.730853,-2.055159,0.069106,0.103403,-0.022557,-1.941449,-0.327016,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,7.0,0.142857,-0.491831,-0.004941,0.64666,-0.752932,-0.027028,-0.705673,1.039984,2.185101,-1.542989,-1.037529,-1.164917,1.1826,-1.18578,1.207127,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,-0.010885,0.027901,0.551294,-0.070355,0.303431,-0.078888,0.680925,0.244905,1.037183,0.403644,0.098335,0.31923,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.376096,0.009921,0.658437,0.556127,0.644574,-0.102729,0.684379,-0.761776,-0.979501,,-0.459111,0.140655,,0.031191,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,11.0,0.090909,0.007553,0.037755,0.600242,-0.050316,,,,,,,,,,,,,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,,,,,,,8.0,0.125,0.060398,-1.104271e-16,0.845613,0.071425,-0.070634,,0.473469,1.0,1.0,-0.537213,-0.085263,-0.269656,-0.073296,0.801144,-0.115323,0.284361,0.478432,,,0.466001,-0.054072,1.0,1.0
1,10001,-0.846744,-0.321758,0.591483,0.0721,-0.320038,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.149487,7,8,11,8,9,5,2,1,3.0,3.0,,2022,0.703271,-1.054647,-0.072635,1.355101,0.507204,-3.873239,-1.364075,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,7.0,0.142857,-0.542717,0.000879,0.641984,-0.846744,0.012064,-0.457136,0.497621,1.664542,-0.606328,-1.47008,-1.684933,0.60215,-0.703618,0.623486,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,-0.143203,0.027686,0.531111,-0.321758,-0.427377,0.325583,0.722043,0.29595,1.045465,0.077072,-0.002603,1.040003,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.384517,0.002796,0.645363,0.591483,0.643289,0.376713,-0.520081,-0.906488,-0.863287,,0.009932,-0.897899,,-0.264434,1.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,9.0,0.111111,-0.220009,0.040971,0.815466,-0.320038,,,,,,,,,,,,,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,,,,,,,8.0,0.125,0.060979,-5.363828e-17,0.84576,0.0721,-0.053004,,0.442175,1.0,1.0,-0.536306,-0.084743,-0.269007,-0.072623,0.801375,-0.116805,0.284308,0.481635,,,,,,
2,10001,-0.647058,0.284081,0.312074,-0.127836,-1.024044,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.162208,7,8,11,8,10,5,2,1,3.0,3.0,,2023,-0.041613,-2.19781,-0.019543,1.708727,0.366398,-1.543654,-1.741473,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,7.0,0.142857,-0.495567,-0.05126,0.686658,-0.647058,0.207832,-0.702681,0.28604,0.105508,0.219318,-0.048609,-0.198058,0.159253,0.872692,0.062022,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.171616,0.020822,0.530811,0.284081,0.024204,0.444297,0.745807,-0.407579,1.025843,0.314693,-0.315731,-0.212425,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.202389,-0.013939,0.693192,0.312074,0.992908,0.10185,-1.77947,-0.904124,-0.237088,,-0.634479,0.488322,,-0.609958,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,10.0,0.1,-0.538475,0.030917,0.556023,-1.024044,,,,,,,,,,,,,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,,,,,,,8.0,0.125,-0.109237,2.611408e-16,0.85451,-0.127836,0.924456,-1.039363,-1.763346,1.0,1.0,-1.07585,-0.175531,-1.051153,0.808598,0.68837,0.037272,-0.445032,0.339427,,,,,,
3,10001,0.007071,0.230867,-0.146867,0.137779,-0.677624,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.030968,7,7,11,8,10,5,2,1,3.0,3.0,,2024,0.470514,-1.535074,0.282996,1.510467,0.129818,-0.554262,-0.748332,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,7.0,0.142857,-0.063411,-0.068391,0.704371,0.007071,1.123231,-0.791585,-0.63392,0.29207,0.584835,0.293805,0.886353,-1.03233,0.626949,-0.252865,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.138216,0.014038,0.537879,0.230867,0.879845,-0.232451,0.488068,-0.71166,,-0.210159,0.003034,-1.162386,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,-0.135101,-0.021526,0.773315,-0.146867,0.940083,-0.029295,0.419351,-0.942734,-1.140182,,-3.129707,0.000412,,0.371166,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,10.0,0.1,-0.335877,0.029039,0.538522,-0.677624,,,,,,,,,,,,,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,,,,,,,8.0,0.125,0.119895,1.093644e-15,0.870204,0.137779,0.423838,-0.431525,0.583665,1.0,1.0,-0.373358,-0.400063,-0.243371,0.124487,0.479704,0.876397,0.133873,0.361496,,,,,,
4,10001,0.29611,0.361406,0.344883,0.042072,-0.484678,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,0.171622,7,7,11,8,11,5,2,1,3.0,4.0,,2025,1.405923,-1.04537,1.011496,0.650904,-0.315375,0.023766,-0.732077,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,7.0,0.142857,0.142753,-0.073484,0.730255,0.29611,1.024273,-0.368843,-0.448127,0.289973,0.607499,0.859382,0.192452,-1.028886,0.62791,-0.25798,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.206001,0.013108,0.53373,0.361406,0.698729,0.30367,0.739078,-0.467783,,0.476446,-0.105703,0.103001,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,0.249634,-0.01903,0.779001,0.344883,1.044716,0.080291,0.781373,-1.062058,-1.138829,,-3.171398,0.588011,0.563049,0.449772,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,11.0,0.090909,-0.204464,0.028411,0.480473,-0.484678,,,,,,,,,,,,,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,,,,,,,8.0,0.125,0.036435,4.8997890000000005e-17,0.866021,0.042072,0.768359,-0.525017,0.140984,1.0,1.0,-1.154064,0.570761,-0.231671,-0.254181,-0.312251,0.933177,0.719371,0.020336,,,,,,


## Load SAS pack input files
These data originate from Care Compare

In [18]:
df_2026 = pd.read_csv(stars_dir + "Reproduce_Stars_Input/2026/Input_File/data_for_2026_prognostications_from_Nov2025.csv")
df_2026['Release year'] = ['2026']*df_2026.shape[0]
df_2026 = df_2026.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)

ls = []
for p in df_2026['PROVIDER_ID'].tolist():
    p = int(p)
    p = str(p)
    if '666666' in p:
        p = p[:-6]
        p = p + 'F'
    while len(p) < 6:
        p = '0' + p
    ls.append(p)

df_2026['PROVIDER_ID'] = ls
df_2026 = df_2026[df_2026['PROVIDER_ID'].isin(prvdrs_2026)]

print(df_2026.shape)
prvdrs = df_2026['PROVIDER_ID'].unique()
print(len(prvdrs), 'hospitals in 2026 prognostications')

df_2026.head()

(3210, 177)
3210 hospitals in 2026 prognostications


Unnamed: 0,PROVIDER_ID,IMM_3,IMM_3_DEN,OP_18B,OP_18B_DEN,OP_22,OP_22_DEN,OP_23,OP_23_DEN,OP_29,OP_29_DEN,SEP_1,SEP_1_DEN,SAFE_USE_OF_OPIOIDS,SAFE_USE_OF_OPIOIDS_DEN,MORT_30_AMI,MORT_30_AMI_DEN,MORT_30_CABG,MORT_30_CABG_DEN,MORT_30_COPD,MORT_30_COPD_DEN,MORT_30_HF,MORT_30_HF_DEN,MORT_30_PN,MORT_30_PN_DEN,MORT_30_STK,MORT_30_STK_DEN,PSI_4_SURG_COMP,PSI_4_SURG_COMP_DEN,COMP_HIP_KNEE,COMP_HIP_KNEE_DEN,PSI_90_SAFETY,PSI_90_SAFETY_DEN,Hybrid_HWM,Hybrid_HWM_DEN,H_COMP_1_A_P,H_COMP_1_SN_P,H_COMP_1_U_P,H_COMP_1_LINEAR_SCORE,H_COMP_1_STAR_RATING,H_NURSE_RESPECT_A_P,H_NURSE_RESPECT_SN_P,H_NURSE_RESPECT_U_P,H_NURSE_LISTEN_A_P,H_NURSE_LISTEN_SN_P,H_NURSE_LISTEN_U_P,H_NURSE_EXPLAIN_A_P,H_NURSE_EXPLAIN_SN_P,H_NURSE_EXPLAIN_U_P,H_COMP_2_A_P,H_COMP_2_SN_P,H_COMP_2_U_P,H_COMP_2_LINEAR_SCORE,H_COMP_2_STAR_RATING,H_DOCTOR_RESPECT_A_P,H_DOCTOR_RESPECT_SN_P,H_DOCTOR_RESPECT_U_P,H_DOCTOR_LISTEN_A_P,H_DOCTOR_LISTEN_SN_P,H_DOCTOR_LISTEN_U_P,H_DOCTOR_EXPLAIN_A_P,H_DOCTOR_EXPLAIN_SN_P,H_DOCTOR_EXPLAIN_U_P,H_COMP_3_A_P,H_COMP_3_SN_P,H_COMP_3_U_P,H_COMP_3_LINEAR_SCORE,H_COMP_3_STAR_RATING,H_CALL_BUTTON_A_P,H_CALL_BUTTON_SN_P,H_CALL_BUTTON_U_P,H_BATH_HELP_A_P,H_BATH_HELP_SN_P,H_BATH_HELP_U_P,H_COMP_5_A_P,H_COMP_5_SN_P,H_COMP_5_U_P,H_COMP_5_LINEAR_SCORE,H_COMP_5_STAR_RATING,H_MED_FOR_A_P,H_MED_FOR_SN_P,H_MED_FOR_U_P,H_SIDE_EFFECTS_A_P,H_SIDE_EFFECTS_SN_P,H_SIDE_EFFECTS_U_P,H_COMP_6_N_P,H_COMP_6_Y_P,H_COMP_6_LINEAR_SCORE,H_COMP_6_STAR_RATING,H_DISCH_HELP_N_P,H_DISCH_HELP_Y_P,H_SYMPTOMS_N_P,H_SYMPTOMS_Y_P,H_COMP_7_A,H_COMP_7_D_SD,H_COMP_7_SA,H_COMP_7_LINEAR_SCORE,H_COMP_7_STAR_RATING,H_CT_PREFER_A,H_CT_PREFER_D_SD,H_CT_PREFER_SA,H_CT_UNDER_A,H_CT_UNDER_D_SD,H_CT_UNDER_SA,H_CT_MED_A,H_CT_MED_D_SD,H_CT_MED_SA,H_CLEAN_HSP_A_P,H_CLEAN_HSP_SN_P,H_CLEAN_HSP_U_P,H_CLEAN_LINEAR_SCORE,H_CLEAN_STAR_RATING,H_QUIET_HSP_A_P,H_QUIET_HSP_SN_P,H_QUIET_HSP_U_P,H_QUIET_LINEAR_SCORE,H_QUIET_STAR_RATING,H_HSP_RATING_0_6,H_HSP_RATING_7_8,H_HSP_RATING_9_10,H_HSP_RATING_LINEAR_SCORE,H_HSP_RATING_STAR_RATING,H_RECMND_DN,H_RECMND_DY,H_RECMND_PY,H_RECMND_LINEAR_SCORE,H_RECMND_STAR_RATING,H_STAR_RATING,O_COMP_1_LINEAR_SCORE,O_COMP_2_LINEAR_SCORE,O_COMP_3_LINEAR_SCORE,O_PATIENT_RATE_LINEAR_SCORE,O_PATIENT_REC_LINEAR_SCORE,EDAC_30_AMI,EDAC_30_AMI_DEN,EDAC_30_HF,EDAC_30_HF_DEN,EDAC_30_PN,EDAC_30_PN_DEN,OP_32,OP_32_DEN,OP_35_ADM,OP_35_ADM_DEN,OP_35_ED,OP_35_ED_DEN,OP_36,OP_36_DEN,READM_30_CABG,READM_30_CABG_DEN,READM_30_COPD,READM_30_COPD_DEN,READM_30_HIP_KNEE,READM_30_HIP_KNEE_DEN,Hybrid_HWR,Hybrid_HWR_DEN,HAI_1_DEN_PRED,HAI_1_DEN_VOL,HAI_1,HAI_2_DEN_PRED,HAI_2_DEN_VOL,HAI_2,HAI_3_DEN_PRED,HAI_3_DEN_VOL,HAI_3,HAI_4_DEN_PRED,HAI_4_DEN_VOL,HAI_4,HAI_5_DEN_PRED,HAI_5_DEN_VOL,HAI_5,HAI_6_DEN_PRED,HAI_6_DEN_VOL,HAI_6,OP_13,OP_8,OP_10,Release year
0,10001,0.93,4625.0,222.0,387.0,0.05,57084.0,0.91,11.0,0.72,29.0,0.69,150.0,0.14,4583.0,0.114,270.0,0.03,144.0,0.094,112.0,0.102,583.0,0.184,517.0,0.135,395.0,203.0,118.0,0.032,27.0,0.95,,0.045,1835.0,,,,90.0,,,,,,,,,,,,,,92.0,,,,,,,,,,,,,,82.0,,,,,,,,,,,79.0,,,,,,,,,,87.0,,,,,,,,,82.0,,,,,,,,,,,,,,85.0,,,,,88.0,,,,,89.0,,,,,91.0,,,98.0,93.0,97.0,94.0,94.0,-15.6,273.0,-1.1,652.0,17.4,507.0,12.8,218.0,8.5,261.0,5.7,261.0,0.8,647.0,0.101,137.0,0.18,122.0,0.048,25.0,0.151,2824.0,9.44,8935.0,0.53,23.35,16255.0,0.086,6.562,229.0,0.457,0.903,97.0,,10.937,109019.0,0.183,68.076,109019.0,0.382,0.038,0.308,0.053,2026
1,10005,0.59,2856.0,137.0,1120.0,0.03,58624.0,0.27,11.0,1.0,210.0,0.75,289.0,0.15,1859.0,,,,,0.089,126.0,0.141,158.0,0.212,285.0,0.129,89.0,184.79,27.0,0.03,104.0,0.97,,0.046,698.0,,,,91.0,,,,,,,,,,,,,,93.0,,,,,,,,,,,,,,77.0,,,,,,,,,,,72.0,,,,,,,,,,88.0,,,,,,,,,80.0,,,,,,,,,,,,,,85.0,,,,,87.0,,,,,87.0,,,,,85.0,,,98.0,96.0,98.0,94.0,93.0,,,12.2,164.0,-17.2,292.0,14.2,897.0,8.6,99.0,6.2,99.0,1.0,396.0,,,0.171,132.0,0.042,81.0,0.133,986.0,2.746,4514.0,0.728,2.991,6567.0,1.003,2.291,90.0,0.436,0.582,56.0,,1.258,37163.0,1.59,9.436,35488.0,0.53,0.033,0.422,0.128,2026
2,10006,0.64,2565.0,150.0,336.0,0.01,44924.0,0.67,15.0,0.86,85.0,0.66,137.0,0.15,4350.0,0.145,266.0,0.054,79.0,0.087,160.0,0.125,413.0,0.196,659.0,0.124,258.0,236.12,91.0,0.047,49.0,1.14,,0.052,1583.0,,,,89.0,,,,,,,,,,,,,,88.0,,,,,,,,,,,,,,76.0,,,,,,,,,,,70.0,,,,,,,,,,82.0,,,,,,,,,77.0,,,,,,,,,,,,,,79.0,,,,,84.0,,,,,83.0,,,,,80.0,,,98.0,94.0,97.0,93.0,91.0,-18.0,285.0,-4.7,461.0,-0.9,679.0,11.6,1618.0,,,,,1.0,502.0,0.106,71.0,0.191,174.0,0.051,56.0,0.159,2494.0,4.335,3915.0,0.0,9.758,7561.0,0.0,2.448,92.0,0.0,2.183,225.0,0.0,4.733,69033.0,0.0,38.671,66038.0,0.078,0.041,,0.088,2026
3,10007,0.29,358.0,117.0,1107.0,0.02,12667.0,,,0.65,52.0,0.13,15.0,0.15,212.0,,,,,0.112,34.0,0.134,34.0,0.254,98.0,,,,,,,1.06,,0.048,125.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,97.0,92.0,96.0,91.0,92.0,,,77.9,39.0,29.8,84.0,12.5,117.0,,,,,,,,,0.186,32.0,,,0.154,189.0,0.158,253.0,,0.766,1402.0,,0.113,4.0,,,,,0.086,4132.0,,1.481,4132.0,0.675,,,0.059,2026
5,10011,0.79,2317.0,154.0,85.0,0.02,42419.0,,,0.83,42.0,0.75,51.0,0.13,727.0,0.127,109.0,0.022,64.0,0.091,53.0,0.144,235.0,0.201,268.0,0.128,66.0,176.9,43.0,0.04,25.0,1.03,,0.041,582.0,,,,89.0,,,,,,,,,,,,,,90.0,,,,,,,,,,,,,,84.0,,,,,,,,,,,73.0,,,,,,,,,,84.0,,,,,,,,,79.0,,,,,,,,,,,,,,83.0,,,,,85.0,,,,,87.0,,,,,86.0,,,,,,,,-23.6,118.0,21.2,266.0,18.1,281.0,12.8,125.0,,,,,0.7,270.0,0.119,63.0,0.182,57.0,,,0.146,902.0,2.472,2304.0,2.427,2.917,2269.0,0.343,1.112,41.0,0.0,0.009,1.0,,1.396,18473.0,1.433,9.183,18473.0,0.327,,,0.092,2026


In [19]:
with io.capture_output() as captured: 
    df_2025 = pd.read_sas(stars_dir + '2025/2025-07 Stars Release/alldata_2025jul.sas7bdat')
df_2025['Release year'] = ['2025']*df_2025.shape[0]
df_2025 = df_2025.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df_2025 = df_2025[df_2025['PROVIDER_ID'].isin(prvdrs_2025)]

print(df_2025.shape)
prvdrs = df_2025['PROVIDER_ID'].unique()
print(len(prvdrs), 'hospitals in 2025')

df_2025.head()

(2891, 94)
2891 hospitals in 2025


Unnamed: 0,PROVIDER_ID,COMP_HIP_KNEE,IMM_3_DEN,EDAC_30_AMI,EDAC_30_AMI_DEN,READM_30_HOSP_WIDE_DEN,HAI_3,EDAC_30_HF_DEN,OP_18B_DEN,HAI_6,OP_29,OP_23_DEN,SAFE_USE_OF_OPIOIDS,MORT_30_COPD_DEN,HAI_3_DEN_VOL,HAI_5_DEN_PRED,OP_35_ED,OP_13_DEN,READM_30_CABG,HAI_5_DEN_VOL,OP_22,HAI_1_DEN_VOL,OP_36,MORT_30_STK,READM_30_COPD,READM_30_HIP_KNEE_DEN,H_NUMB_COMP,HAI_6_DEN_VOL,MORT_30_PN,OP_35_ADM,PC_01,MORT_30_HF_DEN,OP_32,PC_01_DEN,SAFE_USE_OF_OPIOIDS_DEN,H_COMP_7_STAR_RATING,READM_30_HIP_KNEE,HCP_COVID_19,READM_30_COPD_DEN,H_COMP_5_STAR_RATING,OP_10_DEN,COMP_HIP_KNEE_DEN,HAI_2_DEN_PRED,MORT_30_CABG,OP_29_DEN,OP_22_DEN,SEP_1,OP_23,MORT_30_AMI_DEN,H_COMP_6_STAR_RATING,OP_18B,MORT_30_AMI,MORT_30_PN_DEN,READM_30_HOSP_WIDE,EDAC_30_PN,OP_36_DEN,MORT_30_CABG_DEN,HAI_1_DEN_PRED,HAI_6_DEN_PRED,MORT_30_COPD,OP_13,PSI_90_SAFETY,MORT_30_HF,H_RESP_RATE_P,HCP_COVID_19_DEN,H_COMP_1_STAR_RATING,READM_30_CABG_DEN,PSI_90_SAFETY_DEN,IMM_3,HAI_3_DEN_PRED,H_COMP_3_STAR_RATING,OP_8_DEN,MORT_30_STK_DEN,OP_8,PSI_4_SURG_COMP_DEN,OP_10,EDAC_30_HF,HAI_2_DEN_VOL,OP_32_DEN,OP_35_ED_DEN,HAI_2,H_COMP_2_STAR_RATING,HAI_1,PSI_4_SURG_COMP,OP_35_ADM_DEN,SEP_1_DEN,EDAC_30_PN_DEN,HAI_5,HAI_4,HAI_4_DEN_VOL,HAI_4_DEN_PRED,H_INDI_STAR_RATING,H_GLOB_STAR_RATING,Release year
0,10001,0.03,4115.0,-13.8,296.0,2924.0,1.209,679.0,345.0,0.491,0.47,,0.12,122.0,240.0,11.232,4.9,193.0,0.102,103195.0,0.05,9538.0,1.1,0.137,0.177,34.0,643.0,103195.0,0.189,11.9,0.02,610.0,12.9,46.0,4303.0,4.0,0.044,0.0,130.0,3.0,1478.0,32.0,23.712,0.038,17.0,52960.0,0.68,,291.0,3.0,217.0,0.108,489.0,0.141,14.3,668.0,157.0,10.082,65.234,0.079,0.021,0.98,0.105,17.0,2712.0,2.0,151.0,,0.96,6.618,3.0,66.0,414.0,0.333,125.0,0.054,10.6,16332.0,170.0,202.0,0.169,4.0,0.496,194.78,202.0,131.0,490.0,0.445,,,,3.0,4.0,2025
1,10005,0.027,2407.0,,,1056.0,0.0,176.0,1154.0,0.631,0.96,13.0,0.16,133.0,102.0,1.631,5.5,177.0,,35424.0,0.03,3410.0,1.9,0.139,0.161,144.0,714.0,33872.0,0.231,7.9,0.01,162.0,14.2,193.0,2244.0,3.0,0.039,0.026,143.0,4.0,850.0,138.0,3.262,,180.0,56820.0,0.76,0.69,28.0,4.0,144.0,0.145,301.0,0.135,-8.4,406.0,,2.074,9.503,0.092,0.028,1.01,0.127,17.0,2431.0,3.0,,,0.72,2.867,2.0,109.0,97.0,0.459,43.0,0.134,9.8,7200.0,739.0,107.0,1.226,4.0,0.964,191.14,107.0,288.0,305.0,2.452,,,,3.5,3.0,2025
2,10006,0.04,2560.0,13.4,315.0,2560.0,0.0,508.0,349.0,0.026,0.85,,0.16,141.0,73.0,5.397,,243.0,0.115,68020.0,0.01,4826.0,1.4,0.142,0.177,90.0,1620.0,64728.0,0.198,,0.0,455.0,12.1,22.0,3521.0,2.0,0.044,0.0,154.0,2.0,973.0,84.0,11.164,0.052,82.0,42286.0,0.56,,292.0,2.0,177.0,0.166,616.0,0.147,4.5,484.0,102.0,5.279,37.771,0.102,0.029,1.16,0.122,18.0,2095.0,2.0,91.0,,0.66,1.935,2.0,,271.0,,102.0,0.108,-4.9,8700.0,1355.0,,0.0,2.0,0.0,201.13,,162.0,621.0,0.371,0.0,116.0,1.086,2.5,2.0,2025
3,10007,0.034,345.0,,,234.0,,35.0,594.0,1.003,0.23,,,42.0,,,,,,,0.04,,1.2,,0.19,33.0,175.0,5267.0,0.29,,,33.0,13.4,,,4.0,0.044,0.0,41.0,5.0,151.0,33.0,,,111.0,11202.0,0.24,,,5.0,129.0,,101.0,0.151,21.5,59.0,,,1.994,0.135,,0.93,0.141,24.0,245.0,5.0,,,0.26,,4.0,,,,,0.073,20.9,,109.0,,,5.0,,,,21.0,85.0,,,,,3.0,4.0,2025
5,10011,0.038,,-12.4,148.0,1636.0,0.0,294.0,333.0,0.498,1.0,23.0,0.17,55.0,117.0,8.719,,,0.119,80106.0,0.05,9905.0,0.9,0.129,0.187,32.0,1334.0,80106.0,0.225,,,258.0,12.8,,3360.0,3.0,0.043,0.008,58.0,3.0,383.0,28.0,13.381,0.021,24.0,41057.0,0.6,0.43,136.0,4.0,190.0,0.128,336.0,0.159,6.3,249.0,70.0,10.751,42.14,0.106,,0.95,0.14,26.0,1402.0,3.0,70.0,,,3.261,3.0,,86.0,,67.0,0.078,30.0,10399.0,68.0,,0.299,4.0,0.744,208.8,,205.0,344.0,0.573,,,,2.5,3.0,2025


In [20]:
with io.capture_output() as captured: 
    df_2024 = pd.read_sas(stars_dir + '2024/2024-07 Stars Release/alldata_2024jul.sas7bdat')
df_2024['Release year'] = ['2024']*df_2024.shape[0]
df_2024 = df_2024.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df_2024 = df_2024[df_2024['PROVIDER_ID'].isin(prvdrs_2024)]

print(df_2024.shape)
prvdrs = df_2024['PROVIDER_ID'].unique()
print(len(prvdrs), 'hospitals in 2024')

df_2024.head()

(2847, 96)
2847 hospitals in 2024


Unnamed: 0,PROVIDER_ID,HAI_1_DEN_VOL,HAI_2_DEN_VOL,HAI_3_DEN_VOL,HAI_4_DEN_VOL,HAI_5_DEN_VOL,HAI_6_DEN_VOL,HAI_1_DEN_PRED,HAI_2_DEN_PRED,HAI_3_DEN_PRED,HAI_4_DEN_PRED,HAI_5_DEN_PRED,HAI_6_DEN_PRED,HAI_1,HAI_2,HAI_3,HAI_4,HAI_5,HAI_6,READM_30_HOSP_WIDE,READM_30_HIP_KNEE,EDAC_30_HF,READM_30_COPD,EDAC_30_AMI,EDAC_30_PN,MORT_30_STK,MORT_30_PN,MORT_30_HF,MORT_30_COPD,MORT_30_AMI,COMP_HIP_KNEE,READM_30_HOSP_WIDE_DEN,READM_30_HIP_KNEE_DEN,EDAC_30_HF_DEN,READM_30_COPD_DEN,EDAC_30_AMI_DEN,EDAC_30_PN_DEN,MORT_30_STK_DEN,MORT_30_PN_DEN,MORT_30_HF_DEN,MORT_30_COPD_DEN,MORT_30_AMI_DEN,COMP_HIP_KNEE_DEN,OP_2,OP_2_DEN,OP_3B,OP_3B_DEN,OP_8,OP_8_DEN,OP_10,OP_10_DEN,OP_13,OP_13_DEN,OP_18B,OP_18B_DEN,OP_22,OP_22_DEN,OP_23,OP_23_DEN,OP_29,OP_29_DEN,PSI_4_SURG_COMP,PSI_4_SURG_COMP_DEN,PSI_90_SAFETY,IMM_3_DEN,IMM_3,HCP_COVID_19_DEN,HCP_COVID_19,PC_01,PC_01_DEN,SEP_1,SEP_1_DEN,H_RESP_RATE_P,H_COMP_1_STAR_RATING,H_COMP_2_STAR_RATING,H_COMP_3_STAR_RATING,H_COMP_5_STAR_RATING,H_COMP_6_STAR_RATING,H_COMP_7_STAR_RATING,H_GLOB_STAR_RATING,H_INDI_STAR_RATING,H_NUMB_COMP,PSI_90_SAFETY_DEN,MORT_30_CABG,MORT_30_CABG_DEN,READM_30_CABG,READM_30_CABG_DEN,OP_32,OP_32_DEN,OP_35_ADM,OP_35_ADM_DEN,OP_35_ED,OP_35_ED_DEN,OP_36,OP_36_DEN,Release year
0,10001,9149.0,17310.0,214.0,,104733.0,104733.0,9.597,24.766,5.994,,11.4,67.066,0.938,0.363,1.335,,0.965,0.507,0.142,0.038,23.4,0.19,-15.4,23.6,0.148,0.18,0.089,0.088,0.12,0.027,2912.0,49.0,614.0,117.0,274.0,403.0,398.0,400.0,549.0,107.0,278.0,49.0,,,,,0.38,79.0,0.061,1410.0,0.028,178.0,214.0,348.0,0.05,52960.0,,,0.47,17.0,184.68,134.0,1.21,3905.0,0.95,2496.0,0.836,0.0,32.0,0.65,127.0,15.0,3.0,3.0,3.0,3.0,4.0,4.0,3.5,3.5,544.0,2542.0385,0.041,132.0,0.105,126.0,12.9,170.0,11.9,202.0,4.9,202.0,1.1,668.0,2024
1,10005,3194.0,8277.0,96.0,,36794.0,34887.0,1.989,4.019,2.626,,1.847,10.066,2.514,0.995,0.762,,0.541,0.497,0.128,0.034,22.1,0.166,,-6.2,0.153,0.233,0.149,0.099,0.136,0.023,1052.0,172.0,129.0,136.0,,285.0,81.0,289.0,121.0,126.0,27.0,155.0,,,57.0,16.0,0.477,130.0,0.12,1057.0,0.042,189.0,145.0,1074.0,0.03,56820.0,0.58,12.0,0.96,180.0,183.49,43.0,0.97,2700.0,0.8,2552.0,0.807,0.02,200.0,0.69,252.0,18.0,3.0,4.0,1.0,3.0,4.0,3.0,3.0,3.0,824.0,978.028994,,,,,14.2,739.0,7.9,107.0,5.5,107.0,1.9,406.0,2024
2,10006,5343.0,8715.0,111.0,,63727.0,60304.0,5.801,11.166,2.95,,5.283,27.805,0.172,0.358,0.0,,1.514,0.072,0.134,0.053,-4.7,0.176,28.1,-0.4,0.172,0.195,0.125,0.099,0.165,0.046,2310.0,138.0,441.0,158.0,273.0,472.0,227.0,469.0,388.0,148.0,254.0,145.0,,,,,0.462,39.0,0.101,978.0,0.045,221.0,168.0,360.0,0.01,42286.0,0.75,16.0,0.85,82.0,173.63,96.0,1.17,2536.0,0.67,1882.0,0.796,0.04,28.0,0.57,126.0,19.0,2.0,3.0,2.0,2.0,3.0,2.0,2.0,2.5,1503.0,1753.979899,0.036,95.0,0.124,89.0,12.1,1355.0,,,,,1.4,484.0,2024
3,10007,,,,,,5511.0,,,,,,2.66,,,,,,0.376,0.157,0.042,-1.9,0.2,,-9.4,,0.285,0.125,0.137,,,258.0,26.0,31.0,34.0,,72.0,,88.0,26.0,34.0,,,,,,,,,0.034,146.0,,,132.0,1275.0,0.04,11202.0,,,0.23,111.0,,,0.95,350.0,0.53,252.0,0.601,,,0.93,43.0,24.0,3.0,5.0,4.0,3.0,3.0,3.0,3.5,3.0,189.0,228.286193,,,,,13.4,109.0,,,,,1.2,59.0,2024
5,10011,12136.0,11795.0,142.0,,83101.0,83101.0,13.096,15.144,3.798,,6.946,59.529,0.916,0.528,0.263,,0.72,0.638,0.149,0.042,26.0,0.197,-9.0,14.5,0.123,0.209,0.135,0.103,0.132,0.035,1366.0,27.0,251.0,47.0,127.0,309.0,86.0,304.0,225.0,43.0,121.0,28.0,,,,,,,0.057,332.0,,,213.0,341.0,0.05,41057.0,0.25,16.0,1.0,24.0,189.28,83.0,0.72,2377.0,0.81,1487.0,0.889,,,0.44,201.0,33.0,3.0,3.0,4.0,3.0,4.0,3.0,3.5,2.5,2023.0,1293.230115,0.026,65.0,0.126,64.0,12.8,68.0,,,,,0.9,249.0,2024


In [21]:
with io.capture_output() as captured: 
    df_2023 = pd.read_sas(stars_dir + '2023/2023-07 Stars Release/alldata_2023jul.sas7bdat')
df_2023['Release year'] = ['2023']*df_2023.shape[0]
df_2023 = df_2023.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df_2023 = df_2023[df_2023['PROVIDER_ID'].isin(prvdrs_2023)]

print(df_2023.shape)
prvdrs = df_2023['PROVIDER_ID'].unique()
print(len(prvdrs), 'hospitals in 2023')

df_2023.head()

(3076, 96)
3076 hospitals in 2023


Unnamed: 0,PROVIDER_ID,HAI_1_DEN_VOL,HAI_2_DEN_VOL,HAI_3_DEN_VOL,HAI_4_DEN_VOL,HAI_5_DEN_VOL,HAI_6_DEN_VOL,HAI_1_DEN_PRED,HAI_2_DEN_PRED,HAI_3_DEN_PRED,HAI_4_DEN_PRED,HAI_5_DEN_PRED,HAI_6_DEN_PRED,HAI_1,HAI_2,HAI_3,HAI_4,HAI_5,HAI_6,READM_30_HOSP_WIDE,READM_30_HIP_KNEE,EDAC_30_HF,READM_30_COPD,EDAC_30_AMI,EDAC_30_PN,MORT_30_STK,MORT_30_PN,MORT_30_HF,MORT_30_COPD,MORT_30_AMI,COMP_HIP_KNEE,READM_30_HOSP_WIDE_DEN,READM_30_HIP_KNEE_DEN,EDAC_30_HF_DEN,READM_30_COPD_DEN,EDAC_30_AMI_DEN,EDAC_30_PN_DEN,MORT_30_STK_DEN,MORT_30_PN_DEN,MORT_30_HF_DEN,MORT_30_COPD_DEN,MORT_30_AMI_DEN,COMP_HIP_KNEE_DEN,OP_2,OP_2_DEN,OP_3B,OP_3B_DEN,OP_8,OP_8_DEN,OP_10,OP_10_DEN,OP_13,OP_13_DEN,OP_18B,OP_18B_DEN,OP_22,OP_22_DEN,OP_23,OP_23_DEN,OP_29,OP_29_DEN,PSI_4_SURG_COMP,PSI_4_SURG_COMP_DEN,PSI_90_SAFETY,IMM_3_DEN,IMM_3,HCP_COVID_19_DEN,HCP_COVID_19,PC_01,PC_01_DEN,SEP_1,SEP_1_DEN,H_RESP_RATE_P,H_COMP_1_STAR_RATING,H_COMP_2_STAR_RATING,H_COMP_3_STAR_RATING,H_COMP_5_STAR_RATING,H_COMP_6_STAR_RATING,H_COMP_7_STAR_RATING,H_GLOB_STAR_RATING,H_INDI_STAR_RATING,H_NUMB_COMP,PSI_90_SAFETY_DEN,MORT_30_CABG,MORT_30_CABG_DEN,READM_30_CABG,READM_30_CABG_DEN,OP_32,OP_32_DEN,OP_35_ADM,OP_35_ADM_DEN,OP_35_ED,OP_35_ED_DEN,OP_36,OP_36_DEN,Release year
0,10001,10024.0,17731.0,154.0,200.0,101908.0,101451.0,10.597,26.63,4.548,1.845,9.412,72.686,0.661,0.3,1.099,0.0,0.85,0.66,0.142,0.042,21.8,0.199,1.9,-1.5,0.164,0.159,0.083,0.085,0.124,0.024,3058.0,98.0,755.0,202.0,319.0,436.0,489.0,407.0,630.0,182.0,317.0,102.0,,,,,0.425,146.0,0.057,1488.0,0.067,208.0,205.0,323.0,0.03,51079.0,,,0.81,16.0,173.39,120.0,1.01,3795.0,0.97,2323.0,0.737,0.09,34.0,0.46,146.0,15.0,2.0,3.0,2.0,4.0,4.0,3.0,3.0,3.5,434.0,2046.895485,0.047,172.0,0.117,165.0,14.1,254.0,10.2,214.0,4.7,214.0,1.0,688.0,2023
1,10005,3713.0,8670.0,88.0,,38413.0,35686.0,2.45,4.995,2.512,,1.999,10.484,3.673,1.201,1.194,,0.0,0.858,0.139,0.04,9.3,0.176,4.7,2.2,0.166,0.218,0.169,0.081,0.126,0.018,1258.0,178.0,157.0,234.0,38.0,361.0,100.0,369.0,153.0,195.0,52.0,134.0,,,,,0.545,191.0,0.138,1214.0,0.043,208.0,146.0,1003.0,0.03,54503.0,0.73,15.0,0.99,108.0,142.88,35.0,0.91,2593.0,0.9,2026.0,0.821,0.01,194.0,0.59,242.0,16.0,3.0,4.0,2.0,3.0,3.0,3.0,3.0,3.5,717.0,819.043002,,,,,14.6,850.0,11.1,108.0,5.7,108.0,0.9,362.0,2023
2,10006,7318.0,11755.0,91.0,,62709.0,54159.0,7.924,15.296,2.523,,4.164,22.618,0.757,0.196,0.396,,1.441,0.088,0.142,0.048,-2.3,0.177,25.9,42.3,0.189,0.178,0.122,0.078,0.165,0.034,2555.0,246.0,550.0,235.0,312.0,538.0,261.0,528.0,468.0,209.0,295.0,234.0,,,,,0.412,97.0,0.11,1168.0,0.014,217.0,144.0,363.0,0.01,41137.0,0.57,14.0,0.88,75.0,157.42,84.0,1.1,2292.0,0.64,2694.0,0.651,0.0,37.0,0.58,142.0,17.0,2.0,3.0,1.0,2.0,3.0,2.0,2.0,2.5,1358.0,1487.163359,0.035,117.0,0.156,109.0,12.5,1505.0,,,,,1.1,468.0,2023
3,10007,,,,,,5413.0,,,,,,2.148,,,,,,0.466,0.151,,36.3,0.197,,-12.6,,0.217,0.139,0.103,,,272.0,,51.0,72.0,,99.0,,106.0,45.0,63.0,,,,,,,,,0.059,169.0,,,119.0,1202.0,0.03,11120.0,,,0.63,68.0,,,0.99,318.0,0.61,277.0,0.574,,,0.93,55.0,23.0,3.0,5.0,4.0,4.0,4.0,4.0,4.0,4.0,173.0,142.073902,,,,,15.3,118.0,,,,,1.0,56.0,2023
5,10011,11886.0,11988.0,154.0,,81837.0,81837.0,12.884,15.434,4.069,,8.226,56.451,0.621,0.907,0.0,,1.459,0.62,0.146,0.042,17.9,0.206,13.0,7.7,0.152,0.171,0.122,0.082,0.138,0.024,1524.0,36.0,298.0,101.0,139.0,346.0,101.0,330.0,265.0,90.0,127.0,35.0,,,,,,,0.053,361.0,,,192.0,339.0,0.05,42062.0,0.2,15.0,0.18,83.0,136.95,76.0,0.82,1944.0,0.75,1476.0,0.851,,,0.34,242.0,29.0,3.0,3.0,4.0,4.0,3.0,3.0,3.5,3.0,1949.0,1034.277124,0.035,64.0,0.138,61.0,13.6,72.0,,,,,1.1,131.0,2023


In [22]:
with io.capture_output() as captured: 
    df_2022 = pd.read_sas(stars_dir + '2022/2022-07 Stars Release/all_data_2022jul.sas7bdat')
df_2022['Release year'] = ['2022']*df_2022.shape[0]
df_2022 = df_2022.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df_2022 = df_2022[df_2022['PROVIDER_ID'].isin(prvdrs_2022)]

prvdrs = df_2022['PROVIDER_ID'].unique()
print(len(prvdrs), 'hospitals in 2022')

df_2022.head()

3121 hospitals in 2022


Unnamed: 0,PROVIDER_ID,HAI_1_DEN_VOL,HAI_2_DEN_VOL,HAI_3_DEN_VOL,HAI_4_DEN_VOL,HAI_5_DEN_VOL,HAI_6_DEN_VOL,HAI_1_DEN_PRED,HAI_2_DEN_PRED,HAI_3_DEN_PRED,HAI_4_DEN_PRED,HAI_5_DEN_PRED,HAI_6_DEN_PRED,HAI_1,HAI_2,HAI_3,HAI_4,HAI_5,HAI_6,READM_30_HOSP_WIDE,READM_30_HIP_KNEE,EDAC_30_HF,READM_30_COPD,EDAC_30_AMI,EDAC_30_PN,MORT_30_STK,MORT_30_PN,MORT_30_HF,MORT_30_COPD,MORT_30_AMI,COMP_HIP_KNEE,READM_30_HOSP_WIDE_DEN,READM_30_HIP_KNEE_DEN,EDAC_30_HF_DEN,READM_30_COPD_DEN,EDAC_30_AMI_DEN,EDAC_30_PN_DEN,MORT_30_STK_DEN,MORT_30_PN_DEN,MORT_30_HF_DEN,MORT_30_COPD_DEN,MORT_30_AMI_DEN,COMP_HIP_KNEE_DEN,OP_2,OP_2_DEN,OP_3B,OP_3B_DEN,OP_8,OP_8_DEN,OP_10,OP_10_DEN,OP_13,OP_13_DEN,OP_18B,OP_18B_DEN,OP_22,OP_22_DEN,OP_23,OP_23_DEN,OP_29,OP_29_DEN,PSI_4_SURG_COMP,PSI_4_SURG_COMP_DEN,PSI_90_SAFETY,IMM_3_DEN,IMM_3,PC_01,PC_01_DEN,SEP_1,SEP_1_DEN,H_RESP_RATE_P,H_COMP_1_STAR_RATING,H_COMP_2_STAR_RATING,H_COMP_3_STAR_RATING,H_COMP_5_STAR_RATING,H_COMP_6_STAR_RATING,H_COMP_7_STAR_RATING,H_GLOB_STAR_RATING,H_INDI_STAR_RATING,H_NUMB_COMP,PSI_90_SAFETY_DEN,MORT_30_CABG,MORT_30_CABG_DEN,READM_30_CABG,READM_30_CABG_DEN,OP_32,OP_32_DEN,OP_33,OP_33_DEN,OP_35_ADM,OP_35_ADM_DEN,OP_35_ED,OP_35_ED_DEN,OP_36,OP_36_DEN,Release year
0,10001,8391.0,13866.0,171.0,229.0,102470.0,102470.0,8.711,19.223,4.938,2.025,7.631,78.514,0.574,0.312,0.608,0.0,0.786,0.56,0.155,0.049,16.1,0.212,6.5,-7.1,0.198,0.145,0.089,0.083,0.116,0.026,1805.0,165.0,904.0,310.0,424.0,503.0,533.0,474.0,725.0,270.0,412.0,175.0,,,,,0.459,122.0,0.04,925.0,0.048,147.0,183.0,176.0,0.03,59762.0,,,0.9,52.0,184.28,115.0,0.81,4817.0,0.97,0.0,18.0,0.55,56.0,21.0,3.0,3.0,3.0,3.0,4.0,3.0,3.5,3.5,507.0,2828.228824,0.038,200.0,0.135,193.0,14.1,511.0,,,11.3,192.0,6.7,192.0,0.9,1003.0,2022
1,10005,2864.0,8199.0,79.0,,37761.0,35442.0,1.808,4.57,2.084,,1.506,11.292,0.553,0.875,0.48,,1.328,0.708,0.147,0.039,15.5,0.181,-17.4,12.9,0.155,0.18,0.167,0.086,0.124,0.02,764.0,168.0,223.0,378.0,36.0,468.0,122.0,477.0,213.0,314.0,57.0,166.0,,,,,0.415,94.0,0.149,739.0,0.013,154.0,122.0,588.0,0.02,68296.0,0.67,12.0,0.97,180.0,145.66,40.0,1.19,1915.0,0.91,0.03,105.0,0.64,145.0,30.0,3.0,4.0,3.0,3.0,4.0,3.0,3.0,3.0,568.0,1106.469176,,,,,14.8,1086.0,1.0,21.0,9.5,120.0,5.4,120.0,0.9,514.0,2022
2,10006,6011.0,10398.0,92.0,,63658.0,62016.0,5.334,10.217,2.432,,4.672,28.079,0.562,0.392,0.411,,1.07,0.356,0.157,0.048,-8.1,0.182,0.1,39.2,0.15,0.163,0.124,0.078,0.157,0.032,1563.0,306.0,615.0,381.0,363.0,663.0,286.0,634.0,528.0,331.0,344.0,306.0,,,,,0.279,61.0,0.158,728.0,0.007,152.0,171.0,183.0,0.01,47004.0,,,0.92,89.0,193.03,87.0,1.19,2456.0,0.93,0.0,15.0,0.33,54.0,23.0,3.0,3.0,1.0,2.0,2.0,2.0,3.0,3.0,1136.0,2131.116508,0.034,127.0,0.138,120.0,13.6,1908.0,,,,,,,1.0,502.0,2022
3,10007,,,,,,5230.0,,,,,,2.033,,,,,,0.0,0.157,,8.0,0.2,,13.9,,0.192,0.126,0.089,,,144.0,,70.0,103.0,,147.0,,152.0,67.0,90.0,,,,,,,,,0.022,90.0,,,110.0,586.0,0.02,12514.0,,,0.92,169.0,,,0.94,,,,,1.0,32.0,27.0,3.0,3.0,3.0,4.0,4.0,4.0,3.0,4.0,179.0,188.206619,,,,,18.4,184.0,,,,,,,1.0,31.0,2022
5,10011,10861.0,11435.0,151.0,,81783.0,81783.0,11.733,14.72,4.032,,8.211,95.085,0.511,0.611,0.992,,0.609,0.252,0.163,0.042,64.8,0.195,33.4,27.4,0.139,0.161,0.11,0.089,0.143,0.025,918.0,43.0,351.0,166.0,126.0,349.0,104.0,331.0,305.0,150.0,123.0,49.0,,,,,,,0.073,259.0,,,183.0,87.0,0.03,52482.0,,,,,161.7,82.0,1.06,,,,,0.34,47.0,31.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,2.5,2139.0,1327.131542,0.041,69.0,0.153,64.0,16.6,96.0,0.75,40.0,,,,,1.0,248.0,2022


In [23]:
with io.capture_output() as captured: 
    df_2021 = pd.read_sas(stars_dir + '2021/2021-04 Stars Release/all_data_2021apr.sas7bdat')
df_2021['Release year'] = ['2021']*df_2021.shape[0]
df_2021 = df_2021.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
df_2021 = df_2021[df_2021['PROVIDER_ID'].isin(prvdrs_2021)]

print(df_2021.shape)
prvdrs = df_2021['PROVIDER_ID'].unique()
print(len(prvdrs), 'hospitals in 2021')
df_2021.head()

(3355, 100)
3355 hospitals in 2021


Unnamed: 0,PROVIDER_ID,HAI_1_DEN_VOL,HAI_2_DEN_VOL,HAI_3_DEN_VOL,HAI_4_DEN_VOL,HAI_5_DEN_VOL,HAI_6_DEN_VOL,HAI_1_DEN_PRED,HAI_2_DEN_PRED,HAI_3_DEN_PRED,HAI_4_DEN_PRED,HAI_5_DEN_PRED,HAI_6_DEN_PRED,HAI_1,HAI_2,HAI_3,HAI_4,HAI_5,HAI_6,READM_30_HOSP_WIDE,READM_30_HIP_KNEE,EDAC_30_HF,READM_30_COPD,EDAC_30_AMI,EDAC_30_PN,MORT_30_STK,MORT_30_PN,MORT_30_HF,MORT_30_COPD,MORT_30_AMI,COMP_HIP_KNEE,READM_30_HOSP_WIDE_DEN,READM_30_HIP_KNEE_DEN,EDAC_30_HF_DEN,READM_30_COPD_DEN,EDAC_30_AMI_DEN,EDAC_30_PN_DEN,MORT_30_STK_DEN,MORT_30_PN_DEN,MORT_30_HF_DEN,MORT_30_COPD_DEN,MORT_30_AMI_DEN,COMP_HIP_KNEE_DEN,OP_2,OP_2_DEN,OP_3B,OP_3B_DEN,OP_8,OP_8_DEN,OP_10,OP_10_DEN,OP_13,OP_13_DEN,OP_18B,OP_18B_DEN,OP_22,OP_22_DEN,OP_23,OP_23_DEN,OP_29,OP_29_DEN,OP_30,OP_30_DEN,PSI_4_SURG_COMP,PSI_4_SURG_COMP_DEN,PSI_90_SAFETY,IMM_3_DEN,IMM_3,PC_01,PC_01_DEN,SEP_1,SEP_1_DEN,ED_2B,ED_2B_DEN,H_RESP_RATE_P,H_COMP_1_STAR_RATING,H_COMP_2_STAR_RATING,H_COMP_3_STAR_RATING,H_COMP_5_STAR_RATING,H_COMP_6_STAR_RATING,H_COMP_7_STAR_RATING,H_GLOB_STAR_RATING,H_INDI_STAR_RATING,H_NUMB_COMP,PSI_90_SAFETY_DEN,MORT_30_CABG,MORT_30_CABG_DEN,READM_30_CABG,READM_30_CABG_DEN,OP_32,OP_32_DEN,OP_33,OP_33_DEN,OP_35_ADM,OP_35_ADM_DEN,OP_35_ED,OP_35_ED_DEN,OP_36,OP_36_DEN,Release year
0,10001,7846.0,13268.0,165.0,216.0,102765.0,102765.0,8.086,18.498,4.615,1.884,7.305,76.294,0.742,0.324,0.65,0.0,0.548,0.537,0.156,0.046,21.6,0.207,7.0,-20.6,0.166,0.157,0.113,0.084,0.119,0.023,4474.0,258.0,1106.0,443.0,620.0,594.0,644.0,554.0,858.0,374.0,586.0,250.0,,,,,0.389,211.0,0.072,2117.0,0.028,211.0,178.0,349.0,0.03,57844.0,,,0.81,64.0,0.98,162.0,170.87,165.0,0.93,4817.0,0.97,0.0,24.0,0.6,102.0,103.0,655.0,21.0,3.0,3.0,3.0,3.0,4.0,3.0,3.5,3.5,507.0,3896.300852,0.047,281.0,0.149,268.0,13.4,606.0,,,10.8,190.0,7.1,190.0,0.8,993.0,2021
1,10005,3088.0,7928.0,74.0,,40143.0,37697.0,1.988,4.66,1.97,,1.548,12.386,0.0,1.073,0.508,,1.938,0.565,0.159,0.039,10.2,0.187,-7.0,22.9,0.171,0.195,0.168,0.099,0.138,0.023,2018.0,221.0,304.0,591.0,52.0,625.0,169.0,637.0,291.0,502.0,80.0,213.0,,,62.0,18.0,0.427,246.0,0.142,1504.0,0.033,273.0,115.0,1408.0,0.02,71631.0,0.71,24.0,0.82,204.0,0.94,413.0,190.88,51.0,1.0,1915.0,0.91,0.03,193.0,0.68,330.0,82.0,1034.0,30.0,3.0,4.0,3.0,3.0,4.0,3.0,3.0,3.0,568.0,1538.334998,,,,,16.3,1150.0,1.0,12.0,11.1,121.0,6.2,121.0,1.1,483.0,2021
2,10006,5874.0,10270.0,90.0,,64819.0,62418.0,4.781,8.992,2.439,,4.588,28.161,0.0,0.222,0.82,,0.654,0.426,0.152,0.046,-17.8,0.186,-5.5,27.5,0.125,0.184,0.125,0.095,0.157,0.028,3620.0,392.0,771.0,565.0,443.0,763.0,362.0,738.0,659.0,489.0,422.0,363.0,,,,,0.429,119.0,0.145,1386.0,0.026,265.0,152.0,362.0,0.01,41321.0,,,0.81,94.0,0.87,125.0,217.08,109.0,1.07,2456.0,0.93,0.0,35.0,0.33,105.0,110.0,552.0,23.0,3.0,3.0,1.0,2.0,2.0,2.0,3.0,3.0,1136.0,2816.123681,0.041,139.0,0.132,130.0,16.4,1948.0,,,,,,,1.1,454.0,2021
3,10007,,,,,,4783.0,,,,,,1.84,,,,,,0.0,0.161,,29.1,0.194,,16.9,0.153,0.203,0.141,0.09,,,443.0,,83.0,136.0,,200.0,29.0,205.0,78.0,116.0,,,,,,,,,0.091,208.0,,,108.0,1295.0,0.03,1116.0,,,0.13,82.0,0.9,40.0,,,0.92,,,,,0.98,52.0,62.0,525.0,27.0,3.0,3.0,3.0,4.0,4.0,4.0,3.0,4.0,179.0,277.574249,,,,,17.7,213.0,,,,,,,1.0,76.0,2021
4,10008,,,,,,,,,,,,,,,,,,,0.148,,,0.194,,-3.6,,0.178,0.12,0.105,,,127.0,,,30.0,,39.0,,43.0,27.0,32.0,,,,,,,,,0.034,148.0,,,91.0,335.0,0.01,7012.0,,,0.59,34.0,0.97,30.0,,,0.99,189.0,0.48,,,0.43,14.0,86.0,396.0,,,,,,,,,,,58.903206,,,,,17.5,77.0,,,,,,,,,2021


## Merge SAS pack output data with the input data

In [24]:
input_df = df_2026.merge(df_2025, how='outer')
input_df = input_df.merge(df_2024, how='outer')
input_df = input_df.merge(df_2023, how='outer')
input_df = input_df.merge(df_2022, how='outer')
input_df = input_df.merge(df_2021, how='outer')

print(input_df.shape)
prvdrs = input_df['PROVIDER_ID'].unique()
print(len(prvdrs), 'hospitals in input df')
input_df.head()


(18500, 200)
3648 hospitals in input df


Unnamed: 0,PROVIDER_ID,IMM_3,IMM_3_DEN,OP_18B,OP_18B_DEN,OP_22,OP_22_DEN,OP_23,OP_23_DEN,OP_29,OP_29_DEN,SEP_1,SEP_1_DEN,SAFE_USE_OF_OPIOIDS,SAFE_USE_OF_OPIOIDS_DEN,MORT_30_AMI,MORT_30_AMI_DEN,MORT_30_CABG,MORT_30_CABG_DEN,MORT_30_COPD,MORT_30_COPD_DEN,MORT_30_HF,MORT_30_HF_DEN,MORT_30_PN,MORT_30_PN_DEN,MORT_30_STK,MORT_30_STK_DEN,PSI_4_SURG_COMP,PSI_4_SURG_COMP_DEN,COMP_HIP_KNEE,COMP_HIP_KNEE_DEN,PSI_90_SAFETY,PSI_90_SAFETY_DEN,Hybrid_HWM,Hybrid_HWM_DEN,H_COMP_1_A_P,H_COMP_1_SN_P,H_COMP_1_U_P,H_COMP_1_LINEAR_SCORE,H_COMP_1_STAR_RATING,H_NURSE_RESPECT_A_P,H_NURSE_RESPECT_SN_P,H_NURSE_RESPECT_U_P,H_NURSE_LISTEN_A_P,H_NURSE_LISTEN_SN_P,H_NURSE_LISTEN_U_P,H_NURSE_EXPLAIN_A_P,H_NURSE_EXPLAIN_SN_P,H_NURSE_EXPLAIN_U_P,H_COMP_2_A_P,H_COMP_2_SN_P,H_COMP_2_U_P,H_COMP_2_LINEAR_SCORE,H_COMP_2_STAR_RATING,H_DOCTOR_RESPECT_A_P,H_DOCTOR_RESPECT_SN_P,H_DOCTOR_RESPECT_U_P,H_DOCTOR_LISTEN_A_P,H_DOCTOR_LISTEN_SN_P,H_DOCTOR_LISTEN_U_P,H_DOCTOR_EXPLAIN_A_P,H_DOCTOR_EXPLAIN_SN_P,H_DOCTOR_EXPLAIN_U_P,H_COMP_3_A_P,H_COMP_3_SN_P,H_COMP_3_U_P,H_COMP_3_LINEAR_SCORE,H_COMP_3_STAR_RATING,H_CALL_BUTTON_A_P,H_CALL_BUTTON_SN_P,H_CALL_BUTTON_U_P,H_BATH_HELP_A_P,H_BATH_HELP_SN_P,H_BATH_HELP_U_P,H_COMP_5_A_P,H_COMP_5_SN_P,H_COMP_5_U_P,H_COMP_5_LINEAR_SCORE,H_COMP_5_STAR_RATING,H_MED_FOR_A_P,H_MED_FOR_SN_P,H_MED_FOR_U_P,H_SIDE_EFFECTS_A_P,H_SIDE_EFFECTS_SN_P,H_SIDE_EFFECTS_U_P,H_COMP_6_N_P,H_COMP_6_Y_P,H_COMP_6_LINEAR_SCORE,H_COMP_6_STAR_RATING,H_DISCH_HELP_N_P,H_DISCH_HELP_Y_P,H_SYMPTOMS_N_P,H_SYMPTOMS_Y_P,H_COMP_7_A,H_COMP_7_D_SD,H_COMP_7_SA,H_COMP_7_LINEAR_SCORE,H_COMP_7_STAR_RATING,H_CT_PREFER_A,H_CT_PREFER_D_SD,H_CT_PREFER_SA,H_CT_UNDER_A,H_CT_UNDER_D_SD,H_CT_UNDER_SA,H_CT_MED_A,H_CT_MED_D_SD,H_CT_MED_SA,H_CLEAN_HSP_A_P,H_CLEAN_HSP_SN_P,H_CLEAN_HSP_U_P,H_CLEAN_LINEAR_SCORE,H_CLEAN_STAR_RATING,H_QUIET_HSP_A_P,H_QUIET_HSP_SN_P,H_QUIET_HSP_U_P,H_QUIET_LINEAR_SCORE,H_QUIET_STAR_RATING,H_HSP_RATING_0_6,H_HSP_RATING_7_8,H_HSP_RATING_9_10,H_HSP_RATING_LINEAR_SCORE,H_HSP_RATING_STAR_RATING,H_RECMND_DN,H_RECMND_DY,H_RECMND_PY,H_RECMND_LINEAR_SCORE,H_RECMND_STAR_RATING,H_STAR_RATING,O_COMP_1_LINEAR_SCORE,O_COMP_2_LINEAR_SCORE,O_COMP_3_LINEAR_SCORE,O_PATIENT_RATE_LINEAR_SCORE,O_PATIENT_REC_LINEAR_SCORE,EDAC_30_AMI,EDAC_30_AMI_DEN,EDAC_30_HF,EDAC_30_HF_DEN,EDAC_30_PN,EDAC_30_PN_DEN,OP_32,OP_32_DEN,OP_35_ADM,OP_35_ADM_DEN,OP_35_ED,OP_35_ED_DEN,OP_36,OP_36_DEN,READM_30_CABG,READM_30_CABG_DEN,READM_30_COPD,READM_30_COPD_DEN,READM_30_HIP_KNEE,READM_30_HIP_KNEE_DEN,Hybrid_HWR,Hybrid_HWR_DEN,HAI_1_DEN_PRED,HAI_1_DEN_VOL,HAI_1,HAI_2_DEN_PRED,HAI_2_DEN_VOL,HAI_2,HAI_3_DEN_PRED,HAI_3_DEN_VOL,HAI_3,HAI_4_DEN_PRED,HAI_4_DEN_VOL,HAI_4,HAI_5_DEN_PRED,HAI_5_DEN_VOL,HAI_5,HAI_6_DEN_PRED,HAI_6_DEN_VOL,HAI_6,OP_13,OP_8,OP_10,Release year,READM_30_HOSP_WIDE_DEN,OP_13_DEN,H_NUMB_COMP,PC_01,PC_01_DEN,HCP_COVID_19,OP_10_DEN,READM_30_HOSP_WIDE,H_RESP_RATE_P,HCP_COVID_19_DEN,OP_8_DEN,H_INDI_STAR_RATING,H_GLOB_STAR_RATING,OP_2,OP_2_DEN,OP_3B,OP_3B_DEN,OP_33,OP_33_DEN,OP_30,OP_30_DEN,ED_2B,ED_2B_DEN
0,10001,0.93,4625.0,222.0,387.0,0.05,57084.0,0.91,11.0,0.72,29.0,0.69,150.0,0.14,4583.0,0.114,270.0,0.03,144.0,0.094,112.0,0.102,583.0,0.184,517.0,0.135,395.0,203.0,118.0,0.032,27.0,0.95,,0.045,1835.0,,,,90.0,,,,,,,,,,,,,,92.0,,,,,,,,,,,,,,82.0,,,,,,,,,,,79.0,,,,,,,,,,87.0,,,,,,,,,82.0,,,,,,,,,,,,,,85.0,,,,,88.0,,,,,89.0,,,,,91.0,,,98.0,93.0,97.0,94.0,94.0,-15.6,273.0,-1.1,652.0,17.4,507.0,12.8,218.0,8.5,261.0,5.7,261.0,0.8,647.0,0.101,137.0,0.18,122.0,0.048,25.0,0.151,2824.0,9.44,8935.0,0.53,23.35,16255.0,0.086,6.562,229.0,0.457,0.903,97.0,,10.937,109019.0,0.183,68.076,109019.0,0.382,0.038,0.308,0.053,2026,,,,,,,,,,,,,,,,,,,,,,,
1,10001,0.95,3905.0,214.0,348.0,0.05,52960.0,,,0.47,17.0,0.65,127.0,,,0.12,278.0,0.041,132.0,0.088,107.0,0.089,549.0,0.18,400.0,0.148,398.0,184.68,134.0,0.027,49.0,1.21,2542.0385,,,,,,,3.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,3.0,,,,,,,,,,4.0,,,,,,,,,4.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-15.4,274.0,23.4,614.0,23.6,403.0,12.9,170.0,11.9,202.0,4.9,202.0,1.1,668.0,0.105,126.0,0.19,117.0,0.038,49.0,,,9.597,9149.0,0.938,24.766,17310.0,0.363,5.994,214.0,1.335,,,,11.4,104733.0,0.965,67.066,104733.0,0.507,0.028,0.38,0.061,2024,2912.0,178.0,544.0,0.0,32.0,0.836,1410.0,0.142,15.0,2496.0,79.0,3.5,3.5,,,,,,,,,,
2,10001,0.96,4115.0,217.0,345.0,0.05,52960.0,,,0.47,17.0,0.68,131.0,0.12,4303.0,0.108,291.0,0.038,157.0,0.079,122.0,0.105,610.0,0.189,489.0,0.137,414.0,194.78,125.0,0.03,32.0,0.98,,,,,,,,2.0,,,,,,,,,,,,,,4.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,3.0,,,,,,,,,,3.0,,,,,,,,,4.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-13.8,296.0,10.6,679.0,14.3,490.0,12.9,170.0,11.9,202.0,4.9,202.0,1.1,668.0,0.102,151.0,0.177,130.0,0.044,34.0,,,10.082,9538.0,0.496,23.712,16332.0,0.169,6.618,240.0,1.209,,,,11.232,103195.0,0.445,65.234,103195.0,0.491,0.021,0.333,0.054,2025,2924.0,193.0,643.0,0.02,46.0,0.0,1478.0,0.141,17.0,2712.0,66.0,3.0,4.0,,,,,,,,,,
3,10001,0.97,3795.0,205.0,323.0,0.03,51079.0,,,0.81,16.0,0.46,146.0,,,0.124,317.0,0.047,172.0,0.085,182.0,0.083,630.0,0.159,407.0,0.164,489.0,173.39,120.0,0.024,102.0,1.01,2046.895485,,,,,,,2.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,,,,2.0,,,,,,,,,,,4.0,,,,,,,,,,4.0,,,,,,,,,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.9,319.0,21.8,755.0,-1.5,436.0,14.1,254.0,10.2,214.0,4.7,214.0,1.0,688.0,0.117,165.0,0.199,202.0,0.042,98.0,,,10.597,10024.0,0.661,26.63,17731.0,0.3,4.548,154.0,1.099,1.845,200.0,0.0,9.412,101908.0,0.85,72.686,101451.0,0.66,0.067,0.425,0.057,2023,3058.0,208.0,434.0,0.09,34.0,0.737,1488.0,0.142,15.0,2323.0,146.0,3.5,3.0,,,,,,,,,,
4,10001,0.97,4817.0,178.0,349.0,0.03,57844.0,,,0.81,64.0,0.6,102.0,,,0.119,586.0,0.047,281.0,0.084,374.0,0.113,858.0,0.157,554.0,0.166,644.0,170.87,165.0,0.023,250.0,0.93,3896.300852,,,,,,,3.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,3.0,,,,,,,,,,4.0,,,,,,,,,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,7.0,620.0,21.6,1106.0,-20.6,594.0,13.4,606.0,10.8,190.0,7.1,190.0,0.8,993.0,0.149,268.0,0.207,443.0,0.046,258.0,,,8.086,7846.0,0.742,18.498,13268.0,0.324,4.615,165.0,0.65,1.884,216.0,0.0,7.305,102765.0,0.548,76.294,102765.0,0.537,0.028,0.389,0.072,2021,4474.0,211.0,507.0,0.0,24.0,,2117.0,0.156,21.0,,211.0,3.5,3.5,,,,,,,0.98,162.0,103.0,655.0


In [25]:
main_df = main_df.merge(input_df, how='outer', on=['PROVIDER_ID', 'Release year'])
print(main_df.shape)

main_df.rename(columns={'PROVIDER_ID': 'Facility ID'}, inplace=True)

prvdrs = main_df['Facility ID'].unique()
print(len(prvdrs), 'hospitals in main_df')
print(main_df['Release year'].unique())

main_df.head()

(18500, 380)
3648 hospitals in main_df
['2021' '2022' '2023' '2024' '2025' '2026']


Unnamed: 0,Facility ID,Std_Outcomes_Mortality_score,Std_Outcomes_Readmission_score,Std_Outcomes_Safety_score,Std_PatientExp_score,Std_Process_score,std_weight_PatientExperience,std_weight_Readmission,std_weight_Mortality,std_weight_safety,std_weight_Process,weight_PatientExperience,weight_Outcomes_Readmission,weight_Outcomes_Mortality,weight_Outcomes_Safety,weight_Process,summary_score,Outcomes_Mortality_cnt,Outcomes_safety_cnt,Outcomes_Readmission_cnt,Patient_Experience_cnt,Process_cnt,Total_measure_group_cnt,MortSafe_Group_cnt,report_indicator,cnt_grp,star,Q_25,Release year,std_MORT_30_AMI,std_MORT_30_CABG,std_MORT_30_COPD,std_MORT_30_HF,std_MORT_30_PN,std_MORT_30_STK,std_PSI_4_SURG_COMP,std_Hybrid_HWM,mortality_C1,mortality_C2,mortality_C3,mortality_C4,mortality_C5,mortality_C6,mortality_C7,mortality_C8,mortality_total_cnt,mortality_measure_wt,mortality_score_before_std,mortality_Mean,mortality_StdDev,mortality_grp_score,std_EDAC_30_AMI,std_EDAC_30_HF,std_EDAC_30_PN,std_OP_32,std_READM_30_CABG,std_READM_30_COPD,std_READM_30_HIP_KNEE,std_OP_35_ADM,std_OP_35_ED,std_OP_36,std_Hybrid_HWR,readmission_C1,readmission_C2,readmission_C3,readmission_C4,readmission_C5,readmission_C6,readmission_C7,readmission_C8,readmission_C9,readmission_C10,readmission_C11,readmission_total_cnt,readmission_measure_wt,readmission_score_before_std,readmission_Mean,readmission_StdDev,readmission_grp_score,std_COMP_HIP_KNEE,std_HAI_1,std_HAI_2,std_HAI_3,std_HAI_4,std_HAI_5,std_HAI_6,std_PSI_90_SAFETY,safety_C1,safety_C2,safety_C3,safety_C4,safety_C5,safety_C6,safety_C7,safety_C8,safety_total_cnt,safety_measure_wt,safety_score_before_std,safety_Mean,safety_StdDev,safety_grp_score,std_IMM_3,std_OP_10,std_OP_13,std_OP_18B,std_OP_22,std_OP_23,std_OP_29,std_OP_8,std_SAFE_USE_OF_OPIOIDS,std_SEP_1,process_C1,process_C2,process_C3,process_C4,process_C5,process_C6,process_C7,process_C8,process_C9,process_C10,process_total_cnt,process_measure_wt,process_score_before_std,process_Mean,process_StdDev,process_grp_score,std_O_COMP_1_LINEAR_SCORE,std_O_COMP_2_LINEAR_SCORE,std_O_COMP_3_LINEAR_SCORE,std_O_PATIENT_RATE_LINEAR_SCORE,std_O_PATIENT_REC_LINEAR_SCORE,std_H_COMP_1_LINEAR_SCORE,std_H_COMP_2_LINEAR_SCORE,std_H_COMP_3_LINEAR_SCORE,std_H_COMP_5_LINEAR_SCORE,std_H_COMP_6_LINEAR_SCORE,std_H_COMP_7_LINEAR_SCORE,std_H_CLEAN_LINEAR_SCORE,std_H_QUIET_LINEAR_SCORE,std_H_RECMND_LINEAR_SCORE,std_H_HSP_RATING_LINEAR_SCORE,patient_exp_C1,patient_exp_C2,patient_exp_C3,patient_exp_C4,patient_exp_C5,patient_exp_C6,patient_exp_C7,patient_exp_C8,patient_exp_C9,patient_exp_C10,patient_exp_C11,patient_exp_C12,patient_exp_C13,patient_exp_C14,patient_exp_C15,patient_exp_total_cnt,patient_exp_measure_wt,patient_exp_score_before_std,patient_exp_Mean,patient_exp_StdDev,patient_exp_grp_score,std_READM_30_HOSP_WIDE,std_HCP_COVID_19,std_PC_01,process_C11,process_C12,std_H_COMP_1_STAR_RATING,std_H_COMP_2_STAR_RATING,std_H_COMP_3_STAR_RATING,std_H_COMP_5_STAR_RATING,std_H_COMP_6_STAR_RATING,std_H_COMP_7_STAR_RATING,std_H_GLOB_STAR_RATING,std_H_INDI_STAR_RATING,std_OP_3B,std_OP_33,std_OP_30,std_ED_2B,process_C13,process_C14,IMM_3,IMM_3_DEN,OP_18B,OP_18B_DEN,OP_22,OP_22_DEN,OP_23,OP_23_DEN,OP_29,OP_29_DEN,SEP_1,SEP_1_DEN,SAFE_USE_OF_OPIOIDS,SAFE_USE_OF_OPIOIDS_DEN,MORT_30_AMI,MORT_30_AMI_DEN,MORT_30_CABG,MORT_30_CABG_DEN,MORT_30_COPD,MORT_30_COPD_DEN,MORT_30_HF,MORT_30_HF_DEN,MORT_30_PN,MORT_30_PN_DEN,MORT_30_STK,MORT_30_STK_DEN,PSI_4_SURG_COMP,PSI_4_SURG_COMP_DEN,COMP_HIP_KNEE,COMP_HIP_KNEE_DEN,PSI_90_SAFETY,PSI_90_SAFETY_DEN,Hybrid_HWM,Hybrid_HWM_DEN,H_COMP_1_A_P,H_COMP_1_SN_P,H_COMP_1_U_P,H_COMP_1_LINEAR_SCORE,H_COMP_1_STAR_RATING,H_NURSE_RESPECT_A_P,H_NURSE_RESPECT_SN_P,H_NURSE_RESPECT_U_P,H_NURSE_LISTEN_A_P,H_NURSE_LISTEN_SN_P,H_NURSE_LISTEN_U_P,H_NURSE_EXPLAIN_A_P,H_NURSE_EXPLAIN_SN_P,H_NURSE_EXPLAIN_U_P,H_COMP_2_A_P,H_COMP_2_SN_P,H_COMP_2_U_P,H_COMP_2_LINEAR_SCORE,H_COMP_2_STAR_RATING,H_DOCTOR_RESPECT_A_P,H_DOCTOR_RESPECT_SN_P,H_DOCTOR_RESPECT_U_P,H_DOCTOR_LISTEN_A_P,H_DOCTOR_LISTEN_SN_P,H_DOCTOR_LISTEN_U_P,H_DOCTOR_EXPLAIN_A_P,H_DOCTOR_EXPLAIN_SN_P,H_DOCTOR_EXPLAIN_U_P,H_COMP_3_A_P,H_COMP_3_SN_P,H_COMP_3_U_P,H_COMP_3_LINEAR_SCORE,H_COMP_3_STAR_RATING,H_CALL_BUTTON_A_P,H_CALL_BUTTON_SN_P,H_CALL_BUTTON_U_P,H_BATH_HELP_A_P,H_BATH_HELP_SN_P,H_BATH_HELP_U_P,H_COMP_5_A_P,H_COMP_5_SN_P,H_COMP_5_U_P,H_COMP_5_LINEAR_SCORE,H_COMP_5_STAR_RATING,H_MED_FOR_A_P,H_MED_FOR_SN_P,H_MED_FOR_U_P,H_SIDE_EFFECTS_A_P,H_SIDE_EFFECTS_SN_P,H_SIDE_EFFECTS_U_P,H_COMP_6_N_P,H_COMP_6_Y_P,H_COMP_6_LINEAR_SCORE,H_COMP_6_STAR_RATING,H_DISCH_HELP_N_P,H_DISCH_HELP_Y_P,H_SYMPTOMS_N_P,H_SYMPTOMS_Y_P,H_COMP_7_A,H_COMP_7_D_SD,H_COMP_7_SA,H_COMP_7_LINEAR_SCORE,H_COMP_7_STAR_RATING,H_CT_PREFER_A,H_CT_PREFER_D_SD,H_CT_PREFER_SA,H_CT_UNDER_A,H_CT_UNDER_D_SD,H_CT_UNDER_SA,H_CT_MED_A,H_CT_MED_D_SD,H_CT_MED_SA,H_CLEAN_HSP_A_P,H_CLEAN_HSP_SN_P,H_CLEAN_HSP_U_P,H_CLEAN_LINEAR_SCORE,H_CLEAN_STAR_RATING,H_QUIET_HSP_A_P,H_QUIET_HSP_SN_P,H_QUIET_HSP_U_P,H_QUIET_LINEAR_SCORE,H_QUIET_STAR_RATING,H_HSP_RATING_0_6,H_HSP_RATING_7_8,H_HSP_RATING_9_10,H_HSP_RATING_LINEAR_SCORE,H_HSP_RATING_STAR_RATING,H_RECMND_DN,H_RECMND_DY,H_RECMND_PY,H_RECMND_LINEAR_SCORE,H_RECMND_STAR_RATING,H_STAR_RATING,O_COMP_1_LINEAR_SCORE,O_COMP_2_LINEAR_SCORE,O_COMP_3_LINEAR_SCORE,O_PATIENT_RATE_LINEAR_SCORE,O_PATIENT_REC_LINEAR_SCORE,EDAC_30_AMI,EDAC_30_AMI_DEN,EDAC_30_HF,EDAC_30_HF_DEN,EDAC_30_PN,EDAC_30_PN_DEN,OP_32,OP_32_DEN,OP_35_ADM,OP_35_ADM_DEN,OP_35_ED,OP_35_ED_DEN,OP_36,OP_36_DEN,READM_30_CABG,READM_30_CABG_DEN,READM_30_COPD,READM_30_COPD_DEN,READM_30_HIP_KNEE,READM_30_HIP_KNEE_DEN,Hybrid_HWR,Hybrid_HWR_DEN,HAI_1_DEN_PRED,HAI_1_DEN_VOL,HAI_1,HAI_2_DEN_PRED,HAI_2_DEN_VOL,HAI_2,HAI_3_DEN_PRED,HAI_3_DEN_VOL,HAI_3,HAI_4_DEN_PRED,HAI_4_DEN_VOL,HAI_4,HAI_5_DEN_PRED,HAI_5_DEN_VOL,HAI_5,HAI_6_DEN_PRED,HAI_6_DEN_VOL,HAI_6,OP_13,OP_8,OP_10,READM_30_HOSP_WIDE_DEN,OP_13_DEN,H_NUMB_COMP,PC_01,PC_01_DEN,HCP_COVID_19,OP_10_DEN,READM_30_HOSP_WIDE,H_RESP_RATE_P,HCP_COVID_19_DEN,OP_8_DEN,H_INDI_STAR_RATING,H_GLOB_STAR_RATING,OP_2,OP_2_DEN,OP_3B,OP_3B_DEN,OP_33,OP_33_DEN,OP_30,OP_30_DEN,ED_2B,ED_2B_DEN
0,10001,-0.752932,-0.070355,0.556127,0.071425,-0.050316,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.0491,7,8,11,8,11,5,2,1,3.0,3.0,,2021,0.730853,-2.055159,0.069106,0.103403,-0.022557,-1.941449,-0.327016,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,7.0,0.142857,-0.491831,-0.004941,0.64666,-0.752932,-0.027028,-0.705673,1.039984,2.185101,-1.542989,-1.037529,-1.164917,1.1826,-1.18578,1.207127,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,-0.010885,0.027901,0.551294,-0.070355,0.303431,-0.078888,0.680925,0.244905,1.037183,0.403644,0.098335,0.31923,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.376096,0.009921,0.658437,0.556127,0.644574,-0.102729,0.684379,-0.761776,-0.979501,,-0.459111,0.140655,,0.031191,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,11.0,0.090909,0.007553,0.037755,0.600242,-0.050316,,,,,,,,,,,,,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,,,,,,,8.0,0.125,0.060398,-1.104271e-16,0.845613,0.071425,-0.070634,,0.473469,1.0,1.0,-0.537213,-0.085263,-0.269656,-0.073296,0.801144,-0.115323,0.284361,0.478432,,,0.466001,-0.054072,1.0,1.0,0.97,4817.0,178.0,349.0,0.03,57844.0,,,0.81,64.0,0.6,102.0,,,0.119,586.0,0.047,281.0,0.084,374.0,0.113,858.0,0.157,554.0,0.166,644.0,170.87,165.0,0.023,250.0,0.93,3896.300852,,,,,,,3.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,3.0,,,,,,,,,,4.0,,,,,,,,,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,7.0,620.0,21.6,1106.0,-20.6,594.0,13.4,606.0,10.8,190.0,7.1,190.0,0.8,993.0,0.149,268.0,0.207,443.0,0.046,258.0,,,8.086,7846.0,0.742,18.498,13268.0,0.324,4.615,165.0,0.65,1.884,216.0,0.0,7.305,102765.0,0.548,76.294,102765.0,0.537,0.028,0.389,0.072,4474.0,211.0,507.0,0.0,24.0,,2117.0,0.156,21.0,,211.0,3.5,3.5,,,,,,,0.98,162.0,103.0,655.0
1,10001,-0.846744,-0.321758,0.591483,0.0721,-0.320038,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.149487,7,8,11,8,9,5,2,1,3.0,3.0,,2022,0.703271,-1.054647,-0.072635,1.355101,0.507204,-3.873239,-1.364075,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,7.0,0.142857,-0.542717,0.000879,0.641984,-0.846744,0.012064,-0.457136,0.497621,1.664542,-0.606328,-1.47008,-1.684933,0.60215,-0.703618,0.623486,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,-0.143203,0.027686,0.531111,-0.321758,-0.427377,0.325583,0.722043,0.29595,1.045465,0.077072,-0.002603,1.040003,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.384517,0.002796,0.645363,0.591483,0.643289,0.376713,-0.520081,-0.906488,-0.863287,,0.009932,-0.897899,,-0.264434,1.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,9.0,0.111111,-0.220009,0.040971,0.815466,-0.320038,,,,,,,,,,,,,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,,,,,,,8.0,0.125,0.060979,-5.363828e-17,0.84576,0.0721,-0.053004,,0.442175,1.0,1.0,-0.536306,-0.084743,-0.269007,-0.072623,0.801375,-0.116805,0.284308,0.481635,,,,,,,0.97,4817.0,183.0,176.0,0.03,59762.0,,,0.9,52.0,0.55,56.0,,,0.116,412.0,0.038,200.0,0.083,270.0,0.089,725.0,0.145,474.0,0.198,533.0,184.28,115.0,0.026,175.0,0.81,2828.228824,,,,,,,3.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,3.0,,,,,,,,,,4.0,,,,,,,,,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,6.5,424.0,16.1,904.0,-7.1,503.0,14.1,511.0,11.3,192.0,6.7,192.0,0.9,1003.0,0.135,193.0,0.212,310.0,0.049,165.0,,,8.711,8391.0,0.574,19.223,13866.0,0.312,4.938,171.0,0.608,2.025,229.0,0.0,7.631,102470.0,0.786,78.514,102470.0,0.56,0.048,0.459,0.04,1805.0,147.0,507.0,0.0,18.0,,925.0,0.155,21.0,,122.0,3.5,3.5,,,,,,,,,,
2,10001,-0.647058,0.284081,0.312074,-0.127836,-1.024044,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.162208,7,8,11,8,10,5,2,1,3.0,3.0,,2023,-0.041613,-2.19781,-0.019543,1.708727,0.366398,-1.543654,-1.741473,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,7.0,0.142857,-0.495567,-0.05126,0.686658,-0.647058,0.207832,-0.702681,0.28604,0.105508,0.219318,-0.048609,-0.198058,0.159253,0.872692,0.062022,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.171616,0.020822,0.530811,0.284081,0.024204,0.444297,0.745807,-0.407579,1.025843,0.314693,-0.315731,-0.212425,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.202389,-0.013939,0.693192,0.312074,0.992908,0.10185,-1.77947,-0.904124,-0.237088,,-0.634479,0.488322,,-0.609958,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,10.0,0.1,-0.538475,0.030917,0.556023,-1.024044,,,,,,,,,,,,,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,,,,,,,8.0,0.125,-0.109237,2.611408e-16,0.85451,-0.127836,0.924456,-1.039363,-1.763346,1.0,1.0,-1.07585,-0.175531,-1.051153,0.808598,0.68837,0.037272,-0.445032,0.339427,,,,,,,0.97,3795.0,205.0,323.0,0.03,51079.0,,,0.81,16.0,0.46,146.0,,,0.124,317.0,0.047,172.0,0.085,182.0,0.083,630.0,0.159,407.0,0.164,489.0,173.39,120.0,0.024,102.0,1.01,2046.895485,,,,,,,2.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,,,,2.0,,,,,,,,,,,4.0,,,,,,,,,,4.0,,,,,,,,,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.9,319.0,21.8,755.0,-1.5,436.0,14.1,254.0,10.2,214.0,4.7,214.0,1.0,688.0,0.117,165.0,0.199,202.0,0.042,98.0,,,10.597,10024.0,0.661,26.63,17731.0,0.3,4.548,154.0,1.099,1.845,200.0,0.0,9.412,101908.0,0.85,72.686,101451.0,0.66,0.067,0.425,0.057,3058.0,208.0,434.0,0.09,34.0,0.737,1488.0,0.142,15.0,2323.0,146.0,3.5,3.0,,,,,,,,,,
3,10001,0.007071,0.230867,-0.146867,0.137779,-0.677624,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.030968,7,7,11,8,10,5,2,1,3.0,3.0,,2024,0.470514,-1.535074,0.282996,1.510467,0.129818,-0.554262,-0.748332,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,7.0,0.142857,-0.063411,-0.068391,0.704371,0.007071,1.123231,-0.791585,-0.63392,0.29207,0.584835,0.293805,0.886353,-1.03233,0.626949,-0.252865,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.138216,0.014038,0.537879,0.230867,0.879845,-0.232451,0.488068,-0.71166,,-0.210159,0.003034,-1.162386,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,-0.135101,-0.021526,0.773315,-0.146867,0.940083,-0.029295,0.419351,-0.942734,-1.140182,,-3.129707,0.000412,,0.371166,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,10.0,0.1,-0.335877,0.029039,0.538522,-0.677624,,,,,,,,,,,,,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,,,,,,,8.0,0.125,0.119895,1.093644e-15,0.870204,0.137779,0.423838,-0.431525,0.583665,1.0,1.0,-0.373358,-0.400063,-0.243371,0.124487,0.479704,0.876397,0.133873,0.361496,,,,,,,0.95,3905.0,214.0,348.0,0.05,52960.0,,,0.47,17.0,0.65,127.0,,,0.12,278.0,0.041,132.0,0.088,107.0,0.089,549.0,0.18,400.0,0.148,398.0,184.68,134.0,0.027,49.0,1.21,2542.0385,,,,,,,3.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,3.0,,,,,,,,,,4.0,,,,,,,,,4.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-15.4,274.0,23.4,614.0,23.6,403.0,12.9,170.0,11.9,202.0,4.9,202.0,1.1,668.0,0.105,126.0,0.19,117.0,0.038,49.0,,,9.597,9149.0,0.938,24.766,17310.0,0.363,5.994,214.0,1.335,,,,11.4,104733.0,0.965,67.066,104733.0,0.507,0.028,0.38,0.061,2912.0,178.0,544.0,0.0,32.0,0.836,1410.0,0.142,15.0,2496.0,79.0,3.5,3.5,,,,,,,,,,
4,10001,0.29611,0.361406,0.344883,0.042072,-0.484678,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,0.171622,7,7,11,8,11,5,2,1,3.0,4.0,,2025,1.405923,-1.04537,1.011496,0.650904,-0.315375,0.023766,-0.732077,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,7.0,0.142857,0.142753,-0.073484,0.730255,0.29611,1.024273,-0.368843,-0.448127,0.289973,0.607499,0.859382,0.192452,-1.028886,0.62791,-0.25798,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.206001,0.013108,0.53373,0.361406,0.698729,0.30367,0.739078,-0.467783,,0.476446,-0.105703,0.103001,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,0.249634,-0.01903,0.779001,0.344883,1.044716,0.080291,0.781373,-1.062058,-1.138829,,-3.171398,0.588011,0.563049,0.449772,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,11.0,0.090909,-0.204464,0.028411,0.480473,-0.484678,,,,,,,,,,,,,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,,,,,,,8.0,0.125,0.036435,4.8997890000000005e-17,0.866021,0.042072,0.768359,-0.525017,0.140984,1.0,1.0,-1.154064,0.570761,-0.231671,-0.254181,-0.312251,0.933177,0.719371,0.020336,,,,,,,0.96,4115.0,217.0,345.0,0.05,52960.0,,,0.47,17.0,0.68,131.0,0.12,4303.0,0.108,291.0,0.038,157.0,0.079,122.0,0.105,610.0,0.189,489.0,0.137,414.0,194.78,125.0,0.03,32.0,0.98,,,,,,,,2.0,,,,,,,,,,,,,,4.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,3.0,,,,,,,,,,3.0,,,,,,,,,4.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-13.8,296.0,10.6,679.0,14.3,490.0,12.9,170.0,11.9,202.0,4.9,202.0,1.1,668.0,0.102,151.0,0.177,130.0,0.044,34.0,,,10.082,9538.0,0.496,23.712,16332.0,0.169,6.618,240.0,1.209,,,,11.232,103195.0,0.445,65.234,103195.0,0.491,0.021,0.333,0.054,2924.0,193.0,643.0,0.02,46.0,0.0,1478.0,0.141,17.0,2712.0,66.0,3.0,4.0,,,,,,,,,,


In [26]:
tdf = main_df[main_df['Release year'] == '2026']
tdf = tdf[tdf['Facility ID'].isin(['140119', '140063', '140029'])]
print(tdf.shape)
tdf.head()

(3, 380)


Unnamed: 0,Facility ID,Std_Outcomes_Mortality_score,Std_Outcomes_Readmission_score,Std_Outcomes_Safety_score,Std_PatientExp_score,Std_Process_score,std_weight_PatientExperience,std_weight_Readmission,std_weight_Mortality,std_weight_safety,std_weight_Process,weight_PatientExperience,weight_Outcomes_Readmission,weight_Outcomes_Mortality,weight_Outcomes_Safety,weight_Process,summary_score,Outcomes_Mortality_cnt,Outcomes_safety_cnt,Outcomes_Readmission_cnt,Patient_Experience_cnt,Process_cnt,Total_measure_group_cnt,MortSafe_Group_cnt,report_indicator,cnt_grp,star,Q_25,Release year,std_MORT_30_AMI,std_MORT_30_CABG,std_MORT_30_COPD,std_MORT_30_HF,std_MORT_30_PN,std_MORT_30_STK,std_PSI_4_SURG_COMP,std_Hybrid_HWM,mortality_C1,mortality_C2,mortality_C3,mortality_C4,mortality_C5,mortality_C6,mortality_C7,mortality_C8,mortality_total_cnt,mortality_measure_wt,mortality_score_before_std,mortality_Mean,mortality_StdDev,mortality_grp_score,std_EDAC_30_AMI,std_EDAC_30_HF,std_EDAC_30_PN,std_OP_32,std_READM_30_CABG,std_READM_30_COPD,std_READM_30_HIP_KNEE,std_OP_35_ADM,std_OP_35_ED,std_OP_36,std_Hybrid_HWR,readmission_C1,readmission_C2,readmission_C3,readmission_C4,readmission_C5,readmission_C6,readmission_C7,readmission_C8,readmission_C9,readmission_C10,readmission_C11,readmission_total_cnt,readmission_measure_wt,readmission_score_before_std,readmission_Mean,readmission_StdDev,readmission_grp_score,std_COMP_HIP_KNEE,std_HAI_1,std_HAI_2,std_HAI_3,std_HAI_4,std_HAI_5,std_HAI_6,std_PSI_90_SAFETY,safety_C1,safety_C2,safety_C3,safety_C4,safety_C5,safety_C6,safety_C7,safety_C8,safety_total_cnt,safety_measure_wt,safety_score_before_std,safety_Mean,safety_StdDev,safety_grp_score,std_IMM_3,std_OP_10,std_OP_13,std_OP_18B,std_OP_22,std_OP_23,std_OP_29,std_OP_8,std_SAFE_USE_OF_OPIOIDS,std_SEP_1,process_C1,process_C2,process_C3,process_C4,process_C5,process_C6,process_C7,process_C8,process_C9,process_C10,process_total_cnt,process_measure_wt,process_score_before_std,process_Mean,process_StdDev,process_grp_score,std_O_COMP_1_LINEAR_SCORE,std_O_COMP_2_LINEAR_SCORE,std_O_COMP_3_LINEAR_SCORE,std_O_PATIENT_RATE_LINEAR_SCORE,std_O_PATIENT_REC_LINEAR_SCORE,std_H_COMP_1_LINEAR_SCORE,std_H_COMP_2_LINEAR_SCORE,std_H_COMP_3_LINEAR_SCORE,std_H_COMP_5_LINEAR_SCORE,std_H_COMP_6_LINEAR_SCORE,std_H_COMP_7_LINEAR_SCORE,std_H_CLEAN_LINEAR_SCORE,std_H_QUIET_LINEAR_SCORE,std_H_RECMND_LINEAR_SCORE,std_H_HSP_RATING_LINEAR_SCORE,patient_exp_C1,patient_exp_C2,patient_exp_C3,patient_exp_C4,patient_exp_C5,patient_exp_C6,patient_exp_C7,patient_exp_C8,patient_exp_C9,patient_exp_C10,patient_exp_C11,patient_exp_C12,patient_exp_C13,patient_exp_C14,patient_exp_C15,patient_exp_total_cnt,patient_exp_measure_wt,patient_exp_score_before_std,patient_exp_Mean,patient_exp_StdDev,patient_exp_grp_score,std_READM_30_HOSP_WIDE,std_HCP_COVID_19,std_PC_01,process_C11,process_C12,std_H_COMP_1_STAR_RATING,std_H_COMP_2_STAR_RATING,std_H_COMP_3_STAR_RATING,std_H_COMP_5_STAR_RATING,std_H_COMP_6_STAR_RATING,std_H_COMP_7_STAR_RATING,std_H_GLOB_STAR_RATING,std_H_INDI_STAR_RATING,std_OP_3B,std_OP_33,std_OP_30,std_ED_2B,process_C13,process_C14,IMM_3,IMM_3_DEN,OP_18B,OP_18B_DEN,OP_22,OP_22_DEN,OP_23,OP_23_DEN,OP_29,OP_29_DEN,SEP_1,SEP_1_DEN,SAFE_USE_OF_OPIOIDS,SAFE_USE_OF_OPIOIDS_DEN,MORT_30_AMI,MORT_30_AMI_DEN,MORT_30_CABG,MORT_30_CABG_DEN,MORT_30_COPD,MORT_30_COPD_DEN,MORT_30_HF,MORT_30_HF_DEN,MORT_30_PN,MORT_30_PN_DEN,MORT_30_STK,MORT_30_STK_DEN,PSI_4_SURG_COMP,PSI_4_SURG_COMP_DEN,COMP_HIP_KNEE,COMP_HIP_KNEE_DEN,PSI_90_SAFETY,PSI_90_SAFETY_DEN,Hybrid_HWM,Hybrid_HWM_DEN,H_COMP_1_A_P,H_COMP_1_SN_P,H_COMP_1_U_P,H_COMP_1_LINEAR_SCORE,H_COMP_1_STAR_RATING,H_NURSE_RESPECT_A_P,H_NURSE_RESPECT_SN_P,H_NURSE_RESPECT_U_P,H_NURSE_LISTEN_A_P,H_NURSE_LISTEN_SN_P,H_NURSE_LISTEN_U_P,H_NURSE_EXPLAIN_A_P,H_NURSE_EXPLAIN_SN_P,H_NURSE_EXPLAIN_U_P,H_COMP_2_A_P,H_COMP_2_SN_P,H_COMP_2_U_P,H_COMP_2_LINEAR_SCORE,H_COMP_2_STAR_RATING,H_DOCTOR_RESPECT_A_P,H_DOCTOR_RESPECT_SN_P,H_DOCTOR_RESPECT_U_P,H_DOCTOR_LISTEN_A_P,H_DOCTOR_LISTEN_SN_P,H_DOCTOR_LISTEN_U_P,H_DOCTOR_EXPLAIN_A_P,H_DOCTOR_EXPLAIN_SN_P,H_DOCTOR_EXPLAIN_U_P,H_COMP_3_A_P,H_COMP_3_SN_P,H_COMP_3_U_P,H_COMP_3_LINEAR_SCORE,H_COMP_3_STAR_RATING,H_CALL_BUTTON_A_P,H_CALL_BUTTON_SN_P,H_CALL_BUTTON_U_P,H_BATH_HELP_A_P,H_BATH_HELP_SN_P,H_BATH_HELP_U_P,H_COMP_5_A_P,H_COMP_5_SN_P,H_COMP_5_U_P,H_COMP_5_LINEAR_SCORE,H_COMP_5_STAR_RATING,H_MED_FOR_A_P,H_MED_FOR_SN_P,H_MED_FOR_U_P,H_SIDE_EFFECTS_A_P,H_SIDE_EFFECTS_SN_P,H_SIDE_EFFECTS_U_P,H_COMP_6_N_P,H_COMP_6_Y_P,H_COMP_6_LINEAR_SCORE,H_COMP_6_STAR_RATING,H_DISCH_HELP_N_P,H_DISCH_HELP_Y_P,H_SYMPTOMS_N_P,H_SYMPTOMS_Y_P,H_COMP_7_A,H_COMP_7_D_SD,H_COMP_7_SA,H_COMP_7_LINEAR_SCORE,H_COMP_7_STAR_RATING,H_CT_PREFER_A,H_CT_PREFER_D_SD,H_CT_PREFER_SA,H_CT_UNDER_A,H_CT_UNDER_D_SD,H_CT_UNDER_SA,H_CT_MED_A,H_CT_MED_D_SD,H_CT_MED_SA,H_CLEAN_HSP_A_P,H_CLEAN_HSP_SN_P,H_CLEAN_HSP_U_P,H_CLEAN_LINEAR_SCORE,H_CLEAN_STAR_RATING,H_QUIET_HSP_A_P,H_QUIET_HSP_SN_P,H_QUIET_HSP_U_P,H_QUIET_LINEAR_SCORE,H_QUIET_STAR_RATING,H_HSP_RATING_0_6,H_HSP_RATING_7_8,H_HSP_RATING_9_10,H_HSP_RATING_LINEAR_SCORE,H_HSP_RATING_STAR_RATING,H_RECMND_DN,H_RECMND_DY,H_RECMND_PY,H_RECMND_LINEAR_SCORE,H_RECMND_STAR_RATING,H_STAR_RATING,O_COMP_1_LINEAR_SCORE,O_COMP_2_LINEAR_SCORE,O_COMP_3_LINEAR_SCORE,O_PATIENT_RATE_LINEAR_SCORE,O_PATIENT_REC_LINEAR_SCORE,EDAC_30_AMI,EDAC_30_AMI_DEN,EDAC_30_HF,EDAC_30_HF_DEN,EDAC_30_PN,EDAC_30_PN_DEN,OP_32,OP_32_DEN,OP_35_ADM,OP_35_ADM_DEN,OP_35_ED,OP_35_ED_DEN,OP_36,OP_36_DEN,READM_30_CABG,READM_30_CABG_DEN,READM_30_COPD,READM_30_COPD_DEN,READM_30_HIP_KNEE,READM_30_HIP_KNEE_DEN,Hybrid_HWR,Hybrid_HWR_DEN,HAI_1_DEN_PRED,HAI_1_DEN_VOL,HAI_1,HAI_2_DEN_PRED,HAI_2_DEN_VOL,HAI_2,HAI_3_DEN_PRED,HAI_3_DEN_VOL,HAI_3,HAI_4_DEN_PRED,HAI_4_DEN_VOL,HAI_4,HAI_5_DEN_PRED,HAI_5_DEN_VOL,HAI_5,HAI_6_DEN_PRED,HAI_6_DEN_VOL,HAI_6,OP_13,OP_8,OP_10,READM_30_HOSP_WIDE_DEN,OP_13_DEN,H_NUMB_COMP,PC_01,PC_01_DEN,HCP_COVID_19,OP_10_DEN,READM_30_HOSP_WIDE,H_RESP_RATE_P,HCP_COVID_19_DEN,OP_8_DEN,H_INDI_STAR_RATING,H_GLOB_STAR_RATING,OP_2,OP_2_DEN,OP_3B,OP_3B_DEN,OP_33,OP_33_DEN,OP_30,OP_30_DEN,ED_2B,ED_2B_DEN
4907,140029,0.596369,0.196678,0.373559,-0.014642,0.696556,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,0.337019,8,8,11,15,10,5,2,1,3.0,4.0,,2026,0.54997,-0.287871,-0.207909,0.813467,0.345121,0.490473,-0.100905,1.285927,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.361034,-0.079177,0.738153,0.596369,-0.436328,0.001979,-0.187686,0.842209,-0.160486,1.318246,0.362713,-1.047856,0.881257,-1.036502,0.680565,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.110737,-9.9e-05,0.563541,0.196678,0.570514,0.228877,0.635833,-0.598331,1.120116,1.148146,-0.439417,-0.374579,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.286395,-0.025094,0.833842,0.373559,0.995679,0.44659,1.030102,-0.532281,-1.161984,0.331734,0.544638,0.030714,0.361155,1.70167,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,10.0,0.1,0.374802,0.017718,0.512643,0.696556,0.803203,-0.082826,-0.390364,0.544011,0.834243,-0.078576,-0.281022,-0.902535,-0.454435,0.426545,0.347443,-0.157809,-0.563565,0.214888,-0.00343,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,15.0,0.066667,0.017051,0.029341,0.839329,-0.014642,,,,,,,,,,,,,,,,,,,,0.94,3769.0,185.0,567.0,0.05,56773.0,0.77,22.0,0.99,142.0,0.9,145.0,0.13,3329.0,0.115,164.0,0.029,54.0,0.092,136.0,0.098,402.0,0.154,503.0,0.124,182.0,176.24,43.0,0.032,244.0,1.07,,0.035,1514.0,,,,91.0,,,,,,,,,,,,,,90.0,,,,,,,,,,,,,,79.0,,,,,,,,,,,74.0,,,,,,,,,,88.0,,,,,,,,,82.0,,,,,,,,,,,,,,86.0,,,,,79.0,,,,,88.0,,,,,88.0,,,99.0,95.0,97.0,95.0,95.0,17.8,157.0,5.1,455.0,11.9,529.0,12.3,807.0,12.3,127.0,4.8,127.0,1.4,333.0,0.108,52.0,0.171,148.0,0.046,200.0,0.145,2598.0,6.474,6109.0,0.463,5.718,4710.0,0.175,3.186,116.0,1.255,1.067,123.0,0.0,3.837,58841.0,0.0,36.948,50392.0,0.595,0.017,0.367,0.038,,,,,,,,,,,,,,,,,,,,,,,
4994,140063,1.607526,0.557798,-0.3956,-0.576602,-0.905579,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,0.153817,6,5,8,15,8,5,2,1,3.0,4.0,,2026,-0.137152,,1.08204,1.867428,1.938433,0.607862,,1.285927,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,6.0,0.166667,1.107423,-0.079177,0.738153,1.607526,,-0.917151,-0.600916,0.842209,,0.851009,,0.679505,0.881257,0.662844,0.115191,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,8.0,0.125,0.314243,-9.9e-05,0.563541,0.557798,,-1.303176,0.948383,,,-1.496084,0.399215,-0.323151,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,5.0,0.2,-0.354962,-0.025094,0.833842,-0.3956,1.098394,-0.173592,,-1.476878,-2.313293,0.752411,0.468598,,0.004698,-1.932502,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,8.0,0.125,-0.446521,0.017718,0.512643,-0.905579,0.803203,-0.082826,0.533043,-0.468851,-0.19754,-0.453797,-0.657542,-0.504454,-0.454435,-1.391708,-0.914393,-1.059252,-0.745056,-0.40775,-0.817909,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,15.0,0.066667,-0.454618,0.029341,0.839329,-0.576602,,,,,,,,,,,,,,,,,,,,0.96,1091.0,234.0,366.0,0.08,47586.0,0.85,13.0,0.98,92.0,0.31,112.0,0.15,954.0,0.123,51.0,,,0.072,43.0,0.075,225.0,0.11,216.0,0.122,109.0,,,,,1.06,,0.035,667.0,,,,90.0,,,,,,,,,,,,,,89.0,,,,,,,,,,,,,,81.0,,,,,,,,,,,74.0,,,,,,,,,,81.0,,,,,,,,,78.0,,,,,,,,,,,,,,82.0,,,,,78.0,,,,,85.0,,,,,85.0,,,99.0,95.0,98.0,93.0,92.0,,,34.9,265.0,24.4,224.0,12.3,833.0,9.8,88.0,4.8,88.0,0.8,419.0,,,0.175,50.0,,,0.149,1069.0,1.292,1597.0,1.548,1.461,1458.0,0.0,0.54,21.0,,0.236,26.0,,1.258,25202.0,1.59,21.388,25202.0,0.234,,,0.067,,,,,,,,,,,,,,,,,,,,,,,
5130,140119,2.2151,0.471158,0.370794,0.204082,-1.167632,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,0.577334,8,8,11,15,9,5,2,1,3.0,5.0,,2026,1.580655,0.669179,1.662517,2.508969,1.902221,1.723061,1.472369,0.928276,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,1.555906,-0.079177,0.738153,2.2151,-0.067977,-0.358887,0.430505,1.828597,0.426218,1.902293,0.504145,-0.702384,1.715327,-0.753278,-2.004961,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.265418,-9.9e-05,0.563541,0.471158,-0.938958,0.114503,0.405439,0.382388,0.667102,0.532822,-0.367401,1.476815,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.284089,-0.025094,0.833842,0.370794,1.201108,-0.665461,-0.453975,-3.674512,-1.161984,,0.392558,0.767467,0.361155,-1.994098,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,9.0,0.111111,-0.58086,0.017718,0.512643,-1.167632,-0.128381,-0.082826,-0.390364,0.544011,0.834243,0.296645,0.095498,-0.504454,0.15615,0.166795,0.347443,-0.38317,0.888369,0.62998,0.539556,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,15.0,0.066667,0.200633,0.029341,0.839329,0.204082,,,,,,,,,,,,,,,,,,,,0.98,14131.0,348.0,372.0,0.05,65669.0,,,0.97,72.0,0.3,82.0,0.13,7390.0,0.103,104.0,0.021,97.0,0.063,85.0,0.061,508.0,0.111,300.0,0.103,298.0,139.78,270.0,0.043,268.0,0.71,,0.037,2393.0,,,,92.0,,,,,,,,,,,,,,91.0,,,,,,,,,,,,,,81.0,,,,,,,,,,,77.0,,,,,,,,,,87.0,,,,,,,,,82.0,,,,,,,,,,,,,,85.0,,,,,87.0,,,,,90.0,,,,,90.0,,,98.0,95.0,97.0,95.0,95.0,8.8,141.0,16.8,657.0,-6.8,326.0,11.4,2117.0,11.8,814.0,4.1,814.0,1.3,1130.0,0.102,95.0,0.166,96.0,0.045,269.0,0.164,4347.0,34.928,30903.0,0.544,23.012,14163.0,0.304,8.773,307.0,0.57,2.252,249.0,0.444,8.115,158839.0,0.37,111.737,138502.0,0.564,0.042,0.31,0.09,,,,,,,,,,,,,,,,,,,,,,,


## Load general hospital information from hospitals-data-archive project

In [27]:
# CMS used July 2021 measure level results for July 2022 star rating reporting
# CMS used October 2020 measure level results for April 2021 star rating reporting

n_dir = stars_dir + 'CareCompare'
ls = ['Facility ID', 'Facility Name', 
      'Address', 'City', 'State', 'ZIP Code', 'County Name',
      'Hospital Type', 'Hospital Ownership', 'Emergency Services', 
      'Meets criteria for promoting interoperability of EHRs', 
      'Hospital overall rating', 
      'Hospital overall rating footnote',
     ]


## 2026
tdf = main_df[main_df['Release year'] == '2026']
prvdrs = tdf['Facility ID'].unique()
df_2026 = pd.read_csv(n_dir + '/hospitals_11_2025/Hospital_General_Information.csv')#, compression='gzip')
df_2026['Hospital overall rating footnote'] = df_2026['Hospital overall rating footnote'].astype(str)
df_2026['Facility ID'] = df_2026['Facility ID'].astype(str)
df_2026 = df_2026[df_2026['Facility ID'].isin(prvdrs_2026)]

cols1 = ['Provider ID', "Measure Start Date", "Measure End Date", 'Hospital Name', 'Address 1', 'City/Town', 
         'County/Parish', 'Telephone Number']
    
cols2 = ['Facility ID', "Start Date", "End Date", 'Facility Name', 'Address', 'City', 'County Name',
         'Phone Number']
    
for i, col in enumerate(cols1):
    if col in list(df_2026):
        df_2026.rename(columns={col: cols2[i]}, inplace=True)

df_2026 = df_2026.filter(items=ls, axis=1)
df_2026['file_month'] = '08'
df_2026['file_year'] = '2025'
df_2026 = df_2026.merge(tdf, how='outer')
print('Release years:', df_2026['Release year'].unique())


## 2025
tdf = main_df[main_df['Release year'] == '2025']
prvdrs = tdf['Facility ID'].unique()
df_2025 = pd.read_csv(n_dir + '/hospitals_10_2024/Hospital_General_Information.csv')#, compression='gzip')
df_2025['Hospital overall rating footnote'] = df_2025['Hospital overall rating footnote'].astype(str)
df_2025['Facility ID'] = df_2025['Facility ID'].astype(str)
df_2025 = df_2025[df_2025['Facility ID'].isin(prvdrs_2025)]

cols1 = ['Provider ID', "Measure Start Date", "Measure End Date", 'Hospital Name', 'Address 1', 'City/Town', 
         'County/Parish', 'Telephone Number']
    
cols2 = ['Facility ID', "Start Date", "End Date", 'Facility Name', 'Address', 'City', 'County Name',
         'Phone Number']
    
for i, col in enumerate(cols1):
    if col in list(df_2025):
        df_2025.rename(columns={col: cols2[i]}, inplace=True)

df_2025 = df_2025.filter(items=ls, axis=1)
df_2025['file_month'] = '10'
df_2025['file_year'] = '2024'
df_2025 = df_2025.merge(tdf, how='outer')
print('Release years:', df_2025['Release year'].unique())


## 2024
tdf = main_df[main_df['Release year'] == '2024']
prvdrs = tdf['Facility ID'].unique()
df_2024 = pd.read_csv(n_dir + '/hospitals_01_2024/Hospital_General_Information.csv')#, compression='gzip')
df_2024['Hospital overall rating footnote'] = df_2024['Hospital overall rating footnote'].astype(str)
df_2024['Facility ID'] = df_2024['Facility ID'].astype(str)
df_2024 = df_2024[df_2024['Facility ID'].isin(prvdrs_2024)]

cols1 = ['Provider ID', "Measure Start Date", "Measure End Date", 'Hospital Name', 'Address 1', 'City/Town', 
         'County/Parish', 'Telephone Number']
    
cols2 = ['Facility ID', "Start Date", "End Date", 'Facility Name', 'Address', 'City', 'County Name',
         'Phone Number']
    
for i, col in enumerate(cols1):
    if col in list(df_2024):
        df_2024.rename(columns={col: cols2[i]}, inplace=True)

df_2024 = df_2024.filter(items=ls, axis=1)
df_2024['file_month'] = '01'
df_2024['file_year'] = '2024'
df_2024 = df_2024.merge(tdf, how='outer')
print('Release years:', df_2024['Release year'].unique())


## 2023
tdf = main_df[main_df['Release year'] == '2023']
prvdrs = tdf['Facility ID'].unique()
df_2023 = pd.read_csv(n_dir + '/hospitals_01_2023/Hospital_General_Information.csv')#, compression='gzip')
df_2023['Hospital overall rating footnote'] = df_2023['Hospital overall rating footnote'].astype(str)
df_2023['Facility ID'] = df_2023['Facility ID'].astype(str)
df_2023 = df_2023[df_2023['Facility ID'].isin(prvdrs_2023)]
df_2023 = df_2023.filter(items=ls, axis=1)
df_2023['file_month'] = '01'
df_2023['file_year'] = '2023'
df_2023 = df_2023.merge(tdf, how='outer')
print('Release years:', df_2023['Release year'].unique())


## 2022
tdf = main_df[main_df['Release year'] == '2022']
prvdrs = tdf['Facility ID'].unique()
df_2022 = pd.read_csv(n_dir + '/hospitals_07_2021/Hospital_General_Information.csv')#, compression='gzip')
df_2022['Hospital overall rating footnote'] = df_2022['Hospital overall rating footnote'].astype(str)
df_2022['Facility ID'] = df_2022['Facility ID'].astype(str)
df_2022 = df_2022[df_2022['Facility ID'].isin(prvdrs_2022)]
df_2022 = df_2022.filter(items=ls, axis=1)
df_2022['file_month'] = '07'
df_2022['file_year'] = '2021'
df_2022 = df_2022.merge(tdf, how='outer')
print('Release years:', df_2022['Release year'].unique())


## 2021
tdf = main_df[main_df['Release year'] == '2021']
prvdrs = tdf['Facility ID'].unique()
df_2021 = pd.read_csv(n_dir + '/hospitals_10_2020/Hospital_General_Information.csv')#, compression='gzip')
df_2021['Hospital overall rating footnote'] = df_2021['Hospital overall rating footnote'].astype(str)
df_2021['Facility ID'] = df_2021['Facility ID'].astype(str)
df_2021 = df_2021[df_2021['Facility ID'].isin(prvdrs_2021)]
df_2021 = df_2021.filter(items=ls, axis=1)
df_2021['file_month'] = '10'
df_2021['file_year'] = '2020'
df_2021 = df_2021.merge(tdf, how='outer')
print('Release years:', df_2021['Release year'].unique())

Release years: ['2026']
Release years: ['2025']
Release years: ['2024']
Release years: ['2023']
Release years: ['2022']
Release years: ['2021']


In [28]:
print(df_2026.shape)
print(df_2025.shape)
print(df_2024.shape)
print(df_2023.shape)
print(df_2022.shape)
print(df_2021.shape)


(3210, 393)
(2891, 394)
(2847, 394)
(3076, 394)
(3121, 394)
(3355, 394)


In [29]:
mdf = df_2026.merge(df_2025, how='outer')
mdf = mdf.merge(df_2024, how='outer')
mdf = mdf.merge(df_2023, how='outer')
mdf = mdf.merge(df_2022, how='outer')
mdf = mdf.merge(df_2021, how='outer')

mdf = mdf[~mdf['Release year'].isin([np.nan, float('NaN')])]
print(mdf.shape)
print('Release years:', sorted(mdf['Release year'].unique()))
mdf.head()

(18500, 394)
Release years: ['2021', '2022', '2023', '2024', '2025', '2026']


Unnamed: 0,Facility ID,Facility Name,Address,City,State,ZIP Code,County Name,Hospital Type,Hospital Ownership,Emergency Services,Hospital overall rating,Hospital overall rating footnote,file_month,file_year,Std_Outcomes_Mortality_score,Std_Outcomes_Readmission_score,Std_Outcomes_Safety_score,Std_PatientExp_score,Std_Process_score,std_weight_PatientExperience,std_weight_Readmission,std_weight_Mortality,std_weight_safety,std_weight_Process,weight_PatientExperience,weight_Outcomes_Readmission,weight_Outcomes_Mortality,weight_Outcomes_Safety,weight_Process,summary_score,Outcomes_Mortality_cnt,Outcomes_safety_cnt,Outcomes_Readmission_cnt,Patient_Experience_cnt,Process_cnt,Total_measure_group_cnt,MortSafe_Group_cnt,report_indicator,cnt_grp,star,Q_25,Release year,std_MORT_30_AMI,std_MORT_30_CABG,std_MORT_30_COPD,std_MORT_30_HF,std_MORT_30_PN,std_MORT_30_STK,std_PSI_4_SURG_COMP,std_Hybrid_HWM,mortality_C1,mortality_C2,mortality_C3,mortality_C4,mortality_C5,mortality_C6,mortality_C7,mortality_C8,mortality_total_cnt,mortality_measure_wt,mortality_score_before_std,mortality_Mean,mortality_StdDev,mortality_grp_score,std_EDAC_30_AMI,std_EDAC_30_HF,std_EDAC_30_PN,std_OP_32,std_READM_30_CABG,std_READM_30_COPD,std_READM_30_HIP_KNEE,std_OP_35_ADM,std_OP_35_ED,std_OP_36,std_Hybrid_HWR,readmission_C1,readmission_C2,readmission_C3,readmission_C4,readmission_C5,readmission_C6,readmission_C7,readmission_C8,readmission_C9,readmission_C10,readmission_C11,readmission_total_cnt,readmission_measure_wt,readmission_score_before_std,readmission_Mean,readmission_StdDev,readmission_grp_score,std_COMP_HIP_KNEE,std_HAI_1,std_HAI_2,std_HAI_3,std_HAI_4,std_HAI_5,std_HAI_6,std_PSI_90_SAFETY,safety_C1,safety_C2,safety_C3,safety_C4,safety_C5,safety_C6,safety_C7,safety_C8,safety_total_cnt,safety_measure_wt,safety_score_before_std,safety_Mean,safety_StdDev,safety_grp_score,std_IMM_3,std_OP_10,std_OP_13,std_OP_18B,std_OP_22,std_OP_23,std_OP_29,std_OP_8,std_SAFE_USE_OF_OPIOIDS,std_SEP_1,process_C1,process_C2,process_C3,process_C4,process_C5,process_C6,process_C7,process_C8,process_C9,process_C10,process_total_cnt,process_measure_wt,process_score_before_std,process_Mean,process_StdDev,process_grp_score,std_O_COMP_1_LINEAR_SCORE,std_O_COMP_2_LINEAR_SCORE,std_O_COMP_3_LINEAR_SCORE,std_O_PATIENT_RATE_LINEAR_SCORE,std_O_PATIENT_REC_LINEAR_SCORE,std_H_COMP_1_LINEAR_SCORE,std_H_COMP_2_LINEAR_SCORE,std_H_COMP_3_LINEAR_SCORE,std_H_COMP_5_LINEAR_SCORE,std_H_COMP_6_LINEAR_SCORE,std_H_COMP_7_LINEAR_SCORE,std_H_CLEAN_LINEAR_SCORE,std_H_QUIET_LINEAR_SCORE,std_H_RECMND_LINEAR_SCORE,std_H_HSP_RATING_LINEAR_SCORE,patient_exp_C1,patient_exp_C2,patient_exp_C3,patient_exp_C4,patient_exp_C5,patient_exp_C6,patient_exp_C7,patient_exp_C8,patient_exp_C9,patient_exp_C10,patient_exp_C11,patient_exp_C12,patient_exp_C13,patient_exp_C14,patient_exp_C15,patient_exp_total_cnt,patient_exp_measure_wt,patient_exp_score_before_std,patient_exp_Mean,patient_exp_StdDev,patient_exp_grp_score,std_READM_30_HOSP_WIDE,std_HCP_COVID_19,std_PC_01,process_C11,process_C12,std_H_COMP_1_STAR_RATING,std_H_COMP_2_STAR_RATING,std_H_COMP_3_STAR_RATING,std_H_COMP_5_STAR_RATING,std_H_COMP_6_STAR_RATING,std_H_COMP_7_STAR_RATING,std_H_GLOB_STAR_RATING,std_H_INDI_STAR_RATING,std_OP_3B,std_OP_33,std_OP_30,std_ED_2B,process_C13,process_C14,IMM_3,IMM_3_DEN,OP_18B,OP_18B_DEN,OP_22,OP_22_DEN,OP_23,OP_23_DEN,OP_29,OP_29_DEN,SEP_1,SEP_1_DEN,SAFE_USE_OF_OPIOIDS,SAFE_USE_OF_OPIOIDS_DEN,MORT_30_AMI,MORT_30_AMI_DEN,MORT_30_CABG,MORT_30_CABG_DEN,MORT_30_COPD,MORT_30_COPD_DEN,MORT_30_HF,MORT_30_HF_DEN,MORT_30_PN,MORT_30_PN_DEN,MORT_30_STK,MORT_30_STK_DEN,PSI_4_SURG_COMP,PSI_4_SURG_COMP_DEN,COMP_HIP_KNEE,COMP_HIP_KNEE_DEN,PSI_90_SAFETY,PSI_90_SAFETY_DEN,Hybrid_HWM,Hybrid_HWM_DEN,H_COMP_1_A_P,H_COMP_1_SN_P,H_COMP_1_U_P,H_COMP_1_LINEAR_SCORE,H_COMP_1_STAR_RATING,H_NURSE_RESPECT_A_P,H_NURSE_RESPECT_SN_P,H_NURSE_RESPECT_U_P,H_NURSE_LISTEN_A_P,H_NURSE_LISTEN_SN_P,H_NURSE_LISTEN_U_P,H_NURSE_EXPLAIN_A_P,H_NURSE_EXPLAIN_SN_P,H_NURSE_EXPLAIN_U_P,H_COMP_2_A_P,H_COMP_2_SN_P,H_COMP_2_U_P,H_COMP_2_LINEAR_SCORE,H_COMP_2_STAR_RATING,H_DOCTOR_RESPECT_A_P,H_DOCTOR_RESPECT_SN_P,H_DOCTOR_RESPECT_U_P,H_DOCTOR_LISTEN_A_P,H_DOCTOR_LISTEN_SN_P,H_DOCTOR_LISTEN_U_P,H_DOCTOR_EXPLAIN_A_P,H_DOCTOR_EXPLAIN_SN_P,H_DOCTOR_EXPLAIN_U_P,H_COMP_3_A_P,H_COMP_3_SN_P,H_COMP_3_U_P,H_COMP_3_LINEAR_SCORE,H_COMP_3_STAR_RATING,H_CALL_BUTTON_A_P,H_CALL_BUTTON_SN_P,H_CALL_BUTTON_U_P,H_BATH_HELP_A_P,H_BATH_HELP_SN_P,H_BATH_HELP_U_P,H_COMP_5_A_P,H_COMP_5_SN_P,H_COMP_5_U_P,H_COMP_5_LINEAR_SCORE,H_COMP_5_STAR_RATING,H_MED_FOR_A_P,H_MED_FOR_SN_P,H_MED_FOR_U_P,H_SIDE_EFFECTS_A_P,H_SIDE_EFFECTS_SN_P,H_SIDE_EFFECTS_U_P,H_COMP_6_N_P,H_COMP_6_Y_P,H_COMP_6_LINEAR_SCORE,H_COMP_6_STAR_RATING,H_DISCH_HELP_N_P,H_DISCH_HELP_Y_P,H_SYMPTOMS_N_P,H_SYMPTOMS_Y_P,H_COMP_7_A,H_COMP_7_D_SD,H_COMP_7_SA,H_COMP_7_LINEAR_SCORE,H_COMP_7_STAR_RATING,H_CT_PREFER_A,H_CT_PREFER_D_SD,H_CT_PREFER_SA,H_CT_UNDER_A,H_CT_UNDER_D_SD,H_CT_UNDER_SA,H_CT_MED_A,H_CT_MED_D_SD,H_CT_MED_SA,H_CLEAN_HSP_A_P,H_CLEAN_HSP_SN_P,H_CLEAN_HSP_U_P,H_CLEAN_LINEAR_SCORE,H_CLEAN_STAR_RATING,H_QUIET_HSP_A_P,H_QUIET_HSP_SN_P,H_QUIET_HSP_U_P,H_QUIET_LINEAR_SCORE,H_QUIET_STAR_RATING,H_HSP_RATING_0_6,H_HSP_RATING_7_8,H_HSP_RATING_9_10,H_HSP_RATING_LINEAR_SCORE,H_HSP_RATING_STAR_RATING,H_RECMND_DN,H_RECMND_DY,H_RECMND_PY,H_RECMND_LINEAR_SCORE,H_RECMND_STAR_RATING,H_STAR_RATING,O_COMP_1_LINEAR_SCORE,O_COMP_2_LINEAR_SCORE,O_COMP_3_LINEAR_SCORE,O_PATIENT_RATE_LINEAR_SCORE,O_PATIENT_REC_LINEAR_SCORE,EDAC_30_AMI,EDAC_30_AMI_DEN,EDAC_30_HF,EDAC_30_HF_DEN,EDAC_30_PN,EDAC_30_PN_DEN,OP_32,OP_32_DEN,OP_35_ADM,OP_35_ADM_DEN,OP_35_ED,OP_35_ED_DEN,OP_36,OP_36_DEN,READM_30_CABG,READM_30_CABG_DEN,READM_30_COPD,READM_30_COPD_DEN,READM_30_HIP_KNEE,READM_30_HIP_KNEE_DEN,Hybrid_HWR,Hybrid_HWR_DEN,HAI_1_DEN_PRED,HAI_1_DEN_VOL,HAI_1,HAI_2_DEN_PRED,HAI_2_DEN_VOL,HAI_2,HAI_3_DEN_PRED,HAI_3_DEN_VOL,HAI_3,HAI_4_DEN_PRED,HAI_4_DEN_VOL,HAI_4,HAI_5_DEN_PRED,HAI_5_DEN_VOL,HAI_5,HAI_6_DEN_PRED,HAI_6_DEN_VOL,HAI_6,OP_13,OP_8,OP_10,READM_30_HOSP_WIDE_DEN,OP_13_DEN,H_NUMB_COMP,PC_01,PC_01_DEN,HCP_COVID_19,OP_10_DEN,READM_30_HOSP_WIDE,H_RESP_RATE_P,HCP_COVID_19_DEN,OP_8_DEN,H_INDI_STAR_RATING,H_GLOB_STAR_RATING,OP_2,OP_2_DEN,OP_3B,OP_3B_DEN,OP_33,OP_33_DEN,OP_30,OP_30_DEN,ED_2B,ED_2B_DEN,Meets criteria for promoting interoperability of EHRs
0,10001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301.0,HOUSTON,Acute Care Hospitals,Government - Hospital District or Authority,Yes,2,,10,2020,-0.752932,-0.070355,0.556127,0.071425,-0.050316,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.0491,7,8,11,8,11,5,2,1,3.0,3.0,,2021,0.730853,-2.055159,0.069106,0.103403,-0.022557,-1.941449,-0.327016,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,7.0,0.142857,-0.491831,-0.004941,0.64666,-0.752932,-0.027028,-0.705673,1.039984,2.185101,-1.542989,-1.037529,-1.164917,1.1826,-1.18578,1.207127,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,-0.010885,0.027901,0.551294,-0.070355,0.303431,-0.078888,0.680925,0.244905,1.037183,0.403644,0.098335,0.31923,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.376096,0.009921,0.658437,0.556127,0.644574,-0.102729,0.684379,-0.761776,-0.979501,,-0.459111,0.140655,,0.031191,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,11.0,0.090909,0.007553,0.037755,0.600242,-0.050316,,,,,,,,,,,,,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,,,,,,,8.0,0.125,0.060398,-1.104271e-16,0.845613,0.071425,-0.070634,,0.473469,1.0,1.0,-0.537213,-0.085263,-0.269656,-0.073296,0.801144,-0.115323,0.284361,0.478432,,,0.466001,-0.054072,1.0,1.0,0.97,4817.0,178.0,349.0,0.03,57844.0,,,0.81,64.0,0.6,102.0,,,0.119,586.0,0.047,281.0,0.084,374.0,0.113,858.0,0.157,554.0,0.166,644.0,170.87,165.0,0.023,250.0,0.93,3896.300852,,,,,,,3.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,3.0,,,,,,,,,,4.0,,,,,,,,,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,7.0,620.0,21.6,1106.0,-20.6,594.0,13.4,606.0,10.8,190.0,7.1,190.0,0.8,993.0,0.149,268.0,0.207,443.0,0.046,258.0,,,8.086,7846.0,0.742,18.498,13268.0,0.324,4.615,165.0,0.65,1.884,216.0,0.0,7.305,102765.0,0.548,76.294,102765.0,0.537,0.028,0.389,0.072,4474.0,211.0,507.0,0.0,24.0,,2117.0,0.156,21.0,,211.0,3.5,3.5,,,,,,,0.98,162.0,103.0,655.0,Y
1,10001,SOUTHEAST HEALTH MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301.0,HOUSTON,Acute Care Hospitals,Government - Hospital District or Authority,Yes,3,,1,2023,-0.647058,0.284081,0.312074,-0.127836,-1.024044,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.162208,7,8,11,8,10,5,2,1,3.0,3.0,,2023,-0.041613,-2.19781,-0.019543,1.708727,0.366398,-1.543654,-1.741473,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,7.0,0.142857,-0.495567,-0.05126,0.686658,-0.647058,0.207832,-0.702681,0.28604,0.105508,0.219318,-0.048609,-0.198058,0.159253,0.872692,0.062022,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.171616,0.020822,0.530811,0.284081,0.024204,0.444297,0.745807,-0.407579,1.025843,0.314693,-0.315731,-0.212425,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.202389,-0.013939,0.693192,0.312074,0.992908,0.10185,-1.77947,-0.904124,-0.237088,,-0.634479,0.488322,,-0.609958,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,10.0,0.1,-0.538475,0.030917,0.556023,-1.024044,,,,,,,,,,,,,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,,,,,,,8.0,0.125,-0.109237,2.611408e-16,0.85451,-0.127836,0.924456,-1.039363,-1.763346,1.0,1.0,-1.07585,-0.175531,-1.051153,0.808598,0.68837,0.037272,-0.445032,0.339427,,,,,,,0.97,3795.0,205.0,323.0,0.03,51079.0,,,0.81,16.0,0.46,146.0,,,0.124,317.0,0.047,172.0,0.085,182.0,0.083,630.0,0.159,407.0,0.164,489.0,173.39,120.0,0.024,102.0,1.01,2046.895485,,,,,,,2.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,,,,2.0,,,,,,,,,,,4.0,,,,,,,,,,4.0,,,,,,,,,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.9,319.0,21.8,755.0,-1.5,436.0,14.1,254.0,10.2,214.0,4.7,214.0,1.0,688.0,0.117,165.0,0.199,202.0,0.042,98.0,,,10.597,10024.0,0.661,26.63,17731.0,0.3,4.548,154.0,1.099,1.845,200.0,0.0,9.412,101908.0,0.85,72.686,101451.0,0.66,0.067,0.425,0.057,3058.0,208.0,434.0,0.09,34.0,0.737,1488.0,0.142,15.0,2323.0,146.0,3.5,3.0,,,,,,,,,,,Y
2,10001,SOUTHEAST HEALTH MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301.0,HOUSTON,Acute Care Hospitals,Government - Hospital District or Authority,Yes,3,,1,2024,0.007071,0.230867,-0.146867,0.137779,-0.677624,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.030968,7,7,11,8,10,5,2,1,3.0,3.0,,2024,0.470514,-1.535074,0.282996,1.510467,0.129818,-0.554262,-0.748332,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,7.0,0.142857,-0.063411,-0.068391,0.704371,0.007071,1.123231,-0.791585,-0.63392,0.29207,0.584835,0.293805,0.886353,-1.03233,0.626949,-0.252865,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.138216,0.014038,0.537879,0.230867,0.879845,-0.232451,0.488068,-0.71166,,-0.210159,0.003034,-1.162386,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,-0.135101,-0.021526,0.773315,-0.146867,0.940083,-0.029295,0.419351,-0.942734,-1.140182,,-3.129707,0.000412,,0.371166,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,10.0,0.1,-0.335877,0.029039,0.538522,-0.677624,,,,,,,,,,,,,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,,,,,,,8.0,0.125,0.119895,1.093644e-15,0.870204,0.137779,0.423838,-0.431525,0.583665,1.0,1.0,-0.373358,-0.400063,-0.243371,0.124487,0.479704,0.876397,0.133873,0.361496,,,,,,,0.95,3905.0,214.0,348.0,0.05,52960.0,,,0.47,17.0,0.65,127.0,,,0.12,278.0,0.041,132.0,0.088,107.0,0.089,549.0,0.18,400.0,0.148,398.0,184.68,134.0,0.027,49.0,1.21,2542.0385,,,,,,,3.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,3.0,,,,,,,,,,4.0,,,,,,,,,4.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-15.4,274.0,23.4,614.0,23.6,403.0,12.9,170.0,11.9,202.0,4.9,202.0,1.1,668.0,0.105,126.0,0.19,117.0,0.038,49.0,,,9.597,9149.0,0.938,24.766,17310.0,0.363,5.994,214.0,1.335,,,,11.4,104733.0,0.965,67.066,104733.0,0.507,0.028,0.38,0.061,2912.0,178.0,544.0,0.0,32.0,0.836,1410.0,0.142,15.0,2496.0,79.0,3.5,3.5,,,,,,,,,,,Y
3,10001,SOUTHEAST HEALTH MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301.0,HOUSTON,Acute Care Hospitals,Government - Hospital District or Authority,Yes,3,,10,2024,0.29611,0.361406,0.344883,0.042072,-0.484678,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,0.171622,7,7,11,8,11,5,2,1,3.0,4.0,,2025,1.405923,-1.04537,1.011496,0.650904,-0.315375,0.023766,-0.732077,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,7.0,0.142857,0.142753,-0.073484,0.730255,0.29611,1.024273,-0.368843,-0.448127,0.289973,0.607499,0.859382,0.192452,-1.028886,0.62791,-0.25798,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.206001,0.013108,0.53373,0.361406,0.698729,0.30367,0.739078,-0.467783,,0.476446,-0.105703,0.103001,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,0.249634,-0.01903,0.779001,0.344883,1.044716,0.080291,0.781373,-1.062058,-1.138829,,-3.171398,0.588011,0.563049,0.449772,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,11.0,0.090909,-0.204464,0.028411,0.480473,-0.484678,,,,,,,,,,,,,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,,,,,,,8.0,0.125,0.036435,4.8997890000000005e-17,0.866021,0.042072,0.768359,-0.525017,0.140984,1.0,1.0,-1.154064,0.570761,-0.231671,-0.254181,-0.312251,0.933177,0.719371,0.020336,,,,,,,0.96,4115.0,217.0,345.0,0.05,52960.0,,,0.47,17.0,0.68,131.0,0.12,4303.0,0.108,291.0,0.038,157.0,0.079,122.0,0.105,610.0,0.189,489.0,0.137,414.0,194.78,125.0,0.03,32.0,0.98,,,,,,,,2.0,,,,,,,,,,,,,,4.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,3.0,,,,,,,,,,3.0,,,,,,,,,4.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-13.8,296.0,10.6,679.0,14.3,490.0,12.9,170.0,11.9,202.0,4.9,202.0,1.1,668.0,0.102,151.0,0.177,130.0,0.044,34.0,,,10.082,9538.0,0.496,23.712,16332.0,0.169,6.618,240.0,1.209,,,,11.232,103195.0,0.445,65.234,103195.0,0.491,0.021,0.333,0.054,2924.0,193.0,643.0,0.02,46.0,0.0,1478.0,0.141,17.0,2712.0,66.0,3.0,4.0,,,,,,,,,,,Y
4,10001,SOUTHEAST HEALTH MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301.0,HOUSTON,Acute Care Hospitals,Government - Hospital District or Authority,Yes,4,,8,2025,-0.253894,0.613401,0.575849,0.069504,-0.153563,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,0.202642,8,7,11,15,10,5,2,1,3.0,4.0,-0.304441,2026,0.635861,-0.407503,-0.336903,0.63017,-0.741228,-0.155169,-1.255617,-0.502326,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,-0.26659,-0.079177,0.738153,-0.253894,0.930664,0.193207,-0.369507,0.294215,0.524002,0.266962,0.079849,1.577732,-0.191118,0.662844,-0.167496,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.345578,-9.9e-05,0.563541,0.613401,0.570514,0.134271,0.794787,0.544171,,0.84381,0.055399,0.242553,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,0.455072,-0.025094,0.833842,0.575849,0.944322,0.125806,-0.216523,-1.245548,-1.161984,1.06792,-1.508442,0.793318,0.182926,0.408151,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,10.0,0.1,-0.061005,0.017718,0.512643,-0.153563,-0.128381,-1.276518,-0.390364,0.03758,0.490315,-0.453797,0.472017,-0.305413,0.563206,0.166795,0.347443,-0.38317,1.069861,0.837525,0.268063,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,15.0,0.066667,0.087678,0.02934081,0.839329,0.069504,,,,,,,,,,,,,,,,,,,,0.93,4625.0,222.0,387.0,0.05,57084.0,0.91,11.0,0.72,29.0,0.69,150.0,0.14,4583.0,0.114,270.0,0.03,144.0,0.094,112.0,0.102,583.0,0.184,517.0,0.135,395.0,203.0,118.0,0.032,27.0,0.95,,0.045,1835.0,,,,90.0,,,,,,,,,,,,,,92.0,,,,,,,,,,,,,,82.0,,,,,,,,,,,79.0,,,,,,,,,,87.0,,,,,,,,,82.0,,,,,,,,,,,,,,85.0,,,,,88.0,,,,,89.0,,,,,91.0,,,98.0,93.0,97.0,94.0,94.0,-15.6,273.0,-1.1,652.0,17.4,507.0,12.8,218.0,8.5,261.0,5.7,261.0,0.8,647.0,0.101,137.0,0.18,122.0,0.048,25.0,0.151,2824.0,9.44,8935.0,0.53,23.35,16255.0,0.086,6.562,229.0,0.457,0.903,97.0,,10.937,109019.0,0.183,68.076,109019.0,0.382,0.038,0.308,0.053,,,,,,,,,,,,,,,,,,,,,,,,


In [30]:
df = pd.read_pickle('~/GitHub/HCRIS-databuilder/GenDat4App_p4.pkl')

df2 = df.set_index(('Curated Name and Num', 'Curated Name and Num', 'Curated Name and Num', 'Curated Name and Num'))
df2.columns = df2.columns.droplevel([0,1,2])
df2 = df2.reset_index()

df2.head()

Unnamed: 0,"(Curated Name and Num, Curated Name and Num, Curated Name and Num, Curated Name and Num)",Hospital Provider Number (PRVDR_NUM),Beginning FFY,Report_Period_Begin_Yr,Lat,Lon,Type of Control of Hospital (See Table I) (S2_1_C1_21),Hospital type (modified),Beds Total Facility (S3_1_C2_27),Hospital State (S2_1_C2_2),Urban (1) or Rural (2) (S2_1_C1_27),Is this a teaching hospital or\naffiliated with a teaching hospital? (Y/N) (S2_1_C1_56)
0,SOUTHEAST ALABAMA MEDICAL CENTER (010001),10001,2011,2010,31.214058,-85.361725,Governmental-County,General Short Term (Acute Care Hospitals),420.0,AL,1.0,N
1,SOUTHEAST ALABAMA MEDICAL CENTER (010001),10001,2012,2011,31.214058,-85.361725,Governmental-County,General Short Term (Acute Care Hospitals),420.0,AL,1.0,N
2,SOUTHEAST ALABAMA MEDICAL CENTER (010001),10001,2013,2012,31.214058,-85.361725,Governmental-County,General Short Term (Acute Care Hospitals),420.0,AL,1.0,N
3,SOUTHEAST ALABAMA MEDICAL CENTER (010001),10001,2014,2013,31.214058,-85.361725,Governmental-County,General Short Term (Acute Care Hospitals),410.0,AL,1.0,N
4,SOUTHEAST ALABAMA MEDICAL CENTER (010001),10001,2015,2014,31.214058,-85.361725,Governmental-County,General Short Term (Acute Care Hospitals),400.0,AL,2.0,N


In [31]:
df2.rename(columns={'Hospital Provider Number (PRVDR_NUM)': 'Facility ID',
                    'Beds Total Facility (S3_1_C2_27)': 'Beds',
                    #'Urban (1) or Rural (2) (S2_1_C1_27)': 'Urban (1) or Rural (2)',
                    #'Is this a teaching hospital or\naffiliated with a teaching hospital? (Y/N) (S2_1_C1_56)': 'Teaching hospital or affiliated with a teaching hospital?',
                   }, inplace=True)
df2 = df2.filter(items=['Facility ID', 'Beds', 'Lat', 'Lon', #'Urban (1) or Rural (2)',
                        #'Teaching hospital or affiliated with a teaching hospital?',
                       ], axis=1)

df2['Beds'] = df2.groupby(['Facility ID'])['Beds'].transform('mean')
df2['Beds'] = np.round(df2['Beds'], 0)

prvdrs = main_df['Facility ID'].unique()
df2 = df2[df2['Facility ID'].isin(prvdrs)]

print(len(prvdrs))
print(len(df2['Facility ID'].unique()))

df2.drop_duplicates(inplace=True)
print(df2.shape)
df2.head()


3648
3523
(3523, 4)


Unnamed: 0,Facility ID,Beds,Lat,Lon
0,10001,371.642857,31.214058,-85.361725
14,10005,189.142857,,
28,10006,295.666667,34.802756,-87.652191
43,10007,62.714286,31.291972,-86.255415
57,10008,49.0,31.692595,-86.266156


In [32]:
hoarc = sorted(main_df['Facility ID'].unique())
hcris = sorted(df2['Facility ID'].unique())

if hoarc == hcris: 
    print('same')
else:
    ls1 = np.setdiff1d(hoarc, hcris)
    print(len(ls1), 'hospitals in hospitals data archive that are not in hcris:')
    for i in ls1:
        print(i, "and other 'F' hospitals")
        break

    ls2 = np.setdiff1d(hcris, hoarc)
    print(len(ls2), 'hospitals in hcris that are not in hospitals data archive')


125 hospitals in hospitals data archive that are not in hcris:
01014F and other 'F' hospitals
0 hospitals in hcris that are not in hospitals data archive


In [33]:
print('df2.shape:', df2.shape)
print('mdf.shape:', mdf.shape)

main_df = mdf.merge(df2, how='outer', on=['Facility ID'])
main_df = main_df[~main_df['Facility ID'].isin([np.nan, float('NaN')])]

print('main_df.shape:', main_df.shape)
main_df.drop_duplicates(inplace=True)
print('main_df.shape:', main_df.shape)


df2.shape: (3523, 4)
mdf.shape: (18500, 394)
main_df.shape: (18500, 397)
main_df.shape: (18500, 397)


In [34]:
zips1 = main_df['ZIP Code'].tolist()
zips2 = []
for z in zips1:
    try:
        z = int(z)
        z = str(z)
        zips2.append(z)
    except:
        zips2.append(np.nan)
        
main_df['ZIP Code'] = zips2
print(main_df.shape)
main_df.head()


(18500, 397)


Unnamed: 0,Facility ID,Facility Name,Address,City,State,ZIP Code,County Name,Hospital Type,Hospital Ownership,Emergency Services,Hospital overall rating,Hospital overall rating footnote,file_month,file_year,Std_Outcomes_Mortality_score,Std_Outcomes_Readmission_score,Std_Outcomes_Safety_score,Std_PatientExp_score,Std_Process_score,std_weight_PatientExperience,std_weight_Readmission,std_weight_Mortality,std_weight_safety,std_weight_Process,weight_PatientExperience,weight_Outcomes_Readmission,weight_Outcomes_Mortality,weight_Outcomes_Safety,weight_Process,summary_score,Outcomes_Mortality_cnt,Outcomes_safety_cnt,Outcomes_Readmission_cnt,Patient_Experience_cnt,Process_cnt,Total_measure_group_cnt,MortSafe_Group_cnt,report_indicator,cnt_grp,star,Q_25,Release year,std_MORT_30_AMI,std_MORT_30_CABG,std_MORT_30_COPD,std_MORT_30_HF,std_MORT_30_PN,std_MORT_30_STK,std_PSI_4_SURG_COMP,std_Hybrid_HWM,mortality_C1,mortality_C2,mortality_C3,mortality_C4,mortality_C5,mortality_C6,mortality_C7,mortality_C8,mortality_total_cnt,mortality_measure_wt,mortality_score_before_std,mortality_Mean,mortality_StdDev,mortality_grp_score,std_EDAC_30_AMI,std_EDAC_30_HF,std_EDAC_30_PN,std_OP_32,std_READM_30_CABG,std_READM_30_COPD,std_READM_30_HIP_KNEE,std_OP_35_ADM,std_OP_35_ED,std_OP_36,std_Hybrid_HWR,readmission_C1,readmission_C2,readmission_C3,readmission_C4,readmission_C5,readmission_C6,readmission_C7,readmission_C8,readmission_C9,readmission_C10,readmission_C11,readmission_total_cnt,readmission_measure_wt,readmission_score_before_std,readmission_Mean,readmission_StdDev,readmission_grp_score,std_COMP_HIP_KNEE,std_HAI_1,std_HAI_2,std_HAI_3,std_HAI_4,std_HAI_5,std_HAI_6,std_PSI_90_SAFETY,safety_C1,safety_C2,safety_C3,safety_C4,safety_C5,safety_C6,safety_C7,safety_C8,safety_total_cnt,safety_measure_wt,safety_score_before_std,safety_Mean,safety_StdDev,safety_grp_score,std_IMM_3,std_OP_10,std_OP_13,std_OP_18B,std_OP_22,std_OP_23,std_OP_29,std_OP_8,std_SAFE_USE_OF_OPIOIDS,std_SEP_1,process_C1,process_C2,process_C3,process_C4,process_C5,process_C6,process_C7,process_C8,process_C9,process_C10,process_total_cnt,process_measure_wt,process_score_before_std,process_Mean,process_StdDev,process_grp_score,std_O_COMP_1_LINEAR_SCORE,std_O_COMP_2_LINEAR_SCORE,std_O_COMP_3_LINEAR_SCORE,std_O_PATIENT_RATE_LINEAR_SCORE,std_O_PATIENT_REC_LINEAR_SCORE,std_H_COMP_1_LINEAR_SCORE,std_H_COMP_2_LINEAR_SCORE,std_H_COMP_3_LINEAR_SCORE,std_H_COMP_5_LINEAR_SCORE,std_H_COMP_6_LINEAR_SCORE,std_H_COMP_7_LINEAR_SCORE,std_H_CLEAN_LINEAR_SCORE,std_H_QUIET_LINEAR_SCORE,std_H_RECMND_LINEAR_SCORE,std_H_HSP_RATING_LINEAR_SCORE,patient_exp_C1,patient_exp_C2,patient_exp_C3,patient_exp_C4,patient_exp_C5,patient_exp_C6,patient_exp_C7,patient_exp_C8,patient_exp_C9,patient_exp_C10,patient_exp_C11,patient_exp_C12,patient_exp_C13,patient_exp_C14,patient_exp_C15,patient_exp_total_cnt,patient_exp_measure_wt,patient_exp_score_before_std,patient_exp_Mean,patient_exp_StdDev,patient_exp_grp_score,std_READM_30_HOSP_WIDE,std_HCP_COVID_19,std_PC_01,process_C11,process_C12,std_H_COMP_1_STAR_RATING,std_H_COMP_2_STAR_RATING,std_H_COMP_3_STAR_RATING,std_H_COMP_5_STAR_RATING,std_H_COMP_6_STAR_RATING,std_H_COMP_7_STAR_RATING,std_H_GLOB_STAR_RATING,std_H_INDI_STAR_RATING,std_OP_3B,std_OP_33,std_OP_30,std_ED_2B,process_C13,process_C14,IMM_3,IMM_3_DEN,OP_18B,OP_18B_DEN,OP_22,OP_22_DEN,OP_23,OP_23_DEN,OP_29,OP_29_DEN,SEP_1,SEP_1_DEN,SAFE_USE_OF_OPIOIDS,SAFE_USE_OF_OPIOIDS_DEN,MORT_30_AMI,MORT_30_AMI_DEN,MORT_30_CABG,MORT_30_CABG_DEN,MORT_30_COPD,MORT_30_COPD_DEN,MORT_30_HF,MORT_30_HF_DEN,MORT_30_PN,MORT_30_PN_DEN,MORT_30_STK,MORT_30_STK_DEN,PSI_4_SURG_COMP,PSI_4_SURG_COMP_DEN,COMP_HIP_KNEE,COMP_HIP_KNEE_DEN,PSI_90_SAFETY,PSI_90_SAFETY_DEN,Hybrid_HWM,Hybrid_HWM_DEN,H_COMP_1_A_P,H_COMP_1_SN_P,H_COMP_1_U_P,H_COMP_1_LINEAR_SCORE,H_COMP_1_STAR_RATING,H_NURSE_RESPECT_A_P,H_NURSE_RESPECT_SN_P,H_NURSE_RESPECT_U_P,H_NURSE_LISTEN_A_P,H_NURSE_LISTEN_SN_P,H_NURSE_LISTEN_U_P,H_NURSE_EXPLAIN_A_P,H_NURSE_EXPLAIN_SN_P,H_NURSE_EXPLAIN_U_P,H_COMP_2_A_P,H_COMP_2_SN_P,H_COMP_2_U_P,H_COMP_2_LINEAR_SCORE,H_COMP_2_STAR_RATING,H_DOCTOR_RESPECT_A_P,H_DOCTOR_RESPECT_SN_P,H_DOCTOR_RESPECT_U_P,H_DOCTOR_LISTEN_A_P,H_DOCTOR_LISTEN_SN_P,H_DOCTOR_LISTEN_U_P,H_DOCTOR_EXPLAIN_A_P,H_DOCTOR_EXPLAIN_SN_P,H_DOCTOR_EXPLAIN_U_P,H_COMP_3_A_P,H_COMP_3_SN_P,H_COMP_3_U_P,H_COMP_3_LINEAR_SCORE,H_COMP_3_STAR_RATING,H_CALL_BUTTON_A_P,H_CALL_BUTTON_SN_P,H_CALL_BUTTON_U_P,H_BATH_HELP_A_P,H_BATH_HELP_SN_P,H_BATH_HELP_U_P,H_COMP_5_A_P,H_COMP_5_SN_P,H_COMP_5_U_P,H_COMP_5_LINEAR_SCORE,H_COMP_5_STAR_RATING,H_MED_FOR_A_P,H_MED_FOR_SN_P,H_MED_FOR_U_P,H_SIDE_EFFECTS_A_P,H_SIDE_EFFECTS_SN_P,H_SIDE_EFFECTS_U_P,H_COMP_6_N_P,H_COMP_6_Y_P,H_COMP_6_LINEAR_SCORE,H_COMP_6_STAR_RATING,H_DISCH_HELP_N_P,H_DISCH_HELP_Y_P,H_SYMPTOMS_N_P,H_SYMPTOMS_Y_P,H_COMP_7_A,H_COMP_7_D_SD,H_COMP_7_SA,H_COMP_7_LINEAR_SCORE,H_COMP_7_STAR_RATING,H_CT_PREFER_A,H_CT_PREFER_D_SD,H_CT_PREFER_SA,H_CT_UNDER_A,H_CT_UNDER_D_SD,H_CT_UNDER_SA,H_CT_MED_A,H_CT_MED_D_SD,H_CT_MED_SA,H_CLEAN_HSP_A_P,H_CLEAN_HSP_SN_P,H_CLEAN_HSP_U_P,H_CLEAN_LINEAR_SCORE,H_CLEAN_STAR_RATING,H_QUIET_HSP_A_P,H_QUIET_HSP_SN_P,H_QUIET_HSP_U_P,H_QUIET_LINEAR_SCORE,H_QUIET_STAR_RATING,H_HSP_RATING_0_6,H_HSP_RATING_7_8,H_HSP_RATING_9_10,H_HSP_RATING_LINEAR_SCORE,H_HSP_RATING_STAR_RATING,H_RECMND_DN,H_RECMND_DY,H_RECMND_PY,H_RECMND_LINEAR_SCORE,H_RECMND_STAR_RATING,H_STAR_RATING,O_COMP_1_LINEAR_SCORE,O_COMP_2_LINEAR_SCORE,O_COMP_3_LINEAR_SCORE,O_PATIENT_RATE_LINEAR_SCORE,O_PATIENT_REC_LINEAR_SCORE,EDAC_30_AMI,EDAC_30_AMI_DEN,EDAC_30_HF,EDAC_30_HF_DEN,EDAC_30_PN,EDAC_30_PN_DEN,OP_32,OP_32_DEN,OP_35_ADM,OP_35_ADM_DEN,OP_35_ED,OP_35_ED_DEN,OP_36,OP_36_DEN,READM_30_CABG,READM_30_CABG_DEN,READM_30_COPD,READM_30_COPD_DEN,READM_30_HIP_KNEE,READM_30_HIP_KNEE_DEN,Hybrid_HWR,Hybrid_HWR_DEN,HAI_1_DEN_PRED,HAI_1_DEN_VOL,HAI_1,HAI_2_DEN_PRED,HAI_2_DEN_VOL,HAI_2,HAI_3_DEN_PRED,HAI_3_DEN_VOL,HAI_3,HAI_4_DEN_PRED,HAI_4_DEN_VOL,HAI_4,HAI_5_DEN_PRED,HAI_5_DEN_VOL,HAI_5,HAI_6_DEN_PRED,HAI_6_DEN_VOL,HAI_6,OP_13,OP_8,OP_10,READM_30_HOSP_WIDE_DEN,OP_13_DEN,H_NUMB_COMP,PC_01,PC_01_DEN,HCP_COVID_19,OP_10_DEN,READM_30_HOSP_WIDE,H_RESP_RATE_P,HCP_COVID_19_DEN,OP_8_DEN,H_INDI_STAR_RATING,H_GLOB_STAR_RATING,OP_2,OP_2_DEN,OP_3B,OP_3B_DEN,OP_33,OP_33_DEN,OP_30,OP_30_DEN,ED_2B,ED_2B_DEN,Meets criteria for promoting interoperability of EHRs,Beds,Lat,Lon
0,10001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,HOUSTON,Acute Care Hospitals,Government - Hospital District or Authority,Yes,2,,10,2020,-0.752932,-0.070355,0.556127,0.071425,-0.050316,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.0491,7,8,11,8,11,5,2,1,3.0,3.0,,2021,0.730853,-2.055159,0.069106,0.103403,-0.022557,-1.941449,-0.327016,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,7.0,0.142857,-0.491831,-0.004941,0.64666,-0.752932,-0.027028,-0.705673,1.039984,2.185101,-1.542989,-1.037529,-1.164917,1.1826,-1.18578,1.207127,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,-0.010885,0.027901,0.551294,-0.070355,0.303431,-0.078888,0.680925,0.244905,1.037183,0.403644,0.098335,0.31923,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.376096,0.009921,0.658437,0.556127,0.644574,-0.102729,0.684379,-0.761776,-0.979501,,-0.459111,0.140655,,0.031191,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,11.0,0.090909,0.007553,0.037755,0.600242,-0.050316,,,,,,,,,,,,,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,,,,,,,8.0,0.125,0.060398,-1.104271e-16,0.845613,0.071425,-0.070634,,0.473469,1.0,1.0,-0.537213,-0.085263,-0.269656,-0.073296,0.801144,-0.115323,0.284361,0.478432,,,0.466001,-0.054072,1.0,1.0,0.97,4817.0,178.0,349.0,0.03,57844.0,,,0.81,64.0,0.6,102.0,,,0.119,586.0,0.047,281.0,0.084,374.0,0.113,858.0,0.157,554.0,0.166,644.0,170.87,165.0,0.023,250.0,0.93,3896.300852,,,,,,,3.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,3.0,,,,,,,,,,4.0,,,,,,,,,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,7.0,620.0,21.6,1106.0,-20.6,594.0,13.4,606.0,10.8,190.0,7.1,190.0,0.8,993.0,0.149,268.0,0.207,443.0,0.046,258.0,,,8.086,7846.0,0.742,18.498,13268.0,0.324,4.615,165.0,0.65,1.884,216.0,0.0,7.305,102765.0,0.548,76.294,102765.0,0.537,0.028,0.389,0.072,4474.0,211.0,507.0,0.0,24.0,,2117.0,0.156,21.0,,211.0,3.5,3.5,,,,,,,0.98,162.0,103.0,655.0,Y,371.642857,31.214058,-85.361725
1,10001,SOUTHEAST HEALTH MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,HOUSTON,Acute Care Hospitals,Government - Hospital District or Authority,Yes,3,,1,2023,-0.647058,0.284081,0.312074,-0.127836,-1.024044,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.162208,7,8,11,8,10,5,2,1,3.0,3.0,,2023,-0.041613,-2.19781,-0.019543,1.708727,0.366398,-1.543654,-1.741473,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,7.0,0.142857,-0.495567,-0.05126,0.686658,-0.647058,0.207832,-0.702681,0.28604,0.105508,0.219318,-0.048609,-0.198058,0.159253,0.872692,0.062022,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.171616,0.020822,0.530811,0.284081,0.024204,0.444297,0.745807,-0.407579,1.025843,0.314693,-0.315731,-0.212425,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,0.202389,-0.013939,0.693192,0.312074,0.992908,0.10185,-1.77947,-0.904124,-0.237088,,-0.634479,0.488322,,-0.609958,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,10.0,0.1,-0.538475,0.030917,0.556023,-1.024044,,,,,,,,,,,,,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,,,,,,,8.0,0.125,-0.109237,2.611408e-16,0.85451,-0.127836,0.924456,-1.039363,-1.763346,1.0,1.0,-1.07585,-0.175531,-1.051153,0.808598,0.68837,0.037272,-0.445032,0.339427,,,,,,,0.97,3795.0,205.0,323.0,0.03,51079.0,,,0.81,16.0,0.46,146.0,,,0.124,317.0,0.047,172.0,0.085,182.0,0.083,630.0,0.159,407.0,0.164,489.0,173.39,120.0,0.024,102.0,1.01,2046.895485,,,,,,,2.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,,,,2.0,,,,,,,,,,,4.0,,,,,,,,,,4.0,,,,,,,,,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.9,319.0,21.8,755.0,-1.5,436.0,14.1,254.0,10.2,214.0,4.7,214.0,1.0,688.0,0.117,165.0,0.199,202.0,0.042,98.0,,,10.597,10024.0,0.661,26.63,17731.0,0.3,4.548,154.0,1.099,1.845,200.0,0.0,9.412,101908.0,0.85,72.686,101451.0,0.66,0.067,0.425,0.057,3058.0,208.0,434.0,0.09,34.0,0.737,1488.0,0.142,15.0,2323.0,146.0,3.5,3.0,,,,,,,,,,,Y,371.642857,31.214058,-85.361725
2,10001,SOUTHEAST HEALTH MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,HOUSTON,Acute Care Hospitals,Government - Hospital District or Authority,Yes,3,,1,2024,0.007071,0.230867,-0.146867,0.137779,-0.677624,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.030968,7,7,11,8,10,5,2,1,3.0,3.0,,2024,0.470514,-1.535074,0.282996,1.510467,0.129818,-0.554262,-0.748332,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,7.0,0.142857,-0.063411,-0.068391,0.704371,0.007071,1.123231,-0.791585,-0.63392,0.29207,0.584835,0.293805,0.886353,-1.03233,0.626949,-0.252865,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.138216,0.014038,0.537879,0.230867,0.879845,-0.232451,0.488068,-0.71166,,-0.210159,0.003034,-1.162386,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,-0.135101,-0.021526,0.773315,-0.146867,0.940083,-0.029295,0.419351,-0.942734,-1.140182,,-3.129707,0.000412,,0.371166,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,10.0,0.1,-0.335877,0.029039,0.538522,-0.677624,,,,,,,,,,,,,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,,,,,,,8.0,0.125,0.119895,1.093644e-15,0.870204,0.137779,0.423838,-0.431525,0.583665,1.0,1.0,-0.373358,-0.400063,-0.243371,0.124487,0.479704,0.876397,0.133873,0.361496,,,,,,,0.95,3905.0,214.0,348.0,0.05,52960.0,,,0.47,17.0,0.65,127.0,,,0.12,278.0,0.041,132.0,0.088,107.0,0.089,549.0,0.18,400.0,0.148,398.0,184.68,134.0,0.027,49.0,1.21,2542.0385,,,,,,,3.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,3.0,,,,,,,,,,4.0,,,,,,,,,4.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-15.4,274.0,23.4,614.0,23.6,403.0,12.9,170.0,11.9,202.0,4.9,202.0,1.1,668.0,0.105,126.0,0.19,117.0,0.038,49.0,,,9.597,9149.0,0.938,24.766,17310.0,0.363,5.994,214.0,1.335,,,,11.4,104733.0,0.965,67.066,104733.0,0.507,0.028,0.38,0.061,2912.0,178.0,544.0,0.0,32.0,0.836,1410.0,0.142,15.0,2496.0,79.0,3.5,3.5,,,,,,,,,,,Y,371.642857,31.214058,-85.361725
3,10001,SOUTHEAST HEALTH MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,HOUSTON,Acute Care Hospitals,Government - Hospital District or Authority,Yes,3,,10,2024,0.29611,0.361406,0.344883,0.042072,-0.484678,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,0.171622,7,7,11,8,11,5,2,1,3.0,4.0,,2025,1.405923,-1.04537,1.011496,0.650904,-0.315375,0.023766,-0.732077,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,7.0,0.142857,0.142753,-0.073484,0.730255,0.29611,1.024273,-0.368843,-0.448127,0.289973,0.607499,0.859382,0.192452,-1.028886,0.62791,-0.25798,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.206001,0.013108,0.53373,0.361406,0.698729,0.30367,0.739078,-0.467783,,0.476446,-0.105703,0.103001,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,0.249634,-0.01903,0.779001,0.344883,1.044716,0.080291,0.781373,-1.062058,-1.138829,,-3.171398,0.588011,0.563049,0.449772,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,11.0,0.090909,-0.204464,0.028411,0.480473,-0.484678,,,,,,,,,,,,,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,,,,,,,8.0,0.125,0.036435,4.8997890000000005e-17,0.866021,0.042072,0.768359,-0.525017,0.140984,1.0,1.0,-1.154064,0.570761,-0.231671,-0.254181,-0.312251,0.933177,0.719371,0.020336,,,,,,,0.96,4115.0,217.0,345.0,0.05,52960.0,,,0.47,17.0,0.68,131.0,0.12,4303.0,0.108,291.0,0.038,157.0,0.079,122.0,0.105,610.0,0.189,489.0,0.137,414.0,194.78,125.0,0.03,32.0,0.98,,,,,,,,2.0,,,,,,,,,,,,,,4.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,3.0,,,,,,,,,,3.0,,,,,,,,,4.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-13.8,296.0,10.6,679.0,14.3,490.0,12.9,170.0,11.9,202.0,4.9,202.0,1.1,668.0,0.102,151.0,0.177,130.0,0.044,34.0,,,10.082,9538.0,0.496,23.712,16332.0,0.169,6.618,240.0,1.209,,,,11.232,103195.0,0.445,65.234,103195.0,0.491,0.021,0.333,0.054,2924.0,193.0,643.0,0.02,46.0,0.0,1478.0,0.141,17.0,2712.0,66.0,3.0,4.0,,,,,,,,,,,Y,371.642857,31.214058,-85.361725
4,10001,SOUTHEAST HEALTH MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,HOUSTON,Acute Care Hospitals,Government - Hospital District or Authority,Yes,4,,8,2025,-0.253894,0.613401,0.575849,0.069504,-0.153563,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,0.202642,8,7,11,15,10,5,2,1,3.0,4.0,-0.304441,2026,0.635861,-0.407503,-0.336903,0.63017,-0.741228,-0.155169,-1.255617,-0.502326,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.0,0.125,-0.26659,-0.079177,0.738153,-0.253894,0.930664,0.193207,-0.369507,0.294215,0.524002,0.266962,0.079849,1.577732,-0.191118,0.662844,-0.167496,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,11.0,0.090909,0.345578,-9.9e-05,0.563541,0.613401,0.570514,0.134271,0.794787,0.544171,,0.84381,0.055399,0.242553,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,7.0,0.142857,0.455072,-0.025094,0.833842,0.575849,0.944322,0.125806,-0.216523,-1.245548,-1.161984,1.06792,-1.508442,0.793318,0.182926,0.408151,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,10.0,0.1,-0.061005,0.017718,0.512643,-0.153563,-0.128381,-1.276518,-0.390364,0.03758,0.490315,-0.453797,0.472017,-0.305413,0.563206,0.166795,0.347443,-0.38317,1.069861,0.837525,0.268063,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,15.0,0.066667,0.087678,0.02934081,0.839329,0.069504,,,,,,,,,,,,,,,,,,,,0.93,4625.0,222.0,387.0,0.05,57084.0,0.91,11.0,0.72,29.0,0.69,150.0,0.14,4583.0,0.114,270.0,0.03,144.0,0.094,112.0,0.102,583.0,0.184,517.0,0.135,395.0,203.0,118.0,0.032,27.0,0.95,,0.045,1835.0,,,,90.0,,,,,,,,,,,,,,92.0,,,,,,,,,,,,,,82.0,,,,,,,,,,,79.0,,,,,,,,,,87.0,,,,,,,,,82.0,,,,,,,,,,,,,,85.0,,,,,88.0,,,,,89.0,,,,,91.0,,,98.0,93.0,97.0,94.0,94.0,-15.6,273.0,-1.1,652.0,17.4,507.0,12.8,218.0,8.5,261.0,5.7,261.0,0.8,647.0,0.101,137.0,0.18,122.0,0.048,25.0,0.151,2824.0,9.44,8935.0,0.53,23.35,16255.0,0.086,6.562,229.0,0.457,0.903,97.0,,10.937,109019.0,0.183,68.076,109019.0,0.382,0.038,0.308,0.053,,,,,,,,,,,,,,,,,,,,,,,,,371.642857,31.214058,-85.361725


In [35]:
print('main_df.shape:', main_df.shape)
main_df.drop_duplicates(inplace=True)
print('main_df.shape:', main_df.shape)


main_df.shape: (18500, 397)
main_df.shape: (18500, 397)


In [36]:

main_df.replace({'Facility Name': {'COPLEY MEMORIAL HOSPITAL': 'RUSH COPLEY'}}, inplace = True)
main_df['Name and Num'] = main_df['Facility Name'] + ' (' + main_df['Facility ID'] + ')'
main_df = main_df[~main_df['Name and Num'].isin([np.nan, float('NaN')])]
main_df['State'] = main_df['State'].replace(np.nan, 'Not given')
main_df['Hospital Type'] = main_df['Hospital Type'].replace(np.nan, 'Not given')
main_df['Hospital Ownership'] = main_df['Hospital Ownership'].replace(np.nan, 'Not given')

print('main_df.shape:', main_df.shape)
main_df.drop_duplicates(inplace=True)
print('main_df.shape:', main_df.shape)


drop_ls = [
    'Emergency Services',
    'Meets criteria for promoting interoperability of EHRs',
    'Hospital overall rating',
    'Hospital overall rating footnote',
    'mortality_C1',
    'mortality_C2',
    'mortality_C3',
    'mortality_C4',
    'mortality_C5',
    'mortality_C6',
    'mortality_C7',
    'mortality_total_cnt',
    'mortality_score_before_std',
    'mortality_Mean',
    'mortality_StdDev',
    'mortality_grp_score',
    'readmission_C1',
    'readmission_C2',
    'readmission_C3',
    'readmission_C4',
    'readmission_C5',
    'readmission_C6',
    'readmission_C7',
    'readmission_C8',
    'readmission_C9',
    'readmission_C10',
    'readmission_C11',
    'readmission_total_cnt',
    'readmission_score_before_std',
    'readmission_Mean',
    'readmission_StdDev',
    'readmission_grp_score',
    'safety_C1',
    'safety_C2',
    'safety_C3',
    'safety_C4',
    'safety_C5',
    'safety_C6',
    'safety_C7',
    'safety_C8',
    'safety_total_cnt',
    'safety_score_before_std',
    'safety_Mean',
    'safety_StdDev',
    'safety_grp_score',
    'process_C1',
    'process_C2',
    'process_C3',
    'process_C4',
    'process_C5',
    'process_C6',
    'process_C7',
    'process_C8',
    'process_C9',
    'process_C10',
    'process_C11',
    'process_C12', 
    'process_C13', 
    'process_C14',
    'process_total_cnt',
    'process_score_before_std',
    'process_Mean',
    'process_StdDev',
    'process_grp_score',
    'patient_exp_C1',
    'patient_exp_C2',
    'patient_exp_C3',
    'patient_exp_C4',
    'patient_exp_C5',
    'patient_exp_C6',
    'patient_exp_C7',
    'patient_exp_C8',
    'patient_exp_total_cnt',
    'patient_exp_score_before_std',
    'patient_exp_Mean',
    'patient_exp_StdDev',
    'patient_exp_grp_score',
    #'Urban (1) or Rural (2)',
    #'Teaching hospital or affiliated with a teaching hospital?',
]

for lab in list(main_df):
    if '_DEN' in lab:
        drop_ls.append(lab)
        
print(main_df.shape)
main_df.drop(labels=drop_ls, axis=1, inplace=True)
print(main_df.shape)
main_df.head()

main_df.shape: (18386, 398)
main_df.shape: (18386, 398)
(18386, 398)
(18386, 270)


Unnamed: 0,Facility ID,Facility Name,Address,City,State,ZIP Code,County Name,Hospital Type,Hospital Ownership,file_month,file_year,Std_Outcomes_Mortality_score,Std_Outcomes_Readmission_score,Std_Outcomes_Safety_score,Std_PatientExp_score,Std_Process_score,std_weight_PatientExperience,std_weight_Readmission,std_weight_Mortality,std_weight_safety,std_weight_Process,weight_PatientExperience,weight_Outcomes_Readmission,weight_Outcomes_Mortality,weight_Outcomes_Safety,weight_Process,summary_score,Outcomes_Mortality_cnt,Outcomes_safety_cnt,Outcomes_Readmission_cnt,Patient_Experience_cnt,Process_cnt,Total_measure_group_cnt,MortSafe_Group_cnt,report_indicator,cnt_grp,star,Q_25,Release year,std_MORT_30_AMI,std_MORT_30_CABG,std_MORT_30_COPD,std_MORT_30_HF,std_MORT_30_PN,std_MORT_30_STK,std_PSI_4_SURG_COMP,std_Hybrid_HWM,mortality_C8,mortality_measure_wt,std_EDAC_30_AMI,std_EDAC_30_HF,std_EDAC_30_PN,std_OP_32,std_READM_30_CABG,std_READM_30_COPD,std_READM_30_HIP_KNEE,std_OP_35_ADM,std_OP_35_ED,std_OP_36,std_Hybrid_HWR,readmission_measure_wt,std_COMP_HIP_KNEE,std_HAI_1,std_HAI_2,std_HAI_3,std_HAI_4,std_HAI_5,std_HAI_6,std_PSI_90_SAFETY,safety_measure_wt,std_IMM_3,std_OP_10,std_OP_13,std_OP_18B,std_OP_22,std_OP_23,std_OP_29,std_OP_8,std_SAFE_USE_OF_OPIOIDS,std_SEP_1,process_measure_wt,std_O_COMP_1_LINEAR_SCORE,std_O_COMP_2_LINEAR_SCORE,std_O_COMP_3_LINEAR_SCORE,std_O_PATIENT_RATE_LINEAR_SCORE,std_O_PATIENT_REC_LINEAR_SCORE,std_H_COMP_1_LINEAR_SCORE,std_H_COMP_2_LINEAR_SCORE,std_H_COMP_3_LINEAR_SCORE,std_H_COMP_5_LINEAR_SCORE,std_H_COMP_6_LINEAR_SCORE,std_H_COMP_7_LINEAR_SCORE,std_H_CLEAN_LINEAR_SCORE,std_H_QUIET_LINEAR_SCORE,std_H_RECMND_LINEAR_SCORE,std_H_HSP_RATING_LINEAR_SCORE,patient_exp_C9,patient_exp_C10,patient_exp_C11,patient_exp_C12,patient_exp_C13,patient_exp_C14,patient_exp_C15,patient_exp_measure_wt,std_READM_30_HOSP_WIDE,std_HCP_COVID_19,std_PC_01,std_H_COMP_1_STAR_RATING,std_H_COMP_2_STAR_RATING,std_H_COMP_3_STAR_RATING,std_H_COMP_5_STAR_RATING,std_H_COMP_6_STAR_RATING,std_H_COMP_7_STAR_RATING,std_H_GLOB_STAR_RATING,std_H_INDI_STAR_RATING,std_OP_3B,std_OP_33,std_OP_30,std_ED_2B,IMM_3,OP_18B,OP_22,OP_23,OP_29,SEP_1,SAFE_USE_OF_OPIOIDS,MORT_30_AMI,MORT_30_CABG,MORT_30_COPD,MORT_30_HF,MORT_30_PN,MORT_30_STK,PSI_4_SURG_COMP,COMP_HIP_KNEE,PSI_90_SAFETY,Hybrid_HWM,H_COMP_1_A_P,H_COMP_1_SN_P,H_COMP_1_U_P,H_COMP_1_LINEAR_SCORE,H_COMP_1_STAR_RATING,H_NURSE_RESPECT_A_P,H_NURSE_RESPECT_SN_P,H_NURSE_RESPECT_U_P,H_NURSE_LISTEN_A_P,H_NURSE_LISTEN_SN_P,H_NURSE_LISTEN_U_P,H_NURSE_EXPLAIN_A_P,H_NURSE_EXPLAIN_SN_P,H_NURSE_EXPLAIN_U_P,H_COMP_2_A_P,H_COMP_2_SN_P,H_COMP_2_U_P,H_COMP_2_LINEAR_SCORE,H_COMP_2_STAR_RATING,H_DOCTOR_RESPECT_A_P,H_DOCTOR_RESPECT_SN_P,H_DOCTOR_RESPECT_U_P,H_DOCTOR_LISTEN_A_P,H_DOCTOR_LISTEN_SN_P,H_DOCTOR_LISTEN_U_P,H_DOCTOR_EXPLAIN_A_P,H_DOCTOR_EXPLAIN_SN_P,H_DOCTOR_EXPLAIN_U_P,H_COMP_3_A_P,H_COMP_3_SN_P,H_COMP_3_U_P,H_COMP_3_LINEAR_SCORE,H_COMP_3_STAR_RATING,H_CALL_BUTTON_A_P,H_CALL_BUTTON_SN_P,H_CALL_BUTTON_U_P,H_BATH_HELP_A_P,H_BATH_HELP_SN_P,H_BATH_HELP_U_P,H_COMP_5_A_P,H_COMP_5_SN_P,H_COMP_5_U_P,H_COMP_5_LINEAR_SCORE,H_COMP_5_STAR_RATING,H_MED_FOR_A_P,H_MED_FOR_SN_P,H_MED_FOR_U_P,H_SIDE_EFFECTS_A_P,H_SIDE_EFFECTS_SN_P,H_SIDE_EFFECTS_U_P,H_COMP_6_N_P,H_COMP_6_Y_P,H_COMP_6_LINEAR_SCORE,H_COMP_6_STAR_RATING,H_DISCH_HELP_N_P,H_DISCH_HELP_Y_P,H_SYMPTOMS_N_P,H_SYMPTOMS_Y_P,H_COMP_7_A,H_COMP_7_D_SD,H_COMP_7_SA,H_COMP_7_LINEAR_SCORE,H_COMP_7_STAR_RATING,H_CT_PREFER_A,H_CT_PREFER_D_SD,H_CT_PREFER_SA,H_CT_UNDER_A,H_CT_UNDER_D_SD,H_CT_UNDER_SA,H_CT_MED_A,H_CT_MED_D_SD,H_CT_MED_SA,H_CLEAN_HSP_A_P,H_CLEAN_HSP_SN_P,H_CLEAN_HSP_U_P,H_CLEAN_LINEAR_SCORE,H_CLEAN_STAR_RATING,H_QUIET_HSP_A_P,H_QUIET_HSP_SN_P,H_QUIET_HSP_U_P,H_QUIET_LINEAR_SCORE,H_QUIET_STAR_RATING,H_HSP_RATING_0_6,H_HSP_RATING_7_8,H_HSP_RATING_9_10,H_HSP_RATING_LINEAR_SCORE,H_HSP_RATING_STAR_RATING,H_RECMND_DN,H_RECMND_DY,H_RECMND_PY,H_RECMND_LINEAR_SCORE,H_RECMND_STAR_RATING,H_STAR_RATING,O_COMP_1_LINEAR_SCORE,O_COMP_2_LINEAR_SCORE,O_COMP_3_LINEAR_SCORE,O_PATIENT_RATE_LINEAR_SCORE,O_PATIENT_REC_LINEAR_SCORE,EDAC_30_AMI,EDAC_30_HF,EDAC_30_PN,OP_32,OP_35_ADM,OP_35_ED,OP_36,READM_30_CABG,READM_30_COPD,READM_30_HIP_KNEE,Hybrid_HWR,HAI_1,HAI_2,HAI_3,HAI_4,HAI_5,HAI_6,OP_13,OP_8,OP_10,H_NUMB_COMP,PC_01,HCP_COVID_19,READM_30_HOSP_WIDE,H_RESP_RATE_P,H_INDI_STAR_RATING,H_GLOB_STAR_RATING,OP_2,OP_3B,OP_33,OP_30,ED_2B,Beds,Lat,Lon,Name and Num
0,10001,SOUTHEAST ALABAMA MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,HOUSTON,Acute Care Hospitals,Government - Hospital District or Authority,10,2020,-0.752932,-0.070355,0.556127,0.071425,-0.050316,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.0491,7,8,11,8,11,5,2,1,3.0,3.0,,2021,0.730853,-2.055159,0.069106,0.103403,-0.022557,-1.941449,-0.327016,,,0.142857,-0.027028,-0.705673,1.039984,2.185101,-1.542989,-1.037529,-1.164917,1.1826,-1.18578,1.207127,,0.090909,0.303431,-0.078888,0.680925,0.244905,1.037183,0.403644,0.098335,0.31923,0.125,0.644574,-0.102729,0.684379,-0.761776,-0.979501,,-0.459111,0.140655,,0.031191,0.090909,,,,,,,,,,,,,,,,,,,,,,,0.125,-0.070634,,0.473469,-0.537213,-0.085263,-0.269656,-0.073296,0.801144,-0.115323,0.284361,0.478432,,,0.466001,-0.054072,0.97,178.0,0.03,,0.81,0.6,,0.119,0.047,0.084,0.113,0.157,0.166,170.87,0.023,0.93,,,,,,3.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,3.0,,,,,,,,,,4.0,,,,,,,,,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,7.0,21.6,-20.6,13.4,10.8,7.1,0.8,0.149,0.207,0.046,,0.742,0.324,0.65,0.0,0.548,0.537,0.028,0.389,0.072,507.0,0.0,,0.156,21.0,3.5,3.5,,,,0.98,103.0,371.642857,31.214058,-85.361725,SOUTHEAST ALABAMA MEDICAL CENTER (010001)
1,10001,SOUTHEAST HEALTH MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,HOUSTON,Acute Care Hospitals,Government - Hospital District or Authority,1,2023,-0.647058,0.284081,0.312074,-0.127836,-1.024044,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.162208,7,8,11,8,10,5,2,1,3.0,3.0,,2023,-0.041613,-2.19781,-0.019543,1.708727,0.366398,-1.543654,-1.741473,,,0.142857,0.207832,-0.702681,0.28604,0.105508,0.219318,-0.048609,-0.198058,0.159253,0.872692,0.062022,,0.090909,0.024204,0.444297,0.745807,-0.407579,1.025843,0.314693,-0.315731,-0.212425,0.125,0.992908,0.10185,-1.77947,-0.904124,-0.237088,,-0.634479,0.488322,,-0.609958,0.1,,,,,,,,,,,,,,,,,,,,,,,0.125,0.924456,-1.039363,-1.763346,-1.07585,-0.175531,-1.051153,0.808598,0.68837,0.037272,-0.445032,0.339427,,,,,0.97,205.0,0.03,,0.81,0.46,,0.124,0.047,0.085,0.083,0.159,0.164,173.39,0.024,1.01,,,,,,2.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,,,,2.0,,,,,,,,,,,4.0,,,,,,,,,,4.0,,,,,,,,,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.9,21.8,-1.5,14.1,10.2,4.7,1.0,0.117,0.199,0.042,,0.661,0.3,1.099,0.0,0.85,0.66,0.067,0.425,0.057,434.0,0.09,0.737,0.142,15.0,3.5,3.0,,,,,,371.642857,31.214058,-85.361725,SOUTHEAST HEALTH MEDICAL CENTER (010001)
2,10001,SOUTHEAST HEALTH MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,HOUSTON,Acute Care Hospitals,Government - Hospital District or Authority,1,2024,0.007071,0.230867,-0.146867,0.137779,-0.677624,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,-0.030968,7,7,11,8,10,5,2,1,3.0,3.0,,2024,0.470514,-1.535074,0.282996,1.510467,0.129818,-0.554262,-0.748332,,,0.142857,1.123231,-0.791585,-0.63392,0.29207,0.584835,0.293805,0.886353,-1.03233,0.626949,-0.252865,,0.090909,0.879845,-0.232451,0.488068,-0.71166,,-0.210159,0.003034,-1.162386,0.142857,0.940083,-0.029295,0.419351,-0.942734,-1.140182,,-3.129707,0.000412,,0.371166,0.1,,,,,,,,,,,,,,,,,,,,,,,0.125,0.423838,-0.431525,0.583665,-0.373358,-0.400063,-0.243371,0.124487,0.479704,0.876397,0.133873,0.361496,,,,,0.95,214.0,0.05,,0.47,0.65,,0.12,0.041,0.088,0.089,0.18,0.148,184.68,0.027,1.21,,,,,,3.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,3.0,,,,,,,,,,4.0,,,,,,,,,4.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-15.4,23.4,23.6,12.9,11.9,4.9,1.1,0.105,0.19,0.038,,0.938,0.363,1.335,,0.965,0.507,0.028,0.38,0.061,544.0,0.0,0.836,0.142,15.0,3.5,3.5,,,,,,371.642857,31.214058,-85.361725,SOUTHEAST HEALTH MEDICAL CENTER (010001)
3,10001,SOUTHEAST HEALTH MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,HOUSTON,Acute Care Hospitals,Government - Hospital District or Authority,10,2024,0.29611,0.361406,0.344883,0.042072,-0.484678,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,0.171622,7,7,11,8,11,5,2,1,3.0,4.0,,2025,1.405923,-1.04537,1.011496,0.650904,-0.315375,0.023766,-0.732077,,,0.142857,1.024273,-0.368843,-0.448127,0.289973,0.607499,0.859382,0.192452,-1.028886,0.62791,-0.25798,,0.090909,0.698729,0.30367,0.739078,-0.467783,,0.476446,-0.105703,0.103001,0.142857,1.044716,0.080291,0.781373,-1.062058,-1.138829,,-3.171398,0.588011,0.563049,0.449772,0.090909,,,,,,,,,,,,,,,,,,,,,,,0.125,0.768359,-0.525017,0.140984,-1.154064,0.570761,-0.231671,-0.254181,-0.312251,0.933177,0.719371,0.020336,,,,,0.96,217.0,0.05,,0.47,0.68,0.12,0.108,0.038,0.079,0.105,0.189,0.137,194.78,0.03,0.98,,,,,,2.0,,,,,,,,,,,,,,4.0,,,,,,,,,,,,,,3.0,,,,,,,,,,,3.0,,,,,,,,,,3.0,,,,,,,,,4.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-13.8,10.6,14.3,12.9,11.9,4.9,1.1,0.102,0.177,0.044,,0.496,0.169,1.209,,0.445,0.491,0.021,0.333,0.054,643.0,0.02,0.0,0.141,17.0,3.0,4.0,,,,,,371.642857,31.214058,-85.361725,SOUTHEAST HEALTH MEDICAL CENTER (010001)
4,10001,SOUTHEAST HEALTH MEDICAL CENTER,1108 ROSS CLARK CIRCLE,DOTHAN,AL,36301,HOUSTON,Acute Care Hospitals,Government - Hospital District or Authority,8,2025,-0.253894,0.613401,0.575849,0.069504,-0.153563,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,0.202642,8,7,11,15,10,5,2,1,3.0,4.0,-0.304441,2026,0.635861,-0.407503,-0.336903,0.63017,-0.741228,-0.155169,-1.255617,-0.502326,1.0,0.125,0.930664,0.193207,-0.369507,0.294215,0.524002,0.266962,0.079849,1.577732,-0.191118,0.662844,-0.167496,0.090909,0.570514,0.134271,0.794787,0.544171,,0.84381,0.055399,0.242553,0.142857,0.944322,0.125806,-0.216523,-1.245548,-1.161984,1.06792,-1.508442,0.793318,0.182926,0.408151,0.1,-0.128381,-1.276518,-0.390364,0.03758,0.490315,-0.453797,0.472017,-0.305413,0.563206,0.166795,0.347443,-0.38317,1.069861,0.837525,0.268063,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.066667,,,,,,,,,,,,,,,,0.93,222.0,0.05,0.91,0.72,0.69,0.14,0.114,0.03,0.094,0.102,0.184,0.135,203.0,0.032,0.95,0.045,,,,90.0,,,,,,,,,,,,,,92.0,,,,,,,,,,,,,,82.0,,,,,,,,,,,79.0,,,,,,,,,,87.0,,,,,,,,,82.0,,,,,,,,,,,,,,85.0,,,,,88.0,,,,,89.0,,,,,91.0,,,98.0,93.0,97.0,94.0,94.0,-15.6,-1.1,17.4,12.8,8.5,5.7,0.8,0.101,0.18,0.048,0.151,0.53,0.086,0.457,,0.183,0.382,0.038,0.308,0.053,,,,,,,,,,,,,371.642857,31.214058,-85.361725,SOUTHEAST HEALTH MEDICAL CENTER (010001)


In [37]:
tdf = main_df[main_df['Release year'] == '2026']
tdf = tdf[tdf['Facility ID'] == '140063']
print(tdf.shape)
tdf.head()

(1, 270)


Unnamed: 0,Facility ID,Facility Name,Address,City,State,ZIP Code,County Name,Hospital Type,Hospital Ownership,file_month,file_year,Std_Outcomes_Mortality_score,Std_Outcomes_Readmission_score,Std_Outcomes_Safety_score,Std_PatientExp_score,Std_Process_score,std_weight_PatientExperience,std_weight_Readmission,std_weight_Mortality,std_weight_safety,std_weight_Process,weight_PatientExperience,weight_Outcomes_Readmission,weight_Outcomes_Mortality,weight_Outcomes_Safety,weight_Process,summary_score,Outcomes_Mortality_cnt,Outcomes_safety_cnt,Outcomes_Readmission_cnt,Patient_Experience_cnt,Process_cnt,Total_measure_group_cnt,MortSafe_Group_cnt,report_indicator,cnt_grp,star,Q_25,Release year,std_MORT_30_AMI,std_MORT_30_CABG,std_MORT_30_COPD,std_MORT_30_HF,std_MORT_30_PN,std_MORT_30_STK,std_PSI_4_SURG_COMP,std_Hybrid_HWM,mortality_C8,mortality_measure_wt,std_EDAC_30_AMI,std_EDAC_30_HF,std_EDAC_30_PN,std_OP_32,std_READM_30_CABG,std_READM_30_COPD,std_READM_30_HIP_KNEE,std_OP_35_ADM,std_OP_35_ED,std_OP_36,std_Hybrid_HWR,readmission_measure_wt,std_COMP_HIP_KNEE,std_HAI_1,std_HAI_2,std_HAI_3,std_HAI_4,std_HAI_5,std_HAI_6,std_PSI_90_SAFETY,safety_measure_wt,std_IMM_3,std_OP_10,std_OP_13,std_OP_18B,std_OP_22,std_OP_23,std_OP_29,std_OP_8,std_SAFE_USE_OF_OPIOIDS,std_SEP_1,process_measure_wt,std_O_COMP_1_LINEAR_SCORE,std_O_COMP_2_LINEAR_SCORE,std_O_COMP_3_LINEAR_SCORE,std_O_PATIENT_RATE_LINEAR_SCORE,std_O_PATIENT_REC_LINEAR_SCORE,std_H_COMP_1_LINEAR_SCORE,std_H_COMP_2_LINEAR_SCORE,std_H_COMP_3_LINEAR_SCORE,std_H_COMP_5_LINEAR_SCORE,std_H_COMP_6_LINEAR_SCORE,std_H_COMP_7_LINEAR_SCORE,std_H_CLEAN_LINEAR_SCORE,std_H_QUIET_LINEAR_SCORE,std_H_RECMND_LINEAR_SCORE,std_H_HSP_RATING_LINEAR_SCORE,patient_exp_C9,patient_exp_C10,patient_exp_C11,patient_exp_C12,patient_exp_C13,patient_exp_C14,patient_exp_C15,patient_exp_measure_wt,std_READM_30_HOSP_WIDE,std_HCP_COVID_19,std_PC_01,std_H_COMP_1_STAR_RATING,std_H_COMP_2_STAR_RATING,std_H_COMP_3_STAR_RATING,std_H_COMP_5_STAR_RATING,std_H_COMP_6_STAR_RATING,std_H_COMP_7_STAR_RATING,std_H_GLOB_STAR_RATING,std_H_INDI_STAR_RATING,std_OP_3B,std_OP_33,std_OP_30,std_ED_2B,IMM_3,OP_18B,OP_22,OP_23,OP_29,SEP_1,SAFE_USE_OF_OPIOIDS,MORT_30_AMI,MORT_30_CABG,MORT_30_COPD,MORT_30_HF,MORT_30_PN,MORT_30_STK,PSI_4_SURG_COMP,COMP_HIP_KNEE,PSI_90_SAFETY,Hybrid_HWM,H_COMP_1_A_P,H_COMP_1_SN_P,H_COMP_1_U_P,H_COMP_1_LINEAR_SCORE,H_COMP_1_STAR_RATING,H_NURSE_RESPECT_A_P,H_NURSE_RESPECT_SN_P,H_NURSE_RESPECT_U_P,H_NURSE_LISTEN_A_P,H_NURSE_LISTEN_SN_P,H_NURSE_LISTEN_U_P,H_NURSE_EXPLAIN_A_P,H_NURSE_EXPLAIN_SN_P,H_NURSE_EXPLAIN_U_P,H_COMP_2_A_P,H_COMP_2_SN_P,H_COMP_2_U_P,H_COMP_2_LINEAR_SCORE,H_COMP_2_STAR_RATING,H_DOCTOR_RESPECT_A_P,H_DOCTOR_RESPECT_SN_P,H_DOCTOR_RESPECT_U_P,H_DOCTOR_LISTEN_A_P,H_DOCTOR_LISTEN_SN_P,H_DOCTOR_LISTEN_U_P,H_DOCTOR_EXPLAIN_A_P,H_DOCTOR_EXPLAIN_SN_P,H_DOCTOR_EXPLAIN_U_P,H_COMP_3_A_P,H_COMP_3_SN_P,H_COMP_3_U_P,H_COMP_3_LINEAR_SCORE,H_COMP_3_STAR_RATING,H_CALL_BUTTON_A_P,H_CALL_BUTTON_SN_P,H_CALL_BUTTON_U_P,H_BATH_HELP_A_P,H_BATH_HELP_SN_P,H_BATH_HELP_U_P,H_COMP_5_A_P,H_COMP_5_SN_P,H_COMP_5_U_P,H_COMP_5_LINEAR_SCORE,H_COMP_5_STAR_RATING,H_MED_FOR_A_P,H_MED_FOR_SN_P,H_MED_FOR_U_P,H_SIDE_EFFECTS_A_P,H_SIDE_EFFECTS_SN_P,H_SIDE_EFFECTS_U_P,H_COMP_6_N_P,H_COMP_6_Y_P,H_COMP_6_LINEAR_SCORE,H_COMP_6_STAR_RATING,H_DISCH_HELP_N_P,H_DISCH_HELP_Y_P,H_SYMPTOMS_N_P,H_SYMPTOMS_Y_P,H_COMP_7_A,H_COMP_7_D_SD,H_COMP_7_SA,H_COMP_7_LINEAR_SCORE,H_COMP_7_STAR_RATING,H_CT_PREFER_A,H_CT_PREFER_D_SD,H_CT_PREFER_SA,H_CT_UNDER_A,H_CT_UNDER_D_SD,H_CT_UNDER_SA,H_CT_MED_A,H_CT_MED_D_SD,H_CT_MED_SA,H_CLEAN_HSP_A_P,H_CLEAN_HSP_SN_P,H_CLEAN_HSP_U_P,H_CLEAN_LINEAR_SCORE,H_CLEAN_STAR_RATING,H_QUIET_HSP_A_P,H_QUIET_HSP_SN_P,H_QUIET_HSP_U_P,H_QUIET_LINEAR_SCORE,H_QUIET_STAR_RATING,H_HSP_RATING_0_6,H_HSP_RATING_7_8,H_HSP_RATING_9_10,H_HSP_RATING_LINEAR_SCORE,H_HSP_RATING_STAR_RATING,H_RECMND_DN,H_RECMND_DY,H_RECMND_PY,H_RECMND_LINEAR_SCORE,H_RECMND_STAR_RATING,H_STAR_RATING,O_COMP_1_LINEAR_SCORE,O_COMP_2_LINEAR_SCORE,O_COMP_3_LINEAR_SCORE,O_PATIENT_RATE_LINEAR_SCORE,O_PATIENT_REC_LINEAR_SCORE,EDAC_30_AMI,EDAC_30_HF,EDAC_30_PN,OP_32,OP_35_ADM,OP_35_ED,OP_36,READM_30_CABG,READM_30_COPD,READM_30_HIP_KNEE,Hybrid_HWR,HAI_1,HAI_2,HAI_3,HAI_4,HAI_5,HAI_6,OP_13,OP_8,OP_10,H_NUMB_COMP,PC_01,HCP_COVID_19,READM_30_HOSP_WIDE,H_RESP_RATE_P,H_INDI_STAR_RATING,H_GLOB_STAR_RATING,OP_2,OP_3B,OP_33,OP_30,ED_2B,Beds,Lat,Lon,Name and Num
4989,140063,RUSH OAK PARK HOSPITAL,520 S MAPLE AVE,OAK PARK,IL,60304,COOK,Acute Care Hospitals,Voluntary non-profit - Church,8,2025,1.607526,0.557798,-0.3956,-0.576602,-0.905579,0.22,0.22,0.22,0.22,0.12,0.22,0.22,0.22,0.22,0.12,0.153817,6,5,8,15,8,5,2,1,3.0,4.0,,2026,-0.137152,,1.08204,1.867428,1.938433,0.607862,,1.285927,1.0,0.166667,,-0.917151,-0.600916,0.842209,,0.851009,,0.679505,0.881257,0.662844,0.115191,0.125,,-1.303176,0.948383,,,-1.496084,0.399215,-0.323151,0.2,1.098394,-0.173592,,-1.476878,-2.313293,0.752411,0.468598,,0.004698,-1.932502,0.125,0.803203,-0.082826,0.533043,-0.468851,-0.19754,-0.453797,-0.657542,-0.504454,-0.454435,-1.391708,-0.914393,-1.059252,-0.745056,-0.40775,-0.817909,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.066667,,,,,,,,,,,,,,,,0.96,234.0,0.08,0.85,0.98,0.31,0.15,0.123,,0.072,0.075,0.11,0.122,,,1.06,0.035,,,,90.0,,,,,,,,,,,,,,89.0,,,,,,,,,,,,,,81.0,,,,,,,,,,,74.0,,,,,,,,,,81.0,,,,,,,,,78.0,,,,,,,,,,,,,,82.0,,,,,78.0,,,,,85.0,,,,,85.0,,,99.0,95.0,98.0,93.0,92.0,,34.9,24.4,12.3,9.8,4.8,0.8,,0.175,,0.149,1.548,0.0,,,1.59,0.234,,,0.067,,,,,,,,,,,,,214.866667,41.879026,-87.803818,RUSH OAK PARK HOSPITAL (140063)


In [38]:
print('main_df.shape:', main_df.shape)
main_df.drop_duplicates(inplace=True)
print('main_df.shape:', main_df.shape)


main_df.shape: (18386, 270)
main_df.shape: (18386, 270)


In [39]:
## save main_df to file
main_df.to_pickle(stars_dir + 'FilesForApp/hosp_stars_dat.pkl')