# Generate HVBP safety dataframe

In [1]:
import pandas as pd
import numpy as np
import warnings
from IPython.utils import io
import sys
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

main_dir = '/Users/kenlocey/Desktop/Rush/CMS_HospitalArchives/'

## Define Custom Functions

In [2]:

def check_lists(lists):
    for i, ls in enumerate(lists):
        for i2, ls2 in enumerate(lists):
            for i3 in ls:
                if i3 not in ls2:
                    print('\n')
                    print(i3 + ': NOT FOUND IN')
                    print(ls2)
                    sys.exit()
                        
                        
def curate(df):

    try:
        df = df[df['Facility ID'] != np.nan]
        df['Facility ID'] = df['Facility ID'].values.astype(str)
        
        ids = df['Facility ID'].tolist()
        ids2 = []
        for i in ids:
            if len(i) < 6:
                i = '0' + i
            ids2.append(i)
        df['Facility ID'] = ids2
        
    except:
        pass
    try:
        df = df[df['Facility Name'] != np.nan]
    except:
        pass
    
    for c in list(df):    
        try:
            df[c] = df[c].str.replace("\t","")
        except:
            pass

    return df


def rename_and_fill(df):
    
    cols1 = ['Provider ID', "Measure Start Date", "Measure End Date",
             'Hospital Name', 'Provider Number', ' ZIP Code',
             ]
    
    
    cols2 = ['Facility ID', "Start Date", "End Date",
             'Facility Name', 'Facility ID', 'ZIP Code',
             ]
    
    for i, col in enumerate(cols1):
        if col in list(df):
            df.rename(columns={col: cols2[i]}, inplace=True)
            
            
    cols = ['Fiscal Year', 'HAI-1 Achievement Threshold', 'HAI-1 Benchmark',
            'HAI-1 Baseline Rate', 'HAI-1 Performance Rate', 'HAI-1 Achievement Points',
            'HAI-1 Improvement Points', 'HAI-1 Measure Score', 'HAI-2 Achievement Threshold',
            'HAI-2 Benchmark', 'HAI-2 Baseline Rate', 'HAI-2 Performance Rate',
            'HAI-2 Achievement Points', 'HAI-2 Improvement Points',
            'HAI-2 Measure Score', 'Combined SSI Measure Score',
            'HAI-3 Achievement Threshold', 'HAI-3 Benchmark', 'HAI-3 Baseline Rate',
            'HAI-3 Performance Rate', 'HAI-3 Achievement Points',
            'HAI-3 Improvement Points', 'HAI-3 Measure Score',
            'HAI-4 Achievement Threshold', 'HAI-4 Benchmark', 'HAI-4 Baseline Rate',
            'HAI-4 Performance Rate', 'HAI-4 Achievement Points', 
            'HAI-4 Improvement Points', 'HAI-4 Measure Score', 
            'HAI-5 Achievement Threshold', 'HAI-5 Benchmark', 'HAI-5 Baseline Rate',
            'HAI-5 Performance Rate', 'HAI-5 Achievement Points',
            'HAI-5 Improvement Points', 'HAI-5 Measure Score', 'HAI-6 Achievement Threshold',
            'HAI-6 Benchmark', 'HAI-6 Baseline Rate', 'HAI-6 Performance Rate',
            'HAI-6 Achievement Points', 'HAI-6 Improvement Points',
            'HAI-6 Measure Score', 'MORT-30-AMI Achievement Threshold',
            'MORT-30-AMI Benchmark', 'MORT-30-AMI Baseline Rate',
            'MORT-30-AMI Performance Rate', 'MORT-30-AMI Achievement Points',
            'MORT-30-AMI Improvement Points', 'MORT-30-AMI Measure Score',
            'MORT-30-HF Achievement Threshold', 'MORT-30-HF Benchmark',
            'MORT-30-HF Baseline Rate', 'MORT-30-HF Performance Rate',
            'MORT-30-HF Achievement Points', 'MORT-30-HF Improvement Points',
            'MORT-30-HF Measure Score', 'MORT-30-PN Achievement Threshold',
            'MORT-30-PN Benchmark', 'MORT-30-PN Baseline Rate',
            'MORT-30-PN Performance Rate', 'MORT-30-PN Achievement Points',
            'MORT-30-PN Improvement Points', 'MORT-30-PN Measure Score',
            'COMP-HIP-KNEE Achievement Threshold', 'COMP-HIP-KNEE Benchmark',
            'COMP-HIP-KNEE Baseline Rate', 'COMP-HIP-KNEE Performance Rate',
            'COMP-HIP-KNEE Achievement Points', 'COMP-HIP-KNEE Improvement Points',
            'COMP-HIP-KNEE Measure Score', 'PC-01 Achievement Threshold',
            'PC-01 Benchmark', 'PC-01 Baseline Rate', 'PC-01 Performance Rate',
            'PC-01 Achievement Points', 'PC-01 Improvement Points',
            'PC-01 Measure Score', 'PSI-90 Achievement Threshold',
            'PSI-90 Benchmark', 'PSI-90 Baseline Rate', 'PSI-90 Performance Rate',
            'PSI-90 Achievement Points', 'PSI-90 Improvement Points',
            'PSI-90 Measure Score', 
            ]
    
    for col in cols:
        if col not in list(df):
            df[col] = float('NaN')
    
    l = list(df)
    l = list(set([x for x in l if l.count(x) > 1]))
    if len(l) > 0:
        print('duplicates:', l)
        sys.exit()
        
    return df


def process2(df, lists, yr, mo):
    df = rename_and_fill(df)
    df = curate(df)
    lists.append(list(df))
    df['file_month'] = [mo]* df.shape[0]
    df['file_year'] = [yr]* df.shape[0]
    df = df.reindex(sorted(df.columns), axis=1)
    return df, lists


## Load Files

In [3]:
df_list = []
lists = []

yrs = ['2023', '2023',
       '2022', '2022', '2022',
       '2021','2021','2021', '2021', '2021',
       '2020', '2020', '2020', '2020', 
       '2019', '2019', '2019', '2019', 
       '2018', '2018', '2018', '2018',
       '2017', '2017', '2017',
       '2016',
       ]

mos = ['01', '04',
       '01', '04', '07',
       '01', '03', '04', '07', '10', 
       '10', '07', '04', '01', 
       '10', '07', '04', '03', 
       '10', '07', '05', '01',
       '10', '07', '04',
       '12',
       ]


subdirs = ['2023/hospitals_01_2023/hvbp_safety.csv', 
           '2023/hospitals_04_2023/hvbp_safety.csv',
           
           '2022/hospitals_01_2022/hvbp_safety.csv', 
           '2022/hospitals_04_2022/hvbp_safety.csv',
           '2022/hospitals_07_2022/hvbp_safety.csv',
           
           '2021/hospitals_01_2021/hvbp_safety.csv',
           '2021/hospitals_03_2021/hvbp_safety.csv',
           '2021/hospitals_04_2021/hvbp_safety.csv',
           '2021/hospitals_07_2021/hvbp_safety.csv',
           '2021/hospitals_10_2021/hvbp_safety.csv',
           
           '2020/hospitals_archive_10_2020/hvbp_clinical_outcomes_12_09_2019.csv',
           '2020/hospitals_archive_07_2020/hvbp_safety.csv',
           '2020/HOSArchive_Revised_Flatfiles_20200422/hvbp_safety_12_09_2019.csv',
           '2020/HOSArchive_Revised_Flatfiles_20200129/hvbp_safety_12_09_2019.csv',
           
           '2019/HOSArchive_Revised_Flatfiles_20191030/hvbp_safety_11_09_2018.csv',
           '2019/HOSArchive_Revised_Flatfiles_20190702/hvbp_safety_11_09_2018.csv',
           '2019/HOSArchive_Revised_FlatFiles_20190424/hvbp_safety_11_09_2018.csv',
           '2019/HOSArchive_Revised_Flatfiles_20190321/hvbp_safety_11_09_2018.csv',
           
           '2018/HOSArchive_Revised_FlatFiles_20181031/hvbp_safety_11_07_2017.csv',
           '2018/HOSArchive_Revised_FlatFiles_20180725/hvbp_safety_11_07_2017.csv',
           '2018/HOSArchive_Revised_FlatFiles_20180523/hvbp_safety_11_07_2017.csv', 
           '2018/HOSArchive_Revised_FlatFiles_20180126/hvbp_safety_11_07_2017.csv',
           
           '2017/HOSArchive_Revised_FlatFiles_20171024/hvbp_safety_11_10_2016.csv',
           '2017/HOSArchive_Revised_FlatFiles_20170726/hvbp_safety_11_10_2016.csv', 
           '2017/HOSArchive_Revised_Flatfiles_20170428/hvbp_safety_11_10_2016.csv',
           
           '2016/HOSArchive_Revised_Flatfiles_20161219/hvbp_safety_11_10_2016.csv', 
           
           ]

for i, subdir in enumerate(subdirs):
    with io.capture_output() as captured: df = pd.read_csv(main_dir + subdir, encoding = "ISO-8859-1")
    print(subdir + ' :  (rows, columns) =', df.shape)
    df, lists = process2(df, lists, yrs[i], mos[i])
    df_list.append(df)

check_lists(lists)
subdir = 'HVBP/CombinedFiles_HVBP'
hvb_df = pd.concat(df_list)
print('hvb_df.shape:', hvb_df.shape)

2023/hospitals_01_2023/hvbp_safety.csv :  (rows, columns) = (2517, 51)
2023/hospitals_04_2023/hvbp_safety.csv :  (rows, columns) = (2517, 51)
2022/hospitals_01_2022/hvbp_safety.csv :  (rows, columns) = (2676, 51)
2022/hospitals_04_2022/hvbp_safety.csv :  (rows, columns) = (2236, 51)
2022/hospitals_07_2022/hvbp_safety.csv :  (rows, columns) = (2236, 51)
2021/hospitals_01_2021/hvbp_safety.csv :  (rows, columns) = (2676, 51)
2021/hospitals_03_2021/hvbp_safety.csv :  (rows, columns) = (2676, 51)
2021/hospitals_04_2021/hvbp_safety.csv :  (rows, columns) = (2676, 51)
2021/hospitals_07_2021/hvbp_safety.csv :  (rows, columns) = (2676, 51)
2021/hospitals_10_2021/hvbp_safety.csv :  (rows, columns) = (2676, 51)
2020/hospitals_archive_10_2020/hvbp_clinical_outcomes_12_09_2019.csv :  (rows, columns) = (2731, 35)
2020/hospitals_archive_07_2020/hvbp_safety.csv :  (rows, columns) = (2731, 57)
2020/HOSArchive_Revised_Flatfiles_20200422/hvbp_safety_12_09_2019.csv :  (rows, columns) = (2731, 57)
2020/HOS

In [4]:
hvb_df = pd.read_pickle(main_dir + 'HVBP/CombinedFiles_HVBP/' + 'hvbp_safety.pkl')
print('HVBP df:', hvb_df.shape)


hvb_df.drop(labels=['Address', 'City', 'County Name',
                    'Facility Name', 'State', 'ZIP Code', 'Fiscal Year',
                    ], axis=1, inplace=True)

print(hvb_df.shape)
hvb_df.head()

HVBP df: (70682, 95)
(70682, 88)


Unnamed: 0,COMP-HIP-KNEE Achievement Points,COMP-HIP-KNEE Achievement Threshold,COMP-HIP-KNEE Baseline Rate,COMP-HIP-KNEE Benchmark,COMP-HIP-KNEE Improvement Points,COMP-HIP-KNEE Measure Score,COMP-HIP-KNEE Performance Rate,Combined SSI Measure Score,Facility ID,HAI-1 Achievement Points,HAI-1 Achievement Threshold,HAI-1 Baseline Rate,HAI-1 Benchmark,HAI-1 Improvement Points,HAI-1 Measure Score,HAI-1 Performance Rate,HAI-2 Achievement Points,HAI-2 Achievement Threshold,HAI-2 Baseline Rate,HAI-2 Benchmark,HAI-2 Improvement Points,HAI-2 Measure Score,HAI-2 Performance Rate,HAI-3 Achievement Points,HAI-3 Achievement Threshold,HAI-3 Baseline Rate,HAI-3 Benchmark,HAI-3 Improvement Points,HAI-3 Measure Score,HAI-3 Performance Rate,HAI-4 Achievement Points,HAI-4 Achievement Threshold,HAI-4 Baseline Rate,HAI-4 Benchmark,HAI-4 Improvement Points,HAI-4 Measure Score,HAI-4 Performance Rate,HAI-5 Achievement Points,HAI-5 Achievement Threshold,HAI-5 Baseline Rate,HAI-5 Benchmark,HAI-5 Improvement Points,HAI-5 Measure Score,HAI-5 Performance Rate,HAI-6 Achievement Points,HAI-6 Achievement Threshold,HAI-6 Baseline Rate,HAI-6 Benchmark,HAI-6 Improvement Points,HAI-6 Measure Score,HAI-6 Performance Rate,MORT-30-AMI Achievement Points,MORT-30-AMI Achievement Threshold,MORT-30-AMI Baseline Rate,MORT-30-AMI Benchmark,MORT-30-AMI Improvement Points,MORT-30-AMI Measure Score,MORT-30-AMI Performance Rate,MORT-30-HF Achievement Points,MORT-30-HF Achievement Threshold,MORT-30-HF Baseline Rate,MORT-30-HF Benchmark,MORT-30-HF Improvement Points,MORT-30-HF Measure Score,MORT-30-HF Performance Rate,MORT-30-PN Achievement Points,MORT-30-PN Achievement Threshold,MORT-30-PN Baseline Rate,MORT-30-PN Benchmark,MORT-30-PN Improvement Points,MORT-30-PN Measure Score,MORT-30-PN Performance Rate,PC-01 Achievement Points,PC-01 Achievement Threshold,PC-01 Baseline Rate,PC-01 Benchmark,PC-01 Improvement Points,PC-01 Measure Score,PC-01 Performance Rate,PSI-90 Achievement Points,PSI-90 Achievement Threshold,PSI-90 Baseline Rate,PSI-90 Benchmark,PSI-90 Improvement Points,PSI-90 Measure Score,PSI-90 Performance Rate,file_month,file_year
0,,,,,,,,Not Available,10001,Not Available,0.589,0.742,0.0,Not Available,Not Available,0.476,Not Available,0.65,0.324,0.0,Not Available,Not Available,0.306,Not Available,0.717,0.650,0.0,Not Available,Not Available,1.228,Not Available,0.738,0.000,0.0,Not Available,Not Available,0.000,Not Available,0.726,0.548,0.0,Not Available,Not Available,0.912,Not Available,0.52,0.537,0.014,Not Available,Not Available,0.644,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,2023
1,,,,,,,,Not Available,10005,Not Available,0.589,0.000,0.0,Not Available,Not Available,3.311,Not Available,0.65,1.073,0.0,Not Available,Not Available,2.350,Not Available,0.717,0.508,0.0,Not Available,Not Available,0.400,Not Available,0.738,Not Available,0.0,Not Available,Not Available,Not Available,Not Available,0.726,1.938,0.0,Not Available,Not Available,0.000,Not Available,0.52,0.565,0.014,Not Available,Not Available,0.86,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,2023
2,,,,,,,,Not Available,10006,Not Available,0.589,0.000,0.0,Not Available,Not Available,0.507,Not Available,0.65,0.222,0.0,Not Available,Not Available,0.602,Not Available,0.717,0.820,0.0,Not Available,Not Available,0.871,Not Available,0.738,Not Available,0.0,Not Available,Not Available,Not Available,Not Available,0.726,0.654,0.0,Not Available,Not Available,1.640,Not Available,0.52,0.426,0.014,Not Available,Not Available,0.08,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,2023
3,,,,,,,,Not Available,10007,Not Available,0.589,Not Available,0.0,Not Available,Not Available,Not Available,Not Available,0.65,Not Available,0.0,Not Available,Not Available,Not Available,Not Available,0.717,Not Available,0.0,Not Available,Not Available,Not Available,Not Available,0.738,Not Available,0.0,Not Available,Not Available,Not Available,Not Available,0.726,Not Available,0.0,Not Available,Not Available,Not Available,Not Available,0.52,0.0,0.014,Not Available,Not Available,0.41,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,2023
4,,,,,,,,Not Available,10011,Not Available,0.589,0.421,0.0,Not Available,Not Available,0.728,Not Available,0.65,0.598,0.0,Not Available,Not Available,0.927,Not Available,0.717,0.930,0.0,Not Available,Not Available,0.268,Not Available,0.738,Not Available,0.0,Not Available,Not Available,Not Available,Not Available,0.726,0.964,0.0,Not Available,Not Available,1.346,Not Available,0.52,0.343,0.014,Not Available,Not Available,0.607,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,2023


In [5]:
for i, n in enumerate(list(hvb_df)):
    if n in ['Facility ID', 'file_month', 'file_year']:
        continue
    
    #n1 = n[:n.index(" ")]
    #n2 = n[n.rindex(' ')+1:].strip()
    hvb_df.rename(columns={n: 'HVBP Safety: ' + n}, inplace=True)
    
print(hvb_df.shape)
hvb_df.tail()


(70682, 88)


Unnamed: 0,HVBP Safety: COMP-HIP-KNEE Achievement Points,HVBP Safety: COMP-HIP-KNEE Achievement Threshold,HVBP Safety: COMP-HIP-KNEE Baseline Rate,HVBP Safety: COMP-HIP-KNEE Benchmark,HVBP Safety: COMP-HIP-KNEE Improvement Points,HVBP Safety: COMP-HIP-KNEE Measure Score,HVBP Safety: COMP-HIP-KNEE Performance Rate,HVBP Safety: Combined SSI Measure Score,Facility ID,HVBP Safety: HAI-1 Achievement Points,HVBP Safety: HAI-1 Achievement Threshold,HVBP Safety: HAI-1 Baseline Rate,HVBP Safety: HAI-1 Benchmark,HVBP Safety: HAI-1 Improvement Points,HVBP Safety: HAI-1 Measure Score,HVBP Safety: HAI-1 Performance Rate,HVBP Safety: HAI-2 Achievement Points,HVBP Safety: HAI-2 Achievement Threshold,HVBP Safety: HAI-2 Baseline Rate,HVBP Safety: HAI-2 Benchmark,HVBP Safety: HAI-2 Improvement Points,HVBP Safety: HAI-2 Measure Score,HVBP Safety: HAI-2 Performance Rate,HVBP Safety: HAI-3 Achievement Points,HVBP Safety: HAI-3 Achievement Threshold,HVBP Safety: HAI-3 Baseline Rate,HVBP Safety: HAI-3 Benchmark,HVBP Safety: HAI-3 Improvement Points,HVBP Safety: HAI-3 Measure Score,HVBP Safety: HAI-3 Performance Rate,HVBP Safety: HAI-4 Achievement Points,HVBP Safety: HAI-4 Achievement Threshold,HVBP Safety: HAI-4 Baseline Rate,HVBP Safety: HAI-4 Benchmark,HVBP Safety: HAI-4 Improvement Points,HVBP Safety: HAI-4 Measure Score,HVBP Safety: HAI-4 Performance Rate,HVBP Safety: HAI-5 Achievement Points,HVBP Safety: HAI-5 Achievement Threshold,HVBP Safety: HAI-5 Baseline Rate,HVBP Safety: HAI-5 Benchmark,HVBP Safety: HAI-5 Improvement Points,HVBP Safety: HAI-5 Measure Score,HVBP Safety: HAI-5 Performance Rate,HVBP Safety: HAI-6 Achievement Points,HVBP Safety: HAI-6 Achievement Threshold,HVBP Safety: HAI-6 Baseline Rate,HVBP Safety: HAI-6 Benchmark,HVBP Safety: HAI-6 Improvement Points,HVBP Safety: HAI-6 Measure Score,HVBP Safety: HAI-6 Performance Rate,HVBP Safety: MORT-30-AMI Achievement Points,HVBP Safety: MORT-30-AMI Achievement Threshold,HVBP Safety: MORT-30-AMI Baseline Rate,HVBP Safety: MORT-30-AMI Benchmark,HVBP Safety: MORT-30-AMI Improvement Points,HVBP Safety: MORT-30-AMI Measure Score,HVBP Safety: MORT-30-AMI Performance Rate,HVBP Safety: MORT-30-HF Achievement Points,HVBP Safety: MORT-30-HF Achievement Threshold,HVBP Safety: MORT-30-HF Baseline Rate,HVBP Safety: MORT-30-HF Benchmark,HVBP Safety: MORT-30-HF Improvement Points,HVBP Safety: MORT-30-HF Measure Score,HVBP Safety: MORT-30-HF Performance Rate,HVBP Safety: MORT-30-PN Achievement Points,HVBP Safety: MORT-30-PN Achievement Threshold,HVBP Safety: MORT-30-PN Baseline Rate,HVBP Safety: MORT-30-PN Benchmark,HVBP Safety: MORT-30-PN Improvement Points,HVBP Safety: MORT-30-PN Measure Score,HVBP Safety: MORT-30-PN Performance Rate,HVBP Safety: PC-01 Achievement Points,HVBP Safety: PC-01 Achievement Threshold,HVBP Safety: PC-01 Baseline Rate,HVBP Safety: PC-01 Benchmark,HVBP Safety: PC-01 Improvement Points,HVBP Safety: PC-01 Measure Score,HVBP Safety: PC-01 Performance Rate,HVBP Safety: PSI-90 Achievement Points,HVBP Safety: PSI-90 Achievement Threshold,HVBP Safety: PSI-90 Baseline Rate,HVBP Safety: PSI-90 Benchmark,HVBP Safety: PSI-90 Improvement Points,HVBP Safety: PSI-90 Measure Score,HVBP Safety: PSI-90 Performance Rate,file_month,file_year
2950,,,,,,,,10 out of 10,670082,0 out of 10,0.457,1.247,0.0,4 out of 9,4 out of 10,0.679,0 out of 10,0.845,2.538,0.0,2 out of 9,2 out of 10,1.946,10 out of 10,0.751,0.588,0.0,9 out of 9,10 out of 10,0.000,Not Available,0.698,Not Available,0.0,Not Available,Not Available,Not Available,10 out of 10,0.799,Not Available,0.0,Not Available,10 out of 10,0.000,0 out of 10,0.75,1.115,0.0,0 out of 9,0 out of 10,1.353,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10 out of 10,0.777936,Not Available,0.547889,Not Available,10 out of 10,0.393041,12,2016
2951,,,,,,,,0 out of 10,670085,10 out of 10,0.457,Not Available,0.0,Not Available,10 out of 10,0.000,10 out of 10,0.845,Not Available,0.0,Not Available,10 out of 10,0.000,0 out of 10,0.751,Not Available,0.0,Not Available,0 out of 10,2.973,Not Available,0.698,Not Available,0.0,Not Available,Not Available,Not Available,Not Available,0.799,Not Available,0.0,Not Available,Not Available,Not Available,0 out of 10,0.75,0.323,0.0,0 out of 9,0 out of 10,1.557,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10 out of 10,0.777936,Not Available,0.547889,Not Available,10 out of 10,0.446768,12,2016
2952,,,,,,,,0 out of 10,670088,0 out of 10,0.457,Not Available,0.0,Not Available,0 out of 10,0.975,3 out of 10,0.845,Not Available,0.0,Not Available,3 out of 10,0.586,0 out of 10,0.751,Not Available,0.0,Not Available,0 out of 10,0.969,Not Available,0.698,Not Available,0.0,Not Available,Not Available,Not Available,Not Available,0.799,Not Available,0.0,Not Available,Not Available,Not Available,3 out of 10,0.75,Not Available,0.0,Not Available,3 out of 10,0.506,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10 out of 10,0.777936,Not Available,0.547889,Not Available,10 out of 10,0.513223,12,2016
2953,,,,,,,,Not Available,670103,Not Available,0.457,Not Available,0.0,Not Available,Not Available,Not Available,Not Available,0.845,Not Available,0.0,Not Available,Not Available,Not Available,Not Available,0.751,Not Available,0.0,Not Available,Not Available,Not Available,Not Available,0.698,Not Available,0.0,Not Available,Not Available,Not Available,Not Available,0.799,Not Available,0.0,Not Available,Not Available,Not Available,0 out of 10,0.75,Not Available,0.0,Not Available,0 out of 10,0.929,,,,,,,,,,,,,,,,,,,,,,,,,,,,,9 out of 10,0.777936,Not Available,0.547889,Not Available,9 out of 10,0.563756,12,2016
2954,,,,,,,,Not Available,670106,Not Available,0.457,Not Available,0.0,Not Available,Not Available,Not Available,Not Available,0.845,Not Available,0.0,Not Available,Not Available,Not Available,Not Available,0.751,Not Available,0.0,Not Available,Not Available,Not Available,Not Available,0.698,Not Available,0.0,Not Available,Not Available,Not Available,Not Available,0.799,Not Available,0.0,Not Available,Not Available,Not Available,Not Available,0.75,Not Available,0.0,Not Available,Not Available,Not Available,,,,,,,,,,,,,,,,,,,,,,,,,,,,,9 out of 10,0.777936,Not Available,0.547889,Not Available,9 out of 10,0.561992,12,2016


In [6]:
ls_10 = []
ls_9 = []

for l in list(hvb_df):
    ls = hvb_df[l].tolist()
    for i in ls:
        try:
            if 'out of 10' in i:
                ls_10.append(l)
                break
            elif 'out of 9' in i:
                ls_9.append(l)
                break
        except:
            continue


In [7]:
for l in ls_9:
    hvb_df[l] = hvb_df[l].replace(['0 out of 9', 
                                   '1 out of 9', '2 out of 9', '3 out of 9',
                                   '4 out of 9', '5 out of 9', '6 out of 9', 
                                   '7 out of 9', '8 out of 9', '9 out of 9'],
                                  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    hvb_df[l] = pd.to_numeric(hvb_df[l], errors='coerce')

    

for l in ls_10:
    hvb_df[l] = hvb_df[l].replace(['0 out of 10', 
                                   '1 out of 10', '2 out of 10', '3 out of 10',
                                   '4 out of 10', '5 out of 10', '6 out of 10', 
                                   '7 out of 10', '8 out of 10', '9 out of 10', 
                                   '10 out of 10'],
                                  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    hvb_df[l] = pd.to_numeric(hvb_df[l], errors='coerce')

    
    
hvb_df.head()

Unnamed: 0,HVBP Safety: COMP-HIP-KNEE Achievement Points,HVBP Safety: COMP-HIP-KNEE Achievement Threshold,HVBP Safety: COMP-HIP-KNEE Baseline Rate,HVBP Safety: COMP-HIP-KNEE Benchmark,HVBP Safety: COMP-HIP-KNEE Improvement Points,HVBP Safety: COMP-HIP-KNEE Measure Score,HVBP Safety: COMP-HIP-KNEE Performance Rate,HVBP Safety: Combined SSI Measure Score,Facility ID,HVBP Safety: HAI-1 Achievement Points,HVBP Safety: HAI-1 Achievement Threshold,HVBP Safety: HAI-1 Baseline Rate,HVBP Safety: HAI-1 Benchmark,HVBP Safety: HAI-1 Improvement Points,HVBP Safety: HAI-1 Measure Score,HVBP Safety: HAI-1 Performance Rate,HVBP Safety: HAI-2 Achievement Points,HVBP Safety: HAI-2 Achievement Threshold,HVBP Safety: HAI-2 Baseline Rate,HVBP Safety: HAI-2 Benchmark,HVBP Safety: HAI-2 Improvement Points,HVBP Safety: HAI-2 Measure Score,HVBP Safety: HAI-2 Performance Rate,HVBP Safety: HAI-3 Achievement Points,HVBP Safety: HAI-3 Achievement Threshold,HVBP Safety: HAI-3 Baseline Rate,HVBP Safety: HAI-3 Benchmark,HVBP Safety: HAI-3 Improvement Points,HVBP Safety: HAI-3 Measure Score,HVBP Safety: HAI-3 Performance Rate,HVBP Safety: HAI-4 Achievement Points,HVBP Safety: HAI-4 Achievement Threshold,HVBP Safety: HAI-4 Baseline Rate,HVBP Safety: HAI-4 Benchmark,HVBP Safety: HAI-4 Improvement Points,HVBP Safety: HAI-4 Measure Score,HVBP Safety: HAI-4 Performance Rate,HVBP Safety: HAI-5 Achievement Points,HVBP Safety: HAI-5 Achievement Threshold,HVBP Safety: HAI-5 Baseline Rate,HVBP Safety: HAI-5 Benchmark,HVBP Safety: HAI-5 Improvement Points,HVBP Safety: HAI-5 Measure Score,HVBP Safety: HAI-5 Performance Rate,HVBP Safety: HAI-6 Achievement Points,HVBP Safety: HAI-6 Achievement Threshold,HVBP Safety: HAI-6 Baseline Rate,HVBP Safety: HAI-6 Benchmark,HVBP Safety: HAI-6 Improvement Points,HVBP Safety: HAI-6 Measure Score,HVBP Safety: HAI-6 Performance Rate,HVBP Safety: MORT-30-AMI Achievement Points,HVBP Safety: MORT-30-AMI Achievement Threshold,HVBP Safety: MORT-30-AMI Baseline Rate,HVBP Safety: MORT-30-AMI Benchmark,HVBP Safety: MORT-30-AMI Improvement Points,HVBP Safety: MORT-30-AMI Measure Score,HVBP Safety: MORT-30-AMI Performance Rate,HVBP Safety: MORT-30-HF Achievement Points,HVBP Safety: MORT-30-HF Achievement Threshold,HVBP Safety: MORT-30-HF Baseline Rate,HVBP Safety: MORT-30-HF Benchmark,HVBP Safety: MORT-30-HF Improvement Points,HVBP Safety: MORT-30-HF Measure Score,HVBP Safety: MORT-30-HF Performance Rate,HVBP Safety: MORT-30-PN Achievement Points,HVBP Safety: MORT-30-PN Achievement Threshold,HVBP Safety: MORT-30-PN Baseline Rate,HVBP Safety: MORT-30-PN Benchmark,HVBP Safety: MORT-30-PN Improvement Points,HVBP Safety: MORT-30-PN Measure Score,HVBP Safety: MORT-30-PN Performance Rate,HVBP Safety: PC-01 Achievement Points,HVBP Safety: PC-01 Achievement Threshold,HVBP Safety: PC-01 Baseline Rate,HVBP Safety: PC-01 Benchmark,HVBP Safety: PC-01 Improvement Points,HVBP Safety: PC-01 Measure Score,HVBP Safety: PC-01 Performance Rate,HVBP Safety: PSI-90 Achievement Points,HVBP Safety: PSI-90 Achievement Threshold,HVBP Safety: PSI-90 Baseline Rate,HVBP Safety: PSI-90 Benchmark,HVBP Safety: PSI-90 Improvement Points,HVBP Safety: PSI-90 Measure Score,HVBP Safety: PSI-90 Performance Rate,file_month,file_year
0,,,,,,,,,10001,,0.589,0.742,0.0,,,0.476,,0.65,0.324,0.0,,,0.306,,0.717,0.650,0.0,,,1.228,,0.738,0.000,0.0,,,0.000,,0.726,0.548,0.0,,,0.912,,0.52,0.537,0.014,,,0.644,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,2023
1,,,,,,,,,10005,,0.589,0.000,0.0,,,3.311,,0.65,1.073,0.0,,,2.350,,0.717,0.508,0.0,,,0.400,,0.738,Not Available,0.0,,,Not Available,,0.726,1.938,0.0,,,0.000,,0.52,0.565,0.014,,,0.86,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,2023
2,,,,,,,,,10006,,0.589,0.000,0.0,,,0.507,,0.65,0.222,0.0,,,0.602,,0.717,0.820,0.0,,,0.871,,0.738,Not Available,0.0,,,Not Available,,0.726,0.654,0.0,,,1.640,,0.52,0.426,0.014,,,0.08,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,2023
3,,,,,,,,,10007,,0.589,Not Available,0.0,,,Not Available,,0.65,Not Available,0.0,,,Not Available,,0.717,Not Available,0.0,,,Not Available,,0.738,Not Available,0.0,,,Not Available,,0.726,Not Available,0.0,,,Not Available,,0.52,0.0,0.014,,,0.41,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,2023
4,,,,,,,,,10011,,0.589,0.421,0.0,,,0.728,,0.65,0.598,0.0,,,0.927,,0.717,0.930,0.0,,,0.268,,0.738,Not Available,0.0,,,Not Available,,0.726,0.964,0.0,,,1.346,,0.52,0.343,0.014,,,0.607,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,2023


In [8]:
hvb_df.to_pickle('dataframes/hvbp_safety_df.pkl.gz', protocol=5, compression='gzip')