# Generate HCAHPS dataframe

In [1]:
import pandas as pd
import numpy as np
import warnings
from IPython.utils import io
import sys
import time

warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

main_dir = '~/Desktop/Rush/CMS_HospitalArchives/'

## Define Custom Functions

In [2]:
                       
def curate(df):

    try:
        df = df[df['Facility ID'] != np.nan]
        df['Facility ID'] = df['Facility ID'].values.astype(str)
        
        ids = df['Facility ID'].tolist()
        ids2 = []
        for i in ids:
            if len(i) < 6:
                i = '0' + i
            ids2.append(i)
        df['Facility ID'] = ids2
        
    except:
        pass
    try:
        df = df[df['Facility Name'] != np.nan]
    except:
        pass
    
    for c in list(df):    
        try:
            df[c] = df[c].str.replace("\t","")
        except:
            pass

    return df


def rename_and_fill(df):
    
    cols1 = ['Provider ID', 'Measure Start Date', 'Measure End Date',
             'Hospital Name', 'County Name', 'Provider Number', 'Number of completed Surveys',
             'Address 1', 'City/Town', 'County/Parish',
             'Telephone Number',
             ]
    
    cols2 = ['Facility ID', 'Start Date', 'End Date',
             'Facility Name', 'County', 'Facility ID', 'Number of Completed Surveys',
             'Address', 'City', 'County Name',
             'Phone Number',
             ]
    
    for i, col in enumerate(cols1):
        if col in list(df):
            df.rename(columns={col: cols2[i]}, inplace=True)
    
    l = list(df)
    l = list(set([x for x in l if l.count(x) > 1]))
    if len(l) > 0:
        print('duplicates:', l)
        sys.exit()
        
    return df


def process2(df, lists, yr, mo):
    df = rename_and_fill(df)
    df = curate(df)
    lists.append(list(df))
    df['file_month'] = [mo]* df.shape[0]
    df['file_year'] = [yr]* df.shape[0]
    df = df.reindex(sorted(df.columns), axis=1)
    return df, lists

## Load Data Files

In [3]:
df_list = []
lists = []

yrs = ['2023', '2023', '2023', '2023',
       '2022', '2022', '2022', '2022',
       '2021','2021','2021', '2021', '2021',
       '2020', '2020', '2020', '2020', 
       '2019', '2019', '2019', '2019', 
       '2018', '2018', '2018', '2018',
       '2017', '2017', '2017',
       '2016', '2016', '2016', '2016',
       '2015', '2015', '2015', '2015', '2015', '2015',
       '2014', '2014', '2014', 
       ]

mos = ['01', '04', '07', '10',
       '01', '04', '07', '10',
       '01', '03', '04', '07', '10', 
       '10', '07', '04', '01', 
       '10', '07', '04', '03', 
       '10', '07', '05', '01',
       '10', '07', '04',
       '12', '11', '08', '05',
       '12', '10', '07', '05', '04', '01',
       '12', '10', '07', 
       ]

subdirs = ['2023/hospitals_01_2023/HCAHPS-Hospital.csv', 
           '2023/hospitals_04_2023/HCAHPS-Hospital.csv',
           '2023/hospitals_07_2023/HCAHPS-Hospital.csv',
           '2023/hospitals_10_2023/HCAHPS-Hospital.csv',
           
           '2022/hospitals_01_2022/HCAHPS-Hospital.csv', 
           '2022/hospitals_04_2022/HCAHPS-Hospital.csv',
           '2022/hospitals_07_2022/HCAHPS-Hospital.csv',
           '2022/hospitals_10_2022/HCAHPS-Hospital.csv',
           
           '2021/hospitals_01_2021/HCAHPS-Hospital.csv',
           '2021/hospitals_03_2021/HCAHPS-Hospital.csv',
           '2021/hospitals_04_2021/HCAHPS-Hospital.csv',
           '2021/hospitals_07_2021/HCAHPS-Hospital.csv',
           '2021/hospitals_10_2021/HCAHPS-Hospital.csv',
           
           '2020/hospitals_archive_10_2020/HCAHPS_Hospital.csv',
           '2020/hospitals_archive_07_2020/HCAHPS_Hospital.csv',
           '2020/HOSArchive_Revised_Flatfiles_20200422/HCAHPS - Hospital.csv',
           '2020/HOSArchive_Revised_Flatfiles_20200129/HCAHPS - Hospital.csv',
           
           '2019/HOSArchive_Revised_Flatfiles_20191030/HCAHPS - Hospital.csv',
           '2019/HOSArchive_Revised_Flatfiles_20190702/HCAHPS - Hospital.csv',
           '2019/HOSArchive_Revised_FlatFiles_20190424/HCAHPS - Hospital.csv',
           '2019/HOSArchive_Revised_Flatfiles_20190321/HCAHPS - Hospital.csv',
           
           '2018/HOSArchive_Revised_FlatFiles_20181031/HCAHPS - Hospital.csv',
           '2018/HOSArchive_Revised_FlatFiles_20180725/HCAHPS - Hospital.csv', 
           '2018/HOSArchive_Revised_FlatFiles_20180523/HCAHPS - Hospital.csv',
           '2018/HOSArchive_Revised_FlatFiles_20180126/HCAHPS - Hospital.csv',
           
           '2017/HOSArchive_Revised_FlatFiles_20171024/HCAHPS - Hospital.csv',
           '2017/HOSArchive_Revised_FlatFiles_20170726/HCAHPS - Hospital.csv', 
           '2017/HOSArchive_Revised_Flatfiles_20170428/HCAHPS - Hospital.csv', 
           
           '2016/HOSArchive_Revised_Flatfiles_20161219/HCAHPS - Hospital.csv',
           '2016/Hospital_Revised_FlatFiles_20161110/HCAHPS - Hospital.csv', 
           '2016/HOSArchive_Revised_FlatFiles_20160810/HCAHPS - Hospital.csv',
           '2016/HOSArchive_Revised_FlatFiles_20160504/HCAHPS - Hospital.csv',
           
           '2015/HOSArchive_Revised_FlatFiles_20151210/HCAHPS - Hospital.csv',
           '2015/HOSArchive_Revised_FlatFiles_20151008/HCAHPS - Hospital.csv',
           '2015/HOSArchive_Revised_FlatFiles_20150716/HCAHPS - Hospital.csv',
           '2015/HOSArchive_Revised_Flatfiles_20150506/HCAHPS - Hospital.csv',
           '2015/HOSArchive_Revised_Flatfiles_20150416/HCAHPS - Hospital.csv',
           '2015/HOSArchive_Revised_Flatfiles_20150122/HCAHPS - Hospital.csv',
           
           '2014/HOSArchive_Revised_Flatfiles_20141218/HCAHPS - Hospital.csv',
           '2014/HOSArchive_Revised_Flatfiles_20141023/HCAHPS - Hospital.csv',
           '2014/HOSArchive_Revised_Flatfiles_20140717/HCAHPS - Hospital.csv',
           ]

cols = []
for i, subdir in enumerate(subdirs):
    with io.capture_output() as captured: 
        df = pd.read_csv(main_dir + subdir, encoding = "ISO-8859-1")
    cols.extend(list(df))
    cols = list(set(cols))
    print(subdir + ':  (rows, columns) =', df.shape)
    df, lists = process2(df, lists, yrs[i], mos[i])
    df_list.append(df)

df = pd.concat(df_list)

print('df.shape:', df.shape)
df = df[~df['Number of Completed Surveys'].isin([np.nan, float("NaN"), 'Not Available'])]
df = df[~df['Survey Response Rate Percent'].isin([np.nan, float("NaN"), 'Not Available'])]
print('df.shape:', df.shape)

del df_list
df.head()


2023/hospitals_01_2023/HCAHPS-Hospital.csv:  (rows, columns) = (450864, 22)
2023/hospitals_04_2023/HCAHPS-Hospital.csv:  (rows, columns) = (450585, 22)
2023/hospitals_07_2023/HCAHPS-Hospital.csv:  (rows, columns) = (449934, 22)
2023/hospitals_10_2023/HCAHPS-Hospital.csv:  (rows, columns) = (449934, 22)
2022/hospitals_01_2022/HCAHPS-Hospital.csv:  (rows, columns) = (450864, 22)
2022/hospitals_04_2022/HCAHPS-Hospital.csv:  (rows, columns) = (450864, 22)
2022/hospitals_07_2022/HCAHPS-Hospital.csv:  (rows, columns) = (450399, 22)
2022/hospitals_10_2022/HCAHPS-Hospital.csv:  (rows, columns) = (450585, 22)
2021/hospitals_01_2021/HCAHPS-Hospital.csv:  (rows, columns) = (454026, 22)
2021/hospitals_03_2021/HCAHPS-Hospital.csv:  (rows, columns) = (454026, 22)
2021/hospitals_04_2021/HCAHPS-Hospital.csv:  (rows, columns) = (456816, 22)
2021/hospitals_07_2021/HCAHPS-Hospital.csv:  (rows, columns) = (452538, 22)
2021/hospitals_10_2021/HCAHPS-Hospital.csv:  (rows, columns) = (451515, 22)
2020/hospita

Unnamed: 0,Address,City,County,End Date,Facility ID,Facility Name,HCAHPS Answer Description,HCAHPS Answer Percent,HCAHPS Answer Percent Footnote,HCAHPS Linear Mean Value,HCAHPS Measure ID,HCAHPS Question,Number of Completed Surveys,Number of Completed Surveys Footnote,Patient Survey Star Rating,Patient Survey Star Rating Footnote,Phone Number,Start Date,State,Survey Response Rate Percent,Survey Response Rate Percent Footnote,ZIP Code,file_month,file_year,County Name,Footnote
0,1108 ROSS CLARK CIRCLE,DOTHAN,HOUSTON,03/31/2022,10001,SOUTHEAST HEALTH MEDICAL CENTER,"Nurses ""always"" communicated well",75,,Not Applicable,H_COMP_1_A_P,"Patients who reported that their nurses ""Alway...",434,,Not Applicable,,(334) 793-8701,04/01/2021,AL,15,,36301,1,2023,,
1,1108 ROSS CLARK CIRCLE,DOTHAN,HOUSTON,03/31/2022,10001,SOUTHEAST HEALTH MEDICAL CENTER,"Nurses ""sometimes"" or ""never"" communicated well",8,,Not Applicable,H_COMP_1_SN_P,"Patients who reported that their nurses ""Somet...",434,,Not Applicable,,(334) 793-8701,04/01/2021,AL,15,,36301,1,2023,,
2,1108 ROSS CLARK CIRCLE,DOTHAN,HOUSTON,03/31/2022,10001,SOUTHEAST HEALTH MEDICAL CENTER,"Nurses ""usually"" communicated well",17,,Not Applicable,H_COMP_1_U_P,"Patients who reported that their nurses ""Usual...",434,,Not Applicable,,(334) 793-8701,04/01/2021,AL,15,,36301,1,2023,,
3,1108 ROSS CLARK CIRCLE,DOTHAN,HOUSTON,03/31/2022,10001,SOUTHEAST HEALTH MEDICAL CENTER,Nurse communication - linear mean score,Not Applicable,,89,H_COMP_1_LINEAR_SCORE,Nurse communication - linear mean score,434,,Not Applicable,,(334) 793-8701,04/01/2021,AL,15,,36301,1,2023,,
4,1108 ROSS CLARK CIRCLE,DOTHAN,HOUSTON,03/31/2022,10001,SOUTHEAST HEALTH MEDICAL CENTER,Nurse communication - star rating,Not Applicable,,Not Applicable,H_COMP_1_STAR_RATING,Nurse communication - star rating,434,,2,,(334) 793-8701,04/01/2021,AL,15,,36301,1,2023,,


In [4]:
ls = ['Facility ID', 'Facility Name', 'file_month', 'file_year',
      'HCAHPS Answer Description', 'HCAHPS Answer Percent',
      'HCAHPS Linear Mean Value', 'HCAHPS Measure ID', 'HCAHPS Question', 'Number of Completed Surveys', 
      'Patient Survey Star Rating',
      'Survey Response Rate Percent',
      'Start Date', 'End Date',
     ]

df = df.filter(items=ls, axis=1)
df['HCAHPS Question'].fillna('Not Available', inplace=True)
df['HCAHPS Measure ID'].fillna('Not Available', inplace=True)

labs = ['HCAHPS Answer Description', 'HCAHPS Answer Percent', 
        'HCAHPS Linear Mean Value', 'HCAHPS Measure ID', 'HCAHPS Question', 'Number of Completed Surveys', 
        'Patient Survey Star Rating',
        'Survey Response Rate Percent']

for l in labs:
    print(l)
    labs2 = []
    vals = df[l].tolist()
    
    for v in vals:
        try:
            v = v.strip()
        except:
            pass
        
        try:
            if "\x93" in v:
                v = v.replace("\x93", '')
            if "\x94" in v:
                v = v.replace("\x94", '')
            if '"' in v:
                v = v.replace('"', '')
            if "'" in v:
                v = v.replace("'", '')
            if "  " in v:
                v = v.replace("  ", ' ')
            if "  " in v:
                v = v.replace("  ", ' ')    
        except:
            pass

        labs2.append(v)
    df[l] = labs2


d = {'H_CLEAN_STAR_RATING': 'Cleanliness - star rating',
     'H_COMP_1_STAR_RATING': 'Nurse communication - star rating',
     'H_COMP_2_STAR_RATING': 'Doctor communication - star rating',
     'H_COMP_3_STAR_RATING': 'Staff responsiveness - star rating',
     'H_COMP_4_STAR_RATING': 'Pain management - star rating',
     'H_COMP_5_STAR_RATING': 'Communication about medicines - star rating',
     'H_COMP_6_STAR_RATING': 'Discharge information - star rating',
     'H_COMP_7_STAR_RATING': 'Care transition - star rating',
     'H_HSP_RATING_STAR_RATING': 'Overall hospital rating - star rating',
     'H_QUIET_STAR_RATING': 'Quietness - star rating',
     'H_RECMND_STAR_RATING': 'Recommend hospital - star rating'}
df['HCAHPS Question'].replace(to_replace=d, inplace=True)

df['Measure Name'] = df['HCAHPS Question'].tolist()
df = df.filter(items=['Facility ID', 'Facility Name', 'file_month', 'file_year', 'Measure Name', 
                      'HCAHPS Answer Percent', 'HCAHPS Linear Mean Value', 'Number of Completed Surveys', 
                      'Patient Survey Star Rating', 'Survey Response Rate Percent',
                      'Start Date', 'End Date',
                     ], axis=1)

var_labs = ['HCAHPS Linear Mean Value', 'HCAHPS Answer Percent', 'Patient Survey Star Rating']
main_df = pd.DataFrame(columns=['Facility ID', 'Facility Name', 'file_month', 'file_year', 
                                'Number of Completed Surveys', 'Survey Response Rate Percent', 'Measure Name',
                                'Start Date', 'End Date',])

for var_lab in var_labs:
    tdf = df.filter(items=['Facility ID', 'Facility Name', 'file_month', 'file_year', 'Number of Completed Surveys', 
                           'Survey Response Rate Percent', 'Measure Name', 'Start Date', 'End Date', var_lab], axis=1)

    tdf = tdf[tdf[var_lab] != 'Not Applicable']
    tdf['Measure Name'].fillna('None', inplace=True)
    tdf = tdf[tdf['Measure Name'] != 'None']
    tdf.drop_duplicates(inplace=True)
    
    labels = tdf['Measure Name'].unique()
    tdf3 = pd.DataFrame(columns=['Facility ID', 'Facility Name', 'file_month', 'file_year', 
                                 'Number of Completed Surveys', 'Survey Response Rate Percent',
                                 'Start Date', 'End Date',])

    for i, l in enumerate(labels):
        tdf2 = tdf[tdf['Measure Name'] == l]
        
        if '-' in l: l = l.split(" - ")[0]
            
        tdf2[var_lab] = pd.to_numeric(tdf2[var_lab], errors='coerce')
        tdf2.rename(columns={var_lab: l + ': ' + var_lab}, inplace=True)
        tdf2.drop(labels=['Measure Name'], axis=1, inplace=True)
        tdf2.dropna(how='all', axis=1, inplace=True)

        tdf3 = tdf3.merge(tdf2, how='outer')

    main_df = main_df.merge(tdf3, how='outer')
    print(main_df.shape)

main_df.drop(labels=['Measure Name'], axis=1, inplace=True)

print(main_df.shape)
main_df.drop_duplicates(inplace=True)
print(main_df.shape)
main_df.drop_duplicates(subset = ['Facility ID', 'Facility Name', 'file_month','file_year'], inplace=True)
print(main_df.shape)
main_df.head()

HCAHPS Answer Description
HCAHPS Answer Percent
HCAHPS Linear Mean Value
HCAHPS Measure ID
HCAHPS Question
Number of Completed Surveys
Patient Survey Star Rating
Survey Response Rate Percent
(196825, 20)
(196825, 95)
(196825, 107)
(196825, 106)
(196825, 106)
(196825, 106)


Unnamed: 0,Facility ID,Facility Name,file_month,file_year,Number of Completed Surveys,Survey Response Rate Percent,Start Date,End Date,Nurse communication: HCAHPS Linear Mean Value,Doctor communication: HCAHPS Linear Mean Value,Staff responsiveness: HCAHPS Linear Mean Value,Communication about medicines: HCAHPS Linear Mean Value,Discharge information: HCAHPS Linear Mean Value,Care transition: HCAHPS Linear Mean Value,Cleanliness: HCAHPS Linear Mean Value,Quietness: HCAHPS Linear Mean Value,Overall hospital rating: HCAHPS Linear Mean Value,Recommend hospital: HCAHPS Linear Mean Value,Pain management: HCAHPS Linear Mean Value,Patients who reported that their nurses Always communicated well: HCAHPS Answer Percent,Patients who reported that their nurses Sometimes or Never communicated well: HCAHPS Answer Percent,Patients who reported that their nurses Usually communicated well: HCAHPS Answer Percent,Patients who reported that their nurses Always treated them with courtesy and respect: HCAHPS Answer Percent,Patients who reported that their nurses Sometimes or Never treated them with courtesy and respect: HCAHPS Answer Percent,Patients who reported that their nurses Usually treated them with courtesy and respect: HCAHPS Answer Percent,Patients who reported that their nurses Always listened carefully to them: HCAHPS Answer Percent,Patients who reported that their nurses Sometimes or Never listened carefully to them: HCAHPS Answer Percent,Patients who reported that their nurses Usually listened carefully to them: HCAHPS Answer Percent,Patients who reported that their nurses Always explained things in a way they could understand: HCAHPS Answer Percent,Patients who reported that their nurses Sometimes or Never explained things in a way they could understand: HCAHPS Answer Percent,Patients who reported that their nurses Usually explained things in a way they could understand: HCAHPS Answer Percent,Patients who reported that their doctors Always communicated well: HCAHPS Answer Percent,Patients who reported that their doctors Sometimes or Never communicated well: HCAHPS Answer Percent,Patients who reported that their doctors Usually communicated well: HCAHPS Answer Percent,Patients who reported that their doctors Always treated them with courtesy and respect: HCAHPS Answer Percent,Patients who reported that their doctors Sometimes or Never treated them with courtesy and respect: HCAHPS Answer Percent,Patients who reported that their doctors Usually treated them with courtesy and respect: HCAHPS Answer Percent,Patients who reported that their doctors Always listened carefully to them: HCAHPS Answer Percent,Patients who reported that their doctors Sometimes or Never listened carefully to them: HCAHPS Answer Percent,Patients who reported that their doctors Usually listened carefully to them: HCAHPS Answer Percent,Patients who reported that their doctors Always explained things in a way they could understand: HCAHPS Answer Percent,Patients who reported that their doctors Sometimes or Never explained things in a way they could understand: HCAHPS Answer Percent,Patients who reported that their doctors Usually explained things in a way they could understand: HCAHPS Answer Percent,Patients who reported that they Always received help as soon as they wanted: HCAHPS Answer Percent,Patients who reported that they Sometimes or Never received help as soon as they wanted: HCAHPS Answer Percent,Patients who reported that they Usually received help as soon as they wanted: HCAHPS Answer Percent,Patients who reported that they Always received help after using the call button as soon as they wanted: HCAHPS Answer Percent,Patients who reported that they Sometimes or Never received help after using the call button as soon as they wanted: HCAHPS Answer Percent,Patients who reported that they Usually received help after using the call button as soon as they wanted: HCAHPS Answer Percent,Patients who reported that they Always received bathroom help as soon as they wanted: HCAHPS Answer Percent,Patients who reported that they Sometimes or Never received bathroom help as soon as they wanted: HCAHPS Answer Percent,Patients who reported that they Usually received bathroom help as soon as they wanted: HCAHPS Answer Percent,Patients who reported that staff Always explained about medicines before giving it to them: HCAHPS Answer Percent,Patients who reported that staff Sometimes or Never explained about medicines before giving it to them: HCAHPS Answer Percent,Patients who reported that staff Usually explained about medicines before giving it to them: HCAHPS Answer Percent,Patients who reported that when receiving new medication the staff Always communicated what the medication was for: HCAHPS Answer Percent,Patients who reported that when receiving new medication the staff Sometimes or Never communicated what the medication was for: HCAHPS Answer Percent,Patients who reported that when receiving new medication the staff Usually communicated what the medication was for.: HCAHPS Answer Percent,Patients who reported that when receiving new medication the staff Always discussed possible side effects: HCAHPS Answer Percent,Patients who reported that when receiving new medication the staff Sometimes or Never discussed possible side effects: HCAHPS Answer Percent,Patients who reported that when receiving new medication the staff Usually discussed possible side effects: HCAHPS Answer Percent,"Patients who reported that NO, they were not given information about what to do during their recovery at home: HCAHPS Answer Percent","Patients who reported that YES, they were given information about what to do during their recovery at home: HCAHPS Answer Percent","Patients who reported that NO, they did not discuss whether they would need help after discharge: HCAHPS Answer Percent","Patients who reported that YES, they did discuss whether they would need help after discharge: HCAHPS Answer Percent","Patients who reported that NO, they did not receive written information about possible symptoms to look out for after discharge: HCAHPS Answer Percent","Patients who reported that YES, they did receive written information about possible symptoms to look out for after discharge: HCAHPS Answer Percent",Patients who Agree they understood their care when they left the hospital: HCAHPS Answer Percent,Patients who Disagree or Strongly Disagree they understood their care when they left the hospital: HCAHPS Answer Percent,Patients who Strongly Agree they understood their care when they left the hospital: HCAHPS Answer Percent,Patients who Agree that the staff took my preferences into account when determining my health care needs: HCAHPS Answer Percent,Patients who Disagree or Strongly Disagree that the staff took my preferences into account when determining my health care needs: HCAHPS Answer Percent,Patients who Strongly Agree that the staff took my preferences into account when determining my health care needs: HCAHPS Answer Percent,Patients who Agree that they understood their responsiblities in managing their health: HCAHPS Answer Percent,Patients who Disagree or Strongly Disagree that they understood their responsiblities in managing their health: HCAHPS Answer Percent,Patients who Strongly Agree that they understood their responsiblities in managing their health: HCAHPS Answer Percent,Patients who Agree that they understood the purposes of their medications when leaving the hospital: HCAHPS Answer Percent,Patients who Disagree or Strongly Disagree that they understood the purposes of their medications when leaving the hospital: HCAHPS Answer Percent,Patients who Strongly Agree that they understood the purposes of their medications when leaving the hospital: HCAHPS Answer Percent,Patients who reported that their room and bathroom were Always clean: HCAHPS Answer Percent,Patients who reported that their room and bathroom were Sometimes or Never clean: HCAHPS Answer Percent,Patients who reported that their room and bathroom were Usually clean: HCAHPS Answer Percent,Patients who reported that the area around their room was Always quiet at night: HCAHPS Answer Percent,Patients who reported that the area around their room was Sometimes or Never quiet at night: HCAHPS Answer Percent,Patients who reported that the area around their room was Usually quiet at night: HCAHPS Answer Percent,Patients who gave their hospital a rating of 6 or lower on a scale from 0 (lowest) to 10 (highest): HCAHPS Answer Percent,Patients who gave their hospital a rating of 7 or 8 on a scale from 0 (lowest) to 10 (highest): HCAHPS Answer Percent,Patients who gave their hospital a rating of 9 or 10 on a scale from 0 (lowest) to 10 (highest): HCAHPS Answer Percent,"Patients who reported NO, they would probably not or definitely not recommend the hospital: HCAHPS Answer Percent","Patients who reported YES, they would definitely recommend the hospital: HCAHPS Answer Percent","Patients who reported YES, they would probably recommend the hospital: HCAHPS Answer Percent",Patients who reported that their pain was Always well controlled: HCAHPS Answer Percent,Patients who reported that their pain was Sometimes or Never well controlled: HCAHPS Answer Percent,Patients who reported that their pain was Usually well controlled: HCAHPS Answer Percent,Nurse communication: Patient Survey Star Rating,Doctor communication: Patient Survey Star Rating,Staff responsiveness: Patient Survey Star Rating,Communication about medicines: Patient Survey Star Rating,Discharge information: Patient Survey Star Rating,Care transition: Patient Survey Star Rating,Cleanliness: Patient Survey Star Rating,Quietness: Patient Survey Star Rating,Overall hospital rating: Patient Survey Star Rating,Recommend hospital: Patient Survey Star Rating,Summary star rating: Patient Survey Star Rating,Pain management: Patient Survey Star Rating
0,10001,SOUTHEAST HEALTH MEDICAL CENTER,1,2023,434,15,04/01/2021,03/31/2022,89.0,90.0,78.0,78.0,89.0,82.0,84.0,85.0,87.0,87.0,,75.0,8.0,17.0,80.0,7.0,13.0,72.0,8.0,20.0,72.0,9.0,19.0,78.0,6.0,16.0,85.0,4.0,11.0,76.0,7.0,17.0,74.0,6.0,20.0,57.0,20.0,23.0,53.0,23.0,24.0,60.0,16.0,24.0,62.0,20.0,18.0,74.0,11.0,15.0,50.0,29.0,21.0,11.0,89.0,15.0,85.0,8.0,92.0,42.0,5.0,53.0,46.0,8.0,46.0,41.0,4.0,55.0,39.0,4.0,57.0,68.0,12.0,20.0,66.0,8.0,26.0,10.0,21.0,69.0,6.0,69.0,25.0,,,,2.0,3.0,2.0,4.0,4.0,3.0,3.0,4.0,3.0,3.0,3.0,
1,10005,MARSHALL MEDICAL CENTERS,1,2023,717,16,04/01/2021,03/31/2022,90.0,92.0,76.0,74.0,85.0,81.0,82.0,85.0,86.0,84.0,,77.0,7.0,16.0,84.0,5.0,11.0,75.0,8.0,17.0,72.0,8.0,20.0,81.0,4.0,15.0,87.0,3.0,10.0,79.0,4.0,17.0,78.0,6.0,16.0,51.0,18.0,31.0,49.0,18.0,33.0,52.0,17.0,31.0,59.0,23.0,18.0,75.0,11.0,14.0,44.0,35.0,21.0,15.0,85.0,17.0,83.0,13.0,87.0,45.0,6.0,49.0,50.0,8.0,42.0,47.0,5.0,48.0,38.0,4.0,58.0,66.0,14.0,20.0,64.0,8.0,28.0,10.0,22.0,68.0,7.0,63.0,30.0,,,,3.0,4.0,2.0,3.0,3.0,3.0,3.0,4.0,3.0,3.0,3.0,
2,10006,NORTH ALABAMA MEDICAL CENTER,1,2023,1358,17,04/01/2021,03/31/2022,87.0,89.0,74.0,69.0,84.0,77.0,74.0,84.0,82.0,80.0,,72.0,9.0,19.0,79.0,6.0,15.0,69.0,10.0,21.0,68.0,10.0,22.0,76.0,7.0,17.0,84.0,4.0,12.0,73.0,8.0,19.0,70.0,9.0,21.0,49.0,22.0,29.0,45.0,23.0,32.0,53.0,21.0,26.0,53.0,28.0,19.0,66.0,15.0,19.0,39.0,41.0,20.0,16.0,84.0,17.0,83.0,14.0,86.0,47.0,9.0,44.0,52.0,11.0,37.0,49.0,8.0,43.0,42.0,7.0,51.0,54.0,23.0,23.0,62.0,9.0,29.0,17.0,23.0,60.0,13.0,58.0,29.0,,,,2.0,3.0,1.0,2.0,3.0,2.0,1.0,4.0,2.0,2.0,2.0,
3,10007,MIZELL MEMORIAL HOSPITAL,1,2023,173,23,04/01/2021,03/31/2022,90.0,94.0,87.0,79.0,88.0,85.0,84.0,88.0,88.0,88.0,,79.0,6.0,15.0,81.0,7.0,12.0,74.0,7.0,19.0,81.0,5.0,14.0,89.0,4.0,7.0,90.0,5.0,5.0,87.0,5.0,8.0,89.0,2.0,9.0,73.0,12.0,15.0,75.0,12.0,13.0,68.0,13.0,19.0,62.0,14.0,24.0,78.0,1.0,21.0,45.0,29.0,26.0,12.0,88.0,12.0,88.0,12.0,88.0,37.0,4.0,59.0,44.0,5.0,51.0,37.0,4.0,59.0,32.0,2.0,66.0,70.0,12.0,18.0,70.0,6.0,24.0,8.0,16.0,76.0,7.0,74.0,19.0,,,,3.0,5.0,4.0,4.0,4.0,4.0,3.0,5.0,4.0,4.0,4.0,
4,10008,CRENSHAW COMMUNITY HOSPITAL,1,2023,47,22,04/01/2021,03/31/2022,,,,,,,,,,,,86.0,4.0,10.0,,,,,,,,,,96.0,0.0,4.0,,,,,,,,,,72.0,14.0,14.0,,,,,,,66.0,17.0,17.0,,,,,,,17.0,83.0,,,,,44.0,5.0,51.0,,,,,,,,,,70.0,13.0,17.0,66.0,12.0,22.0,2.0,23.0,75.0,0.0,65.0,35.0,,,,,,,,,,,,,,,


## Save the dataframe

In [5]:
del df, tdf, tdf2, tdf3
print(main_df.shape)
tdf = main_df.drop(labels=['Start Date', 'End Date'], axis=1)
tdf.drop_duplicates(inplace=True)
print(tdf.shape)
tdf.to_pickle('~/GitHub/hospitals-data-archive/dataframes/partial_dataframes/hcahps_df.pkl.gz', 
                  protocol=5, compression='gzip')


(196825, 106)
(196825, 104)


## Save measurement dates

In [6]:
# Columns to keep as is
id_cols = ['Facility ID', 'Facility Name', 'file_month', 'file_year', 'Start Date', 'End Date']

# Melt the specific columns and create the 'Measure' and 'Score' columns
measures_df = main_df.melt(id_vars=id_cols, var_name='Measure Name', value_name='Score')
measures_df.drop(labels=['Score', 'Facility ID', 'Facility Name'], axis=1, inplace=True)

print(measures_df.shape)
measures_df.drop_duplicates(inplace=True)
measures_df.reset_index(drop=True, inplace=True)
print(measures_df.shape)

measures_df['Start Date'] = pd.to_datetime(measures_df['Start Date'])
measures_df['End Date'] = pd.to_datetime(measures_df['End Date'])
measures_df.to_csv('~/GitHub/hospitals-data-archive/measure_dates/hcahps_df.csv')

measures_df.head()


(19682500, 5)
(4100, 5)


Unnamed: 0,file_month,file_year,Start Date,End Date,Measure Name
0,1,2023,2021-04-01,2022-03-31,Number of Completed Surveys
1,4,2023,2021-07-01,2022-06-30,Number of Completed Surveys
2,7,2023,2021-10-01,2022-09-30,Number of Completed Surveys
3,10,2023,2021-10-01,2022-09-30,Number of Completed Surveys
4,1,2022,2020-07-01,2021-03-31,Number of Completed Surveys
