# Generate Outpatient Imaging Efficiency dataframe

In [1]:
import pandas as pd
import numpy as np
import warnings
from IPython.utils import io
import sys
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

main_dir = '/Users/kenlocey/Desktop/Rush/CMS_HospitalArchives/'

In [2]:
df = pd.read_pickle(main_dir + 'Outpatient_Imaging_Efficiency/CombinedFiles_Outpatient_Imaging_Efficiency/' + 'Facility.pkl')
print('Outpatient imaging efficiency df:', df.shape)

df = df.filter(items=['Facility ID', 'file_month', 
                      'file_year', 'Measure ID', 
                      'Measure Name', 'Score',], axis=1)

df = df[~df['Measure ID'].isin(['Not given'])]
print(df.shape)

print(df['Measure ID'].unique())
print('\n')
print(df['Measure Name'].unique())

df.head()

Outpatient imaging efficiency df: (885163, 16)
(885163, 6)
['OP-10' 'OP-13' 'OP-39' 'OP-8' 'OP-11' 'OP-14' 'OP-9' 'OP_10' 'OP_11'
 'OP_13' 'OP_14' 'OP_8' 'OP_9']


['Abdomen CT Use of Contrast Material'
 'Outpatients who got cardiac imaging stress tests before low-risk outpatient surgery'
 'Breast Cancer Screening Recall Rates'
 'MRI Lumbar Spine for Low Back Pain' 'Thorax CT Use of Contrast Material'
 'Outpatients with brain CT scans who got a sinus CT scan at the same time'
 'Mammography Follow-up Rates']


Unnamed: 0,Facility ID,file_month,file_year,Measure ID,Measure Name,Score
0,10001,1,2023,OP-10,Abdomen CT Use of Contrast Material,5.7
1,10001,1,2023,OP-13,Outpatients who got cardiac imaging stress tes...,6.8
2,10001,1,2023,OP-39,Breast Cancer Screening Recall Rates,5.5
3,10001,1,2023,OP-8,MRI Lumbar Spine for Low Back Pain,42.5
4,10005,1,2023,OP-10,Abdomen CT Use of Contrast Material,13.8


In [3]:
d = {'OP_10': 'OP-10',
     'OP_11': 'OP-11', 
     'OP_13': 'OP-13',
     'OP_14': 'OP-14',
     'OP_8': 'OP-8', 
     'OP_9': 'OP-9',
     }

df['Measure ID'].replace(to_replace = d, inplace = True)
df['Measure ID'] = df['Measure ID'] + ' — ' + df['Measure Name']
measures = sorted(df['Measure ID'].unique())

for m in measures:
    print(m)
    


OP-10 — Abdomen CT Use of Contrast Material
OP-11 — Thorax CT Use of Contrast Material
OP-13 — Outpatients who got cardiac imaging stress tests before low-risk outpatient surgery
OP-14 — Outpatients with brain CT scans who got a sinus CT scan at the same time
OP-39 — Breast Cancer Screening Recall Rates
OP-8 — MRI Lumbar Spine for Low Back Pain
OP-9 — Mammography Follow-up Rates


In [4]:
measure_ids = sorted(df['Measure ID'].unique())
main_df = pd.DataFrame(columns=['Facility ID', 'file_month', 'file_year'])

for i, m_id in enumerate(measure_ids):
    
    tdf = df[df['Measure ID'] == m_id]
    measures = sorted(tdf['Measure ID'].unique())
    
    oie_df2 = pd.DataFrame(columns=['Facility ID', 'file_month', 'file_year'])

    for j, m in enumerate(measures):
    
        tdf2 = tdf[tdf['Measure ID'] == m]
        for n in list(tdf2):
            if n == 'Measure ID' or n in ['Facility ID', 'file_month', 'file_year', 'Measure Name']:
                continue

            else:
                tdf2[n] = pd.to_numeric(tdf2[n], errors='coerce')
                tdf2.rename(columns={n: m + ' (' + n + ')'}, inplace=True)
        
        tdf2.drop(labels=['Measure ID', 'Measure Name'], axis=1, inplace=True)
        
        oie_df2 = oie_df2.merge(tdf2, on=['Facility ID', 'file_month', 
                                          'file_year'], how='outer')
    
    main_df = main_df.merge(oie_df2, on=['Facility ID', 'file_month', 
                                          'file_year'], how='outer')


df = main_df.copy(deep=True)
del main_df, oie_df2
print(df.shape)
df.dropna(how='all', axis=1, inplace=True)
print(df.shape)
df.head()


(173354, 10)
(173354, 10)


Unnamed: 0,Facility ID,file_month,file_year,OP-10 — Abdomen CT Use of Contrast Material (Score),OP-11 — Thorax CT Use of Contrast Material (Score),OP-13 — Outpatients who got cardiac imaging stress tests before low-risk outpatient surgery (Score),OP-14 — Outpatients with brain CT scans who got a sinus CT scan at the same time (Score),OP-39 — Breast Cancer Screening Recall Rates (Score),OP-8 — MRI Lumbar Spine for Low Back Pain (Score),OP-9 — Mammography Follow-up Rates (Score)
0,10001,1,2023,5.7,,6.8,,5.5,42.5,
1,10005,1,2023,13.8,,3.8,,6.7,54.5,
2,10006,1,2023,11.0,,1.7,,9.4,41.2,
3,10007,1,2023,5.9,,,,25.8,,
4,10008,1,2023,2.1,,,,,,


In [5]:
for i, n in enumerate(list(df)):
    if n in ['Facility ID', 'file_month', 'file_year']:
        continue
            
    df.rename(columns={n: 'Outpatient Imaging Efficiency: ' + n}, inplace=True)
    
print(df.shape)
df.head()

(173354, 10)


Unnamed: 0,Facility ID,file_month,file_year,Outpatient Imaging Efficiency: OP-10 — Abdomen CT Use of Contrast Material (Score),Outpatient Imaging Efficiency: OP-11 — Thorax CT Use of Contrast Material (Score),Outpatient Imaging Efficiency: OP-13 — Outpatients who got cardiac imaging stress tests before low-risk outpatient surgery (Score),Outpatient Imaging Efficiency: OP-14 — Outpatients with brain CT scans who got a sinus CT scan at the same time (Score),Outpatient Imaging Efficiency: OP-39 — Breast Cancer Screening Recall Rates (Score),Outpatient Imaging Efficiency: OP-8 — MRI Lumbar Spine for Low Back Pain (Score),Outpatient Imaging Efficiency: OP-9 — Mammography Follow-up Rates (Score)
0,10001,1,2023,5.7,,6.8,,5.5,42.5,
1,10005,1,2023,13.8,,3.8,,6.7,54.5,
2,10006,1,2023,11.0,,1.7,,9.4,41.2,
3,10007,1,2023,5.9,,,,25.8,,
4,10008,1,2023,2.1,,,,,,


In [6]:
df.to_pickle('dataframe_data/Outpatient_Imaging_Efficiency_df.pkl', protocol=5)