In [1]:
import pandas as pd

In [2]:
from enum import Enum

class FileFormat(Enum):
    XML = "XML"
    CSV = "CSV"
    JSON = "JSON"
    XLSX = "XLSX"

class FileSubtype(Enum):
    AURORA = "AURORA"
    
class TypeRecognizer(object):
    def _looks_like_aurora_xml(self, file_path):
        try:
            df = pd.read_xml(file_path)
        except:
            return False
        
        columns = df.columns.to_list()
        
        if len(columns) > 10:
            check_colnames = ['Facility', 'Type', 'Chargecode_DRG_CPT', 'Description', 
                              'Rev', 'CPT', 'NDC', 'Self_Pay', 'Min', 'Max']
            for col_name in check_colnames:
                if not col_name in columns:
                    return False
            
            remaining_colnames = list(set(columns) - set(check_colnames))
            for col_name in remaining_colnames:
                if " " in col_name:
                    return False
                
                if not col_name.startswith("_"):
                    return False
                
                components = col_name.split("_")
                
                if len(components) < 3:
                    return False
                
                if len(components[1]) != 4 and components[-1] != 'Fee':
                    return False
                
                return True
        
        return False
    
    def recognize_format_and_subtype(self, file_path):
        file_format = None
        subtype = None
        
        if file_path.endswith(".xml") or file_path.endswith(".XML"):
            file_format = FileFormat.XML
            
            if self._looks_like_aurora_xml(file_path):
                subtype = FileSubtype.AURORA
        
        return file_format, subtype

In [3]:
TARGET_COLUMNS = ['filename', 'file_last_updated', 'hospital_ccn', 'hospital_ein', 'code_meta', 
                  'description', 'procedure_code', 'code_type', 'code', 'rev_code',
                  'modifier', 'ndc', 'apc', 'billing_class', 'patient_class', 'billed_quantity',
                  'rev_desc', 'quantity_desc', 'payer_desc', 'payer_category', 'payer_name',
                  'plan_name', 'plan_id', 'plan_type', 'is_medicare_adv', 'rate', 'rate_method',
                  'rate_desc', 'url', 'permalink']

class AbstractStandardChargesConverter(object):
    def convert(self, url, file_path, ccn):
        pass

In [4]:
def cleanup_dollar_value(value):
    if type(value) == str:
        return value.replace(",", "").replace("$", "")

    return value

def cleanup_values(values):
    return list(map(lambda value: cleanup_dollar_value(value), values))

def pad_rev_code_if_needed(rev_code):
    if type(rev_code) == str and rev_code != 'na':
        if len(rev_code) == 3:
            return '0' + rev_code
        elif len(rev_code) == 2:
            return '00' + rev_code
        elif len(rev_code) == 1:
            return '000' + rev_code
    
    return rev_code

pad_rev_code_if_needed('111')

'0111'

In [5]:
class AuroraXMLConverter(AbstractStandardChargesConverter):
    def __init__(self):
        super().__init__()
    
    def convert(self, url, file_path, ccn):
        df_out = pd.DataFrame(columns=TARGET_COLUMNS)
        
        df_in = pd.read_xml(file_path)
        # HACK: https://stackoverflow.com/a/50132405
        df_in['NDC'] = df_in['NDC'].fillna('na')
        df_in['NDC'] = df_in['NDC'].astype(str)
        df_in['Rev'] = df_in['Rev'].fillna('na')
        df_in['Rev'] = df_in['Rev'].astype(str)
        df_in['Chargecode_DRG_CPT'] = df_in['Chargecode_DRG_CPT'].astype(str)
    
        columns = df_in.columns.to_list()
        money_columns = list(filter(lambda c: c.startswith('_'), columns)) + ['Self_Pay', 'Min', 'Max']

        df_in[money_columns] = df_in[money_columns].apply(lambda values: cleanup_values(values))

        remaining_cols = list(set(columns) - set(money_columns))

        df_intermediate = pd.DataFrame(df_in)
        df_intermediate = pd.melt(df_intermediate, id_vars=remaining_cols)
        df_intermediate = df_intermediate.rename(columns={
            'variable': 'payer_desc',
            'value': 'rate',
            'Description': 'procedure_desc',
            'Rev': 'rev_code',
            'NDC': 'ndc',
            'Chargecode_DRG_CPT': 'code'
        })
        del df_intermediate['Facility']
        del df_intermediate['CPT']

        df_intermediate['ndc'] = df_intermediate['ndc'].apply(lambda ndc: ndc.replace('-', ''))
        df_intermediate['rev_code'] = df_intermediate['rev_code'].apply(lambda rev_code: rev_code.split('.')[0])
        df_intermediate['rev_code'] = df_intermediate['rev_code'].apply(pad_rev_code_if_needed)
        df_intermediate['patient_class'] = df_intermediate['Type'].replace(
            'CHARGE', 'na').replace(
            'IP DRG*', 'inpatient').replace(
            'OP PROC*', 'outpatient')
        df_intermediate['code_type'] = df_intermediate['Type'].replace(
            'IP DRG*', 'ms-drg').replace(
            'OP PROC*', 'hcpcs_cpt').replace(
            'CHARGE', 'cdm')
        df_intermediate['code_meta'] = df_intermediate['Type'].replace(
            'IP DRG*', 'drg').replace(
            'OP PROC*', 'cpt').replace(
            'CHARGE', 'cdm')
        
        del df_intermediate['Type']

        def get_payer_category_from_payer_desc(payer_desc):
            if payer_desc == "Min":
                return "min"
            elif payer_desc == "Max":
                return "max"
            elif payer_desc == "Self_Pay":
                return "cash"
            elif payer_desc.endswith("_Fee"):
                return "gross"
            
            return "payer"

        df_intermediate['payer_category'] = df_intermediate['payer_desc'].apply(get_payer_category_from_payer_desc)
        
        def get_payer_name_from_payer_desc(payer_desc):
            if 'Common_Ground' in payer_desc:
                return 'Common Ground'
            elif 'Health_EOS' in payer_desc:
                return 'Health EOS'
            elif 'Aetna' in payer_desc:
                return 'Aetna'
            elif 'Anthem' in payer_desc:
                return 'Anthem'
            elif 'Aurora' in payer_desc:
                return 'Aurora'
            elif 'Centivo' in payer_desc:
                return 'Centivo'
            elif 'Cigna' in payer_desc:
                return 'Cigna'
            elif 'Common_Ground' in payer_desc:
                return 'Common Ground'
            elif 'Everpointe' in payer_desc:
                return 'Everpointe'
            elif 'HealthPartners' in payer_desc:
                return 'HealthPartners'
            elif 'HPS' in payer_desc:
                return 'HPS'
            elif 'HST' in payer_desc:
                return 'HST'
            elif 'Humana' in payer_desc:
                return 'Humana'
            elif 'Molina' in payer_desc:
                return 'Molina'
            elif 'Quartz_One' in payer_desc:
                return 'Quartz One'
            elif 'Trilogy' in payer_desc:
                return 'Trilogy'
            elif 'UHC' in payer_desc:
                return 'UHC'
            elif 'WPS' in payer_desc:
                return 'WPS'
        
            return ''

        df_intermediate['payer_name'] = df_intermediate['payer_desc'].apply(get_payer_name_from_payer_desc)

        filename = file_path.split("/")[0]
        hospital_ein = filename.split("_")[0]

        df_intermediate['filename'] = filename
        df_intermediate['hospital_ein'] = hospital_ein
        df_intermediate['hospital_ccn'] = ccn
        df_intermediate['url'] = url
        df_intermediate['file_last_updated'] = '2023-01-01' # FIXME: refrain from hardcoding this; determine this field from _Fee column name
        df_intermediate['unique_procedure_id'] = 'na'
        df_intermediate['internal_code'] = 'na'
        df_intermediate['billing_class'] = 'na'
        df_intermediate['procedure_code'] = 'na'
        df_intermediate['modifier'] = 'na'
        df_intermediate['apc'] = 'na'
        df_intermediate['billed_quantity'] = -1

        def get_plan_type_from_payer_desc(payer_desc):
            components = payer_desc.split('_')
            last_component = components[-1]
            if last_component in ["HMO", "PPO", "HPN", "EPO"]:
                return last_component
            
            return ''
            
        df_intermediate['plan_type'] = df_intermediate['payer_desc'].apply(get_plan_type_from_payer_desc)
        
        df_out = pd.DataFrame(columns=TARGET_COLUMNS)

        df_out = df_out.append(df_intermediate)
        
        return df_out


In [6]:
import subprocess

from urllib.parse import urlparse

# TODO: write some code to retrieve this mapping based on some criteria (e.g. hospital name substring) in the other DB.
ccn_to_url = {
    '520034' : "https://www.aurorahealthcare.org/assets/documents/billing-insurance/pricing-transparency/391211629_aurora-medical-center-manitowoc-county_standardcharges.xml",
    '520035' : "https://www.aurorahealthcare.org/assets/documents/billing-insurance/pricing-transparency/390930748_aurora-sheboygan-memorial-medical-center_standardcharges.xml",
    '520198' : "https://www.aurorahealthcare.org/assets/documents/billing-insurance/pricing-transparency/391027676_aurora-medical-center-oshkosh_standardcharges.xml",
    '520102' : 'https://www.aurorahealthcare.org/assets/documents/billing-insurance/pricing-transparency/390806347_aurora-lakeland-medical-center_standardcharges.xml',
    '520189' : 'https://www.aurorahealthcare.org/assets/documents/billing-insurance/pricing-transparency/390806347_aurora-medical-center-kenosha_standardcharges.xml',
    '520038' : 'https://www.aurorahealthcare.org/assets/documents/billing-insurance/pricing-transparency/391150165_aurora-medical-center-washington-county_standardcharges.xml',
    '520059' : 'https://www.aurorahealthcare.org/assets/documents/billing-insurance/pricing-transparency/390806347_aurora-medical-center-burlington_standardcharges.xml',
    '520113' : 'https://www.aurorahealthcare.org/assets/documents/billing-insurance/pricing-transparency/391528430_aurora-medical-center-bay-area_standardcharges.xml',
    '52T206' : 'https://www.aurorahealthcare.org/assets/documents/billing-insurance/pricing-transparency/390806347_aurora-medical-center-summit_standardcharges.xml',
    '520206' : 'https://www.aurorahealthcare.org/assets/documents/billing-insurance/pricing-transparency/390806347_aurora-medical-center-summit_standardcharges.xml',
    '520207' : 'https://www.aurorahealthcare.org/assets/documents/billing-insurance/pricing-transparency/272953799_aurora-medical-center-grafton_standardcharges.xml',
    '520193' : 'https://www.aurorahealthcare.org/assets/documents/billing-insurance/pricing-transparency/391947472_aurora-baycare-medical-center_standardcharges.xml',
    '524000' : 'https://www.aurorahealthcare.org/assets/documents/billing-insurance/pricing-transparency/390872192_aurora-psychiatric-hospital_standardcharges.xml'
}

tasks = []

for ccn in ccn_to_url.keys():
    url = ccn_to_url[ccn]
    filename = urlparse(url).path.split('/')[-1]
    tasks.append((ccn, url, filename))
    
tasks


[('520034',
  'https://www.aurorahealthcare.org/assets/documents/billing-insurance/pricing-transparency/391211629_aurora-medical-center-manitowoc-county_standardcharges.xml',
  '391211629_aurora-medical-center-manitowoc-county_standardcharges.xml'),
 ('520035',
  'https://www.aurorahealthcare.org/assets/documents/billing-insurance/pricing-transparency/390930748_aurora-sheboygan-memorial-medical-center_standardcharges.xml',
  '390930748_aurora-sheboygan-memorial-medical-center_standardcharges.xml'),
 ('520198',
  'https://www.aurorahealthcare.org/assets/documents/billing-insurance/pricing-transparency/391027676_aurora-medical-center-oshkosh_standardcharges.xml',
  '391027676_aurora-medical-center-oshkosh_standardcharges.xml'),
 ('520102',
  'https://www.aurorahealthcare.org/assets/documents/billing-insurance/pricing-transparency/390806347_aurora-lakeland-medical-center_standardcharges.xml',
  '390806347_aurora-lakeland-medical-center_standardcharges.xml'),
 ('520189',
  'https://www.aur

In [7]:
recognizer = TypeRecognizer()
converter = AuroraXMLConverter()

for task in tasks:
    ccn, url, filename = task
    
    subprocess.run(['wget', '--no-clobber', url, '-O', filename])
    
    file_format, subtype = recognizer.recognize_format_and_subtype(filename)
    if file_format == FileFormat.XML:
        if subtype == FileSubtype.AURORA:
            df_out = converter.convert(url, filename, ccn)
            print(df_out)
            df_out.to_csv(ccn + '.csv', index=False)

File ‘391211629_aurora-medical-center-manitowoc-county_standardcharges.xml’ already there; not retrieving.
  df_out = df_out.append(df_intermediate)


                                                 filename file_last_updated  \
0       391211629_aurora-medical-center-manitowoc-coun...        2023-01-01   
1       391211629_aurora-medical-center-manitowoc-coun...        2023-01-01   
2       391211629_aurora-medical-center-manitowoc-coun...        2023-01-01   
3       391211629_aurora-medical-center-manitowoc-coun...        2023-01-01   
4       391211629_aurora-medical-center-manitowoc-coun...        2023-01-01   
...                                                   ...               ...   
111809  391211629_aurora-medical-center-manitowoc-coun...        2023-01-01   
111810  391211629_aurora-medical-center-manitowoc-coun...        2023-01-01   
111811  391211629_aurora-medical-center-manitowoc-coun...        2023-01-01   
111812  391211629_aurora-medical-center-manitowoc-coun...        2023-01-01   
111813  391211629_aurora-medical-center-manitowoc-coun...        2023-01-01   

       hospital_ccn hospital_ein code_meta descript

--2023-04-08 08:51:52--  https://www.aurorahealthcare.org/assets/documents/billing-insurance/pricing-transparency/390930748_aurora-sheboygan-memorial-medical-center_standardcharges.xml
Resolving www.aurorahealthcare.org (www.aurorahealthcare.org)... 18.155.181.117, 18.155.181.94, 18.155.181.113, ...
Connecting to www.aurorahealthcare.org (www.aurorahealthcare.org)|18.155.181.117|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 8324254 (7.9M) [text/xml]
Saving to: ‘390930748_aurora-sheboygan-memorial-medical-center_standardcharges.xml’

     0K .......... .......... .......... .......... ..........  0%  854K 9s
    50K .......... .......... .......... .......... ..........  1%  847K 9s
   100K .......... .......... .......... .......... ..........  1% 60.4M 6s
   150K .......... .......... .......... .......... ..........  2% 1.20M 6s
   200K .......... .......... .......... .......... ..........  3% 2.72M 6s
   250K .......... .......... .......... .......... .

                                                 filename file_last_updated  \
0       390930748_aurora-sheboygan-memorial-medical-ce...        2023-01-01   
1       390930748_aurora-sheboygan-memorial-medical-ce...        2023-01-01   
2       390930748_aurora-sheboygan-memorial-medical-ce...        2023-01-01   
3       390930748_aurora-sheboygan-memorial-medical-ce...        2023-01-01   
4       390930748_aurora-sheboygan-memorial-medical-ce...        2023-01-01   
...                                                   ...               ...   
144998  390930748_aurora-sheboygan-memorial-medical-ce...        2023-01-01   
144999  390930748_aurora-sheboygan-memorial-medical-ce...        2023-01-01   
145000  390930748_aurora-sheboygan-memorial-medical-ce...        2023-01-01   
145001  390930748_aurora-sheboygan-memorial-medical-ce...        2023-01-01   
145002  390930748_aurora-sheboygan-memorial-medical-ce...        2023-01-01   

       hospital_ccn hospital_ein code_meta descript

--2023-04-08 08:51:59--  https://www.aurorahealthcare.org/assets/documents/billing-insurance/pricing-transparency/391027676_aurora-medical-center-oshkosh_standardcharges.xml
Resolving www.aurorahealthcare.org (www.aurorahealthcare.org)... 18.155.181.94, 18.155.181.113, 18.155.181.117, ...
Connecting to www.aurorahealthcare.org (www.aurorahealthcare.org)|18.155.181.94|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 8723816 (8.3M) [text/xml]
Saving to: ‘391027676_aurora-medical-center-oshkosh_standardcharges.xml’

     0K .......... .......... .......... .......... ..........  0%  845K 10s
    50K .......... .......... .......... .......... ..........  1%  842K 10s
   100K .......... .......... .......... .......... ..........  1%  133M 7s
   150K .......... .......... .......... .......... ..........  2% 1.10M 7s
   200K .......... .......... .......... .......... ..........  2% 3.23M 6s
   250K .......... .......... .......... .......... ..........  3%  152M 5

                                                 filename file_last_updated  \
0       391027676_aurora-medical-center-oshkosh_standa...        2023-01-01   
1       391027676_aurora-medical-center-oshkosh_standa...        2023-01-01   
2       391027676_aurora-medical-center-oshkosh_standa...        2023-01-01   
3       391027676_aurora-medical-center-oshkosh_standa...        2023-01-01   
4       391027676_aurora-medical-center-oshkosh_standa...        2023-01-01   
...                                                   ...               ...   
152028  391027676_aurora-medical-center-oshkosh_standa...        2023-01-01   
152029  391027676_aurora-medical-center-oshkosh_standa...        2023-01-01   
152030  391027676_aurora-medical-center-oshkosh_standa...        2023-01-01   
152031  391027676_aurora-medical-center-oshkosh_standa...        2023-01-01   
152032  391027676_aurora-medical-center-oshkosh_standa...        2023-01-01   

       hospital_ccn hospital_ein code_meta descript

--2023-04-08 08:52:07--  https://www.aurorahealthcare.org/assets/documents/billing-insurance/pricing-transparency/390806347_aurora-lakeland-medical-center_standardcharges.xml
Resolving www.aurorahealthcare.org (www.aurorahealthcare.org)... 18.155.181.117, 18.155.181.94, 18.155.181.113, ...
Connecting to www.aurorahealthcare.org (www.aurorahealthcare.org)|18.155.181.117|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 6389410 (6.1M) [text/xml]
Saving to: ‘390806347_aurora-lakeland-medical-center_standardcharges.xml’

     0K .......... .......... .......... .......... ..........  0%  851K 7s
    50K .......... .......... .......... .......... ..........  1%  855K 7s
   100K .......... .......... .......... .......... ..........  2% 21.1M 5s
   150K .......... .......... .......... .......... ..........  3% 1.23M 5s
   200K .......... .......... .......... .......... ..........  4% 2.26M 4s
   250K .......... .......... .......... .......... ..........  4% 28.6M 

                                                 filename file_last_updated  \
0       390806347_aurora-lakeland-medical-center_stand...        2023-01-01   
1       390806347_aurora-lakeland-medical-center_stand...        2023-01-01   
2       390806347_aurora-lakeland-medical-center_stand...        2023-01-01   
3       390806347_aurora-lakeland-medical-center_stand...        2023-01-01   
4       390806347_aurora-lakeland-medical-center_stand...        2023-01-01   
...                                                   ...               ...   
111846  390806347_aurora-lakeland-medical-center_stand...        2023-01-01   
111847  390806347_aurora-lakeland-medical-center_stand...        2023-01-01   
111848  390806347_aurora-lakeland-medical-center_stand...        2023-01-01   
111849  390806347_aurora-lakeland-medical-center_stand...        2023-01-01   
111850  390806347_aurora-lakeland-medical-center_stand...        2023-01-01   

       hospital_ccn hospital_ein code_meta descript

--2023-04-08 08:52:12--  https://www.aurorahealthcare.org/assets/documents/billing-insurance/pricing-transparency/390806347_aurora-medical-center-kenosha_standardcharges.xml
Resolving www.aurorahealthcare.org (www.aurorahealthcare.org)... 18.155.181.117, 18.155.181.94, 18.155.181.113, ...
Connecting to www.aurorahealthcare.org (www.aurorahealthcare.org)|18.155.181.117|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 8258696 (7.9M) [text/xml]
Saving to: ‘390806347_aurora-medical-center-kenosha_standardcharges.xml’

     0K .......... .......... .......... .......... ..........  0%  845K 9s
    50K .......... .......... .......... .......... ..........  1%  862K 9s
   100K .......... .......... .......... .......... ..........  1% 27.2M 6s
   150K .......... .......... .......... .......... ..........  2% 23.1M 5s
   200K .......... .......... .......... .......... ..........  3%  797K 6s
   250K .......... .......... .......... .......... ..........  3% 21.6M 5s

                                                 filename file_last_updated  \
0       390806347_aurora-medical-center-kenosha_standa...        2023-01-01   
1       390806347_aurora-medical-center-kenosha_standa...        2023-01-01   
2       390806347_aurora-medical-center-kenosha_standa...        2023-01-01   
3       390806347_aurora-medical-center-kenosha_standa...        2023-01-01   
4       390806347_aurora-medical-center-kenosha_standa...        2023-01-01   
...                                                   ...               ...   
143814  390806347_aurora-medical-center-kenosha_standa...        2023-01-01   
143815  390806347_aurora-medical-center-kenosha_standa...        2023-01-01   
143816  390806347_aurora-medical-center-kenosha_standa...        2023-01-01   
143817  390806347_aurora-medical-center-kenosha_standa...        2023-01-01   
143818  390806347_aurora-medical-center-kenosha_standa...        2023-01-01   

       hospital_ccn hospital_ein code_meta descript

--2023-04-08 08:52:19--  https://www.aurorahealthcare.org/assets/documents/billing-insurance/pricing-transparency/391150165_aurora-medical-center-washington-county_standardcharges.xml
Resolving www.aurorahealthcare.org (www.aurorahealthcare.org)... 18.155.181.117, 18.155.181.94, 18.155.181.113, ...
Connecting to www.aurorahealthcare.org (www.aurorahealthcare.org)|18.155.181.117|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 7127523 (6.8M) [text/xml]
Saving to: ‘391150165_aurora-medical-center-washington-county_standardcharges.xml’

     0K .......... .......... .......... .......... ..........  0%  849K 8s
    50K .......... .......... .......... .......... ..........  1%  850K 8s
   100K .......... .......... .......... .......... ..........  2% 40.1M 5s
   150K .......... .......... .......... .......... ..........  2% 67.5M 4s
   200K .......... .......... .......... .......... ..........  3%  870K 5s
   250K .......... .......... .......... .......... ...

                                                 filename file_last_updated  \
0       391150165_aurora-medical-center-washington-cou...        2023-01-01   
1       391150165_aurora-medical-center-washington-cou...        2023-01-01   
2       391150165_aurora-medical-center-washington-cou...        2023-01-01   
3       391150165_aurora-medical-center-washington-cou...        2023-01-01   
4       391150165_aurora-medical-center-washington-cou...        2023-01-01   
...                                                   ...               ...   
124056  391150165_aurora-medical-center-washington-cou...        2023-01-01   
124057  391150165_aurora-medical-center-washington-cou...        2023-01-01   
124058  391150165_aurora-medical-center-washington-cou...        2023-01-01   
124059  391150165_aurora-medical-center-washington-cou...        2023-01-01   
124060  391150165_aurora-medical-center-washington-cou...        2023-01-01   

       hospital_ccn hospital_ein code_meta descript

--2023-04-08 08:52:25--  https://www.aurorahealthcare.org/assets/documents/billing-insurance/pricing-transparency/390806347_aurora-medical-center-burlington_standardcharges.xml
Resolving www.aurorahealthcare.org (www.aurorahealthcare.org)... 18.155.181.94, 18.155.181.113, 18.155.181.117, ...
Connecting to www.aurorahealthcare.org (www.aurorahealthcare.org)|18.155.181.94|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 7250105 (6.9M) [text/xml]
Saving to: ‘390806347_aurora-medical-center-burlington_standardcharges.xml’

     0K .......... .......... .......... .......... ..........  0%  861K 8s
    50K .......... .......... .......... .......... ..........  1%  832K 8s
   100K .......... .......... .......... .......... ..........  2% 30.1M 6s
   150K .......... .......... .......... .......... ..........  2% 1.16M 6s
   200K .......... .......... .......... .......... ..........  3% 3.22M 5s
   250K .......... .......... .......... .......... ..........  4% 34.

                                                 filename file_last_updated  \
0       390806347_aurora-medical-center-burlington_sta...        2023-01-01   
1       390806347_aurora-medical-center-burlington_sta...        2023-01-01   
2       390806347_aurora-medical-center-burlington_sta...        2023-01-01   
3       390806347_aurora-medical-center-burlington_sta...        2023-01-01   
4       390806347_aurora-medical-center-burlington_sta...        2023-01-01   
...                                                   ...               ...   
126424  390806347_aurora-medical-center-burlington_sta...        2023-01-01   
126425  390806347_aurora-medical-center-burlington_sta...        2023-01-01   
126426  390806347_aurora-medical-center-burlington_sta...        2023-01-01   
126427  390806347_aurora-medical-center-burlington_sta...        2023-01-01   
126428  390806347_aurora-medical-center-burlington_sta...        2023-01-01   

       hospital_ccn hospital_ein code_meta descript

--2023-04-08 08:52:31--  https://www.aurorahealthcare.org/assets/documents/billing-insurance/pricing-transparency/391528430_aurora-medical-center-bay-area_standardcharges.xml
Resolving www.aurorahealthcare.org (www.aurorahealthcare.org)... 18.155.181.117, 18.155.181.94, 18.155.181.113, ...
Connecting to www.aurorahealthcare.org (www.aurorahealthcare.org)|18.155.181.117|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 7896525 (7.5M) [text/xml]
Saving to: ‘391528430_aurora-medical-center-bay-area_standardcharges.xml’

     0K .......... .......... .......... .......... ..........  0%  894K 9s
    50K .......... .......... .......... .......... ..........  1%  847K 9s
   100K .......... .......... .......... .......... ..........  1% 50.4M 6s
   150K .......... .......... .......... .......... ..........  2% 1.17M 6s
   200K .......... .......... .......... .......... ..........  3% 2.84M 5s
   250K .......... .......... .......... .......... ..........  3% 12.7M 

                                                 filename file_last_updated  \
0       391528430_aurora-medical-center-bay-area_stand...        2023-01-01   
1       391528430_aurora-medical-center-bay-area_stand...        2023-01-01   
2       391528430_aurora-medical-center-bay-area_stand...        2023-01-01   
3       391528430_aurora-medical-center-bay-area_stand...        2023-01-01   
4       391528430_aurora-medical-center-bay-area_stand...        2023-01-01   
...                                                   ...               ...   
138116  391528430_aurora-medical-center-bay-area_stand...        2023-01-01   
138117  391528430_aurora-medical-center-bay-area_stand...        2023-01-01   
138118  391528430_aurora-medical-center-bay-area_stand...        2023-01-01   
138119  391528430_aurora-medical-center-bay-area_stand...        2023-01-01   
138120  391528430_aurora-medical-center-bay-area_stand...        2023-01-01   

       hospital_ccn hospital_ein code_meta descript

--2023-04-08 08:52:38--  https://www.aurorahealthcare.org/assets/documents/billing-insurance/pricing-transparency/390806347_aurora-medical-center-summit_standardcharges.xml
Resolving www.aurorahealthcare.org (www.aurorahealthcare.org)... 18.155.181.94, 18.155.181.113, 18.155.181.117, ...
Connecting to www.aurorahealthcare.org (www.aurorahealthcare.org)|18.155.181.94|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 8724452 (8.3M) [text/xml]
Saving to: ‘390806347_aurora-medical-center-summit_standardcharges.xml’

     0K .......... .......... .......... .......... ..........  0%  829K 10s
    50K .......... .......... .......... .......... ..........  1%  851K 10s
   100K .......... .......... .......... .......... ..........  1% 46.8M 7s
   150K .......... .......... .......... .......... ..........  2% 1.10M 7s
   200K .......... .......... .......... .......... ..........  2% 3.35M 6s
   250K .......... .......... .......... .......... ..........  3% 49.9M 5s


                                                 filename file_last_updated  \
0       390806347_aurora-medical-center-summit_standar...        2023-01-01   
1       390806347_aurora-medical-center-summit_standar...        2023-01-01   
2       390806347_aurora-medical-center-summit_standar...        2023-01-01   
3       390806347_aurora-medical-center-summit_standar...        2023-01-01   
4       390806347_aurora-medical-center-summit_standar...        2023-01-01   
...                                                   ...               ...   
151917  390806347_aurora-medical-center-summit_standar...        2023-01-01   
151918  390806347_aurora-medical-center-summit_standar...        2023-01-01   
151919  390806347_aurora-medical-center-summit_standar...        2023-01-01   
151920  390806347_aurora-medical-center-summit_standar...        2023-01-01   
151921  390806347_aurora-medical-center-summit_standar...        2023-01-01   

       hospital_ccn hospital_ein code_meta descript

File ‘390806347_aurora-medical-center-summit_standardcharges.xml’ already there; not retrieving.
  df_out = df_out.append(df_intermediate)


                                                 filename file_last_updated  \
0       390806347_aurora-medical-center-summit_standar...        2023-01-01   
1       390806347_aurora-medical-center-summit_standar...        2023-01-01   
2       390806347_aurora-medical-center-summit_standar...        2023-01-01   
3       390806347_aurora-medical-center-summit_standar...        2023-01-01   
4       390806347_aurora-medical-center-summit_standar...        2023-01-01   
...                                                   ...               ...   
151917  390806347_aurora-medical-center-summit_standar...        2023-01-01   
151918  390806347_aurora-medical-center-summit_standar...        2023-01-01   
151919  390806347_aurora-medical-center-summit_standar...        2023-01-01   
151920  390806347_aurora-medical-center-summit_standar...        2023-01-01   
151921  390806347_aurora-medical-center-summit_standar...        2023-01-01   

       hospital_ccn hospital_ein code_meta descript

--2023-04-08 08:52:51--  https://www.aurorahealthcare.org/assets/documents/billing-insurance/pricing-transparency/272953799_aurora-medical-center-grafton_standardcharges.xml
Resolving www.aurorahealthcare.org (www.aurorahealthcare.org)... 18.155.181.94, 18.155.181.113, 18.155.181.117, ...
Connecting to www.aurorahealthcare.org (www.aurorahealthcare.org)|18.155.181.94|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 9176708 (8.8M) [text/xml]
Saving to: ‘272953799_aurora-medical-center-grafton_standardcharges.xml’

     0K .......... .......... .......... .......... ..........  0%  843K 11s
    50K .......... .......... .......... .......... ..........  1%  845K 11s
   100K .......... .......... .......... .......... ..........  1% 47.3M 7s
   150K .......... .......... .......... .......... ..........  2% 37.1M 5s
   200K .......... .......... .......... .......... ..........  2%  858K 6s
   250K .......... .......... .......... .......... ..........  3% 98.1M 5

                                                 filename file_last_updated  \
0       272953799_aurora-medical-center-grafton_standa...        2023-01-01   
1       272953799_aurora-medical-center-grafton_standa...        2023-01-01   
2       272953799_aurora-medical-center-grafton_standa...        2023-01-01   
3       272953799_aurora-medical-center-grafton_standa...        2023-01-01   
4       272953799_aurora-medical-center-grafton_standa...        2023-01-01   
...                                                   ...               ...   
159687  272953799_aurora-medical-center-grafton_standa...        2023-01-01   
159688  272953799_aurora-medical-center-grafton_standa...        2023-01-01   
159689  272953799_aurora-medical-center-grafton_standa...        2023-01-01   
159690  272953799_aurora-medical-center-grafton_standa...        2023-01-01   
159691  272953799_aurora-medical-center-grafton_standa...        2023-01-01   

       hospital_ccn hospital_ein code_meta descript

--2023-04-08 08:52:59--  https://www.aurorahealthcare.org/assets/documents/billing-insurance/pricing-transparency/391947472_aurora-baycare-medical-center_standardcharges.xml
Resolving www.aurorahealthcare.org (www.aurorahealthcare.org)... 18.155.181.101, 18.155.181.94, 18.155.181.113, ...
Connecting to www.aurorahealthcare.org (www.aurorahealthcare.org)|18.155.181.101|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 9701203 (9.3M) [text/xml]
Saving to: ‘391947472_aurora-baycare-medical-center_standardcharges.xml’

     0K .......... .......... .......... .......... ..........  0%  831K 11s
    50K .......... .......... .......... .......... ..........  1%  862K 11s
   100K .......... .......... .......... .......... ..........  1% 35.2M 7s
   150K .......... .......... .......... .......... ..........  2% 1.06M 8s
   200K .......... .......... .......... .......... ..........  2% 2.06M 7s
   250K .......... .......... .......... .......... ..........  3% 32.0M 

                                                 filename file_last_updated  \
0       391947472_aurora-baycare-medical-center_standa...        2023-01-01   
1       391947472_aurora-baycare-medical-center_standa...        2023-01-01   
2       391947472_aurora-baycare-medical-center_standa...        2023-01-01   
3       391947472_aurora-baycare-medical-center_standa...        2023-01-01   
4       391947472_aurora-baycare-medical-center_standa...        2023-01-01   
...                                                   ...               ...   
168752  391947472_aurora-baycare-medical-center_standa...        2023-01-01   
168753  391947472_aurora-baycare-medical-center_standa...        2023-01-01   
168754  391947472_aurora-baycare-medical-center_standa...        2023-01-01   
168755  391947472_aurora-baycare-medical-center_standa...        2023-01-01   
168756  391947472_aurora-baycare-medical-center_standa...        2023-01-01   

       hospital_ccn hospital_ein code_meta descript

--2023-04-08 08:53:07--  https://www.aurorahealthcare.org/assets/documents/billing-insurance/pricing-transparency/390872192_aurora-psychiatric-hospital_standardcharges.xml
Resolving www.aurorahealthcare.org (www.aurorahealthcare.org)... 18.155.181.101, 18.155.181.94, 18.155.181.113, ...
Connecting to www.aurorahealthcare.org (www.aurorahealthcare.org)|18.155.181.101|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1408614 (1.3M) [text/xml]
Saving to: ‘390872192_aurora-psychiatric-hospital_standardcharges.xml’

     0K .......... .......... .......... .......... ..........  3%  860K 2s
    50K .......... .......... .......... .......... ..........  7%  846K 1s
   100K .......... .......... .......... .......... .......... 10% 50.4M 1s
   150K .......... .......... .......... .......... .......... 14% 48.9M 1s
   200K .......... .......... .......... .......... .......... 18%  859K 1s
   250K .......... .......... .......... .......... .......... 21% 25.3M 1s
   

                                                filename file_last_updated  \
0      390872192_aurora-psychiatric-hospital_standard...        2023-01-01   
1      390872192_aurora-psychiatric-hospital_standard...        2023-01-01   
2      390872192_aurora-psychiatric-hospital_standard...        2023-01-01   
3      390872192_aurora-psychiatric-hospital_standard...        2023-01-01   
4      390872192_aurora-psychiatric-hospital_standard...        2023-01-01   
...                                                  ...               ...   
24933  390872192_aurora-psychiatric-hospital_standard...        2023-01-01   
24934  390872192_aurora-psychiatric-hospital_standard...        2023-01-01   
24935  390872192_aurora-psychiatric-hospital_standard...        2023-01-01   
24936  390872192_aurora-psychiatric-hospital_standard...        2023-01-01   
24937  390872192_aurora-psychiatric-hospital_standard...        2023-01-01   

      hospital_ccn hospital_ein code_meta description procedure

In [8]:
len(tasks)

13