In [23]:
import base64
from io import StringIO
from dateutil.parser import parse as parse_datetime

import pandas as pd
import requests
from lxml import html

from helpers import *

In [2]:
TARGET_COLUMNS = [
    'hospital_id',
    #'row_id',
    'line_type',
    'description',
    'rev_code',
    'local_code',
    'code',
    'ms_drg',
    'apr_drg',
    'eapg',
    'hcpcs_cpt',
    'modifiers',
    'alt_hcpcs_cpt',
    'thru',
    'apc',
    'icd',
    'ndc',
    'drug_hcpcs_multiplier',
    'drug_quantity',
    'drug_unit_of_measurement',
    'drug_type_of_measurement',
    'billing_class',
    'setting',
    'payer_category',
    'payer_name',
    'plan_name',
    'standard_charge',
    'standard_charge_percent',
    'contracting_method',
    'additional_generic_notes',
    'additional_payer_specific_notes'
]

In [3]:
transparency_page = "https://www.bannerhealth.com/patients/billing/pricing-resources/hospital-price-transparency"
mrf_url = "https://www.cdmpricing.com/f3e6b4138e9802fa49983b8b277b62f9/standard-charges"
ccn = "030002"

In [4]:
hospital_id = mrf_url.split("/")[-2]
api_url = "https://apim.services.craneware.com/api-pricing-transparency/api/public/{}/metadata/cdmFile".format(hospital_id)
api_url

'https://apim.services.craneware.com/api-pricing-transparency/api/public/f3e6b4138e9802fa49983b8b277b62f9/metadata/cdmFile'

In [5]:
resp = requests.get(api_url)

In [6]:
base64_str = resp.json().get("contentBytes")
content_type = resp.json().get("contentType")
filename = resp.json().get("fileDownloadName")
content_type, filename

('text/csv',
 '270036499_BANNER-UNIVERSITY-MEDICAL-CENTER-PHOENIX_standardcharges.csv')

In [18]:
ein = derive_ein_from_filename(filename)
ein

'27-0036499'

In [20]:
csv_str = base64.b64decode(base64_str).decode('utf-8-sig')
out_f = open(filename, 'w')
out_f.write(csv_str)
out_f.close()

In [21]:
starts_at = csv_str.index('\n') + 1
first_line = csv_str[:starts_at].strip()
first_line

'"Updated on May 25, 2023",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,'

In [25]:
date_str = first_line.split('"')[1].replace('Updated on ', '')
last_updated = parse_datetime(date_str).isoformat().split('T')[0]
last_updated

'2023-05-25'

In [7]:
s_f = StringIO(csv_str[starts_at:])
df_in = pd.read_csv(s_f)
df_in

Unnamed: 0,Code,Description,Type,Gross Charge,De-identified min contracted rate,De-identified max contracted rate,Discounted Cash Price,United HealthCare-Commercial HMO/POS_Avg,Aetna-Commercial HMO/POS_Avg,Aetna-Medicare Advantage HMO_Avg,...,Gila River Health-Commercial other_Avg,Imperial Ins Co-Commercial HMO/POS_Avg,Medica Insurance Co-Commercial HMO/POS_Avg,Mercy Care Plans-Medicaid HMO_Avg,Molina Health-Medicaid HMO_Avg,Oscar Health Plan-Commercial HMO/POS_Avg,Triwest HA-Commercial HMO/POS_Avg,UMR-Commercial HMO/POS_Avg,United HealthCare-Medicaid HMO_Avg,United HealthCare-Commercial PPO/Open Access_Avg
0,10005,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,Outpatient,2529.71,462.20,4240.00,2529.71,2840.00,4240.00,626.24,...,,,,611.01,508.42,,626.24,,,
1,10006,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,Outpatient,1813.86,232.49,1285.25,1813.86,,833.00,,...,,,,,,,,,,
2,10009,FINE NEEDLE ASPIRATION BIOPSY OF GROWTH USING ...,Outpatient,2003.00,,,2003.00,,,,...,,,,,,,,,,
3,10030,DRAINAGE OF FLUID COLLECTION IN SOFT TISSUE US...,Outpatient,2647.69,462.20,845.42,2647.69,,,603.39,...,,,,,,,,,,
4,10035,PLACEMENT OF SOFT TISSUE LOCATING DEVICE USING...,Outpatient,1422.00,,,1422.00,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15965,95958,TEST ACTIVATION WADA W EEG MONITOR,HCPCS/CPT,1603.00,,,1603.00,,,,...,,,,,,,,,,
15966,95961,MAPPING CORTICAL&SUBCORTICAL 1ST HR,HCPCS/CPT,712.00,,,712.00,,,,...,,,,,,,,,,
15967,95962,MAPPING CORTICAL&SUBCORTICAL ADD HR,HCPCS/CPT,603.00,,,603.00,,,,...,,,,,,,,,,
15968,95970,ANLYS NEUROSTIM WO REPROGRAMMING,HCPCS/CPT,522.00,,,522.00,,,,...,,,,,,,,,,


In [8]:
df_mid = pd.DataFrame(df_in)
df_mid = df_mid.rename(columns={
    'Code': 'code',
    'Description': 'description',
    'Type': 'line_type',
})
df_mid

Unnamed: 0,code,description,line_type,Gross Charge,De-identified min contracted rate,De-identified max contracted rate,Discounted Cash Price,United HealthCare-Commercial HMO/POS_Avg,Aetna-Commercial HMO/POS_Avg,Aetna-Medicare Advantage HMO_Avg,...,Gila River Health-Commercial other_Avg,Imperial Ins Co-Commercial HMO/POS_Avg,Medica Insurance Co-Commercial HMO/POS_Avg,Mercy Care Plans-Medicaid HMO_Avg,Molina Health-Medicaid HMO_Avg,Oscar Health Plan-Commercial HMO/POS_Avg,Triwest HA-Commercial HMO/POS_Avg,UMR-Commercial HMO/POS_Avg,United HealthCare-Medicaid HMO_Avg,United HealthCare-Commercial PPO/Open Access_Avg
0,10005,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,Outpatient,2529.71,462.20,4240.00,2529.71,2840.00,4240.00,626.24,...,,,,611.01,508.42,,626.24,,,
1,10006,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,Outpatient,1813.86,232.49,1285.25,1813.86,,833.00,,...,,,,,,,,,,
2,10009,FINE NEEDLE ASPIRATION BIOPSY OF GROWTH USING ...,Outpatient,2003.00,,,2003.00,,,,...,,,,,,,,,,
3,10030,DRAINAGE OF FLUID COLLECTION IN SOFT TISSUE US...,Outpatient,2647.69,462.20,845.42,2647.69,,,603.39,...,,,,,,,,,,
4,10035,PLACEMENT OF SOFT TISSUE LOCATING DEVICE USING...,Outpatient,1422.00,,,1422.00,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15965,95958,TEST ACTIVATION WADA W EEG MONITOR,HCPCS/CPT,1603.00,,,1603.00,,,,...,,,,,,,,,,
15966,95961,MAPPING CORTICAL&SUBCORTICAL 1ST HR,HCPCS/CPT,712.00,,,712.00,,,,...,,,,,,,,,,
15967,95962,MAPPING CORTICAL&SUBCORTICAL ADD HR,HCPCS/CPT,603.00,,,603.00,,,,...,,,,,,,,,,
15968,95970,ANLYS NEUROSTIM WO REPROGRAMMING,HCPCS/CPT,522.00,,,522.00,,,,...,,,,,,,,,,


In [9]:
money_columns = df_mid.columns[3:]
remaining_columns = df_mid.columns[:3]
df_mid = pd.melt(df_mid, id_vars=remaining_columns, var_name='payer_name', value_name='standard_charge')
df_mid

Unnamed: 0,code,description,line_type,payer_name,standard_charge
0,10005,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,Outpatient,Gross Charge,2529.71
1,10006,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,Outpatient,Gross Charge,1813.86
2,10009,FINE NEEDLE ASPIRATION BIOPSY OF GROWTH USING ...,Outpatient,Gross Charge,2003.00
3,10030,DRAINAGE OF FLUID COLLECTION IN SOFT TISSUE US...,Outpatient,Gross Charge,2647.69
4,10035,PLACEMENT OF SOFT TISSUE LOCATING DEVICE USING...,Outpatient,Gross Charge,1422.00
...,...,...,...,...,...
654765,95958,TEST ACTIVATION WADA W EEG MONITOR,HCPCS/CPT,United HealthCare-Commercial PPO/Open Access_Avg,
654766,95961,MAPPING CORTICAL&SUBCORTICAL 1ST HR,HCPCS/CPT,United HealthCare-Commercial PPO/Open Access_Avg,
654767,95962,MAPPING CORTICAL&SUBCORTICAL ADD HR,HCPCS/CPT,United HealthCare-Commercial PPO/Open Access_Avg,
654768,95970,ANLYS NEUROSTIM WO REPROGRAMMING,HCPCS/CPT,United HealthCare-Commercial PPO/Open Access_Avg,


In [10]:
set(df_mid['line_type'].to_list())

{'Charge Code', 'HCPCS/CPT', 'Inpatient', 'Outpatient'}

In [11]:
df_mid.loc[df_mid['line_type'] == 'Outpatient', 'setting'] = 'outpatient'
df_mid.loc[df_mid['line_type'] == 'Inpatient', 'setting'] = 'outpatient'
df_mid

Unnamed: 0,code,description,line_type,payer_name,standard_charge,setting
0,10005,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,Outpatient,Gross Charge,2529.71,outpatient
1,10006,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,Outpatient,Gross Charge,1813.86,outpatient
2,10009,FINE NEEDLE ASPIRATION BIOPSY OF GROWTH USING ...,Outpatient,Gross Charge,2003.00,outpatient
3,10030,DRAINAGE OF FLUID COLLECTION IN SOFT TISSUE US...,Outpatient,Gross Charge,2647.69,outpatient
4,10035,PLACEMENT OF SOFT TISSUE LOCATING DEVICE USING...,Outpatient,Gross Charge,1422.00,outpatient
...,...,...,...,...,...,...
654765,95958,TEST ACTIVATION WADA W EEG MONITOR,HCPCS/CPT,United HealthCare-Commercial PPO/Open Access_Avg,,
654766,95961,MAPPING CORTICAL&SUBCORTICAL 1ST HR,HCPCS/CPT,United HealthCare-Commercial PPO/Open Access_Avg,,
654767,95962,MAPPING CORTICAL&SUBCORTICAL ADD HR,HCPCS/CPT,United HealthCare-Commercial PPO/Open Access_Avg,,
654768,95970,ANLYS NEUROSTIM WO REPROGRAMMING,HCPCS/CPT,United HealthCare-Commercial PPO/Open Access_Avg,,


In [12]:
df_mid['hcpcs_cpt'] = None
df_mid['ms_drg'] = None

def recognise_codes(row):
    line_type = row['line_type']
    code = row['code']
    
    if line_type == 'Charge Code':
        row['local_code'] = code
    elif line_type == 'HCPCS/CPT':
        row['hcpcs_cpt'] = code
    else:
        if code_is_ms_drg(code):
            row['ms_drg'] = code
        elif code_is_cpt(code) or code_is_hcpcs(code):
            row['hcpcs_cpt'] = code

    return row

df_mid = df_mid.apply(recognise_codes, axis=1)
df_mid

Unnamed: 0,code,description,hcpcs_cpt,line_type,local_code,ms_drg,payer_name,setting,standard_charge
0,10005,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,10005,Outpatient,,,Gross Charge,outpatient,2529.71
1,10006,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,10006,Outpatient,,,Gross Charge,outpatient,1813.86
2,10009,FINE NEEDLE ASPIRATION BIOPSY OF GROWTH USING ...,10009,Outpatient,,,Gross Charge,outpatient,2003.00
3,10030,DRAINAGE OF FLUID COLLECTION IN SOFT TISSUE US...,10030,Outpatient,,,Gross Charge,outpatient,2647.69
4,10035,PLACEMENT OF SOFT TISSUE LOCATING DEVICE USING...,10035,Outpatient,,,Gross Charge,outpatient,1422.00
...,...,...,...,...,...,...,...,...,...
654765,95958,TEST ACTIVATION WADA W EEG MONITOR,95958,HCPCS/CPT,,,United HealthCare-Commercial PPO/Open Access_Avg,,
654766,95961,MAPPING CORTICAL&SUBCORTICAL 1ST HR,95961,HCPCS/CPT,,,United HealthCare-Commercial PPO/Open Access_Avg,,
654767,95962,MAPPING CORTICAL&SUBCORTICAL ADD HR,95962,HCPCS/CPT,,,United HealthCare-Commercial PPO/Open Access_Avg,,
654768,95970,ANLYS NEUROSTIM WO REPROGRAMMING,95970,HCPCS/CPT,,,United HealthCare-Commercial PPO/Open Access_Avg,,


In [13]:
df_mid['standard_charge'] = df_mid['standard_charge'].apply(cleanup_dollar_value)
df_mid = df_mid[df_mid['standard_charge'] != "N/A"]
df_mid = df_mid[df_mid['standard_charge'].notnull()]
df_mid

Unnamed: 0,code,description,hcpcs_cpt,line_type,local_code,ms_drg,payer_name,setting,standard_charge
0,10005,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,10005,Outpatient,,,Gross Charge,outpatient,2529.71
1,10006,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,10006,Outpatient,,,Gross Charge,outpatient,1813.86
2,10009,FINE NEEDLE ASPIRATION BIOPSY OF GROWTH USING ...,10009,Outpatient,,,Gross Charge,outpatient,2003.00
3,10030,DRAINAGE OF FLUID COLLECTION IN SOFT TISSUE US...,10030,Outpatient,,,Gross Charge,outpatient,2647.69
4,10035,PLACEMENT OF SOFT TISSUE LOCATING DEVICE USING...,10035,Outpatient,,,Gross Charge,outpatient,1422.00
...,...,...,...,...,...,...,...,...,...
641539,96365,"INFUSION INTO A VEIN FOR THERAPY, PREVENTION, ...",96365,Outpatient,,,United HealthCare-Commercial PPO/Open Access_Avg,outpatient,380.00
641602,99283,EMERGENCY DEPARTMENT VISIT FOR PROBLEM OF MODE...,99283,Outpatient,,,United HealthCare-Commercial PPO/Open Access_Avg,outpatient,1509.00
641805,J0485,"INJECTION, BELATACEPT, 1 MG",J0485,Outpatient,,,United HealthCare-Commercial PPO/Open Access_Avg,outpatient,4.40
641876,J1561,"INJECTION, IMMUNE GLOBULIN, (GAMUNEX-C/GAMMAKE...",J1561,Outpatient,,,United HealthCare-Commercial PPO/Open Access_Avg,outpatient,68.31


In [14]:
def payer_category_from_payer_name(payer_name):
    if payer_name == "Gross Charge":
        return 'gross'
    elif payer_name == "Discounted Cash Price":
        return 'cash'
    elif payer_name == "De-identified min contracted rate":
        return 'min'
    elif payer_name == "De-identified max contracted rate":
        return 'max'

    return 'payer'

df_mid['payer_category'] = df_mid['payer_name'].apply(payer_category_from_payer_name)
df_mid

Unnamed: 0,code,description,hcpcs_cpt,line_type,local_code,ms_drg,payer_name,setting,standard_charge,payer_category
0,10005,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,10005,Outpatient,,,Gross Charge,outpatient,2529.71,gross
1,10006,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,10006,Outpatient,,,Gross Charge,outpatient,1813.86,gross
2,10009,FINE NEEDLE ASPIRATION BIOPSY OF GROWTH USING ...,10009,Outpatient,,,Gross Charge,outpatient,2003.00,gross
3,10030,DRAINAGE OF FLUID COLLECTION IN SOFT TISSUE US...,10030,Outpatient,,,Gross Charge,outpatient,2647.69,gross
4,10035,PLACEMENT OF SOFT TISSUE LOCATING DEVICE USING...,10035,Outpatient,,,Gross Charge,outpatient,1422.00,gross
...,...,...,...,...,...,...,...,...,...,...
641539,96365,"INFUSION INTO A VEIN FOR THERAPY, PREVENTION, ...",96365,Outpatient,,,United HealthCare-Commercial PPO/Open Access_Avg,outpatient,380.00,payer
641602,99283,EMERGENCY DEPARTMENT VISIT FOR PROBLEM OF MODE...,99283,Outpatient,,,United HealthCare-Commercial PPO/Open Access_Avg,outpatient,1509.00,payer
641805,J0485,"INJECTION, BELATACEPT, 1 MG",J0485,Outpatient,,,United HealthCare-Commercial PPO/Open Access_Avg,outpatient,4.40,payer
641876,J1561,"INJECTION, IMMUNE GLOBULIN, (GAMUNEX-C/GAMMAKE...",J1561,Outpatient,,,United HealthCare-Commercial PPO/Open Access_Avg,outpatient,68.31,payer


In [15]:
df_mid['hospital_id'] = ccn
df_mid['rev_code'] = None
df_mid['apr_drg'] = None
df_mid['eapg'] = None
df_mid['modifiers'] = None
df_mid['alt_hcpcs_cpt'] = None
df_mid['thru'] = None
df_mid['apc'] = None
df_mid['icd'] = None
df_mid['ndc'] = None
df_mid['drug_hcpcs_multiplier'] = None
df_mid['drug_quantity'] = None
df_mid['drug_unit_of_measurement'] = None
df_mid['drug_type_of_measurement'] = None
df_mid['billing_class'] = None
df_mid['plan_name'] = None
df_mid['standard_charge_percent'] = None
df_mid['contracting_method'] = None
df_mid['additional_generic_notes'] = None
df_mid['additional_payer_specific_notes'] = None

df_mid

Unnamed: 0,code,description,hcpcs_cpt,line_type,local_code,ms_drg,payer_name,setting,standard_charge,payer_category,...,drug_hcpcs_multiplier,drug_quantity,drug_unit_of_measurement,drug_type_of_measurement,billing_class,plan_name,standard_charge_percent,contracting_method,additional_generic_notes,additional_payer_specific_notes
0,10005,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,10005,Outpatient,,,Gross Charge,outpatient,2529.71,gross,...,,,,,,,,,,
1,10006,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,10006,Outpatient,,,Gross Charge,outpatient,1813.86,gross,...,,,,,,,,,,
2,10009,FINE NEEDLE ASPIRATION BIOPSY OF GROWTH USING ...,10009,Outpatient,,,Gross Charge,outpatient,2003.00,gross,...,,,,,,,,,,
3,10030,DRAINAGE OF FLUID COLLECTION IN SOFT TISSUE US...,10030,Outpatient,,,Gross Charge,outpatient,2647.69,gross,...,,,,,,,,,,
4,10035,PLACEMENT OF SOFT TISSUE LOCATING DEVICE USING...,10035,Outpatient,,,Gross Charge,outpatient,1422.00,gross,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
641539,96365,"INFUSION INTO A VEIN FOR THERAPY, PREVENTION, ...",96365,Outpatient,,,United HealthCare-Commercial PPO/Open Access_Avg,outpatient,380.00,payer,...,,,,,,,,,,
641602,99283,EMERGENCY DEPARTMENT VISIT FOR PROBLEM OF MODE...,99283,Outpatient,,,United HealthCare-Commercial PPO/Open Access_Avg,outpatient,1509.00,payer,...,,,,,,,,,,
641805,J0485,"INJECTION, BELATACEPT, 1 MG",J0485,Outpatient,,,United HealthCare-Commercial PPO/Open Access_Avg,outpatient,4.40,payer,...,,,,,,,,,,
641876,J1561,"INJECTION, IMMUNE GLOBULIN, (GAMUNEX-C/GAMMAKE...",J1561,Outpatient,,,United HealthCare-Commercial PPO/Open Access_Avg,outpatient,68.31,payer,...,,,,,,,,,,


In [16]:
df_out = pd.DataFrame(df_mid[TARGET_COLUMNS])
df_out

Unnamed: 0,hospital_id,line_type,description,rev_code,local_code,code,ms_drg,apr_drg,eapg,hcpcs_cpt,...,billing_class,setting,payer_category,payer_name,plan_name,standard_charge,standard_charge_percent,contracting_method,additional_generic_notes,additional_payer_specific_notes
0,030002,Outpatient,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,,,10005,,,,10005,...,,outpatient,gross,Gross Charge,,2529.71,,,,
1,030002,Outpatient,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,,,10006,,,,10006,...,,outpatient,gross,Gross Charge,,1813.86,,,,
2,030002,Outpatient,FINE NEEDLE ASPIRATION BIOPSY OF GROWTH USING ...,,,10009,,,,10009,...,,outpatient,gross,Gross Charge,,2003.00,,,,
3,030002,Outpatient,DRAINAGE OF FLUID COLLECTION IN SOFT TISSUE US...,,,10030,,,,10030,...,,outpatient,gross,Gross Charge,,2647.69,,,,
4,030002,Outpatient,PLACEMENT OF SOFT TISSUE LOCATING DEVICE USING...,,,10035,,,,10035,...,,outpatient,gross,Gross Charge,,1422.00,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
641539,030002,Outpatient,"INFUSION INTO A VEIN FOR THERAPY, PREVENTION, ...",,,96365,,,,96365,...,,outpatient,payer,United HealthCare-Commercial PPO/Open Access_Avg,,380.00,,,,
641602,030002,Outpatient,EMERGENCY DEPARTMENT VISIT FOR PROBLEM OF MODE...,,,99283,,,,99283,...,,outpatient,payer,United HealthCare-Commercial PPO/Open Access_Avg,,1509.00,,,,
641805,030002,Outpatient,"INJECTION, BELATACEPT, 1 MG",,,J0485,,,,J0485,...,,outpatient,payer,United HealthCare-Commercial PPO/Open Access_Avg,,4.40,,,,
641876,030002,Outpatient,"INJECTION, IMMUNE GLOBULIN, (GAMUNEX-C/GAMMAKE...",,,J1561,,,,J1561,...,,outpatient,payer,United HealthCare-Commercial PPO/Open Access_Avg,,68.31,,,,


In [17]:
df_out.to_csv('rate_' + ccn + '.csv', index=False)