In [1]:
import base64
from io import StringIO
from dateutil.parser import parse as parse_datetime

import pandas as pd
import requests
from lxml import html

from helpers import *

In [2]:
TARGET_COLUMNS = [
    'hospital_id',
    #'row_id',
    'line_type',
    'description',
    'rev_code',
    'local_code',
    'code',
    'ms_drg',
    'apr_drg',
    'eapg',
    'hcpcs_cpt',
    'modifiers',
    'alt_hcpcs_cpt',
    'thru',
    'apc',
    'icd',
    'ndc',
    'drug_hcpcs_multiplier',
    'drug_quantity',
    'drug_unit_of_measurement',
    'drug_type_of_measurement',
    'billing_class',
    'setting',
    'payer_category',
    'payer_name',
    'plan_name',
    'standard_charge',
    'standard_charge_percent',
    'contracting_method',
    'additional_generic_notes',
    'additional_payer_specific_notes'
]

In [3]:
transparency_page = "https://www.bannerhealth.com/patients/billing/pricing-resources/hospital-price-transparency"
mrf_url = "https://www.cdmpricing.com/be222bd54fd847aed65ee681547f6476/standard-charges"
ccn = "030064"

In [4]:
hospital_id = mrf_url.split("/")[-2]
api_url = "https://apim.services.craneware.com/api-pricing-transparency/api/public/{}/metadata/cdmFile".format(hospital_id)
api_url

'https://apim.services.craneware.com/api-pricing-transparency/api/public/be222bd54fd847aed65ee681547f6476/metadata/cdmFile'

In [5]:
resp = requests.get(api_url)

In [6]:
base64_str = resp.json().get("contentBytes")
content_type = resp.json().get("contentType")
filename = resp.json().get("fileDownloadName")
content_type, filename

('text/csv',
 '901074557_BANNER-UNIVERSITY-MEDICAL-CENTER-TUCSON_standardcharges.csv')

In [7]:
ein = derive_ein_from_filename(filename)
ein

'90-1074557'

In [8]:
csv_str = base64.b64decode(base64_str).decode('utf-8-sig')
out_f = open(filename, 'w')
out_f.write(csv_str)
out_f.close()

In [9]:
starts_at = csv_str.index('\n') + 1
first_line = csv_str[:starts_at].strip()
first_line

'"Updated on: June 12, 2023",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,'

In [11]:
date_str = first_line.split('"')[1].replace('Updated on ', '').replace('Updated on: ', '')
last_updated = parse_datetime(date_str).isoformat().split('T')[0]
last_updated

'2023-06-12'

In [12]:
s_f = StringIO(csv_str[starts_at:])
df_in = pd.read_csv(s_f)
df_in

Unnamed: 0,Code,Description,Type,Gross Charge,De-identified min contracted rate,De-identified max contracted rate,Discounted Cash Price,United HealthCare-Commercial HMO/POS_Avg,Aetna-Commercial HMO/POS_Avg,Aetna-Medicare Advantage HMO_Avg,...,Humana-Medicare Advantage HMO_Avg,Kaiser Permanaente-Commercial HMO/POS_Avg,Medica Insurance Co-Commercial HMO/POS_Avg,Mercy Care Plans-Medicaid HMO_Avg,OptumHealth-Commercial HMO/POS_Avg,P3 Health Partners-Commercial HMO/POS_Avg,Oscar Health Plan-Commercial HMO/POS_Avg,UMR-Commercial HMO/POS_Avg,United HealthCare-Medicaid HMO_Avg,United HealthCare-Commercial PPO/Open Access_Avg
0,10005,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,Outpatient,1888.18,450.93,651.73,1888.18,,,,...,614.71,,,,,,,,,
1,10006,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,Outpatient,1830.33,,,1830.33,,,,...,,,,,,,,,,
2,10009,FINE NEEDLE ASPIRATION BIOPSY OF GROWTH USING ...,Outpatient,2873.00,,,2873.00,,,,...,,,,,,,,,,
3,10021,"FINE NEEDLE ASPIRATION BIOPSY, FIRST GROWTH",Outpatient,656.00,,,656.00,,,,...,,,,,,,,,,
4,10030,DRAINAGE OF FLUID COLLECTION IN SOFT TISSUE US...,Outpatient,2969.25,617.96,1101.66,2969.25,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16168,95810,PSG W 4+ PARAM 6+YRS PEDS,HCPCS/CPT,4894.00,,,4894.00,,,,...,,,,,,,,,,
16169,95811,PSG W 4+ PARAM 6+YR W CPAP PED<6HR,HCPCS/CPT,5286.00,,,5286.00,,,,...,,,,,,,,,,
16170,95811,PSG W 4+ PARAM 6+YR W CPAP RDC<6HR,HCPCS/CPT,4228.00,,,4228.00,,,,...,,,,,,,,,,
16171,95811,PSG W 4+ PARAM 6+YRS W CPAP,HCPCS/CPT,4228.00,,,4228.00,,,,...,,,,,,,,,,


In [13]:
df_mid = pd.DataFrame(df_in)
df_mid = df_mid.rename(columns={
    'Code': 'code',
    'Description': 'description',
    'Type': 'line_type',
})
df_mid

Unnamed: 0,code,description,line_type,Gross Charge,De-identified min contracted rate,De-identified max contracted rate,Discounted Cash Price,United HealthCare-Commercial HMO/POS_Avg,Aetna-Commercial HMO/POS_Avg,Aetna-Medicare Advantage HMO_Avg,...,Humana-Medicare Advantage HMO_Avg,Kaiser Permanaente-Commercial HMO/POS_Avg,Medica Insurance Co-Commercial HMO/POS_Avg,Mercy Care Plans-Medicaid HMO_Avg,OptumHealth-Commercial HMO/POS_Avg,P3 Health Partners-Commercial HMO/POS_Avg,Oscar Health Plan-Commercial HMO/POS_Avg,UMR-Commercial HMO/POS_Avg,United HealthCare-Medicaid HMO_Avg,United HealthCare-Commercial PPO/Open Access_Avg
0,10005,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,Outpatient,1888.18,450.93,651.73,1888.18,,,,...,614.71,,,,,,,,,
1,10006,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,Outpatient,1830.33,,,1830.33,,,,...,,,,,,,,,,
2,10009,FINE NEEDLE ASPIRATION BIOPSY OF GROWTH USING ...,Outpatient,2873.00,,,2873.00,,,,...,,,,,,,,,,
3,10021,"FINE NEEDLE ASPIRATION BIOPSY, FIRST GROWTH",Outpatient,656.00,,,656.00,,,,...,,,,,,,,,,
4,10030,DRAINAGE OF FLUID COLLECTION IN SOFT TISSUE US...,Outpatient,2969.25,617.96,1101.66,2969.25,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16168,95810,PSG W 4+ PARAM 6+YRS PEDS,HCPCS/CPT,4894.00,,,4894.00,,,,...,,,,,,,,,,
16169,95811,PSG W 4+ PARAM 6+YR W CPAP PED<6HR,HCPCS/CPT,5286.00,,,5286.00,,,,...,,,,,,,,,,
16170,95811,PSG W 4+ PARAM 6+YR W CPAP RDC<6HR,HCPCS/CPT,4228.00,,,4228.00,,,,...,,,,,,,,,,
16171,95811,PSG W 4+ PARAM 6+YRS W CPAP,HCPCS/CPT,4228.00,,,4228.00,,,,...,,,,,,,,,,


In [14]:
money_columns = df_mid.columns[3:]
remaining_columns = df_mid.columns[:3]
df_mid = pd.melt(df_mid, id_vars=remaining_columns, var_name='payer_name', value_name='standard_charge')
df_mid

Unnamed: 0,code,description,line_type,payer_name,standard_charge
0,10005,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,Outpatient,Gross Charge,1888.18
1,10006,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,Outpatient,Gross Charge,1830.33
2,10009,FINE NEEDLE ASPIRATION BIOPSY OF GROWTH USING ...,Outpatient,Gross Charge,2873.00
3,10021,"FINE NEEDLE ASPIRATION BIOPSY, FIRST GROWTH",Outpatient,Gross Charge,656.00
4,10030,DRAINAGE OF FLUID COLLECTION IN SOFT TISSUE US...,Outpatient,Gross Charge,2969.25
...,...,...,...,...,...
549877,95810,PSG W 4+ PARAM 6+YRS PEDS,HCPCS/CPT,United HealthCare-Commercial PPO/Open Access_Avg,
549878,95811,PSG W 4+ PARAM 6+YR W CPAP PED<6HR,HCPCS/CPT,United HealthCare-Commercial PPO/Open Access_Avg,
549879,95811,PSG W 4+ PARAM 6+YR W CPAP RDC<6HR,HCPCS/CPT,United HealthCare-Commercial PPO/Open Access_Avg,
549880,95811,PSG W 4+ PARAM 6+YRS W CPAP,HCPCS/CPT,United HealthCare-Commercial PPO/Open Access_Avg,


In [15]:
set(df_mid['line_type'].to_list())

{'Charge Code', 'HCPCS/CPT', 'Inpatient', 'Outpatient'}

In [16]:
df_mid.loc[df_mid['line_type'] == 'Outpatient', 'setting'] = 'outpatient'
df_mid.loc[df_mid['line_type'] == 'Inpatient', 'setting'] = 'outpatient'
df_mid

Unnamed: 0,code,description,line_type,payer_name,standard_charge,setting
0,10005,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,Outpatient,Gross Charge,1888.18,outpatient
1,10006,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,Outpatient,Gross Charge,1830.33,outpatient
2,10009,FINE NEEDLE ASPIRATION BIOPSY OF GROWTH USING ...,Outpatient,Gross Charge,2873.00,outpatient
3,10021,"FINE NEEDLE ASPIRATION BIOPSY, FIRST GROWTH",Outpatient,Gross Charge,656.00,outpatient
4,10030,DRAINAGE OF FLUID COLLECTION IN SOFT TISSUE US...,Outpatient,Gross Charge,2969.25,outpatient
...,...,...,...,...,...,...
549877,95810,PSG W 4+ PARAM 6+YRS PEDS,HCPCS/CPT,United HealthCare-Commercial PPO/Open Access_Avg,,
549878,95811,PSG W 4+ PARAM 6+YR W CPAP PED<6HR,HCPCS/CPT,United HealthCare-Commercial PPO/Open Access_Avg,,
549879,95811,PSG W 4+ PARAM 6+YR W CPAP RDC<6HR,HCPCS/CPT,United HealthCare-Commercial PPO/Open Access_Avg,,
549880,95811,PSG W 4+ PARAM 6+YRS W CPAP,HCPCS/CPT,United HealthCare-Commercial PPO/Open Access_Avg,,


In [17]:
df_mid['hcpcs_cpt'] = None
df_mid['ms_drg'] = None

def recognise_codes(row):
    line_type = row['line_type']
    code = row['code']
    
    if line_type == 'Charge Code':
        row['local_code'] = code
    elif line_type == 'HCPCS/CPT':
        row['hcpcs_cpt'] = code
    else:
        if code_is_ms_drg(code):
            row['ms_drg'] = code
        elif code_is_cpt(code) or code_is_hcpcs(code):
            row['hcpcs_cpt'] = code

    return row

df_mid = df_mid.apply(recognise_codes, axis=1)
df_mid

Unnamed: 0,code,description,hcpcs_cpt,line_type,local_code,ms_drg,payer_name,setting,standard_charge
0,10005,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,10005,Outpatient,,,Gross Charge,outpatient,1888.18
1,10006,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,10006,Outpatient,,,Gross Charge,outpatient,1830.33
2,10009,FINE NEEDLE ASPIRATION BIOPSY OF GROWTH USING ...,10009,Outpatient,,,Gross Charge,outpatient,2873.00
3,10021,"FINE NEEDLE ASPIRATION BIOPSY, FIRST GROWTH",10021,Outpatient,,,Gross Charge,outpatient,656.00
4,10030,DRAINAGE OF FLUID COLLECTION IN SOFT TISSUE US...,10030,Outpatient,,,Gross Charge,outpatient,2969.25
...,...,...,...,...,...,...,...,...,...
549877,95810,PSG W 4+ PARAM 6+YRS PEDS,95810,HCPCS/CPT,,,United HealthCare-Commercial PPO/Open Access_Avg,,
549878,95811,PSG W 4+ PARAM 6+YR W CPAP PED<6HR,95811,HCPCS/CPT,,,United HealthCare-Commercial PPO/Open Access_Avg,,
549879,95811,PSG W 4+ PARAM 6+YR W CPAP RDC<6HR,95811,HCPCS/CPT,,,United HealthCare-Commercial PPO/Open Access_Avg,,
549880,95811,PSG W 4+ PARAM 6+YRS W CPAP,95811,HCPCS/CPT,,,United HealthCare-Commercial PPO/Open Access_Avg,,


In [18]:
df_mid['standard_charge'] = df_mid['standard_charge'].apply(cleanup_dollar_value)
df_mid = df_mid[df_mid['standard_charge'] != "N/A"]
df_mid = df_mid[df_mid['standard_charge'].notnull()]
df_mid

Unnamed: 0,code,description,hcpcs_cpt,line_type,local_code,ms_drg,payer_name,setting,standard_charge
0,10005,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,10005,Outpatient,,,Gross Charge,outpatient,1888.18
1,10006,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,10006,Outpatient,,,Gross Charge,outpatient,1830.33
2,10009,FINE NEEDLE ASPIRATION BIOPSY OF GROWTH USING ...,10009,Outpatient,,,Gross Charge,outpatient,2873.00
3,10021,"FINE NEEDLE ASPIRATION BIOPSY, FIRST GROWTH",10021,Outpatient,,,Gross Charge,outpatient,656.00
4,10030,DRAINAGE OF FLUID COLLECTION IN SOFT TISSUE US...,10030,Outpatient,,,Gross Charge,outpatient,2969.25
...,...,...,...,...,...,...,...,...,...
535034,51702,SIMPLE INSERTION OF TEMPORARY BLADDER TUBE,51702,Outpatient,,,United HealthCare-Commercial PPO/Open Access_Avg,outpatient,1231.00
535139,55040,REMOVAL OF FLUID COLLECTION IN TESTICLE AND SP...,55040,Outpatient,,,United HealthCare-Commercial PPO/Open Access_Avg,outpatient,4774.00
536721,93931,ULTRASOUND OF ONE ARM ARTERIES OR ARTERY GRAFTS,93931,Outpatient,,,United HealthCare-Commercial PPO/Open Access_Avg,outpatient,345.00
536793,96374,INJECTION OF DRUG OR SUBSTANCE INTO VEIN,96374,Outpatient,,,United HealthCare-Commercial PPO/Open Access_Avg,outpatient,1092.00


In [19]:
def payer_category_from_payer_name(payer_name):
    if payer_name == "Gross Charge":
        return 'gross'
    elif payer_name == "Discounted Cash Price":
        return 'cash'
    elif payer_name == "De-identified min contracted rate":
        return 'min'
    elif payer_name == "De-identified max contracted rate":
        return 'max'

    return 'payer'

df_mid['payer_category'] = df_mid['payer_name'].apply(payer_category_from_payer_name)
df_mid

Unnamed: 0,code,description,hcpcs_cpt,line_type,local_code,ms_drg,payer_name,setting,standard_charge,payer_category
0,10005,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,10005,Outpatient,,,Gross Charge,outpatient,1888.18,payer
1,10006,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,10006,Outpatient,,,Gross Charge,outpatient,1830.33,payer
2,10009,FINE NEEDLE ASPIRATION BIOPSY OF GROWTH USING ...,10009,Outpatient,,,Gross Charge,outpatient,2873.00,payer
3,10021,"FINE NEEDLE ASPIRATION BIOPSY, FIRST GROWTH",10021,Outpatient,,,Gross Charge,outpatient,656.00,payer
4,10030,DRAINAGE OF FLUID COLLECTION IN SOFT TISSUE US...,10030,Outpatient,,,Gross Charge,outpatient,2969.25,payer
...,...,...,...,...,...,...,...,...,...,...
535034,51702,SIMPLE INSERTION OF TEMPORARY BLADDER TUBE,51702,Outpatient,,,United HealthCare-Commercial PPO/Open Access_Avg,outpatient,1231.00,payer
535139,55040,REMOVAL OF FLUID COLLECTION IN TESTICLE AND SP...,55040,Outpatient,,,United HealthCare-Commercial PPO/Open Access_Avg,outpatient,4774.00,payer
536721,93931,ULTRASOUND OF ONE ARM ARTERIES OR ARTERY GRAFTS,93931,Outpatient,,,United HealthCare-Commercial PPO/Open Access_Avg,outpatient,345.00,payer
536793,96374,INJECTION OF DRUG OR SUBSTANCE INTO VEIN,96374,Outpatient,,,United HealthCare-Commercial PPO/Open Access_Avg,outpatient,1092.00,payer


In [20]:
df_mid['hospital_id'] = ccn
df_mid['rev_code'] = None
df_mid['apr_drg'] = None
df_mid['eapg'] = None
df_mid['modifiers'] = None
df_mid['alt_hcpcs_cpt'] = None
df_mid['thru'] = None
df_mid['apc'] = None
df_mid['icd'] = None
df_mid['ndc'] = None
df_mid['drug_hcpcs_multiplier'] = None
df_mid['drug_quantity'] = None
df_mid['drug_unit_of_measurement'] = None
df_mid['drug_type_of_measurement'] = None
df_mid['billing_class'] = None
df_mid['plan_name'] = None
df_mid['standard_charge_percent'] = None
df_mid['contracting_method'] = None
df_mid['additional_generic_notes'] = None
df_mid['additional_payer_specific_notes'] = None

df_mid

Unnamed: 0,code,description,hcpcs_cpt,line_type,local_code,ms_drg,payer_name,setting,standard_charge,payer_category,...,drug_hcpcs_multiplier,drug_quantity,drug_unit_of_measurement,drug_type_of_measurement,billing_class,plan_name,standard_charge_percent,contracting_method,additional_generic_notes,additional_payer_specific_notes
0,10005,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,10005,Outpatient,,,Gross Charge,outpatient,1888.18,payer,...,,,,,,,,,,
1,10006,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,10006,Outpatient,,,Gross Charge,outpatient,1830.33,payer,...,,,,,,,,,,
2,10009,FINE NEEDLE ASPIRATION BIOPSY OF GROWTH USING ...,10009,Outpatient,,,Gross Charge,outpatient,2873.00,payer,...,,,,,,,,,,
3,10021,"FINE NEEDLE ASPIRATION BIOPSY, FIRST GROWTH",10021,Outpatient,,,Gross Charge,outpatient,656.00,payer,...,,,,,,,,,,
4,10030,DRAINAGE OF FLUID COLLECTION IN SOFT TISSUE US...,10030,Outpatient,,,Gross Charge,outpatient,2969.25,payer,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
535034,51702,SIMPLE INSERTION OF TEMPORARY BLADDER TUBE,51702,Outpatient,,,United HealthCare-Commercial PPO/Open Access_Avg,outpatient,1231.00,payer,...,,,,,,,,,,
535139,55040,REMOVAL OF FLUID COLLECTION IN TESTICLE AND SP...,55040,Outpatient,,,United HealthCare-Commercial PPO/Open Access_Avg,outpatient,4774.00,payer,...,,,,,,,,,,
536721,93931,ULTRASOUND OF ONE ARM ARTERIES OR ARTERY GRAFTS,93931,Outpatient,,,United HealthCare-Commercial PPO/Open Access_Avg,outpatient,345.00,payer,...,,,,,,,,,,
536793,96374,INJECTION OF DRUG OR SUBSTANCE INTO VEIN,96374,Outpatient,,,United HealthCare-Commercial PPO/Open Access_Avg,outpatient,1092.00,payer,...,,,,,,,,,,


In [21]:
df_out = pd.DataFrame(df_mid[TARGET_COLUMNS])
df_out

Unnamed: 0,hospital_id,line_type,description,rev_code,local_code,code,ms_drg,apr_drg,eapg,hcpcs_cpt,...,billing_class,setting,payer_category,payer_name,plan_name,standard_charge,standard_charge_percent,contracting_method,additional_generic_notes,additional_payer_specific_notes
0,030064,Outpatient,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,,,10005,,,,10005,...,,outpatient,payer,Gross Charge,,1888.18,,,,
1,030064,Outpatient,FINE NEEDLE ASPIRATION BIOPSY USING ULTRASOUND...,,,10006,,,,10006,...,,outpatient,payer,Gross Charge,,1830.33,,,,
2,030064,Outpatient,FINE NEEDLE ASPIRATION BIOPSY OF GROWTH USING ...,,,10009,,,,10009,...,,outpatient,payer,Gross Charge,,2873.00,,,,
3,030064,Outpatient,"FINE NEEDLE ASPIRATION BIOPSY, FIRST GROWTH",,,10021,,,,10021,...,,outpatient,payer,Gross Charge,,656.00,,,,
4,030064,Outpatient,DRAINAGE OF FLUID COLLECTION IN SOFT TISSUE US...,,,10030,,,,10030,...,,outpatient,payer,Gross Charge,,2969.25,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
535034,030064,Outpatient,SIMPLE INSERTION OF TEMPORARY BLADDER TUBE,,,51702,,,,51702,...,,outpatient,payer,United HealthCare-Commercial PPO/Open Access_Avg,,1231.00,,,,
535139,030064,Outpatient,REMOVAL OF FLUID COLLECTION IN TESTICLE AND SP...,,,55040,,,,55040,...,,outpatient,payer,United HealthCare-Commercial PPO/Open Access_Avg,,4774.00,,,,
536721,030064,Outpatient,ULTRASOUND OF ONE ARM ARTERIES OR ARTERY GRAFTS,,,93931,,,,93931,...,,outpatient,payer,United HealthCare-Commercial PPO/Open Access_Avg,,345.00,,,,
536793,030064,Outpatient,INJECTION OF DRUG OR SUBSTANCE INTO VEIN,,,96374,,,,96374,...,,outpatient,payer,United HealthCare-Commercial PPO/Open Access_Avg,,1092.00,,,,


In [22]:
df_out.to_csv('rate_' + ccn + '.csv', index=False)