In [1]:
import base64
from io import BytesIO
from dateutil.parser import parse as parse_datetime

import pandas as pd
import requests
from lxml import html

from helpers import *

In [2]:
TARGET_COLUMNS = [
    'hospital_id',
    #'row_id',
    'line_type',
    'description',
    'rev_code',
    'local_code',
    'code',
    'ms_drg',
    'apr_drg',
    'eapg',
    'hcpcs_cpt',
    'modifiers',
    'alt_hcpcs_cpt',
    'thru',
    'apc',
    'icd',
    'ndc',
    'drug_hcpcs_multiplier',
    'drug_quantity',
    'drug_unit_of_measurement',
    'drug_type_of_measurement',
    'billing_class',
    'setting',
    'payer_category',
    'payer_name',
    'plan_name',
    'standard_charge',
    'standard_charge_percent',
    'contracting_method',
    'additional_generic_notes',
    'additional_payer_specific_notes'
]

In [3]:
transparency_page = "https://www.bannerhealth.com/patients/billing/pricing-resources/hospital-price-transparency"
mrf_url = "https://www.cdmpricing.com/37cdb181c73f9a24e83a4b98eaac3589/standard-charges"
ccn = "060126"

In [4]:
hospital_id = mrf_url.split("/")[-2]
api_url = "https://apim.services.craneware.com/api-pricing-transparency/api/public/{}/metadata/cdmFile".format(hospital_id)
api_url

'https://apim.services.craneware.com/api-pricing-transparency/api/public/37cdb181c73f9a24e83a4b98eaac3589/metadata/cdmFile'

In [5]:
resp = requests.get(api_url)

In [6]:
base64_str = resp.json().get("contentBytes")
content_type = resp.json().get("contentType")
filename = resp.json().get("fileDownloadName")
content_type, filename

('text/csv', '450233470BANNER-FORT-COLLINS-MEDICAL-CENTERstandardcharges.xlsx')

In [7]:
ein = derive_ein_from_filename(filename)
ein

'45-0233470BANNER-FORT-COLLINS-MEDICAL-CENTERstandardcharges.xlsx'

In [8]:
content = base64.b64decode(base64_str)
out_f = open(filename, 'wb')
out_f.write(content)
out_f.close()

In [9]:
b_f = BytesIO(content)
df_in = pd.read_excel(b_f)

if df_in.columns[0] != 'Code':
    first_line = df_in.columns[0]
    date_str = first_line.replace('Updated on ', '').replace('Updated on: ', '')
    last_updated = parse_datetime(date_str).isoformat().split('T')[0]
    df_in = pd.read_excel(b_f, header=1)
else:
    last_updated = '2021-01-01'

In [10]:
df_in

Unnamed: 0,Code,Description,Type,Gross Charge,De-identified min contracted rate,De-identified max contracted rate,Discounted Cash Price,United HealthCare-Commercial HMO,Aetna-Commercial HMO,Aetna-Medicare Advantage HMO,Cigna-Commercial HMO,BCBS-Commercial HMO,United HealthCare-Medicare Advantage HMO,Cigna-Commercial PPO/Open Access,Humana-Commercial HMO,Humana-Medicare Advantage HMO,Kaiser Permanaente-Commercial HMO,Rocky Mountain HMO-Commercial HMO
0,10005,Fine needle aspiration of first lesion using u...,Outpatient,1764.18,592.46,2021.85,1764.18,1784.00,2021.85,,,,592.46,1939.23,,621.29,867.75,
1,10006,Fine needle aspiration of additional lesion us...,Outpatient,1213.00,691.41,4174.65,1213.00,,4174.65,,,,,,,,691.41,
2,10060,Drainage of abscess,Outpatient,431.15,179.96,1670.61,431.15,1670.61,357.38,,,,,,,,179.96,201.34
3,10120,"Removal of foreign body from tissue, accessed ...",Outpatient,314.25,346.63,3389.03,314.25,,,,,,,,3389.03,346.63,,
4,10140,Drainage of blood or fluid accumulation,Outpatient,3131.10,,3131.10,3131.10,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13107,WIRE PACING TEMP LVL 02,,521,,,,,,,,,,,,,,,
13108,WIRE PACING TEMP LVL 03,,741,,,,,,,,,,,,,,,
13109,WIRE PACING TEMP LVL 04,,987,,,,,,,,,,,,,,,
13110,WIRE PACING TEMP LVL 05,,1232,,,,,,,,,,,,,,,


In [11]:
df_mid = pd.DataFrame(df_in)
df_mid = df_mid.rename(columns={
    'Code': 'code',
    'Description': 'description',
    'Type': 'line_type',
})
df_mid

Unnamed: 0,code,description,line_type,Gross Charge,De-identified min contracted rate,De-identified max contracted rate,Discounted Cash Price,United HealthCare-Commercial HMO,Aetna-Commercial HMO,Aetna-Medicare Advantage HMO,Cigna-Commercial HMO,BCBS-Commercial HMO,United HealthCare-Medicare Advantage HMO,Cigna-Commercial PPO/Open Access,Humana-Commercial HMO,Humana-Medicare Advantage HMO,Kaiser Permanaente-Commercial HMO,Rocky Mountain HMO-Commercial HMO
0,10005,Fine needle aspiration of first lesion using u...,Outpatient,1764.18,592.46,2021.85,1764.18,1784.00,2021.85,,,,592.46,1939.23,,621.29,867.75,
1,10006,Fine needle aspiration of additional lesion us...,Outpatient,1213.00,691.41,4174.65,1213.00,,4174.65,,,,,,,,691.41,
2,10060,Drainage of abscess,Outpatient,431.15,179.96,1670.61,431.15,1670.61,357.38,,,,,,,,179.96,201.34
3,10120,"Removal of foreign body from tissue, accessed ...",Outpatient,314.25,346.63,3389.03,314.25,,,,,,,,3389.03,346.63,,
4,10140,Drainage of blood or fluid accumulation,Outpatient,3131.10,,3131.10,3131.10,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13107,WIRE PACING TEMP LVL 02,,521,,,,,,,,,,,,,,,
13108,WIRE PACING TEMP LVL 03,,741,,,,,,,,,,,,,,,
13109,WIRE PACING TEMP LVL 04,,987,,,,,,,,,,,,,,,
13110,WIRE PACING TEMP LVL 05,,1232,,,,,,,,,,,,,,,


In [12]:
money_columns = df_mid.columns[3:]
remaining_columns = df_mid.columns[:3]
df_mid = pd.melt(df_mid, id_vars=remaining_columns, var_name='payer_name', value_name='standard_charge')
df_mid

Unnamed: 0,code,description,line_type,payer_name,standard_charge
0,10005,Fine needle aspiration of first lesion using u...,Outpatient,Gross Charge,1764.18
1,10006,Fine needle aspiration of additional lesion us...,Outpatient,Gross Charge,1213.00
2,10060,Drainage of abscess,Outpatient,Gross Charge,431.15
3,10120,"Removal of foreign body from tissue, accessed ...",Outpatient,Gross Charge,314.25
4,10140,Drainage of blood or fluid accumulation,Outpatient,Gross Charge,3131.10
...,...,...,...,...,...
196675,WIRE PACING TEMP LVL 02,,521,Rocky Mountain HMO-Commercial HMO,
196676,WIRE PACING TEMP LVL 03,,741,Rocky Mountain HMO-Commercial HMO,
196677,WIRE PACING TEMP LVL 04,,987,Rocky Mountain HMO-Commercial HMO,
196678,WIRE PACING TEMP LVL 05,,1232,Rocky Mountain HMO-Commercial HMO,


In [13]:
df_mid.loc[df_mid['line_type'] == 'Outpatient', 'setting'] = 'outpatient'
df_mid.loc[df_mid['line_type'] == 'Inpatient', 'setting'] = 'outpatient'
df_mid.loc[df_mid['line_type'] == 'Inpatient', 'line_type'] = None
df_mid.loc[df_mid['line_type'] == 'Outpatient', 'line_type'] = None

df_mid

Unnamed: 0,code,description,line_type,payer_name,standard_charge,setting
0,10005,Fine needle aspiration of first lesion using u...,,Gross Charge,1764.18,outpatient
1,10006,Fine needle aspiration of additional lesion us...,,Gross Charge,1213.00,outpatient
2,10060,Drainage of abscess,,Gross Charge,431.15,outpatient
3,10120,"Removal of foreign body from tissue, accessed ...",,Gross Charge,314.25,outpatient
4,10140,Drainage of blood or fluid accumulation,,Gross Charge,3131.10,outpatient
...,...,...,...,...,...,...
196675,WIRE PACING TEMP LVL 02,,521,Rocky Mountain HMO-Commercial HMO,,
196676,WIRE PACING TEMP LVL 03,,741,Rocky Mountain HMO-Commercial HMO,,
196677,WIRE PACING TEMP LVL 04,,987,Rocky Mountain HMO-Commercial HMO,,
196678,WIRE PACING TEMP LVL 05,,1232,Rocky Mountain HMO-Commercial HMO,,


In [14]:
df_mid['hcpcs_cpt'] = None
df_mid['ms_drg'] = None

def recognise_codes(row):
    line_type = row['line_type']
    code = row['code']
    
    if line_type == 'Charge Code':
        row['local_code'] = code
    elif line_type == 'HCPCS/CPT' or code_is_cpt(code) or code_is_hcpcs(code):
        row['hcpcs_cpt'] = code

    return row

df_mid = df_mid.apply(recognise_codes, axis=1)
df_mid

Unnamed: 0,code,description,line_type,payer_name,standard_charge,setting,hcpcs_cpt,ms_drg
0,10005,Fine needle aspiration of first lesion using u...,,Gross Charge,1764.18,outpatient,,
1,10006,Fine needle aspiration of additional lesion us...,,Gross Charge,1213.00,outpatient,,
2,10060,Drainage of abscess,,Gross Charge,431.15,outpatient,,
3,10120,"Removal of foreign body from tissue, accessed ...",,Gross Charge,314.25,outpatient,,
4,10140,Drainage of blood or fluid accumulation,,Gross Charge,3131.10,outpatient,,
...,...,...,...,...,...,...,...,...
196675,WIRE PACING TEMP LVL 02,,521,Rocky Mountain HMO-Commercial HMO,,,,
196676,WIRE PACING TEMP LVL 03,,741,Rocky Mountain HMO-Commercial HMO,,,,
196677,WIRE PACING TEMP LVL 04,,987,Rocky Mountain HMO-Commercial HMO,,,,
196678,WIRE PACING TEMP LVL 05,,1232,Rocky Mountain HMO-Commercial HMO,,,,


In [15]:
df_mid['standard_charge'] = df_mid['standard_charge'].apply(lambda rate: str(rate).replace(',', '.').strip())
df_mid = df_mid[df_mid['standard_charge'] != "N/A"]
df_mid = df_mid[df_mid['standard_charge'] != "nan"]
df_mid = df_mid[df_mid['standard_charge'].notnull()]
df_mid

Unnamed: 0,code,description,line_type,payer_name,standard_charge,setting,hcpcs_cpt,ms_drg
0,10005,Fine needle aspiration of first lesion using u...,,Gross Charge,1764.18,outpatient,,
1,10006,Fine needle aspiration of additional lesion us...,,Gross Charge,1213.0,outpatient,,
2,10060,Drainage of abscess,,Gross Charge,431.15,outpatient,,
3,10120,"Removal of foreign body from tissue, accessed ...",,Gross Charge,314.25,outpatient,,
4,10140,Drainage of blood or fluid accumulation,,Gross Charge,3131.1,outpatient,,
...,...,...,...,...,...,...,...,...
184407,99283,"Emergency department visit, moderately severe ...",,Rocky Mountain HMO-Commercial HMO,293.33,outpatient,,
184408,99284,"Emergency department visit, problem of high se...",,Rocky Mountain HMO-Commercial HMO,162.22,outpatient,,
184409,99285,"Emergency department visit, problem with signi...",,Rocky Mountain HMO-Commercial HMO,222.28,outpatient,,
184498,G0480,"Drug test(s), definitive, utilizing (1) drug i...",,Rocky Mountain HMO-Commercial HMO,122.95,outpatient,G0480,


In [16]:
def payer_category_from_payer_name(payer_name):
    payer_name = payer_name.strip()
    if payer_name == "Gross Charge":
        return 'gross'
    elif payer_name == "Discounted Cash Price":
        return 'cash'
    elif payer_name == "De-identified min contracted rate":
        return 'min'
    elif payer_name == "De-identified max contracted rate":
        return 'max'

    return 'payer'

df_mid['payer_category'] = df_mid['payer_name'].apply(payer_category_from_payer_name)
df_mid

Unnamed: 0,code,description,line_type,payer_name,standard_charge,setting,hcpcs_cpt,ms_drg,payer_category
0,10005,Fine needle aspiration of first lesion using u...,,Gross Charge,1764.18,outpatient,,,gross
1,10006,Fine needle aspiration of additional lesion us...,,Gross Charge,1213.0,outpatient,,,gross
2,10060,Drainage of abscess,,Gross Charge,431.15,outpatient,,,gross
3,10120,"Removal of foreign body from tissue, accessed ...",,Gross Charge,314.25,outpatient,,,gross
4,10140,Drainage of blood or fluid accumulation,,Gross Charge,3131.1,outpatient,,,gross
...,...,...,...,...,...,...,...,...,...
184407,99283,"Emergency department visit, moderately severe ...",,Rocky Mountain HMO-Commercial HMO,293.33,outpatient,,,payer
184408,99284,"Emergency department visit, problem of high se...",,Rocky Mountain HMO-Commercial HMO,162.22,outpatient,,,payer
184409,99285,"Emergency department visit, problem with signi...",,Rocky Mountain HMO-Commercial HMO,222.28,outpatient,,,payer
184498,G0480,"Drug test(s), definitive, utilizing (1) drug i...",,Rocky Mountain HMO-Commercial HMO,122.95,outpatient,G0480,,payer


In [17]:
if not 'local_code' in df_mid.columns:
    df_mid['local_code'] = None

df_mid['hospital_id'] = ccn
df_mid['rev_code'] = None
df_mid['apr_drg'] = None
df_mid['eapg'] = None
df_mid['modifiers'] = None
df_mid['alt_hcpcs_cpt'] = None
df_mid['thru'] = None
df_mid['apc'] = None
df_mid['icd'] = None
df_mid['ndc'] = None
df_mid['drug_hcpcs_multiplier'] = None
df_mid['drug_quantity'] = None
df_mid['drug_unit_of_measurement'] = None
df_mid['drug_type_of_measurement'] = None
df_mid['billing_class'] = None
df_mid['plan_name'] = None
df_mid['standard_charge_percent'] = None
df_mid['contracting_method'] = None
df_mid['additional_generic_notes'] = None
df_mid['additional_payer_specific_notes'] = None

df_mid

Unnamed: 0,code,description,line_type,payer_name,standard_charge,setting,hcpcs_cpt,ms_drg,payer_category,local_code,...,drug_hcpcs_multiplier,drug_quantity,drug_unit_of_measurement,drug_type_of_measurement,billing_class,plan_name,standard_charge_percent,contracting_method,additional_generic_notes,additional_payer_specific_notes
0,10005,Fine needle aspiration of first lesion using u...,,Gross Charge,1764.18,outpatient,,,gross,,...,,,,,,,,,,
1,10006,Fine needle aspiration of additional lesion us...,,Gross Charge,1213.0,outpatient,,,gross,,...,,,,,,,,,,
2,10060,Drainage of abscess,,Gross Charge,431.15,outpatient,,,gross,,...,,,,,,,,,,
3,10120,"Removal of foreign body from tissue, accessed ...",,Gross Charge,314.25,outpatient,,,gross,,...,,,,,,,,,,
4,10140,Drainage of blood or fluid accumulation,,Gross Charge,3131.1,outpatient,,,gross,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
184407,99283,"Emergency department visit, moderately severe ...",,Rocky Mountain HMO-Commercial HMO,293.33,outpatient,,,payer,,...,,,,,,,,,,
184408,99284,"Emergency department visit, problem of high se...",,Rocky Mountain HMO-Commercial HMO,162.22,outpatient,,,payer,,...,,,,,,,,,,
184409,99285,"Emergency department visit, problem with signi...",,Rocky Mountain HMO-Commercial HMO,222.28,outpatient,,,payer,,...,,,,,,,,,,
184498,G0480,"Drug test(s), definitive, utilizing (1) drug i...",,Rocky Mountain HMO-Commercial HMO,122.95,outpatient,G0480,,payer,,...,,,,,,,,,,


In [18]:
df_out = pd.DataFrame(df_mid[TARGET_COLUMNS])
df_out

Unnamed: 0,hospital_id,line_type,description,rev_code,local_code,code,ms_drg,apr_drg,eapg,hcpcs_cpt,...,billing_class,setting,payer_category,payer_name,plan_name,standard_charge,standard_charge_percent,contracting_method,additional_generic_notes,additional_payer_specific_notes
0,060126,,Fine needle aspiration of first lesion using u...,,,10005,,,,,...,,outpatient,gross,Gross Charge,,1764.18,,,,
1,060126,,Fine needle aspiration of additional lesion us...,,,10006,,,,,...,,outpatient,gross,Gross Charge,,1213.0,,,,
2,060126,,Drainage of abscess,,,10060,,,,,...,,outpatient,gross,Gross Charge,,431.15,,,,
3,060126,,"Removal of foreign body from tissue, accessed ...",,,10120,,,,,...,,outpatient,gross,Gross Charge,,314.25,,,,
4,060126,,Drainage of blood or fluid accumulation,,,10140,,,,,...,,outpatient,gross,Gross Charge,,3131.1,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
184407,060126,,"Emergency department visit, moderately severe ...",,,99283,,,,,...,,outpatient,payer,Rocky Mountain HMO-Commercial HMO,,293.33,,,,
184408,060126,,"Emergency department visit, problem of high se...",,,99284,,,,,...,,outpatient,payer,Rocky Mountain HMO-Commercial HMO,,162.22,,,,
184409,060126,,"Emergency department visit, problem with signi...",,,99285,,,,,...,,outpatient,payer,Rocky Mountain HMO-Commercial HMO,,222.28,,,,
184498,060126,,"Drug test(s), definitive, utilizing (1) drug i...",,,G0480,,,,G0480,...,,outpatient,payer,Rocky Mountain HMO-Commercial HMO,,122.95,,,,


In [19]:
df_out.to_csv('rate_' + ccn + '.csv', index=False)

In [20]:
df_out[df_out['hcpcs_cpt'].notnull()]

Unnamed: 0,hospital_id,line_type,description,rev_code,local_code,code,ms_drg,apr_drg,eapg,hcpcs_cpt,...,billing_class,setting,payer_category,payer_name,plan_name,standard_charge,standard_charge_percent,contracting_method,additional_generic_notes,additional_payer_specific_notes
844,060126,,Harvest and injections of platelet rich plasma...,,,0232T,,,,0232T,...,,outpatient,gross,Gross Charge,,7226.61,,,,
845,060126,,Analysis of data from CT study of heart blood ...,,,0502T,,,,0502T,...,,outpatient,gross,Gross Charge,,3208.0,,,,
846,060126,,Analysis of data from CT study of heart blood ...,,,0503T,,,,0503T,...,,outpatient,gross,Gross Charge,,3208.0,,,,
847,060126,,Noncontact real-time fluorescence wound imagin...,,,0598T,,,,0598T,...,,outpatient,gross,Gross Charge,,1459.0,,,,
848,060126,,Noncontact real-time fluorescence wound imagin...,,,0599T,,,,0599T,...,,outpatient,gross,Gross Charge,,813.18,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
171544,060126,,"High osmolar contrast material, 350-399 mg/ml ...",,,Q9963,,,,Q9963,...,,outpatient,payer,Kaiser Permanaente-Commercial HMO,,0.74,,,,
171549,060126,,Infectious agent detection by nucleic acid (DN...,,,U0003,,,,U0003,...,,outpatient,payer,Kaiser Permanaente-Commercial HMO,,135.5,,,,
171550,060126,,Infectious agent detection by nucleic acid (dn...,,,U0005,,,,U0005,...,,outpatient,payer,Kaiser Permanaente-Commercial HMO,,42.75,,,,
184498,060126,,"Drug test(s), definitive, utilizing (1) drug i...",,,G0480,,,,G0480,...,,outpatient,payer,Rocky Mountain HMO-Commercial HMO,,122.95,,,,
