In [1]:
from urllib.parse import urlparse
import subprocess
import json

import pandas as pd
import requests

from helpers import derive_filename_from_url

In [2]:
TARGET_COLUMNS = [ 
    'hospital_id',
    #'row_id',
    'line_type',
    'description',
    'rev_code',
    'local_code',
    'code',
    'ms_drg',
    'apr_drg',
    'eapg',
    'hcpcs_cpt',
    'modifiers',
    'alt_hcpcs_cpt',
    'thru',
    'apc',
    'icd',
    'ndc',
    'drug_hcpcs_multiplier',
    'drug_quantity',
    'drug_unit_of_measurement',
    'drug_type_of_measurement',
    'billing_class',
    'setting',
    'payer_category',
    'payer_name',
    'plan_name',
    'standard_charge',
    'standard_charge_percent',
    'contracting_method',
    'additional_generic_notes',
    'additional_payer_specific_notes'
]

In [3]:
ccn = "241328"
hpi_url = "https://search.hospitalpriceindex.com/hpi2/hospital/SanfordBagleyMedicalCenter/8387or"

In [4]:
def_id = urlparse(hpi_url).path.split('/')[-1].replace('or', '')
def_id

'8387'

In [5]:
json_data = {
    'defId': 8387,
    'priceStatus': 'published',
    'listName': 'machineRead',
}

resp = requests.post('https://api.hospitalpriceindex.com/itemList/detail', json=json_data)
print(resp)

<Response [200]>


In [6]:
json_dict = resp.json()
result = json_dict.get("result")[0] # TODO: check if results list is not empty
ein = result.get("eiNumber")
ein = ein[:2] + "-" + ein[2:]
ein

'41-1266009'

In [7]:
mrf_url = result.get("extractFile").replace('\\', '/')
mrf_url

'https://goldteamstorage.blob.core.windows.net/hpifilestorage/8387/411266009_HPI-CDM_standardcharges.json'

In [8]:
filename = derive_filename_from_url(mrf_url)
filename

'411266009_HPI-CDM_standardcharges.json'

In [9]:
subprocess.run(["wget", "--no-clobber", mrf_url, "-O", filename])

File ‘411266009_HPI-CDM_standardcharges.json’ already there; not retrieving.


CompletedProcess(args=['wget', '--no-clobber', 'https://goldteamstorage.blob.core.windows.net/hpifilestorage/8387/411266009_HPI-CDM_standardcharges.json', '-O', '411266009_HPI-CDM_standardcharges.json'], returncode=1)

In [10]:
in_f = open(filename, 'r')
json_content = json.load(in_f)
in_f.close()

last_updated = json_content[0].get('lastUpdated')
last_updated

'2023-04-25'

In [11]:
df_in = pd.DataFrame(json_content[0].get("item"))
df_in

Unnamed: 0,description,Associated_Codes,Avg_Gross_Charge,Payer_Allowed_Amount,Cash_Discount_Price,Deidentified_Min_Allowed,DeIdentified_Max_Allowed,payer,iobSelection
0,AMBULANCE,0540,,,,,,,
1,"AMBULANCE - Advanced Life Support, Emergency T...","A0427,0540",,,,,,,
2,"AMBULANCE - Advanced Life Support, Level Ii","A0433,0540",,,,,,,
3,"AMBULANCE - Advanced Life Support, Non-Emergen...","A0426,0540",,,,,,,
4,"AMBULANCE - Basic Life Support, Emergency Tran...","A0429,0540",,,,,,,
...,...,...,...,...,...,...,...,...,...
27478,X-Ray of Upper Arm,73060,666.0000,,599.4000,356.7000,466.1900,United Healthcare Medicare Advantage,Outpatient
27479,"X-Ray of Upper Leg, 2 or More Views",73552,644.3200,,579.8900,122.0200,532.2800,United Healthcare Medicare Advantage,Outpatient
27480,"X-Ray of Wrist, 2 Views",73100,367.2200,196.0300,330.5000,196.0300,328.6700,United Healthcare Medicare Advantage,Outpatient
27481,"X-Ray of Wrist, 3 or More Views",73110,539.9800,281.0400,485.9800,57.4800,475.7100,United Healthcare Medicare Advantage,Outpatient


In [12]:
df_mid = pd.DataFrame(df_in)
set(df_mid['iobSelection'].to_list())

{'Inpatient', 'Outpatient', nan}

In [13]:
df_mid = df_mid.rename(columns={
    'iobSelection': 'setting',
    'Associated_Codes': 'code',
    'payer': 'payer_name'
})

In [14]:
money_columns = df_mid.columns[2:-2].to_list()
remaining_columns = set(df_mid.columns.to_list()) - set(money_columns)
remaining_columns = list(remaining_columns)
remaining_columns

['payer_name', 'setting', 'code', 'description']

In [15]:
df_mid = pd.melt(df_mid, id_vars=remaining_columns, var_name='payer_name2', value_name='standard_charge')
df_mid

Unnamed: 0,payer_name,setting,code,description,payer_name2,standard_charge
0,,,0540,AMBULANCE,Avg_Gross_Charge,
1,,,"A0427,0540","AMBULANCE - Advanced Life Support, Emergency T...",Avg_Gross_Charge,
2,,,"A0433,0540","AMBULANCE - Advanced Life Support, Level Ii",Avg_Gross_Charge,
3,,,"A0426,0540","AMBULANCE - Advanced Life Support, Non-Emergen...",Avg_Gross_Charge,
4,,,"A0429,0540","AMBULANCE - Basic Life Support, Emergency Tran...",Avg_Gross_Charge,
...,...,...,...,...,...,...
137410,United Healthcare Medicare Advantage,Outpatient,73060,X-Ray of Upper Arm,DeIdentified_Max_Allowed,466.1900
137411,United Healthcare Medicare Advantage,Outpatient,73552,"X-Ray of Upper Leg, 2 or More Views",DeIdentified_Max_Allowed,532.2800
137412,United Healthcare Medicare Advantage,Outpatient,73100,"X-Ray of Wrist, 2 Views",DeIdentified_Max_Allowed,328.6700
137413,United Healthcare Medicare Advantage,Outpatient,73110,"X-Ray of Wrist, 3 or More Views",DeIdentified_Max_Allowed,475.7100


In [16]:
df_mid.loc[df_mid['payer_name2'] != 'Payer_Allowed_Amount', 'payer_name'] = df_mid[df_mid['payer_name2'] != 'Payer_Allowed_Amount']['payer_name2']
del df_mid['payer_name2']

df_mid

Unnamed: 0,payer_name,setting,code,description,standard_charge
0,Avg_Gross_Charge,,0540,AMBULANCE,
1,Avg_Gross_Charge,,"A0427,0540","AMBULANCE - Advanced Life Support, Emergency T...",
2,Avg_Gross_Charge,,"A0433,0540","AMBULANCE - Advanced Life Support, Level Ii",
3,Avg_Gross_Charge,,"A0426,0540","AMBULANCE - Advanced Life Support, Non-Emergen...",
4,Avg_Gross_Charge,,"A0429,0540","AMBULANCE - Basic Life Support, Emergency Tran...",
...,...,...,...,...,...
137410,DeIdentified_Max_Allowed,Outpatient,73060,X-Ray of Upper Arm,466.1900
137411,DeIdentified_Max_Allowed,Outpatient,73552,"X-Ray of Upper Leg, 2 or More Views",532.2800
137412,DeIdentified_Max_Allowed,Outpatient,73100,"X-Ray of Wrist, 2 Views",328.6700
137413,DeIdentified_Max_Allowed,Outpatient,73110,"X-Ray of Wrist, 3 or More Views",475.7100


In [17]:
df_mid = df_mid[df_mid['standard_charge'] != 'N/A']
df_mid = df_mid[df_mid['standard_charge'].notnull()]
df_mid

Unnamed: 0,payer_name,setting,code,description,standard_charge
209,Avg_Gross_Charge,Outpatient,95912,11-12 Nerve Conduction Studies,1137.0000
210,Avg_Gross_Charge,Outpatient,G0404,12-Lead EKG During Annual Wellness Exam Withou...,48.0000
212,Avg_Gross_Charge,Outpatient,93005,12-Lead EKG Without Physician Interpretation,258.8200
213,Avg_Gross_Charge,Outpatient,95908,3-4 Nerve Conduction Studies,505.0000
214,Avg_Gross_Charge,Outpatient,95909,5-6 Nerve Conduction Studies,652.0000
...,...,...,...,...,...
137410,DeIdentified_Max_Allowed,Outpatient,73060,X-Ray of Upper Arm,466.1900
137411,DeIdentified_Max_Allowed,Outpatient,73552,"X-Ray of Upper Leg, 2 or More Views",532.2800
137412,DeIdentified_Max_Allowed,Outpatient,73100,"X-Ray of Wrist, 2 Views",328.6700
137413,DeIdentified_Max_Allowed,Outpatient,73110,"X-Ray of Wrist, 3 or More Views",475.7100


In [18]:
df_mid['setting'] = df_mid['setting'].str.lower()

In [19]:
from helpers import *

def code_is_rev_code(code):
    if type(code) != str:
        return False

    return len(code) == 4 and code.isnumeric()

df_mid['ms_drg'] = None
df_mid['hcpcs_cpt'] = None
df_mid['alt_hcpcs_cpt'] = None
df_mid['rev_code'] = None

def split_codes(row):
    codes = row.get('code')
    codes = codes.split(',')
    
    for code in codes:
        code = code.strip()

        if code_is_rev_code(code):
            row['rev_code'] = code
        elif code_is_cpt(code) or code_is_hcpcs(code):
            if row.get('hcpcs_cpt') is None:
                row['hcpcs_cpt'] = code
            else:
                row['alt_hcpcs_cpt'] = code
        elif code_is_ms_drg(code):
            row['ms_drg'] = code

    return row

df_mid = df_mid.apply(split_codes, axis=1)
df_mid

Unnamed: 0,payer_name,setting,code,description,standard_charge,ms_drg,hcpcs_cpt,alt_hcpcs_cpt,rev_code
209,Avg_Gross_Charge,outpatient,95912,11-12 Nerve Conduction Studies,1137.0000,,95912,,
210,Avg_Gross_Charge,outpatient,G0404,12-Lead EKG During Annual Wellness Exam Withou...,48.0000,,G0404,,
212,Avg_Gross_Charge,outpatient,93005,12-Lead EKG Without Physician Interpretation,258.8200,,93005,,
213,Avg_Gross_Charge,outpatient,95908,3-4 Nerve Conduction Studies,505.0000,,95908,,
214,Avg_Gross_Charge,outpatient,95909,5-6 Nerve Conduction Studies,652.0000,,95909,,
...,...,...,...,...,...,...,...,...,...
137410,DeIdentified_Max_Allowed,outpatient,73060,X-Ray of Upper Arm,466.1900,,73060,,
137411,DeIdentified_Max_Allowed,outpatient,73552,"X-Ray of Upper Leg, 2 or More Views",532.2800,,73552,,
137412,DeIdentified_Max_Allowed,outpatient,73100,"X-Ray of Wrist, 2 Views",328.6700,,73100,,
137413,DeIdentified_Max_Allowed,outpatient,73110,"X-Ray of Wrist, 3 or More Views",475.7100,,73110,,


In [20]:
df_mid['payer_category'] = 'payer'
df_mid.loc[df_mid['payer_name'] == 'Avg_Gross_Charge', 'payer_category'] = 'gross'
df_mid.loc[df_mid['payer_name'] == 'Cash_Discount_Price', 'payer_category'] = 'cash'
df_mid.loc[df_mid['payer_name'] == 'Deidentified_Min_Allowed', 'payer_category'] = 'min'
df_mid.loc[df_mid['payer_name'] == 'DeIdentified_Max_Allowed', 'payer_category'] = 'max'
df_mid

Unnamed: 0,payer_name,setting,code,description,standard_charge,ms_drg,hcpcs_cpt,alt_hcpcs_cpt,rev_code,payer_category
209,Avg_Gross_Charge,outpatient,95912,11-12 Nerve Conduction Studies,1137.0000,,95912,,,gross
210,Avg_Gross_Charge,outpatient,G0404,12-Lead EKG During Annual Wellness Exam Withou...,48.0000,,G0404,,,gross
212,Avg_Gross_Charge,outpatient,93005,12-Lead EKG Without Physician Interpretation,258.8200,,93005,,,gross
213,Avg_Gross_Charge,outpatient,95908,3-4 Nerve Conduction Studies,505.0000,,95908,,,gross
214,Avg_Gross_Charge,outpatient,95909,5-6 Nerve Conduction Studies,652.0000,,95909,,,gross
...,...,...,...,...,...,...,...,...,...,...
137410,DeIdentified_Max_Allowed,outpatient,73060,X-Ray of Upper Arm,466.1900,,73060,,,max
137411,DeIdentified_Max_Allowed,outpatient,73552,"X-Ray of Upper Leg, 2 or More Views",532.2800,,73552,,,max
137412,DeIdentified_Max_Allowed,outpatient,73100,"X-Ray of Wrist, 2 Views",328.6700,,73100,,,max
137413,DeIdentified_Max_Allowed,outpatient,73110,"X-Ray of Wrist, 3 or More Views",475.7100,,73110,,,max


In [21]:
df_mid['hospital_id'] = ccn
df_mid['line_type'] = None
df_mid['local_code'] = None
df_mid['apr_drg'] = None
df_mid['eapg'] = None
df_mid['modifiers'] = None
df_mid['thru'] = None
df_mid['apc'] = None
df_mid['icd'] = None
df_mid['ndc'] = None
df_mid['drug_hcpcs_multiplier'] = None
df_mid['drug_quantity'] = None
df_mid['drug_unit_of_measurement'] = None
df_mid['drug_type_of_measurement'] = None
df_mid['billing_class'] = None
df_mid['plan_name'] = None
df_mid['standard_charge_percent'] = None
df_mid['contracting_method'] = None
df_mid['additional_generic_notes'] = None
df_mid['additional_payer_specific_notes'] = None

df_mid

Unnamed: 0,payer_name,setting,code,description,standard_charge,ms_drg,hcpcs_cpt,alt_hcpcs_cpt,rev_code,payer_category,...,drug_hcpcs_multiplier,drug_quantity,drug_unit_of_measurement,drug_type_of_measurement,billing_class,plan_name,standard_charge_percent,contracting_method,additional_generic_notes,additional_payer_specific_notes
209,Avg_Gross_Charge,outpatient,95912,11-12 Nerve Conduction Studies,1137.0000,,95912,,,gross,...,,,,,,,,,,
210,Avg_Gross_Charge,outpatient,G0404,12-Lead EKG During Annual Wellness Exam Withou...,48.0000,,G0404,,,gross,...,,,,,,,,,,
212,Avg_Gross_Charge,outpatient,93005,12-Lead EKG Without Physician Interpretation,258.8200,,93005,,,gross,...,,,,,,,,,,
213,Avg_Gross_Charge,outpatient,95908,3-4 Nerve Conduction Studies,505.0000,,95908,,,gross,...,,,,,,,,,,
214,Avg_Gross_Charge,outpatient,95909,5-6 Nerve Conduction Studies,652.0000,,95909,,,gross,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
137410,DeIdentified_Max_Allowed,outpatient,73060,X-Ray of Upper Arm,466.1900,,73060,,,max,...,,,,,,,,,,
137411,DeIdentified_Max_Allowed,outpatient,73552,"X-Ray of Upper Leg, 2 or More Views",532.2800,,73552,,,max,...,,,,,,,,,,
137412,DeIdentified_Max_Allowed,outpatient,73100,"X-Ray of Wrist, 2 Views",328.6700,,73100,,,max,...,,,,,,,,,,
137413,DeIdentified_Max_Allowed,outpatient,73110,"X-Ray of Wrist, 3 or More Views",475.7100,,73110,,,max,...,,,,,,,,,,


In [22]:
df_out = pd.DataFrame(df_mid[TARGET_COLUMNS])
df_out

Unnamed: 0,hospital_id,line_type,description,rev_code,local_code,code,ms_drg,apr_drg,eapg,hcpcs_cpt,...,billing_class,setting,payer_category,payer_name,plan_name,standard_charge,standard_charge_percent,contracting_method,additional_generic_notes,additional_payer_specific_notes
209,241328,,11-12 Nerve Conduction Studies,,,95912,,,,95912,...,,outpatient,gross,Avg_Gross_Charge,,1137.0000,,,,
210,241328,,12-Lead EKG During Annual Wellness Exam Withou...,,,G0404,,,,G0404,...,,outpatient,gross,Avg_Gross_Charge,,48.0000,,,,
212,241328,,12-Lead EKG Without Physician Interpretation,,,93005,,,,93005,...,,outpatient,gross,Avg_Gross_Charge,,258.8200,,,,
213,241328,,3-4 Nerve Conduction Studies,,,95908,,,,95908,...,,outpatient,gross,Avg_Gross_Charge,,505.0000,,,,
214,241328,,5-6 Nerve Conduction Studies,,,95909,,,,95909,...,,outpatient,gross,Avg_Gross_Charge,,652.0000,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
137410,241328,,X-Ray of Upper Arm,,,73060,,,,73060,...,,outpatient,max,DeIdentified_Max_Allowed,,466.1900,,,,
137411,241328,,"X-Ray of Upper Leg, 2 or More Views",,,73552,,,,73552,...,,outpatient,max,DeIdentified_Max_Allowed,,532.2800,,,,
137412,241328,,"X-Ray of Wrist, 2 Views",,,73100,,,,73100,...,,outpatient,max,DeIdentified_Max_Allowed,,328.6700,,,,
137413,241328,,"X-Ray of Wrist, 3 or More Views",,,73110,,,,73110,...,,outpatient,max,DeIdentified_Max_Allowed,,475.7100,,,,


In [23]:
df_out.to_csv('rate_' + ccn + '.csv', index=False)