In [1]:
import subprocess
from urllib.parse import urljoin
from io import StringIO

from dateutil.parser import parse as parse_datetime
import pandas as pd
import requests
from lxml import html
import js2xml

from helpers import derive_ein_from_filename

In [2]:
TARGET_COLUMNS = [
    'hospital_id',
    #'row_id',
    'line_type',
    'description',
    'rev_code',
    'local_code',
    'code',
    'ms_drg',
    'apr_drg',
    'eapg',
    'hcpcs_cpt',
    'modifiers',
    'alt_hcpcs_cpt',
    'thru',
    'apc',
    'icd',
    'ndc',
    'drug_hcpcs_multiplier',
    'drug_quantity',
    'drug_unit_of_measurement',
    'drug_type_of_measurement',
    'billing_class',
    'setting',
    'payer_category',
    'payer_name',
    'plan_name',
    'standard_charge',
    'standard_charge_percent',
    'contracting_method',
    'additional_generic_notes',
    'additional_payer_specific_notes'
]

In [3]:
transparency_page = "https://www.adventisthealth.org/patient-resources/financial-services/healthcare-costs-and-charges/cost-estimator-tool/"
#ccn = "050335"
#app_url = "https://apps.para-hcfs.com/PTT/FinalLinks/Adventist_Sonora_V3.aspx"
ccn = "050608"
app_url = "https://apps.para-hcfs.com/PTT/FinalLinks/Adventist_Delano_V3.aspx"

In [4]:
resp = requests.get(app_url)
resp

<Response [200]>

In [5]:
tree = html.fromstring(resp.text)
js_link = tree.xpath('//script[contains(@src, "/PTT/extnet/extnet-init-js")]/@src')[0]
js_url = urljoin(resp.url, js_link)
resp1 = requests.get(js_url)
parsed = js2xml.parse(resp1.text)
db_name = parsed.xpath('//object[./property/string[text()="App.hdnDB_Container"]]/property[@name="value"]/string/text()')[0]
db_name

'dbDRMCDELANOCA'

In [6]:
params = {
    'dbName': db_name,
    'type': 'CDMWithoutLabel'
}

data = '------WebKitFormBoundarysVqstz3xq11k5yBT\r\nContent-Disposition: form-data; name="__EVENTTARGET"\r\n\r\nResourceManager\r\n------WebKitFormBoundarysVqstz3xq11k5yBT\r\nContent-Disposition: form-data; name="__EVENTARGUMENT"\r\n\r\n-|public|DownloadReport\r\n------WebKitFormBoundarysVqstz3xq11k5yBT\r\nContent-Disposition: form-data; name="__ExtNetDirectEventMarker"\r\n\r\ndelta=true\r\n------WebKitFormBoundarysVqstz3xq11k5yBT--\r\n'

resp2 = requests.post('https://apps.para-hcfs.com/PTT/FinalLinks/Reports.aspx', params=params, data=data)
print(resp2)

<Response [200]>


In [7]:
filename = resp2.headers['content-disposition'].split('"')[1]
filename

'611823825_delano-regional-medical-center_standardcharges.csv'

In [8]:
chunks = resp2.text.split("\r\n\r\n")
len(chunks)

4

In [9]:
chunks[0]

'Run Date: 4/1/2023 9:41:12 AM\r\nCDM Date Stamp: 3/18/2022'

In [10]:
date_str = chunks[0].split(" ")[-1]
date_str

last_updated = parse_datetime(date_str).isoformat().split("T")[0]
last_updated

'2022-03-18'

In [11]:
chunks[1][:100]

'Procedure Code,Procedure Description,Price Tier,Revenue Code,CPT HCPCS Code,Modifier1,Modifier2,NDC '

In [12]:
chunks[2][:100]

'Diagnosis Related Group Code,Diagnosis Related Group Description,Price Tier,Revenue Code,CPT HCPCS D'

In [13]:
csv_f = open(filename, "w")
csv_f.write(resp2.text)
csv_f.close()

In [14]:
csv_buf = StringIO(chunks[2])

df_in = pd.read_csv(csv_buf, dtype=str)
df_in

Unnamed: 0,Diagnosis Related Group Code,Diagnosis Related Group Description,Price Tier,Revenue Code,CPT HCPCS DRG Code,Modifier1,Modifier2,NDC Code,Rx Unit Multiplier,Gross Charge,...,BLUE_SHIELD_EPN_-_ALL_OTHER_PLANS,BLUE_SHIELD_NON-EPN,CIGNA-_ALL_PLANS,COUNTY_OF_KERN_NETWORK-_ALL_PLANS,COVENTRY-_ALL_PLANS,GEM_CARE-_ALL_PLANS,HEALTHNET-_ALL_PLANS,KAISER_COMM-_ALL_PLANS,UHC_HMO_ALL_PAYER-_ALL_OTHER_PLANS,UHC_NON_HMO_ALL_PAYER
0,004,TRACHEOSTOMY WITH MV >96 HOURS OR PRINCIPAL DI...,Inpatient,,004,,,,,589970.8650,...,176823.10,197301.38,471976.69,207465.30,353982.52,197301.89,217327.30,163385.95,250541.27,251609.27
1,057,DEGENERATIVE NERVOUS SYSTEM DISORDERS WITHOUT MCC,Inpatient,,057,,,,,3391.5600,...,3391.56,2398.00,2713.25,2388.00,2034.94,2200.00,2370.00,3391.56,2624.00,3061.00
2,061,"ISCHEMIC STROKE, PRECEREBRAL OCCLUSION OR TRAN...",Inpatient,,061,,,,,70429.0200,...,37763.09,27953.12,56343.22,28702.07,42257.41,28090.33,27299.07,34893.39,31488.93,35187.93
3,064,INTRACRANIAL HEMORRHAGE OR CEREBRAL INFARCTION...,Inpatient,,064,,,,,39998.5188,...,25393.44,15440.00,31998.82,15656.00,23999.11,14800.00,15640.00,23463.74,17974.00,19389.00
4,065,INTRACRANIAL HEMORRHAGE OR CEREBRAL INFARCTION...,Inpatient,,065,,,,,31117.1217,...,13088.18,14446.00,24893.70,14064.00,18670.27,13400.00,13110.00,12093.58,15500.00,18077.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
259,981,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,Inpatient,,981,,,,,95768.9287,...,58981.81,12428.00,76615.14,12828.82,57461.36,12183.64,13356.41,54499.66,14755.00,15635.00
260,982,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,Inpatient,,982,,,,,84205.9300,...,32298.09,42543.00,67364.74,32575.50,50523.56,40847.00,30703.50,29843.69,44702.10,48982.10
261,987,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,Inpatient,,987,,,,,54314.5000,...,42731.04,27664.00,43451.60,27066.00,32588.70,25600.00,25590.00,39483.82,29810.00,34768.00
262,989,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,Inpatient,,989,,,,,15480.7300,...,14195.60,2398.00,12384.58,2388.00,9288.44,2200.00,2370.00,13116.85,2624.00,3061.00


In [15]:
df_mid = pd.DataFrame(df_in)
df_mid = df_mid.rename(columns={
    'Procedure Code': 'local_code',
    'Procedure Description': 'description',
    'Price Tier': 'setting',
    'Revenue Code': 'rev_code',
    'CPT HCPCS Code': 'hcpcs_cpt',
    'NDC Code': 'ndc',
    'Rx Unit Multiplier': 'drug_hcpcs_multiplier',
    'Modifier1': 'modifiers',
    'Diagnosis Related Group Code': 'ms_drg',
    'Diagnosis Related Group Description': 'description',
    'CPT HCPCS DRG Code': 'code',
    'Shoppable Services Code': 'local_code',
    'Shoppable Services Description': 'description'
})

df_mid

Unnamed: 0,ms_drg,description,setting,rev_code,code,modifiers,Modifier2,ndc,drug_hcpcs_multiplier,Gross Charge,...,BLUE_SHIELD_EPN_-_ALL_OTHER_PLANS,BLUE_SHIELD_NON-EPN,CIGNA-_ALL_PLANS,COUNTY_OF_KERN_NETWORK-_ALL_PLANS,COVENTRY-_ALL_PLANS,GEM_CARE-_ALL_PLANS,HEALTHNET-_ALL_PLANS,KAISER_COMM-_ALL_PLANS,UHC_HMO_ALL_PAYER-_ALL_OTHER_PLANS,UHC_NON_HMO_ALL_PAYER
0,004,TRACHEOSTOMY WITH MV >96 HOURS OR PRINCIPAL DI...,Inpatient,,004,,,,,589970.8650,...,176823.10,197301.38,471976.69,207465.30,353982.52,197301.89,217327.30,163385.95,250541.27,251609.27
1,057,DEGENERATIVE NERVOUS SYSTEM DISORDERS WITHOUT MCC,Inpatient,,057,,,,,3391.5600,...,3391.56,2398.00,2713.25,2388.00,2034.94,2200.00,2370.00,3391.56,2624.00,3061.00
2,061,"ISCHEMIC STROKE, PRECEREBRAL OCCLUSION OR TRAN...",Inpatient,,061,,,,,70429.0200,...,37763.09,27953.12,56343.22,28702.07,42257.41,28090.33,27299.07,34893.39,31488.93,35187.93
3,064,INTRACRANIAL HEMORRHAGE OR CEREBRAL INFARCTION...,Inpatient,,064,,,,,39998.5188,...,25393.44,15440.00,31998.82,15656.00,23999.11,14800.00,15640.00,23463.74,17974.00,19389.00
4,065,INTRACRANIAL HEMORRHAGE OR CEREBRAL INFARCTION...,Inpatient,,065,,,,,31117.1217,...,13088.18,14446.00,24893.70,14064.00,18670.27,13400.00,13110.00,12093.58,15500.00,18077.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
259,981,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,Inpatient,,981,,,,,95768.9287,...,58981.81,12428.00,76615.14,12828.82,57461.36,12183.64,13356.41,54499.66,14755.00,15635.00
260,982,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,Inpatient,,982,,,,,84205.9300,...,32298.09,42543.00,67364.74,32575.50,50523.56,40847.00,30703.50,29843.69,44702.10,48982.10
261,987,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,Inpatient,,987,,,,,54314.5000,...,42731.04,27664.00,43451.60,27066.00,32588.70,25600.00,25590.00,39483.82,29810.00,34768.00
262,989,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,Inpatient,,989,,,,,15480.7300,...,14195.60,2398.00,12384.58,2388.00,9288.44,2200.00,2370.00,13116.85,2624.00,3061.00


In [16]:
def unify_modifiers(m1, m2):
    if m1 is None:
        return None

    if m1 is not None:
        if m2 is not None:
            return m1 + "|" + m2

        return m1

assert unify_modifiers(None, None) is None
assert unify_modifiers("TC", None) == "TC"
assert unify_modifiers("TC", "TC") == "TC|TC"

In [17]:
df_mid.loc[df_mid['modifiers'].isnull(), 'modifiers'] = None
df_mid.loc[df_mid['Modifier2'].isnull(), 'Modifier2'] = None

df_mid['modifiers'] = df_mid[['modifiers', 'Modifier2']].apply(lambda row: unify_modifiers(row['modifiers'], row['Modifier2']), axis=1)
del df_mid['Modifier2']
df_mid

Unnamed: 0,ms_drg,description,setting,rev_code,code,modifiers,ndc,drug_hcpcs_multiplier,Gross Charge,Discounted Cash Price,...,BLUE_SHIELD_EPN_-_ALL_OTHER_PLANS,BLUE_SHIELD_NON-EPN,CIGNA-_ALL_PLANS,COUNTY_OF_KERN_NETWORK-_ALL_PLANS,COVENTRY-_ALL_PLANS,GEM_CARE-_ALL_PLANS,HEALTHNET-_ALL_PLANS,KAISER_COMM-_ALL_PLANS,UHC_HMO_ALL_PAYER-_ALL_OTHER_PLANS,UHC_NON_HMO_ALL_PAYER
0,004,TRACHEOSTOMY WITH MV >96 HOURS OR PRINCIPAL DI...,Inpatient,,004,,,,589970.8650,200590.09,...,176823.10,197301.38,471976.69,207465.30,353982.52,197301.89,217327.30,163385.95,250541.27,251609.27
1,057,DEGENERATIVE NERVOUS SYSTEM DISORDERS WITHOUT MCC,Inpatient,,057,,,,3391.5600,1153.13,...,3391.56,2398.00,2713.25,2388.00,2034.94,2200.00,2370.00,3391.56,2624.00,3061.00
2,061,"ISCHEMIC STROKE, PRECEREBRAL OCCLUSION OR TRAN...",Inpatient,,061,,,,70429.0200,23945.87,...,37763.09,27953.12,56343.22,28702.07,42257.41,28090.33,27299.07,34893.39,31488.93,35187.93
3,064,INTRACRANIAL HEMORRHAGE OR CEREBRAL INFARCTION...,Inpatient,,064,,,,39998.5188,13599.50,...,25393.44,15440.00,31998.82,15656.00,23999.11,14800.00,15640.00,23463.74,17974.00,19389.00
4,065,INTRACRANIAL HEMORRHAGE OR CEREBRAL INFARCTION...,Inpatient,,065,,,,31117.1217,10579.82,...,13088.18,14446.00,24893.70,14064.00,18670.27,13400.00,13110.00,12093.58,15500.00,18077.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
259,981,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,Inpatient,,981,,,,95768.9287,32561.44,...,58981.81,12428.00,76615.14,12828.82,57461.36,12183.64,13356.41,54499.66,14755.00,15635.00
260,982,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,Inpatient,,982,,,,84205.9300,28630.02,...,32298.09,42543.00,67364.74,32575.50,50523.56,40847.00,30703.50,29843.69,44702.10,48982.10
261,987,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,Inpatient,,987,,,,54314.5000,18466.93,...,42731.04,27664.00,43451.60,27066.00,32588.70,25600.00,25590.00,39483.82,29810.00,34768.00
262,989,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,Inpatient,,989,,,,15480.7300,5263.45,...,14195.60,2398.00,12384.58,2388.00,9288.44,2200.00,2370.00,13116.85,2624.00,3061.00


In [18]:
money_columns = df_mid.columns.to_list()[8:]
money_columns

['Gross Charge',
 'Discounted Cash Price',
 'De-identified minimum negotiated charge',
 'De-identified maximum negotiated charge',
 'AETNA-_ALL_PLANS',
 'ANTHEM_-_ALL_PLANS',
 'BLUE_SHIELD_EPN_-_ALL_OTHER_PLANS',
 'BLUE_SHIELD_NON-EPN',
 'CIGNA-_ALL_PLANS',
 'COUNTY_OF_KERN_NETWORK-_ALL_PLANS',
 'COVENTRY-_ALL_PLANS',
 'GEM_CARE-_ALL_PLANS',
 'HEALTHNET-_ALL_PLANS',
 'KAISER_COMM-_ALL_PLANS',
 'UHC_HMO_ALL_PAYER-_ALL_OTHER_PLANS',
 'UHC_NON_HMO_ALL_PAYER']

In [19]:
remaining_columns = df_mid.columns.to_list()[:8]
df_mid = pd.melt(df_mid, id_vars=remaining_columns, var_name='payer_name', value_name='standard_charge')
df_mid

Unnamed: 0,ms_drg,description,setting,rev_code,code,modifiers,ndc,drug_hcpcs_multiplier,payer_name,standard_charge
0,004,TRACHEOSTOMY WITH MV >96 HOURS OR PRINCIPAL DI...,Inpatient,,004,,,,Gross Charge,589970.8650
1,057,DEGENERATIVE NERVOUS SYSTEM DISORDERS WITHOUT MCC,Inpatient,,057,,,,Gross Charge,3391.5600
2,061,"ISCHEMIC STROKE, PRECEREBRAL OCCLUSION OR TRAN...",Inpatient,,061,,,,Gross Charge,70429.0200
3,064,INTRACRANIAL HEMORRHAGE OR CEREBRAL INFARCTION...,Inpatient,,064,,,,Gross Charge,39998.5188
4,065,INTRACRANIAL HEMORRHAGE OR CEREBRAL INFARCTION...,Inpatient,,065,,,,Gross Charge,31117.1217
...,...,...,...,...,...,...,...,...,...,...
4219,981,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,Inpatient,,981,,,,UHC_NON_HMO_ALL_PAYER,15635.00
4220,982,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,Inpatient,,982,,,,UHC_NON_HMO_ALL_PAYER,48982.10
4221,987,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,Inpatient,,987,,,,UHC_NON_HMO_ALL_PAYER,34768.00
4222,989,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,Inpatient,,989,,,,UHC_NON_HMO_ALL_PAYER,3061.00


In [20]:
set(df_mid['setting'].to_list())

{'Inpatient'}

In [21]:
df_mid['additional_generic_notes'] = df_mid['setting']
df_mid['billing_class'] = None
df_mid.loc[df_mid['setting'] == 'ProFee', 'billing_class'] = 'professional'
df_mid['setting'] = None
df_mid.loc[df_mid['additional_generic_notes'] == 'Inpatient', 'setting'] = 'inpatient'
df_mid.loc[df_mid['additional_generic_notes'] == 'Outpatient', 'setting'] = 'outpatient'
df_mid

Unnamed: 0,ms_drg,description,setting,rev_code,code,modifiers,ndc,drug_hcpcs_multiplier,payer_name,standard_charge,additional_generic_notes,billing_class
0,004,TRACHEOSTOMY WITH MV >96 HOURS OR PRINCIPAL DI...,inpatient,,004,,,,Gross Charge,589970.8650,Inpatient,
1,057,DEGENERATIVE NERVOUS SYSTEM DISORDERS WITHOUT MCC,inpatient,,057,,,,Gross Charge,3391.5600,Inpatient,
2,061,"ISCHEMIC STROKE, PRECEREBRAL OCCLUSION OR TRAN...",inpatient,,061,,,,Gross Charge,70429.0200,Inpatient,
3,064,INTRACRANIAL HEMORRHAGE OR CEREBRAL INFARCTION...,inpatient,,064,,,,Gross Charge,39998.5188,Inpatient,
4,065,INTRACRANIAL HEMORRHAGE OR CEREBRAL INFARCTION...,inpatient,,065,,,,Gross Charge,31117.1217,Inpatient,
...,...,...,...,...,...,...,...,...,...,...,...,...
4219,981,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,inpatient,,981,,,,UHC_NON_HMO_ALL_PAYER,15635.00,Inpatient,
4220,982,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,inpatient,,982,,,,UHC_NON_HMO_ALL_PAYER,48982.10,Inpatient,
4221,987,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,inpatient,,987,,,,UHC_NON_HMO_ALL_PAYER,34768.00,Inpatient,
4222,989,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,inpatient,,989,,,,UHC_NON_HMO_ALL_PAYER,3061.00,Inpatient,


In [22]:
def payer_name_to_payer_category(payer_name):
    if payer_name == 'Discounted Cash Price':
        return 'cash'
    elif payer_name == 'Gross Charge':
        return 'gross'
    elif payer_name == 'De-identified maximum negotiated charge':
        return 'max'
    elif payer_name == 'De-identified minimum negotiated charge':
        return 'min'

    return 'payer'

df_mid['payer_category'] = df_mid['payer_name'].apply(payer_name_to_payer_category)
df_mid

Unnamed: 0,ms_drg,description,setting,rev_code,code,modifiers,ndc,drug_hcpcs_multiplier,payer_name,standard_charge,additional_generic_notes,billing_class,payer_category
0,004,TRACHEOSTOMY WITH MV >96 HOURS OR PRINCIPAL DI...,inpatient,,004,,,,Gross Charge,589970.8650,Inpatient,,gross
1,057,DEGENERATIVE NERVOUS SYSTEM DISORDERS WITHOUT MCC,inpatient,,057,,,,Gross Charge,3391.5600,Inpatient,,gross
2,061,"ISCHEMIC STROKE, PRECEREBRAL OCCLUSION OR TRAN...",inpatient,,061,,,,Gross Charge,70429.0200,Inpatient,,gross
3,064,INTRACRANIAL HEMORRHAGE OR CEREBRAL INFARCTION...,inpatient,,064,,,,Gross Charge,39998.5188,Inpatient,,gross
4,065,INTRACRANIAL HEMORRHAGE OR CEREBRAL INFARCTION...,inpatient,,065,,,,Gross Charge,31117.1217,Inpatient,,gross
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4219,981,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,inpatient,,981,,,,UHC_NON_HMO_ALL_PAYER,15635.00,Inpatient,,payer
4220,982,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,inpatient,,982,,,,UHC_NON_HMO_ALL_PAYER,48982.10,Inpatient,,payer
4221,987,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,inpatient,,987,,,,UHC_NON_HMO_ALL_PAYER,34768.00,Inpatient,,payer
4222,989,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,inpatient,,989,,,,UHC_NON_HMO_ALL_PAYER,3061.00,Inpatient,,payer


In [23]:
df_mid = df_mid[df_mid['standard_charge'].notnull()]
df_mid.shape

(4213, 13)

In [24]:
df_mid = pd.DataFrame(df_mid) # XXX

if not 'hcpcs_cpt' in df_mid.columns:
    df_mid['hcpcs_cpt'] = None

df_mid.loc[df_mid['hcpcs_cpt'].isnull(), 'hcpcs_cpt'] = ''
df_mid.loc[df_mid['hcpcs_cpt'] == 'WC003', 'code'] = 'WC003'
df_mid.loc[df_mid['hcpcs_cpt'] == 'WC003', 'hcpcs_cpt'] = ''
df_mid.loc[df_mid['hcpcs_cpt'] == 'CS003', 'code'] = 'CS003'
df_mid.loc[df_mid['hcpcs_cpt'] == 'CS003', 'hcpcs_cpt'] = ''
df_mid.loc[df_mid['hcpcs_cpt'].str.isalpha(), 'code'] = df_mid[df_mid['hcpcs_cpt'].str.isalpha()]['hcpcs_cpt']
df_mid['hcpcs_cpt'] = df_mid['hcpcs_cpt'].apply(lambda cpt: '' if len(cpt) != 5 else cpt)
df_mid.loc[df_mid['hcpcs_cpt'].str.isalpha(), 'hcpcs_cpt'] = None
df_mid.loc[df_mid['hcpcs_cpt'] == '', 'hcpcs_cpt'] = None
set(df_mid['hcpcs_cpt'].to_list())

{None}

In [25]:
df_mid['hospital_id'] = ccn
df_mid['line_type'] = None
if not 'local_code' in df_mid.columns:
    df_mid['local_code'] = None
if not 'code' in df_mid.columns:
    df_mid['code'] = None
if not 'ms_drg' in df_mid.columns:
    df_mid['ms_drg'] = None
df_mid['apr_drg'] = None
df_mid['eapg'] = None
df_mid['alt_hcpcs_cpt'] = None
df_mid['thru'] = None
df_mid['apc'] = None
if not 'icd' in df_mid.columns:
    df_mid['icd'] = None
df_mid['drug_quantity'] = None
df_mid['drug_unit_of_measurement'] = None
df_mid['drug_type_of_measurement'] = None
df_mid['plan_name'] = None
df_mid['standard_charge_percent'] = None
df_mid['contracting_method'] = None
df_mid['additional_payer_specific_notes'] = None

df_mid

Unnamed: 0,ms_drg,description,setting,rev_code,code,modifiers,ndc,drug_hcpcs_multiplier,payer_name,standard_charge,...,thru,apc,icd,drug_quantity,drug_unit_of_measurement,drug_type_of_measurement,plan_name,standard_charge_percent,contracting_method,additional_payer_specific_notes
0,004,TRACHEOSTOMY WITH MV >96 HOURS OR PRINCIPAL DI...,inpatient,,004,,,,Gross Charge,589970.8650,...,,,,,,,,,,
1,057,DEGENERATIVE NERVOUS SYSTEM DISORDERS WITHOUT MCC,inpatient,,057,,,,Gross Charge,3391.5600,...,,,,,,,,,,
2,061,"ISCHEMIC STROKE, PRECEREBRAL OCCLUSION OR TRAN...",inpatient,,061,,,,Gross Charge,70429.0200,...,,,,,,,,,,
3,064,INTRACRANIAL HEMORRHAGE OR CEREBRAL INFARCTION...,inpatient,,064,,,,Gross Charge,39998.5188,...,,,,,,,,,,
4,065,INTRACRANIAL HEMORRHAGE OR CEREBRAL INFARCTION...,inpatient,,065,,,,Gross Charge,31117.1217,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4219,981,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,inpatient,,981,,,,UHC_NON_HMO_ALL_PAYER,15635.00,...,,,,,,,,,,
4220,982,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,inpatient,,982,,,,UHC_NON_HMO_ALL_PAYER,48982.10,...,,,,,,,,,,
4221,987,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,inpatient,,987,,,,UHC_NON_HMO_ALL_PAYER,34768.00,...,,,,,,,,,,
4222,989,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,inpatient,,989,,,,UHC_NON_HMO_ALL_PAYER,3061.00,...,,,,,,,,,,


In [26]:
df_out = pd.DataFrame(df_mid[TARGET_COLUMNS])
df_out

Unnamed: 0,hospital_id,line_type,description,rev_code,local_code,code,ms_drg,apr_drg,eapg,hcpcs_cpt,...,billing_class,setting,payer_category,payer_name,plan_name,standard_charge,standard_charge_percent,contracting_method,additional_generic_notes,additional_payer_specific_notes
0,050608,,TRACHEOSTOMY WITH MV >96 HOURS OR PRINCIPAL DI...,,,004,004,,,,...,,inpatient,gross,Gross Charge,,589970.8650,,,Inpatient,
1,050608,,DEGENERATIVE NERVOUS SYSTEM DISORDERS WITHOUT MCC,,,057,057,,,,...,,inpatient,gross,Gross Charge,,3391.5600,,,Inpatient,
2,050608,,"ISCHEMIC STROKE, PRECEREBRAL OCCLUSION OR TRAN...",,,061,061,,,,...,,inpatient,gross,Gross Charge,,70429.0200,,,Inpatient,
3,050608,,INTRACRANIAL HEMORRHAGE OR CEREBRAL INFARCTION...,,,064,064,,,,...,,inpatient,gross,Gross Charge,,39998.5188,,,Inpatient,
4,050608,,INTRACRANIAL HEMORRHAGE OR CEREBRAL INFARCTION...,,,065,065,,,,...,,inpatient,gross,Gross Charge,,31117.1217,,,Inpatient,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4219,050608,,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,,,981,981,,,,...,,inpatient,payer,UHC_NON_HMO_ALL_PAYER,,15635.00,,,Inpatient,
4220,050608,,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,,,982,982,,,,...,,inpatient,payer,UHC_NON_HMO_ALL_PAYER,,48982.10,,,Inpatient,
4221,050608,,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,,,987,987,,,,...,,inpatient,payer,UHC_NON_HMO_ALL_PAYER,,34768.00,,,Inpatient,
4222,050608,,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,,,989,989,,,,...,,inpatient,payer,UHC_NON_HMO_ALL_PAYER,,3061.00,,,Inpatient,


In [27]:
df_out.to_csv("tmp.csv", index=False)

In [28]:
def convert_chunk(chunk, ccn):
    csv_buf = StringIO(chunk)

    df_in = pd.read_csv(csv_buf, dtype=str)

    df_mid = pd.DataFrame(df_in)
    df_mid = df_mid.rename(columns={
        'Procedure Code': 'local_code',
        'Procedure Description': 'description',
        'Price Tier': 'setting',
        'Revenue Code': 'rev_code',
        'CPT HCPCS Code': 'hcpcs_cpt',
        'NDC Code': 'ndc',
        'Rx Unit Multiplier': 'drug_hcpcs_multiplier',
        'Modifier1': 'modifiers',
        'Diagnosis Related Group Code': 'ms_drg',
        'Diagnosis Related Group Description': 'description',
        'CPT HCPCS DRG Code': 'code',
        'Shoppable Services Code': 'local_code',
        'Shoppable Services Description': 'description'
    })

    df_mid.loc[df_mid['modifiers'].isnull(), 'modifiers'] = None
    df_mid.loc[df_mid['Modifier2'].isnull(), 'Modifier2'] = None

    df_mid['modifiers'] = df_mid[['modifiers', 'Modifier2']].apply(lambda row: unify_modifiers(row['modifiers'], row['Modifier2']), axis=1)
    del df_mid['Modifier2']

    remaining_columns = df_mid.columns.to_list()[:8]
    df_mid = pd.melt(df_mid, id_vars=remaining_columns, var_name='payer_name', value_name='standard_charge')

    df_mid['additional_generic_notes'] = df_mid['setting']
    df_mid['billing_class'] = None
    df_mid.loc[df_mid['setting'] == 'ProFee', 'billing_class'] = 'professional'
    df_mid['setting'] = None
    df_mid.loc[df_mid['additional_generic_notes'] == 'Inpatient', 'setting'] = 'inpatient'
    df_mid.loc[df_mid['additional_generic_notes'] == 'Outpatient', 'setting'] = 'outpatient'

    df_mid['payer_category'] = df_mid['payer_name'].apply(payer_name_to_payer_category)

    df_mid = df_mid[df_mid['standard_charge'].notnull()]

    df_mid = pd.DataFrame(df_mid) # XXX

    if not 'hcpcs_cpt' in df_mid.columns:
        df_mid['hcpcs_cpt'] = None

    df_mid.loc[df_mid['hcpcs_cpt'].isnull(), 'hcpcs_cpt'] = ''
    df_mid.loc[df_mid['hcpcs_cpt'] == 'WC003', 'code'] = 'WC003'
    df_mid.loc[df_mid['hcpcs_cpt'] == 'WC003', 'hcpcs_cpt'] = ''
    df_mid.loc[df_mid['hcpcs_cpt'] == 'CS003', 'code'] = 'CS003'
    df_mid.loc[df_mid['hcpcs_cpt'] == 'CS003', 'hcpcs_cpt'] = ''
    df_mid.loc[df_mid['hcpcs_cpt'].str.isalpha(), 'code'] = df_mid[df_mid['hcpcs_cpt'].str.isalpha()]['hcpcs_cpt']
    df_mid['hcpcs_cpt'] = df_mid['hcpcs_cpt'].apply(lambda cpt: '' if len(cpt) != 5 else cpt)
    df_mid.loc[df_mid['hcpcs_cpt'].str.isalpha(), 'hcpcs_cpt'] = None
    df_mid.loc[df_mid['hcpcs_cpt'] == '', 'hcpcs_cpt'] = None

    df_mid['hospital_id'] = ccn
    df_mid['line_type'] = None
    if not 'local_code' in df_mid.columns:
        df_mid['local_code'] = None
    if not 'code' in df_mid.columns:
        df_mid['code'] = None
    if not 'ms_drg' in df_mid.columns:
        df_mid['ms_drg'] = None
    df_mid['apr_drg'] = None
    df_mid['eapg'] = None
    df_mid['alt_hcpcs_cpt'] = None
    df_mid['thru'] = None
    df_mid['apc'] = None
    if not 'icd' in df_mid.columns:
        df_mid['icd'] = None
    df_mid['drug_quantity'] = None
    df_mid['drug_unit_of_measurement'] = None
    df_mid['drug_type_of_measurement'] = None
    df_mid['plan_name'] = None
    df_mid['standard_charge_percent'] = None
    df_mid['contracting_method'] = None
    df_mid['additional_payer_specific_notes'] = None

    df_out = pd.DataFrame(df_mid[TARGET_COLUMNS])
    return df_out

In [29]:
def perform_task(h_f, ccn, app_url, transparency_page):
    resp = requests.get(app_url)
    print(resp.url)

    tree = html.fromstring(resp.text)
    js_link = tree.xpath('//script[contains(@src, "/PTT/extnet/extnet-init-js")]/@src')[0]
    js_url = urljoin(resp.url, js_link)
    resp1 = requests.get(js_url)
    parsed = js2xml.parse(resp1.text)
    db_name = parsed.xpath('//object[./property/string[text()="App.hdnDB_Container"]]/property[@name="value"]/string/text()')[0]

    params = { 'dbName': db_name, 'type': 'CDMWithoutLabel' }

    data = '------WebKitFormBoundarysVqstz3xq11k5yBT\r\nContent-Disposition: form-data; name="__EVENTTARGET"\r\n\r\nResourceManager\r\n------WebKitFormBoundarysVqstz3xq11k5yBT\r\nContent-Disposition: form-data; name="__EVENTARGUMENT"\r\n\r\n-|public|DownloadReport\r\n------WebKitFormBoundarysVqstz3xq11k5yBT\r\nContent-Disposition: form-data; name="__ExtNetDirectEventMarker"\r\n\r\ndelta=true\r\n------WebKitFormBoundarysVqstz3xq11k5yBT--\r\n'

    resp2 = requests.post('https://apps.para-hcfs.com/PTT/FinalLinks/Reports.aspx', params=params, data=data)
    print(resp2.url)

    dfs = []
    
    chunks = resp2.text.split("\r\n\r\n")

    for chunk in chunks[1:]:
        df_tmp = convert_chunk(chunk, ccn)
        dfs.append(df_tmp)

    df_out = pd.concat(dfs)

    df_out.to_csv('rate_' + ccn + '.csv', index=False)
    
    filename = resp2.headers['content-disposition'].split('"')[1]
    ein = derive_ein_from_filename(filename)
    
    date_str = chunks[0].split(" ")[-1]

    last_updated_at = parse_datetime(date_str).isoformat().split("T")[0]
    query = 'UPDATE hospital SET ein = "{}", last_updated = "{}", file_name = "{}", mrf_url = "{}", transparency_page = "{}" WHERE id = "{}";'.format(
        ein, last_updated_at, filename, app_url, transparency_page, ccn)

    h_f.write(query)
    h_f.write("\n")

In [30]:
h_f = open("hospitals.sql", "w")
perform_task(h_f, ccn, app_url, transparency_page)
h_f.close()

https://apps.para-hcfs.com/PTT/FinalLinks/Adventist_Delano_V3.aspx
https://apps.para-hcfs.com/PTT/FinalLinks/Reports.aspx?dbName=dbDRMCDELANOCA&type=CDMWithoutLabel
