In [1]:
import subprocess
from urllib.parse import urljoin
from io import StringIO

from dateutil.parser import parse as parse_datetime
import pandas as pd
import requests
from lxml import html
import js2xml

from helpers import derive_ein_from_filename

In [2]:
TARGET_COLUMNS = [
    'hospital_id',
    #'row_id',
    'line_type',
    'description',
    'rev_code',
    'local_code',
    'code',
    'ms_drg',
    'apr_drg',
    'eapg',
    'hcpcs_cpt',
    'modifiers',
    'alt_hcpcs_cpt',
    'thru',
    'apc',
    'icd',
    'ndc',
    'drug_hcpcs_multiplier',
    'drug_quantity',
    'drug_unit_of_measurement',
    'drug_type_of_measurement',
    'billing_class',
    'setting',
    'payer_category',
    'payer_name',
    'plan_name',
    'standard_charge',
    'standard_charge_percent',
    'contracting_method',
    'additional_generic_notes',
    'additional_payer_specific_notes'
]

In [3]:
transparency_page = "https://www.adventisthealth.org/patient-resources/financial-services/healthcare-costs-and-charges/cost-estimator-tool/"
ccn = "050335"
app_url = "https://apps.para-hcfs.com/PTT/FinalLinks/Adventist_Sonora_V3.aspx"
#ccn = "051317"
#app_url = "https://apps.para-hcfs.com/PTT/FinalLinks/Adventist_Clear_Lake_V3.aspx"

In [4]:
resp = requests.get(app_url)
resp

<Response [200]>

In [5]:
tree = html.fromstring(resp.text)
js_link = tree.xpath('//script[contains(@src, "/PTT/extnet/extnet-init-js")]/@src')[0]
js_url = urljoin(resp.url, js_link)
resp1 = requests.get(js_url)
parsed = js2xml.parse(resp1.text)
db_name = parsed.xpath('//object[./property/string[text()="App.hdnDB_Container"]]/property[@name="value"]/string/text()')[0]
db_name

'dbAHSSONORACA'

In [6]:
params = {
    'dbName': db_name,
    'type': 'CDMWithoutLabel'
}

data = '------WebKitFormBoundarysVqstz3xq11k5yBT\r\nContent-Disposition: form-data; name="__EVENTTARGET"\r\n\r\nResourceManager\r\n------WebKitFormBoundarysVqstz3xq11k5yBT\r\nContent-Disposition: form-data; name="__EVENTARGUMENT"\r\n\r\n-|public|DownloadReport\r\n------WebKitFormBoundarysVqstz3xq11k5yBT\r\nContent-Disposition: form-data; name="__ExtNetDirectEventMarker"\r\n\r\ndelta=true\r\n------WebKitFormBoundarysVqstz3xq11k5yBT--\r\n'

resp2 = requests.post('https://apps.para-hcfs.com/PTT/FinalLinks/Reports.aspx', params=params, data=data)
print(resp2)

<Response [200]>


In [7]:
filename = resp2.headers['content-disposition'].split('"')[1]
filename

'941415069_adventist-health---sonora_standardcharges.csv'

In [8]:
chunks = resp2.text.split("\r\n\r\n")
len(chunks)

4

In [9]:
chunks[0]

'Run Date: 5/6/2023 1:07:03 PM\r\nCDM Date Stamp: 4/20/2023'

In [10]:
date_str = chunks[0].split(" ")[-1]
date_str

last_updated = parse_datetime(date_str).isoformat().split("T")[0]
last_updated

'2023-04-20'

In [11]:
chunks[1][:100]

'Procedure Code,Procedure Description,Price Tier,Revenue Code,CPT HCPCS Code,Modifier1,Modifier2,NDC '

In [12]:
chunks[2][:100]

'Diagnosis Related Group Code,Diagnosis Related Group Description,Price Tier,Revenue Code,CPT HCPCS D'

In [13]:
csv_f = open(filename, "w")
csv_f.write(resp2.text)
csv_f.close()

In [14]:
csv_buf = StringIO(chunks[2])

df_in = pd.read_csv(csv_buf, dtype=str)
df_in

Unnamed: 0,Diagnosis Related Group Code,Diagnosis Related Group Description,Price Tier,Revenue Code,CPT HCPCS DRG Code,Modifier1,Modifier2,NDC Code,Rx Unit Multiplier,Gross Charge,...,MULTIPLAN_-_ALL_PLANS,PACIFICARE_-_ALL_PLANS,PHCS_PPO_-_ALL_PLANS,STANILAUS_FOUNDATION_-_ALL_PLANS,TRPN_PPO_-_ALL_PLANS,UHC_-_ALL_OTHER_PLANS,UHC_JLL_BP_,UHC_PPO_,UHC_SELECT_,WGAT_-_ALL_PLANS
0,003,ECMO OR TRACHEOSTOMY WITH MV >96 HOURS OR PRIN...,Inpatient,,003,,,,,513394.7300,...,462055.26,410715.78,410715.78,436385.52,410715.78,280313.52,266297.85,366461.16,246686.17,183608.39
1,004,TRACHEOSTOMY WITH MV >96 HOURS OR PRINCIPAL DI...,Inpatient,,004,,,,,443679.7938,...,399311.81,354943.84,354943.84,377127.82,354943.84,242249.17,230136.71,316698.64,213188.14,126899.81
2,011,"TRACHEOSTOMY FOR FACE, MOUTH AND NECK DIAGNOSE...",Inpatient,,011,,,,,222749.5300,...,200474.58,178199.62,178199.62,189337.10,178199.62,121621.24,115540.18,158998.61,107031.15,71634.14
3,012,"TRACHEOSTOMY FOR FACE, MOUTH AND NECK DIAGNOSE...",Inpatient,,012,,,,,42377.3870,...,38139.65,33901.91,33901.91,36020.78,33901.91,23138.05,21981.15,30248.98,20362.33,17796.00
4,026,CRANIOTOMY AND ENDOVASCULAR INTRACRANIAL PROCE...,Inpatient,,026,,,,,167118.2900,...,150406.46,133694.63,133694.63,142050.55,133694.63,91246.59,86684.26,119289.04,80300.34,98840.65
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
398,982,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,Inpatient,,982,,,,,202231.8140,...,182008.63,161785.45,161785.45,171897.04,161785.45,110418.57,104897.64,144353.07,97172.39,108478.87
399,983,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,Inpatient,,983,,,,,145272.5567,...,130745.30,116218.05,116218.05,123481.67,116218.05,79318.82,75352.87,103695.55,69803.46,53154.84
400,987,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,Inpatient,,987,,,,,70138.6314,...,63124.77,56110.91,56110.91,59617.84,56110.91,38295.69,36380.91,50064.96,33701.61,26694.00
401,988,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,Inpatient,,988,,,,,58456.9700,...,52611.27,46765.58,46765.58,49688.42,46765.58,31917.51,30321.63,41726.59,28088.57,17796.00


In [15]:
df_mid = pd.DataFrame(df_in)
df_mid = df_mid.rename(columns={
    'Procedure Code': 'local_code',
    'Procedure Description': 'description',
    'Price Tier': 'setting',
    'Revenue Code': 'rev_code',
    'CPT HCPCS Code': 'hcpcs_cpt',
    'NDC Code': 'ndc',
    'Rx Unit Multiplier': 'drug_hcpcs_multiplier',
    'Modifier1': 'modifiers',
    'Diagnosis Related Group Code': 'ms_drg',
    'Diagnosis Related Group Description': 'description',
    'CPT HCPCS DRG Code': 'code',
    'Shoppable Services Code': 'local_code',
    'Shoppable Services Description': 'description'
})

df_mid

Unnamed: 0,ms_drg,description,setting,rev_code,code,modifiers,Modifier2,ndc,drug_hcpcs_multiplier,Gross Charge,...,MULTIPLAN_-_ALL_PLANS,PACIFICARE_-_ALL_PLANS,PHCS_PPO_-_ALL_PLANS,STANILAUS_FOUNDATION_-_ALL_PLANS,TRPN_PPO_-_ALL_PLANS,UHC_-_ALL_OTHER_PLANS,UHC_JLL_BP_,UHC_PPO_,UHC_SELECT_,WGAT_-_ALL_PLANS
0,003,ECMO OR TRACHEOSTOMY WITH MV >96 HOURS OR PRIN...,Inpatient,,003,,,,,513394.7300,...,462055.26,410715.78,410715.78,436385.52,410715.78,280313.52,266297.85,366461.16,246686.17,183608.39
1,004,TRACHEOSTOMY WITH MV >96 HOURS OR PRINCIPAL DI...,Inpatient,,004,,,,,443679.7938,...,399311.81,354943.84,354943.84,377127.82,354943.84,242249.17,230136.71,316698.64,213188.14,126899.81
2,011,"TRACHEOSTOMY FOR FACE, MOUTH AND NECK DIAGNOSE...",Inpatient,,011,,,,,222749.5300,...,200474.58,178199.62,178199.62,189337.10,178199.62,121621.24,115540.18,158998.61,107031.15,71634.14
3,012,"TRACHEOSTOMY FOR FACE, MOUTH AND NECK DIAGNOSE...",Inpatient,,012,,,,,42377.3870,...,38139.65,33901.91,33901.91,36020.78,33901.91,23138.05,21981.15,30248.98,20362.33,17796.00
4,026,CRANIOTOMY AND ENDOVASCULAR INTRACRANIAL PROCE...,Inpatient,,026,,,,,167118.2900,...,150406.46,133694.63,133694.63,142050.55,133694.63,91246.59,86684.26,119289.04,80300.34,98840.65
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
398,982,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,Inpatient,,982,,,,,202231.8140,...,182008.63,161785.45,161785.45,171897.04,161785.45,110418.57,104897.64,144353.07,97172.39,108478.87
399,983,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,Inpatient,,983,,,,,145272.5567,...,130745.30,116218.05,116218.05,123481.67,116218.05,79318.82,75352.87,103695.55,69803.46,53154.84
400,987,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,Inpatient,,987,,,,,70138.6314,...,63124.77,56110.91,56110.91,59617.84,56110.91,38295.69,36380.91,50064.96,33701.61,26694.00
401,988,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,Inpatient,,988,,,,,58456.9700,...,52611.27,46765.58,46765.58,49688.42,46765.58,31917.51,30321.63,41726.59,28088.57,17796.00


In [16]:
def unify_modifiers(m1, m2):
    if m1 is None:
        return None

    if m1 is not None:
        if m2 is not None:
            return m1 + "|" + m2

        return m1

assert unify_modifiers(None, None) is None
assert unify_modifiers("TC", None) == "TC"
assert unify_modifiers("TC", "TC") == "TC|TC"

In [17]:
df_mid.loc[df_mid['modifiers'].isnull(), 'modifiers'] = None
df_mid.loc[df_mid['Modifier2'].isnull(), 'Modifier2'] = None

df_mid['modifiers'] = df_mid[['modifiers', 'Modifier2']].apply(lambda row: unify_modifiers(row['modifiers'], row['Modifier2']), axis=1)
del df_mid['Modifier2']
df_mid

Unnamed: 0,ms_drg,description,setting,rev_code,code,modifiers,ndc,drug_hcpcs_multiplier,Gross Charge,Discounted Cash Price,...,MULTIPLAN_-_ALL_PLANS,PACIFICARE_-_ALL_PLANS,PHCS_PPO_-_ALL_PLANS,STANILAUS_FOUNDATION_-_ALL_PLANS,TRPN_PPO_-_ALL_PLANS,UHC_-_ALL_OTHER_PLANS,UHC_JLL_BP_,UHC_PPO_,UHC_SELECT_,WGAT_-_ALL_PLANS
0,003,ECMO OR TRACHEOSTOMY WITH MV >96 HOURS OR PRIN...,Inpatient,,003,,,,513394.7300,87277.10,...,462055.26,410715.78,410715.78,436385.52,410715.78,280313.52,266297.85,366461.16,246686.17,183608.39
1,004,TRACHEOSTOMY WITH MV >96 HOURS OR PRINCIPAL DI...,Inpatient,,004,,,,443679.7938,75425.56,...,399311.81,354943.84,354943.84,377127.82,354943.84,242249.17,230136.71,316698.64,213188.14,126899.81
2,011,"TRACHEOSTOMY FOR FACE, MOUTH AND NECK DIAGNOSE...",Inpatient,,011,,,,222749.5300,37867.42,...,200474.58,178199.62,178199.62,189337.10,178199.62,121621.24,115540.18,158998.61,107031.15,71634.14
3,012,"TRACHEOSTOMY FOR FACE, MOUTH AND NECK DIAGNOSE...",Inpatient,,012,,,,42377.3870,7204.16,...,38139.65,33901.91,33901.91,36020.78,33901.91,23138.05,21981.15,30248.98,20362.33,17796.00
4,026,CRANIOTOMY AND ENDOVASCULAR INTRACRANIAL PROCE...,Inpatient,,026,,,,167118.2900,28410.11,...,150406.46,133694.63,133694.63,142050.55,133694.63,91246.59,86684.26,119289.04,80300.34,98840.65
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
398,982,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,Inpatient,,982,,,,202231.8140,34379.41,...,182008.63,161785.45,161785.45,171897.04,161785.45,110418.57,104897.64,144353.07,97172.39,108478.87
399,983,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,Inpatient,,983,,,,145272.5567,24696.33,...,130745.30,116218.05,116218.05,123481.67,116218.05,79318.82,75352.87,103695.55,69803.46,53154.84
400,987,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,Inpatient,,987,,,,70138.6314,11923.57,...,63124.77,56110.91,56110.91,59617.84,56110.91,38295.69,36380.91,50064.96,33701.61,26694.00
401,988,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,Inpatient,,988,,,,58456.9700,9937.68,...,52611.27,46765.58,46765.58,49688.42,46765.58,31917.51,30321.63,41726.59,28088.57,17796.00


In [18]:
money_columns = df_mid.columns.to_list()[8:]
money_columns

['Gross Charge',
 'Discounted Cash Price',
 'De-identified minimum negotiated charge',
 'De-identified maximum negotiated charge',
 'AETNA_-_ALL_PLANS',
 'AFFORDABLE_HEALTH_-_ALL_PLANS',
 'BC_MCS_',
 'BC_NON_MCS_-_ALL_OTHER_PLANS',
 'BEECH_STREET-_ALL_PLANS_',
 'BLUE_SHIELD_-_ALL_OTHER_PLANS',
 'BLUE_SHIELD_EPN',
 'CIGNA_PPO_-_ALL_PLANS',
 'GREATWEST_OA_POS_HMO',
 'GREATWEST_PPO_-_ALL_OTHER_PLANS',
 'INTERPLAN_-_ALL_PLANS',
 'KAISER_-_ALL_PLANS',
 'MULTIPLAN_-_ALL_PLANS',
 'PACIFICARE_-_ALL_PLANS',
 'PHCS_PPO_-_ALL_PLANS',
 'STANILAUS_FOUNDATION_-_ALL_PLANS',
 'TRPN_PPO_-_ALL_PLANS',
 'UHC_-_ALL_OTHER_PLANS',
 'UHC_JLL_BP_',
 'UHC_PPO_',
 'UHC_SELECT_',
 'WGAT_-_ALL_PLANS']

In [19]:
remaining_columns = df_mid.columns.to_list()[:8]
df_mid = pd.melt(df_mid, id_vars=remaining_columns, var_name='payer_name', value_name='standard_charge')
df_mid

Unnamed: 0,ms_drg,description,setting,rev_code,code,modifiers,ndc,drug_hcpcs_multiplier,payer_name,standard_charge
0,003,ECMO OR TRACHEOSTOMY WITH MV >96 HOURS OR PRIN...,Inpatient,,003,,,,Gross Charge,513394.7300
1,004,TRACHEOSTOMY WITH MV >96 HOURS OR PRINCIPAL DI...,Inpatient,,004,,,,Gross Charge,443679.7938
2,011,"TRACHEOSTOMY FOR FACE, MOUTH AND NECK DIAGNOSE...",Inpatient,,011,,,,Gross Charge,222749.5300
3,012,"TRACHEOSTOMY FOR FACE, MOUTH AND NECK DIAGNOSE...",Inpatient,,012,,,,Gross Charge,42377.3870
4,026,CRANIOTOMY AND ENDOVASCULAR INTRACRANIAL PROCE...,Inpatient,,026,,,,Gross Charge,167118.2900
...,...,...,...,...,...,...,...,...,...,...
10473,982,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,Inpatient,,982,,,,WGAT_-_ALL_PLANS,108478.87
10474,983,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,Inpatient,,983,,,,WGAT_-_ALL_PLANS,53154.84
10475,987,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,Inpatient,,987,,,,WGAT_-_ALL_PLANS,26694.00
10476,988,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,Inpatient,,988,,,,WGAT_-_ALL_PLANS,17796.00


In [20]:
set(df_mid['setting'].to_list())

{'Inpatient'}

In [21]:
df_mid['additional_generic_notes'] = df_mid['setting']
df_mid['billing_class'] = None
df_mid.loc[df_mid['setting'] == 'ProFee', 'billing_class'] = 'professional'
df_mid['setting'] = None
df_mid.loc[df_mid['additional_generic_notes'] == 'Inpatient', 'setting'] = 'inpatient'
df_mid.loc[df_mid['additional_generic_notes'] == 'Outpatient', 'setting'] = 'outpatient'
df_mid

Unnamed: 0,ms_drg,description,setting,rev_code,code,modifiers,ndc,drug_hcpcs_multiplier,payer_name,standard_charge,additional_generic_notes,billing_class
0,003,ECMO OR TRACHEOSTOMY WITH MV >96 HOURS OR PRIN...,inpatient,,003,,,,Gross Charge,513394.7300,Inpatient,
1,004,TRACHEOSTOMY WITH MV >96 HOURS OR PRINCIPAL DI...,inpatient,,004,,,,Gross Charge,443679.7938,Inpatient,
2,011,"TRACHEOSTOMY FOR FACE, MOUTH AND NECK DIAGNOSE...",inpatient,,011,,,,Gross Charge,222749.5300,Inpatient,
3,012,"TRACHEOSTOMY FOR FACE, MOUTH AND NECK DIAGNOSE...",inpatient,,012,,,,Gross Charge,42377.3870,Inpatient,
4,026,CRANIOTOMY AND ENDOVASCULAR INTRACRANIAL PROCE...,inpatient,,026,,,,Gross Charge,167118.2900,Inpatient,
...,...,...,...,...,...,...,...,...,...,...,...,...
10473,982,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,inpatient,,982,,,,WGAT_-_ALL_PLANS,108478.87,Inpatient,
10474,983,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,inpatient,,983,,,,WGAT_-_ALL_PLANS,53154.84,Inpatient,
10475,987,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,inpatient,,987,,,,WGAT_-_ALL_PLANS,26694.00,Inpatient,
10476,988,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,inpatient,,988,,,,WGAT_-_ALL_PLANS,17796.00,Inpatient,


In [22]:
def payer_name_to_payer_category(payer_name):
    if payer_name == 'Discounted Cash Price':
        return 'cash'
    elif payer_name == 'Gross Charge':
        return 'gross'
    elif payer_name == 'De-identified maximum negotiated charge':
        return 'max'
    elif payer_name == 'De-identified minimum negotiated charge':
        return 'min'

    return 'payer'

df_mid['payer_category'] = df_mid['payer_name'].apply(payer_name_to_payer_category)
df_mid

Unnamed: 0,ms_drg,description,setting,rev_code,code,modifiers,ndc,drug_hcpcs_multiplier,payer_name,standard_charge,additional_generic_notes,billing_class,payer_category
0,003,ECMO OR TRACHEOSTOMY WITH MV >96 HOURS OR PRIN...,inpatient,,003,,,,Gross Charge,513394.7300,Inpatient,,gross
1,004,TRACHEOSTOMY WITH MV >96 HOURS OR PRINCIPAL DI...,inpatient,,004,,,,Gross Charge,443679.7938,Inpatient,,gross
2,011,"TRACHEOSTOMY FOR FACE, MOUTH AND NECK DIAGNOSE...",inpatient,,011,,,,Gross Charge,222749.5300,Inpatient,,gross
3,012,"TRACHEOSTOMY FOR FACE, MOUTH AND NECK DIAGNOSE...",inpatient,,012,,,,Gross Charge,42377.3870,Inpatient,,gross
4,026,CRANIOTOMY AND ENDOVASCULAR INTRACRANIAL PROCE...,inpatient,,026,,,,Gross Charge,167118.2900,Inpatient,,gross
...,...,...,...,...,...,...,...,...,...,...,...,...,...
10473,982,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,inpatient,,982,,,,WGAT_-_ALL_PLANS,108478.87,Inpatient,,payer
10474,983,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,inpatient,,983,,,,WGAT_-_ALL_PLANS,53154.84,Inpatient,,payer
10475,987,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,inpatient,,987,,,,WGAT_-_ALL_PLANS,26694.00,Inpatient,,payer
10476,988,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,inpatient,,988,,,,WGAT_-_ALL_PLANS,17796.00,Inpatient,,payer


In [23]:
df_mid = df_mid[df_mid['standard_charge'].notnull()]
df_mid.shape

(10459, 13)

In [24]:
df_mid = pd.DataFrame(df_mid) # XXX

if not 'hcpcs_cpt' in df_mid.columns:
    df_mid['hcpcs_cpt'] = None

df_mid.loc[df_mid['hcpcs_cpt'].isnull(), 'hcpcs_cpt'] = ''
df_mid.loc[df_mid['hcpcs_cpt'] == 'WC003', 'code'] = 'WC003'
df_mid.loc[df_mid['hcpcs_cpt'] == 'WC003', 'hcpcs_cpt'] = ''
df_mid.loc[df_mid['hcpcs_cpt'].str.isalpha(), 'code'] = df_mid[df_mid['hcpcs_cpt'].str.isalpha()]['hcpcs_cpt']
df_mid['hcpcs_cpt'] = df_mid['hcpcs_cpt'].apply(lambda cpt: '' if len(cpt) != 5 else cpt)
df_mid.loc[df_mid['hcpcs_cpt'].str.isalpha(), 'hcpcs_cpt'] = None
df_mid.loc[df_mid['hcpcs_cpt'] == '', 'hcpcs_cpt'] = None
set(df_mid['hcpcs_cpt'].to_list())

{None}

In [25]:
df_mid['hospital_id'] = ccn
df_mid['line_type'] = None
if not 'local_code' in df_mid.columns:
    df_mid['local_code'] = None
if not 'code' in df_mid.columns:
    df_mid['code'] = None
if not 'ms_drg' in df_mid.columns:
    df_mid['ms_drg'] = None
df_mid['apr_drg'] = None
df_mid['eapg'] = None
df_mid['alt_hcpcs_cpt'] = None
df_mid['thru'] = None
df_mid['apc'] = None
if not 'icd' in df_mid.columns:
    df_mid['icd'] = None
df_mid['drug_quantity'] = None
df_mid['drug_unit_of_measurement'] = None
df_mid['drug_type_of_measurement'] = None
df_mid['plan_name'] = None
df_mid['standard_charge_percent'] = None
df_mid['contracting_method'] = None
df_mid['additional_payer_specific_notes'] = None

df_mid

Unnamed: 0,ms_drg,description,setting,rev_code,code,modifiers,ndc,drug_hcpcs_multiplier,payer_name,standard_charge,...,thru,apc,icd,drug_quantity,drug_unit_of_measurement,drug_type_of_measurement,plan_name,standard_charge_percent,contracting_method,additional_payer_specific_notes
0,003,ECMO OR TRACHEOSTOMY WITH MV >96 HOURS OR PRIN...,inpatient,,003,,,,Gross Charge,513394.7300,...,,,,,,,,,,
1,004,TRACHEOSTOMY WITH MV >96 HOURS OR PRINCIPAL DI...,inpatient,,004,,,,Gross Charge,443679.7938,...,,,,,,,,,,
2,011,"TRACHEOSTOMY FOR FACE, MOUTH AND NECK DIAGNOSE...",inpatient,,011,,,,Gross Charge,222749.5300,...,,,,,,,,,,
3,012,"TRACHEOSTOMY FOR FACE, MOUTH AND NECK DIAGNOSE...",inpatient,,012,,,,Gross Charge,42377.3870,...,,,,,,,,,,
4,026,CRANIOTOMY AND ENDOVASCULAR INTRACRANIAL PROCE...,inpatient,,026,,,,Gross Charge,167118.2900,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10473,982,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,inpatient,,982,,,,WGAT_-_ALL_PLANS,108478.87,...,,,,,,,,,,
10474,983,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,inpatient,,983,,,,WGAT_-_ALL_PLANS,53154.84,...,,,,,,,,,,
10475,987,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,inpatient,,987,,,,WGAT_-_ALL_PLANS,26694.00,...,,,,,,,,,,
10476,988,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,inpatient,,988,,,,WGAT_-_ALL_PLANS,17796.00,...,,,,,,,,,,


In [26]:
df_out = pd.DataFrame(df_mid[TARGET_COLUMNS])
df_out

Unnamed: 0,hospital_id,line_type,description,rev_code,local_code,code,ms_drg,apr_drg,eapg,hcpcs_cpt,...,billing_class,setting,payer_category,payer_name,plan_name,standard_charge,standard_charge_percent,contracting_method,additional_generic_notes,additional_payer_specific_notes
0,050335,,ECMO OR TRACHEOSTOMY WITH MV >96 HOURS OR PRIN...,,,003,003,,,,...,,inpatient,gross,Gross Charge,,513394.7300,,,Inpatient,
1,050335,,TRACHEOSTOMY WITH MV >96 HOURS OR PRINCIPAL DI...,,,004,004,,,,...,,inpatient,gross,Gross Charge,,443679.7938,,,Inpatient,
2,050335,,"TRACHEOSTOMY FOR FACE, MOUTH AND NECK DIAGNOSE...",,,011,011,,,,...,,inpatient,gross,Gross Charge,,222749.5300,,,Inpatient,
3,050335,,"TRACHEOSTOMY FOR FACE, MOUTH AND NECK DIAGNOSE...",,,012,012,,,,...,,inpatient,gross,Gross Charge,,42377.3870,,,Inpatient,
4,050335,,CRANIOTOMY AND ENDOVASCULAR INTRACRANIAL PROCE...,,,026,026,,,,...,,inpatient,gross,Gross Charge,,167118.2900,,,Inpatient,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10473,050335,,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,,,982,982,,,,...,,inpatient,payer,WGAT_-_ALL_PLANS,,108478.87,,,Inpatient,
10474,050335,,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,,,983,983,,,,...,,inpatient,payer,WGAT_-_ALL_PLANS,,53154.84,,,Inpatient,
10475,050335,,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,,,987,987,,,,...,,inpatient,payer,WGAT_-_ALL_PLANS,,26694.00,,,Inpatient,
10476,050335,,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,,,988,988,,,,...,,inpatient,payer,WGAT_-_ALL_PLANS,,17796.00,,,Inpatient,


In [27]:
df_out.to_csv("tmp.csv", index=False)

In [28]:
def convert_chunk(chunk, ccn):
    csv_buf = StringIO(chunk)

    df_in = pd.read_csv(csv_buf, dtype=str)

    df_mid = pd.DataFrame(df_in)
    df_mid = df_mid.rename(columns={
        'Procedure Code': 'local_code',
        'Procedure Description': 'description',
        'Price Tier': 'setting',
        'Revenue Code': 'rev_code',
        'CPT HCPCS Code': 'hcpcs_cpt',
        'NDC Code': 'ndc',
        'Rx Unit Multiplier': 'drug_hcpcs_multiplier',
        'Modifier1': 'modifiers',
        'Diagnosis Related Group Code': 'ms_drg',
        'Diagnosis Related Group Description': 'description',
        'CPT HCPCS DRG Code': 'code',
        'Shoppable Services Code': 'local_code',
        'Shoppable Services Description': 'description'
    })

    df_mid.loc[df_mid['modifiers'].isnull(), 'modifiers'] = None
    df_mid.loc[df_mid['Modifier2'].isnull(), 'Modifier2'] = None

    df_mid['modifiers'] = df_mid[['modifiers', 'Modifier2']].apply(lambda row: unify_modifiers(row['modifiers'], row['Modifier2']), axis=1)
    del df_mid['Modifier2']

    remaining_columns = df_mid.columns.to_list()[:8]
    df_mid = pd.melt(df_mid, id_vars=remaining_columns, var_name='payer_name', value_name='standard_charge')

    df_mid['additional_generic_notes'] = df_mid['setting']
    df_mid['billing_class'] = None
    df_mid.loc[df_mid['setting'] == 'ProFee', 'billing_class'] = 'professional'
    df_mid['setting'] = None
    df_mid.loc[df_mid['additional_generic_notes'] == 'Inpatient', 'setting'] = 'inpatient'
    df_mid.loc[df_mid['additional_generic_notes'] == 'Outpatient', 'setting'] = 'outpatient'

    df_mid['payer_category'] = df_mid['payer_name'].apply(payer_name_to_payer_category)

    df_mid = df_mid[df_mid['standard_charge'].notnull()]

    df_mid = pd.DataFrame(df_mid) # XXX

    if not 'hcpcs_cpt' in df_mid.columns:
        df_mid['hcpcs_cpt'] = None

    df_mid.loc[df_mid['hcpcs_cpt'].isnull(), 'hcpcs_cpt'] = ''
    df_mid.loc[df_mid['hcpcs_cpt'] == 'WC003', 'code'] = 'WC003'
    df_mid.loc[df_mid['hcpcs_cpt'] == 'WC003', 'hcpcs_cpt'] = ''
    df_mid.loc[df_mid['hcpcs_cpt'].str.isalpha(), 'code'] = df_mid[df_mid['hcpcs_cpt'].str.isalpha()]['hcpcs_cpt']
    df_mid['hcpcs_cpt'] = df_mid['hcpcs_cpt'].apply(lambda cpt: '' if len(cpt) != 5 else cpt)
    df_mid.loc[df_mid['hcpcs_cpt'].str.isalpha(), 'hcpcs_cpt'] = None
    df_mid.loc[df_mid['hcpcs_cpt'] == '', 'hcpcs_cpt'] = None

    df_mid['hospital_id'] = ccn
    df_mid['line_type'] = None
    if not 'local_code' in df_mid.columns:
        df_mid['local_code'] = None
    if not 'code' in df_mid.columns:
        df_mid['code'] = None
    if not 'ms_drg' in df_mid.columns:
        df_mid['ms_drg'] = None
    df_mid['apr_drg'] = None
    df_mid['eapg'] = None
    df_mid['alt_hcpcs_cpt'] = None
    df_mid['thru'] = None
    df_mid['apc'] = None
    if not 'icd' in df_mid.columns:
        df_mid['icd'] = None
    df_mid['drug_quantity'] = None
    df_mid['drug_unit_of_measurement'] = None
    df_mid['drug_type_of_measurement'] = None
    df_mid['plan_name'] = None
    df_mid['standard_charge_percent'] = None
    df_mid['contracting_method'] = None
    df_mid['additional_payer_specific_notes'] = None

    df_out = pd.DataFrame(df_mid[TARGET_COLUMNS])
    return df_out

In [29]:
def perform_task(h_f, ccn, app_url, transparency_page):
    resp = requests.get(app_url)
    print(resp.url)

    tree = html.fromstring(resp.text)
    js_link = tree.xpath('//script[contains(@src, "/PTT/extnet/extnet-init-js")]/@src')[0]
    js_url = urljoin(resp.url, js_link)
    resp1 = requests.get(js_url)
    parsed = js2xml.parse(resp1.text)
    db_name = parsed.xpath('//object[./property/string[text()="App.hdnDB_Container"]]/property[@name="value"]/string/text()')[0]

    params = { 'dbName': db_name, 'type': 'CDMWithoutLabel' }

    data = '------WebKitFormBoundarysVqstz3xq11k5yBT\r\nContent-Disposition: form-data; name="__EVENTTARGET"\r\n\r\nResourceManager\r\n------WebKitFormBoundarysVqstz3xq11k5yBT\r\nContent-Disposition: form-data; name="__EVENTARGUMENT"\r\n\r\n-|public|DownloadReport\r\n------WebKitFormBoundarysVqstz3xq11k5yBT\r\nContent-Disposition: form-data; name="__ExtNetDirectEventMarker"\r\n\r\ndelta=true\r\n------WebKitFormBoundarysVqstz3xq11k5yBT--\r\n'

    resp2 = requests.post('https://apps.para-hcfs.com/PTT/FinalLinks/Reports.aspx', params=params, data=data)
    print(resp2.url)

    dfs = []
    
    chunks = resp2.text.split("\r\n\r\n")

    for chunk in chunks[1:]:
        df_tmp = convert_chunk(chunk, ccn)
        dfs.append(df_tmp)

    df_out = pd.concat(dfs)

    df_out.to_csv('rate_' + ccn + '.csv', index=False)
    
    filename = resp2.headers['content-disposition'].split('"')[1]
    ein = derive_ein_from_filename(filename)
    
    date_str = chunks[0].split(" ")[-1]

    last_updated_at = parse_datetime(date_str).isoformat().split("T")[0]
    query = 'UPDATE hospital SET ein = "{}", last_updated = "{}", file_name = "{}", mrf_url = "{}", transparency_page = "{}" WHERE id = "{}";'.format(
        ein, last_updated_at, filename, app_url, transparency_page, ccn)

    h_f.write(query)
    h_f.write("\n")

In [30]:
h_f = open("hospitals.sql", "w")
perform_task(h_f, ccn, app_url, transparency_page)
h_f.close()

https://apps.para-hcfs.com/PTT/FinalLinks/Adventist_Sonora_V3.aspx
https://apps.para-hcfs.com/PTT/FinalLinks/Reports.aspx?dbName=dbAHSSONORACA&type=CDMWithoutLabel
