In [1]:
import subprocess
from urllib.parse import urljoin
from io import StringIO

import pandas as pd
import requests
from lxml import html
import js2xml

In [2]:
TARGET_COLUMNS = [
    'hospital_id',
    #'row_id',
    'line_type',
    'description',
    'rev_code',
    'local_code',
    'code',
    'ms_drg',
    'apr_drg',
    'eapg',
    'hcpcs_cpt',
    'modifiers',
    'alt_hcpcs_cpt',
    'thru',
    'apc',
    'icd',
    'ndc',
    'drug_hcpcs_multiplier',
    'drug_quantity',
    'drug_unit_of_measurement',
    'drug_type_of_measurement',
    'billing_class',
    'setting',
    'payer_category',
    'payer_name',
    'plan_name',
    'standard_charge',
    'standard_charge_percent',
    'contracting_method',
    'additional_generic_notes',
    'additional_payer_specific_notes'
]

In [3]:
# https://www.adventisthealth.org/patient-resources/financial-services/healthcare-costs-and-charges/cost-estimator-tool/
ccn = "050455"
app_url = "https://apps.para-hcfs.com/PTT/FinalLinks/Adventist_Bakersfield_V3.aspx"

In [4]:
resp = requests.get(app_url)
resp

<Response [200]>

In [5]:
tree = html.fromstring(resp.text)
js_link = tree.xpath('//script[contains(@src, "/PTT/extnet/extnet-init-js")]/@src')[0]
js_url = urljoin(resp.url, js_link)
resp1 = requests.get(js_url)
parsed = js2xml.parse(resp1.text)
db_name = parsed.xpath('//object[./property/string[text()="App.hdnDB_Container"]]/property[@name="value"]/string/text()')[0]
db_name

'dbAHBBAKERSFIELDCA'

In [6]:
params = {
    'dbName': db_name,
    'type': 'CDMWithoutLabel'
}

headers = {
    'authority': 'apps.para-hcfs.com',
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
    'accept-language': 'en-GB,en-US;q=0.9,en;q=0.8',
    'cache-control': 'no-cache',
    'content-type': 'multipart/form-data; boundary=----WebKitFormBoundarysVqstz3xq11k5yBT',
    'origin': 'https://apps.para-hcfs.com',
    'pragma': 'no-cache',
    'referer': resp.url,
    'sec-ch-ua': '"Google Chrome";v="113", "Chromium";v="113", "Not-A.Brand";v="24"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"macOS"',
    'sec-fetch-dest': 'iframe',
    'sec-fetch-mode': 'navigate',
    'sec-fetch-site': 'same-origin',
    'sec-fetch-user': '?1',
    'upgrade-insecure-requests': '1',
    'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36',
}

data = '------WebKitFormBoundarysVqstz3xq11k5yBT\r\nContent-Disposition: form-data; name="__EVENTTARGET"\r\n\r\nResourceManager\r\n------WebKitFormBoundarysVqstz3xq11k5yBT\r\nContent-Disposition: form-data; name="__EVENTARGUMENT"\r\n\r\n-|public|DownloadReport\r\n------WebKitFormBoundarysVqstz3xq11k5yBT\r\nContent-Disposition: form-data; name="__ExtNetDirectEventMarker"\r\n\r\ndelta=true\r\n------WebKitFormBoundarysVqstz3xq11k5yBT--\r\n'

resp2 = requests.post('https://apps.para-hcfs.com/PTT/FinalLinks/Reports.aspx', params=params, headers=headers, data=data)
print(resp2)

<Response [200]>


In [7]:
filename = resp2.headers['content-disposition'].split('"')[1]
filename

'952294234_adventist-health---bakersfield_standardcharges.csv'

In [8]:
chunks = resp2.text.split("\r\n\r\n")
len(chunks)

4

In [9]:
chunks[0]

'Run Date: 5/30/2023 8:19:19 PM\r\nCDM Date Stamp: 4/20/2023'

In [10]:
chunks[1][:100]

'Procedure Code,Procedure Description,Price Tier,Revenue Code,CPT HCPCS Code,Modifier1,Modifier2,NDC '

In [11]:
chunks[2][:100]

'Diagnosis Related Group Code,Diagnosis Related Group Description,Price Tier,Revenue Code,CPT HCPCS D'

In [12]:
csv_f = open(filename, "w")
csv_f.write(resp2.text)
csv_f.close()

In [29]:
csv_buf = StringIO(chunks[3])

df_in = pd.read_csv(csv_buf, dtype=str)
df_in

Unnamed: 0,Shoppable Services Code,Shoppable Services Description,Price Tier,Revenue Code,CPT HCPCS DRG Code,Modifier1,Modifier2,NDC Code,Rx Unit Multiplier,Gross Charge,...,KAISER_PPO_POS_-_ALL_OTHER_PLANS,KERN_LEGACY_HP_EPO_-_ALL_OTHER_PLANS,KERN_LEGACY_SHARE_SELECT,OSCAR_-_ALL_PLANS,PHCS-_ALL_PLANS,THREE_RIVERS_PROVIDER_NETWORK-_ALL_PLANS,UHC_ALL_PAYER_-_ALL_OTHER_PLANS,UHC_HMO,UHC_JLL_CSP,WESTERN_GROWERS_ASSURANCE-_ALL_PLANS
0,009U3ZX,SEPTICEMIA OR SEVERE SEPSIS WITHOUT MV >96 HOU...,,,009U3ZX,,,,,225082.01,...,137288.15,41482.88,37882.88,61257.45,23200.00,179115.34,,,,86733.59
1,0202U,Test for detection of respiratory disease-caus...,,,0202U,,,,,1325.00,...,808.25,662.50,662.50,666.85,861.25,1060.00,500.14,500.14,500.14,609.50
2,02HV33Z,SEPTICEMIA OR SEVERE SEPSIS WITHOUT MV >96 HOU...,,,02HV33Z,,,,,182090.07,...,111063.69,32778.66,30018.66,49512.08,17000.00,144772.17,,,,61258.59
3,064,INTRACRANIAL HEMORRHAGE OR CEREBRAL INFARCTION...,,,064,,,,,126416.58,...,77114.11,22882.00,20869.00,26706.46,19350.00,101133.27,35174.56,26231.54,33415.54,57864.65
4,065,INTRACRANIAL HEMORRHAGE OR CEREBRAL INFARCTION...,,,065,,,,,59884.19,...,36529.36,22882.00,20869.00,13764.93,9300.00,47907.36,18129.53,13520.15,17222.90,26116.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
355,N390,KIDNEY AND URINARY TRACT INFECTIONS WITH MAJOR...,,,N390,,,,,62048.71,...,37849.71,13100.00,11920.00,10774.68,9300.00,49638.97,14191.12,10583.07,13481.44,22530.00
356,U0003,INFECTIOUS AGENT DETECTION BY NUCLEIC ACID (DN...,,,U0003,,,,,146.00,...,178.12,146.00,146.00,240.00,189.80,233.60,240.00,240.00,240.00,134.32
357,U0005,INFECTIOUS AGENT DETECTION BY NUCLEIC ACID (DN...,,,U0005,,,,,80.00,...,48.80,40.00,40.00,40.00,52.00,64.00,30.00,30.00,30.00,36.80
358,U071,RESPIRATORY INFECTIONS AND INFLAMMATIONS WITH ...,,,U071,,,,,103326.06,...,63028.90,18350.00,16580.00,10636.54,12400.00,82660.85,18460.00,13843.00,17536.00,42561.26


In [30]:
df_mid = pd.DataFrame(df_in)
df_mid = df_mid.rename(columns={
    'Procedure Code': 'local_code',
    'Procedure Description': 'description',
    'Price Tier': 'setting',
    'Revenue Code': 'rev_code',
    'CPT HCPCS Code': 'hcpcs_cpt',
    'NDC Code': 'ndc',
    'Rx Unit Multiplier': 'drug_hcpcs_multiplier',
    'Modifier1': 'modifiers',
    'Diagnosis Related Group Code': 'ms_drg',
    'Diagnosis Related Group Description': 'description',
    'CPT HCPCS DRG Code': 'code',
    'Shoppable Services Code': 'local_code',
    'Shoppable Services Description': 'description'
})

df_mid

Unnamed: 0,local_code,description,setting,rev_code,code,modifiers,Modifier2,ndc,drug_hcpcs_multiplier,Gross Charge,...,KAISER_PPO_POS_-_ALL_OTHER_PLANS,KERN_LEGACY_HP_EPO_-_ALL_OTHER_PLANS,KERN_LEGACY_SHARE_SELECT,OSCAR_-_ALL_PLANS,PHCS-_ALL_PLANS,THREE_RIVERS_PROVIDER_NETWORK-_ALL_PLANS,UHC_ALL_PAYER_-_ALL_OTHER_PLANS,UHC_HMO,UHC_JLL_CSP,WESTERN_GROWERS_ASSURANCE-_ALL_PLANS
0,009U3ZX,SEPTICEMIA OR SEVERE SEPSIS WITHOUT MV >96 HOU...,,,009U3ZX,,,,,225082.01,...,137288.15,41482.88,37882.88,61257.45,23200.00,179115.34,,,,86733.59
1,0202U,Test for detection of respiratory disease-caus...,,,0202U,,,,,1325.00,...,808.25,662.50,662.50,666.85,861.25,1060.00,500.14,500.14,500.14,609.50
2,02HV33Z,SEPTICEMIA OR SEVERE SEPSIS WITHOUT MV >96 HOU...,,,02HV33Z,,,,,182090.07,...,111063.69,32778.66,30018.66,49512.08,17000.00,144772.17,,,,61258.59
3,064,INTRACRANIAL HEMORRHAGE OR CEREBRAL INFARCTION...,,,064,,,,,126416.58,...,77114.11,22882.00,20869.00,26706.46,19350.00,101133.27,35174.56,26231.54,33415.54,57864.65
4,065,INTRACRANIAL HEMORRHAGE OR CEREBRAL INFARCTION...,,,065,,,,,59884.19,...,36529.36,22882.00,20869.00,13764.93,9300.00,47907.36,18129.53,13520.15,17222.90,26116.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
355,N390,KIDNEY AND URINARY TRACT INFECTIONS WITH MAJOR...,,,N390,,,,,62048.71,...,37849.71,13100.00,11920.00,10774.68,9300.00,49638.97,14191.12,10583.07,13481.44,22530.00
356,U0003,INFECTIOUS AGENT DETECTION BY NUCLEIC ACID (DN...,,,U0003,,,,,146.00,...,178.12,146.00,146.00,240.00,189.80,233.60,240.00,240.00,240.00,134.32
357,U0005,INFECTIOUS AGENT DETECTION BY NUCLEIC ACID (DN...,,,U0005,,,,,80.00,...,48.80,40.00,40.00,40.00,52.00,64.00,30.00,30.00,30.00,36.80
358,U071,RESPIRATORY INFECTIONS AND INFLAMMATIONS WITH ...,,,U071,,,,,103326.06,...,63028.90,18350.00,16580.00,10636.54,12400.00,82660.85,18460.00,13843.00,17536.00,42561.26


In [31]:
def unify_modifiers(m1, m2):
    if m1 is None:
        return None

    if m1 is not None:
        if m2 is not None:
            return m1 + "|" + m2

        return m1

assert unify_modifiers(None, None) is None
assert unify_modifiers("TC", None) == "TC"
assert unify_modifiers("TC", "TC") == "TC|TC"

In [32]:
df_mid.loc[df_mid['modifiers'].isnull(), 'modifiers'] = None
df_mid.loc[df_mid['Modifier2'].isnull(), 'Modifier2'] = None

df_mid['modifiers'] = df_mid[['modifiers', 'Modifier2']].apply(lambda row: unify_modifiers(row['modifiers'], row['Modifier2']), axis=1)
del df_mid['Modifier2']
df_mid

Unnamed: 0,local_code,description,setting,rev_code,code,modifiers,ndc,drug_hcpcs_multiplier,Gross Charge,Discounted Cash Price,...,KAISER_PPO_POS_-_ALL_OTHER_PLANS,KERN_LEGACY_HP_EPO_-_ALL_OTHER_PLANS,KERN_LEGACY_SHARE_SELECT,OSCAR_-_ALL_PLANS,PHCS-_ALL_PLANS,THREE_RIVERS_PROVIDER_NETWORK-_ALL_PLANS,UHC_ALL_PAYER_-_ALL_OTHER_PLANS,UHC_HMO,UHC_JLL_CSP,WESTERN_GROWERS_ASSURANCE-_ALL_PLANS
0,009U3ZX,SEPTICEMIA OR SEVERE SEPSIS WITHOUT MV >96 HOU...,,,009U3ZX,,,,225082.01,42765.58,...,137288.15,41482.88,37882.88,61257.45,23200.00,179115.34,,,,86733.59
1,0202U,Test for detection of respiratory disease-caus...,,,0202U,,,,1325.00,251.75,...,808.25,662.50,662.50,666.85,861.25,1060.00,500.14,500.14,500.14,609.50
2,02HV33Z,SEPTICEMIA OR SEVERE SEPSIS WITHOUT MV >96 HOU...,,,02HV33Z,,,,182090.07,34597.11,...,111063.69,32778.66,30018.66,49512.08,17000.00,144772.17,,,,61258.59
3,064,INTRACRANIAL HEMORRHAGE OR CEREBRAL INFARCTION...,,,064,,,,126416.58,24019.15,...,77114.11,22882.00,20869.00,26706.46,19350.00,101133.27,35174.56,26231.54,33415.54,57864.65
4,065,INTRACRANIAL HEMORRHAGE OR CEREBRAL INFARCTION...,,,065,,,,59884.19,11378.00,...,36529.36,22882.00,20869.00,13764.93,9300.00,47907.36,18129.53,13520.15,17222.90,26116.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
355,N390,KIDNEY AND URINARY TRACT INFECTIONS WITH MAJOR...,,,N390,,,,62048.71,11789.26,...,37849.71,13100.00,11920.00,10774.68,9300.00,49638.97,14191.12,10583.07,13481.44,22530.00
356,U0003,INFECTIOUS AGENT DETECTION BY NUCLEIC ACID (DN...,,,U0003,,,,146.00,27.74,...,178.12,146.00,146.00,240.00,189.80,233.60,240.00,240.00,240.00,134.32
357,U0005,INFECTIOUS AGENT DETECTION BY NUCLEIC ACID (DN...,,,U0005,,,,80.00,15.20,...,48.80,40.00,40.00,40.00,52.00,64.00,30.00,30.00,30.00,36.80
358,U071,RESPIRATORY INFECTIONS AND INFLAMMATIONS WITH ...,,,U071,,,,103326.06,19631.95,...,63028.90,18350.00,16580.00,10636.54,12400.00,82660.85,18460.00,13843.00,17536.00,42561.26


In [33]:
money_columns = df_mid.columns.to_list()[8:]
money_columns

['Gross Charge',
 'Discounted Cash Price',
 'De-identified minimum negotiated charge',
 'De-identified maximum negotiated charge',
 'AETNA-_ALL_PLANS',
 'AFFILIATED_HEALTH_FUNDS-ALL_PLANS',
 'BEECH_STREET-_ALL_PLANS',
 'BLUE_CROSS_MCS_-_ALL_OTHER_PLANS',
 'BLUE_CROSS_NON-MCS',
 'BLUE_SHIELD_EPN',
 'BLUE_SHIELD_EPO_PPO',
 'BLUE_SHIELD_HMO_POS_-_ALL_OTHER_PLANS',
 'CIGNA-_ALL_OTHER_PLANS',
 'CIGNA_HMO_OPEN_ACCESS',
 'FIRST_HEALTH_COVENTRY-_ALL_PLANS',
 'GALAXY_HEALTH_NETWORK-_ALL_PLANS',
 'GEM_CARE-_ALL_PLANS',
 'HEALTH_NET_HMO_PPO_-_ALL_PLANS',
 'INTEGRATED_HEALTH_PLAN-_ALL_PLANS',
 'INTERPLAN-_ALL_PLANS',
 'KAISER_COMM_HMO_',
 'KAISER_NON-KP',
 'KAISER_PPO_POS_-_ALL_OTHER_PLANS',
 'KERN_LEGACY_HP_EPO_-_ALL_OTHER_PLANS',
 'KERN_LEGACY_SHARE_SELECT',
 'OSCAR_-_ALL_PLANS',
 'PHCS-_ALL_PLANS',
 'THREE_RIVERS_PROVIDER_NETWORK-_ALL_PLANS',
 'UHC_ALL_PAYER_-_ALL_OTHER_PLANS',
 'UHC_HMO',
 'UHC_JLL_CSP',
 'WESTERN_GROWERS_ASSURANCE-_ALL_PLANS']

In [34]:
remaining_columns = df_mid.columns.to_list()[:8]
df_mid = pd.melt(df_mid, id_vars=remaining_columns, var_name='payer_name', value_name='standard_charge')
df_mid

Unnamed: 0,local_code,description,setting,rev_code,code,modifiers,ndc,drug_hcpcs_multiplier,payer_name,standard_charge
0,009U3ZX,SEPTICEMIA OR SEVERE SEPSIS WITHOUT MV >96 HOU...,,,009U3ZX,,,,Gross Charge,225082.01
1,0202U,Test for detection of respiratory disease-caus...,,,0202U,,,,Gross Charge,1325.00
2,02HV33Z,SEPTICEMIA OR SEVERE SEPSIS WITHOUT MV >96 HOU...,,,02HV33Z,,,,Gross Charge,182090.07
3,064,INTRACRANIAL HEMORRHAGE OR CEREBRAL INFARCTION...,,,064,,,,Gross Charge,126416.58
4,065,INTRACRANIAL HEMORRHAGE OR CEREBRAL INFARCTION...,,,065,,,,Gross Charge,59884.19
...,...,...,...,...,...,...,...,...,...,...
11515,N390,KIDNEY AND URINARY TRACT INFECTIONS WITH MAJOR...,,,N390,,,,WESTERN_GROWERS_ASSURANCE-_ALL_PLANS,22530.00
11516,U0003,INFECTIOUS AGENT DETECTION BY NUCLEIC ACID (DN...,,,U0003,,,,WESTERN_GROWERS_ASSURANCE-_ALL_PLANS,134.32
11517,U0005,INFECTIOUS AGENT DETECTION BY NUCLEIC ACID (DN...,,,U0005,,,,WESTERN_GROWERS_ASSURANCE-_ALL_PLANS,36.80
11518,U071,RESPIRATORY INFECTIONS AND INFLAMMATIONS WITH ...,,,U071,,,,WESTERN_GROWERS_ASSURANCE-_ALL_PLANS,42561.26


In [35]:
set(df_mid['setting'].to_list())

{nan}

In [37]:
df_mid['additional_generic_notes'] = df_mid['setting']
df_mid['billing_class'] = None
df_mid.loc[df_mid['setting'] == 'ProFee', 'billing_class'] = 'professional'
df_mid['setting'] = None
df_mid.loc[df_mid['additional_generic_notes'] == 'Inpatient', 'setting'] = 'inpatient'
df_mid.loc[df_mid['additional_generic_notes'] == 'Outpatient', 'setting'] = 'outpatient'
df_mid

Unnamed: 0,local_code,description,setting,rev_code,code,modifiers,ndc,drug_hcpcs_multiplier,payer_name,standard_charge,additional_generic_notes,billing_class
0,009U3ZX,SEPTICEMIA OR SEVERE SEPSIS WITHOUT MV >96 HOU...,,,009U3ZX,,,,Gross Charge,225082.01,,
1,0202U,Test for detection of respiratory disease-caus...,,,0202U,,,,Gross Charge,1325.00,,
2,02HV33Z,SEPTICEMIA OR SEVERE SEPSIS WITHOUT MV >96 HOU...,,,02HV33Z,,,,Gross Charge,182090.07,,
3,064,INTRACRANIAL HEMORRHAGE OR CEREBRAL INFARCTION...,,,064,,,,Gross Charge,126416.58,,
4,065,INTRACRANIAL HEMORRHAGE OR CEREBRAL INFARCTION...,,,065,,,,Gross Charge,59884.19,,
...,...,...,...,...,...,...,...,...,...,...,...,...
11515,N390,KIDNEY AND URINARY TRACT INFECTIONS WITH MAJOR...,,,N390,,,,WESTERN_GROWERS_ASSURANCE-_ALL_PLANS,22530.00,,
11516,U0003,INFECTIOUS AGENT DETECTION BY NUCLEIC ACID (DN...,,,U0003,,,,WESTERN_GROWERS_ASSURANCE-_ALL_PLANS,134.32,,
11517,U0005,INFECTIOUS AGENT DETECTION BY NUCLEIC ACID (DN...,,,U0005,,,,WESTERN_GROWERS_ASSURANCE-_ALL_PLANS,36.80,,
11518,U071,RESPIRATORY INFECTIONS AND INFLAMMATIONS WITH ...,,,U071,,,,WESTERN_GROWERS_ASSURANCE-_ALL_PLANS,42561.26,,


In [38]:
def payer_name_to_payer_category(payer_name):
    if payer_name == 'Discounted Cash Price':
        return 'cash'
    elif payer_name == 'Gross Charge':
        return 'gross'
    elif payer_name == 'De-identified maximum negotiated charge':
        return 'max'
    elif payer_name == 'De-identified minimum negotiated charge':
        return 'min'

    return 'payer'

df_mid['payer_category'] = df_mid['payer_name'].apply(payer_name_to_payer_category)
df_mid

Unnamed: 0,local_code,description,setting,rev_code,code,modifiers,ndc,drug_hcpcs_multiplier,payer_name,standard_charge,additional_generic_notes,billing_class,payer_category
0,009U3ZX,SEPTICEMIA OR SEVERE SEPSIS WITHOUT MV >96 HOU...,,,009U3ZX,,,,Gross Charge,225082.01,,,gross
1,0202U,Test for detection of respiratory disease-caus...,,,0202U,,,,Gross Charge,1325.00,,,gross
2,02HV33Z,SEPTICEMIA OR SEVERE SEPSIS WITHOUT MV >96 HOU...,,,02HV33Z,,,,Gross Charge,182090.07,,,gross
3,064,INTRACRANIAL HEMORRHAGE OR CEREBRAL INFARCTION...,,,064,,,,Gross Charge,126416.58,,,gross
4,065,INTRACRANIAL HEMORRHAGE OR CEREBRAL INFARCTION...,,,065,,,,Gross Charge,59884.19,,,gross
...,...,...,...,...,...,...,...,...,...,...,...,...,...
11515,N390,KIDNEY AND URINARY TRACT INFECTIONS WITH MAJOR...,,,N390,,,,WESTERN_GROWERS_ASSURANCE-_ALL_PLANS,22530.00,,,payer
11516,U0003,INFECTIOUS AGENT DETECTION BY NUCLEIC ACID (DN...,,,U0003,,,,WESTERN_GROWERS_ASSURANCE-_ALL_PLANS,134.32,,,payer
11517,U0005,INFECTIOUS AGENT DETECTION BY NUCLEIC ACID (DN...,,,U0005,,,,WESTERN_GROWERS_ASSURANCE-_ALL_PLANS,36.80,,,payer
11518,U071,RESPIRATORY INFECTIONS AND INFLAMMATIONS WITH ...,,,U071,,,,WESTERN_GROWERS_ASSURANCE-_ALL_PLANS,42561.26,,,payer


In [39]:
df_mid = df_mid[df_mid['standard_charge'].notnull()]
df_mid.shape

(10782, 13)

In [41]:
if not 'hcpcs_cpt' in df_mid.columns:
    df_mid['hcpcs_cpt'] = None

df_mid = pd.DataFrame(df_mid)
df_mid.loc[df_mid['hcpcs_cpt'].isnull(), 'hcpcs_cpt'] = ''
df_mid.loc[df_mid['hcpcs_cpt'].str.isalpha(), 'code'] = df_mid[df_mid['hcpcs_cpt'].str.isalpha()]['hcpcs_cpt']
df_mid['hcpcs_cpt'] = df_mid['hcpcs_cpt'].apply(lambda cpt: '' if len(cpt) != 5 else cpt)
df_mid.loc[df_mid['hcpcs_cpt'].str.isalpha(), 'hcpcs_cpt'] = None
df_mid.loc[df_mid['hcpcs_cpt'] == '', 'hcpcs_cpt'] = None
set(df_mid['hcpcs_cpt'].to_list())

{None}

In [42]:
df_mid['hospital_id'] = ccn
df_mid['line_type'] = None
if not 'local_code' in df_mid.columns:
    df_mid['local_code'] = None
if not 'code' in df_mid.columns:
    df_mid['code'] = None
if not 'ms_drg' in df_mid.columns:
    df_mid['ms_drg'] = None
df_mid['apr_drg'] = None
df_mid['eapg'] = None
df_mid['alt_hcpcs_cpt'] = None
df_mid['thru'] = None
df_mid['apc'] = None
if not 'icd' in df_mid.columns:
    df_mid['icd'] = None
df_mid['drug_quantity'] = None
df_mid['drug_unit_of_measurement'] = None
df_mid['drug_type_of_measurement'] = None
df_mid['plan_name'] = None
df_mid['standard_charge_percent'] = None
df_mid['contracting_method'] = None
df_mid['additional_payer_specific_notes'] = None

df_mid

Unnamed: 0,local_code,description,setting,rev_code,code,modifiers,ndc,drug_hcpcs_multiplier,payer_name,standard_charge,...,thru,apc,icd,drug_quantity,drug_unit_of_measurement,drug_type_of_measurement,plan_name,standard_charge_percent,contracting_method,additional_payer_specific_notes
0,009U3ZX,SEPTICEMIA OR SEVERE SEPSIS WITHOUT MV >96 HOU...,,,009U3ZX,,,,Gross Charge,225082.01,...,,,,,,,,,,
1,0202U,Test for detection of respiratory disease-caus...,,,0202U,,,,Gross Charge,1325.00,...,,,,,,,,,,
2,02HV33Z,SEPTICEMIA OR SEVERE SEPSIS WITHOUT MV >96 HOU...,,,02HV33Z,,,,Gross Charge,182090.07,...,,,,,,,,,,
3,064,INTRACRANIAL HEMORRHAGE OR CEREBRAL INFARCTION...,,,064,,,,Gross Charge,126416.58,...,,,,,,,,,,
4,065,INTRACRANIAL HEMORRHAGE OR CEREBRAL INFARCTION...,,,065,,,,Gross Charge,59884.19,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11515,N390,KIDNEY AND URINARY TRACT INFECTIONS WITH MAJOR...,,,N390,,,,WESTERN_GROWERS_ASSURANCE-_ALL_PLANS,22530.00,...,,,,,,,,,,
11516,U0003,INFECTIOUS AGENT DETECTION BY NUCLEIC ACID (DN...,,,U0003,,,,WESTERN_GROWERS_ASSURANCE-_ALL_PLANS,134.32,...,,,,,,,,,,
11517,U0005,INFECTIOUS AGENT DETECTION BY NUCLEIC ACID (DN...,,,U0005,,,,WESTERN_GROWERS_ASSURANCE-_ALL_PLANS,36.80,...,,,,,,,,,,
11518,U071,RESPIRATORY INFECTIONS AND INFLAMMATIONS WITH ...,,,U071,,,,WESTERN_GROWERS_ASSURANCE-_ALL_PLANS,42561.26,...,,,,,,,,,,


In [43]:
df_out = pd.DataFrame(df_mid[TARGET_COLUMNS])
df_out

Unnamed: 0,hospital_id,line_type,description,rev_code,local_code,code,ms_drg,apr_drg,eapg,hcpcs_cpt,...,billing_class,setting,payer_category,payer_name,plan_name,standard_charge,standard_charge_percent,contracting_method,additional_generic_notes,additional_payer_specific_notes
0,050455,,SEPTICEMIA OR SEVERE SEPSIS WITHOUT MV >96 HOU...,,009U3ZX,009U3ZX,,,,,...,,,gross,Gross Charge,,225082.01,,,,
1,050455,,Test for detection of respiratory disease-caus...,,0202U,0202U,,,,,...,,,gross,Gross Charge,,1325.00,,,,
2,050455,,SEPTICEMIA OR SEVERE SEPSIS WITHOUT MV >96 HOU...,,02HV33Z,02HV33Z,,,,,...,,,gross,Gross Charge,,182090.07,,,,
3,050455,,INTRACRANIAL HEMORRHAGE OR CEREBRAL INFARCTION...,,064,064,,,,,...,,,gross,Gross Charge,,126416.58,,,,
4,050455,,INTRACRANIAL HEMORRHAGE OR CEREBRAL INFARCTION...,,065,065,,,,,...,,,gross,Gross Charge,,59884.19,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11515,050455,,KIDNEY AND URINARY TRACT INFECTIONS WITH MAJOR...,,N390,N390,,,,,...,,,payer,WESTERN_GROWERS_ASSURANCE-_ALL_PLANS,,22530.00,,,,
11516,050455,,INFECTIOUS AGENT DETECTION BY NUCLEIC ACID (DN...,,U0003,U0003,,,,,...,,,payer,WESTERN_GROWERS_ASSURANCE-_ALL_PLANS,,134.32,,,,
11517,050455,,INFECTIOUS AGENT DETECTION BY NUCLEIC ACID (DN...,,U0005,U0005,,,,,...,,,payer,WESTERN_GROWERS_ASSURANCE-_ALL_PLANS,,36.80,,,,
11518,050455,,RESPIRATORY INFECTIONS AND INFLAMMATIONS WITH ...,,U071,U071,,,,,...,,,payer,WESTERN_GROWERS_ASSURANCE-_ALL_PLANS,,42561.26,,,,


In [44]:
df_out.to_csv("tmp.csv", index=False)