In [1]:
import csv
import subprocess
import json
from io import StringIO

import pandas as pd
import numpy as np
import requests
from lxml import html

from helpers import derive_ein_from_filename

In [2]:
TARGET_COLUMNS = [
    'hospital_id',
    #'row_id',
    'line_type',
    'description',
    'rev_code',
    'local_code',
    'code',
    'ms_drg',
    'apr_drg',
    'eapg',
    'hcpcs_cpt',
    'modifiers',
    'alt_hcpcs_cpt',
    'thru',
    'apc',
    'icd',
    'ndc',
    'drug_hcpcs_multiplier',
    'drug_quantity',
    'drug_unit_of_measurement',
    'drug_type_of_measurement',
    'billing_class',
    'setting',
    'payer_category',
    'payer_name',
    'plan_name',
    'standard_charge',
    'standard_charge_percent',
    'contracting_method',
    'additional_generic_notes',
    'additional_payer_specific_notes'
]

In [3]:
transparency_page = "https://www.hcafloridahealthcare.com/patient-resources/patient-financial-resources/pricing-transparency-cms-required-file-of-standard-charges"

In [4]:
resp = requests.get(transparency_page)
resp

<Response [200]>

In [5]:
tree = html.fromstring(resp.text)
json_str = tree.xpath('//script[@id="__JSS_STATE__"]/text()')[0]
json_dict = json.loads(json_str)
facilities = json_dict.get(
    'sitecore').get(
    'route').get(
    'placeholders').get(
    'body')[0].get(
    'placeholders').get(
    'col-1')[-1].get(
    'placeholders').get(
    'section')[-1].get(
    'fields').get(
    'facilities')

facilities

[{'locationName': 'Aventura Hospital',
  'disclaimerId': '5313b797-5456-4bbe-be94-fbf466b137b7',
  'pricingTransparencyLink': 'https://hcah-p-001-delivery.stylelabs.cloud/api/public/content/d5f6d5de8cc74f49b8df31456742e141?v=dda11690'},
 {'locationName': 'Bayonet Point Hospital',
  'disclaimerId': '5313b797-5456-4bbe-be94-fbf466b137b7',
  'pricingTransparencyLink': 'https://hcah-p-001-delivery.stylelabs.cloud/api/public/content/015a4ce09f39400aa1b1edca503cc3b1?v=0e6eac12'},
 {'locationName': 'Blake Hospital',
  'disclaimerId': '7265234b-35ea-422f-a7ce-278db1df942f',
  'pricingTransparencyLink': 'https://hcah-p-001-delivery.stylelabs.cloud/api/public/content/2cb8acb1103d4ca8a4ea9ea047dfa255?v=5eeddfb1'},
 {'locationName': 'Brandon Hospital',
  'disclaimerId': '5313b797-5456-4bbe-be94-fbf466b137b7',
  'pricingTransparencyLink': 'https://hcah-p-001-delivery.stylelabs.cloud/api/public/content/104f769cd7eb4356920a1e3240826337?v=487cb8b4'},
 {'locationName': 'Capital Hospital',
  'disclaimer

In [6]:
ccn = "100131"
url = "https://hcah-p-001-delivery.stylelabs.cloud/api/public/content/d5f6d5de8cc74f49b8df31456742e141?v=dda11690"

In [7]:
resp0 = requests.head(url)
content_disposition = resp0.headers.get('Content-Disposition')
filename = content_disposition.split("''")[-1]
filename

'752379007_hca-florida-aventura-hospital_standardcharges.csv'

In [8]:
ein = derive_ein_from_filename(filename)
ein

'75-2379007'

In [9]:
subprocess.run(["wget", "--no-clobber", url, "-O", filename])

File ‘752379007_hca-florida-aventura-hospital_standardcharges.csv’ already there; not retrieving.


CompletedProcess(args=['wget', '--no-clobber', 'https://hcah-p-001-delivery.stylelabs.cloud/api/public/content/d5f6d5de8cc74f49b8df31456742e141?v=dda11690', '-O', '752379007_hca-florida-aventura-hospital_standardcharges.csv'], returncode=1)

In [10]:
in_f = open(filename, "r", encoding='utf-8')
csv_str = in_f.read()
in_f.close()

In [11]:
csv_str = csv_str.replace('\r\n', '\n')

chunks = csv_str.split('\n\n')[:-1]
first_chunk = chunks[0]

starts_at = first_chunk.index('Procedure ID')
aetna_at = first_chunk.index('Aetna')
aetna_chunk = first_chunk[aetna_at:]
first_chunk = first_chunk[starts_at:aetna_at]
first_chunk[:100]

'Procedure ID,HCPCS/CPT Code,Description,Gross Charge,Discounted Cash Price (Gross Charges)\n298,0G037'

In [12]:
aetna_chunk[:100]

'Aetna\nService Description,Coding,Rate\nAngioplasty,"CPT/HCPC 92920-92944, C9600-C9608","$16,518.00"\nB'

In [13]:
df1 = pd.read_csv(StringIO(first_chunk))
df1

Unnamed: 0,Procedure ID,HCPCS/CPT Code,Description,Gross Charge,Discounted Cash Price (Gross Charges)
0,298,0G0378,OBS PER HOUR,191.0,191.0
1,327,0G0379,DIRECT REFERRAL TO OBS,174.0,174.0
2,419,,RM & BD PRIVATE,1541.0,1541.0
3,474,,RM & BD SEMI PRIVATE,1540.0,1540.0
4,477,,SEMI-PVT RM W/ TELE,3117.0,3117.0
...,...,...,...,...,...
31525,936322,,SPACER SPN PEEK 50X18X8M,6000.0,6000.0
31526,936323,0C1713,ROD SPNL STRT 5.5X150MM,360.0,360.0
31527,936369,0A2006,NOVOSORB SYNPATH 100SQCM,7350.0,7350.0
31528,936379,,NUT RETAINER STERILE,1803.0,1803.0


In [14]:
df_mid = pd.DataFrame(df1)
df_mid = df_mid.rename(columns={
    'Procedure ID': 'local_code',
    'HCPCS/CPT Code': 'hcpcs_cpt',
    'Description': 'description'
})

df_mid

Unnamed: 0,local_code,hcpcs_cpt,description,Gross Charge,Discounted Cash Price (Gross Charges)
0,298,0G0378,OBS PER HOUR,191.0,191.0
1,327,0G0379,DIRECT REFERRAL TO OBS,174.0,174.0
2,419,,RM & BD PRIVATE,1541.0,1541.0
3,474,,RM & BD SEMI PRIVATE,1540.0,1540.0
4,477,,SEMI-PVT RM W/ TELE,3117.0,3117.0
...,...,...,...,...,...
31525,936322,,SPACER SPN PEEK 50X18X8M,6000.0,6000.0
31526,936323,0C1713,ROD SPNL STRT 5.5X150MM,360.0,360.0
31527,936369,0A2006,NOVOSORB SYNPATH 100SQCM,7350.0,7350.0
31528,936379,,NUT RETAINER STERILE,1803.0,1803.0


In [15]:
money_columns = df_mid.columns.to_list()[-2:]
remaining_columns = df_mid.columns.to_list()[:-2]
df_mid = pd.melt(df_mid, id_vars=remaining_columns, var_name='payer_name', value_name='standard_charge')
df_mid

Unnamed: 0,local_code,hcpcs_cpt,description,payer_name,standard_charge
0,298,0G0378,OBS PER HOUR,Gross Charge,191.0
1,327,0G0379,DIRECT REFERRAL TO OBS,Gross Charge,174.0
2,419,,RM & BD PRIVATE,Gross Charge,1541.0
3,474,,RM & BD SEMI PRIVATE,Gross Charge,1540.0
4,477,,SEMI-PVT RM W/ TELE,Gross Charge,3117.0
...,...,...,...,...,...
63055,936322,,SPACER SPN PEEK 50X18X8M,Discounted Cash Price (Gross Charges),6000.0
63056,936323,0C1713,ROD SPNL STRT 5.5X150MM,Discounted Cash Price (Gross Charges),360.0
63057,936369,0A2006,NOVOSORB SYNPATH 100SQCM,Discounted Cash Price (Gross Charges),7350.0
63058,936379,,NUT RETAINER STERILE,Discounted Cash Price (Gross Charges),1803.0


In [16]:
df_mid['hcpcs_cpt'] = df_mid['hcpcs_cpt'].apply(lambda cpt: cpt[1:].strip())
set(df_mid['hcpcs_cpt'].to_list())

{'',
 '72131',
 '87278',
 '82784',
 '83018',
 '90376',
 '82565',
 '82626',
 'Q4130',
 '85378',
 '7308050',
 '93990',
 '86777',
 '83060',
 'J0744',
 'J2805',
 '75956',
 '97150GPCQ',
 '82950',
 '29580LT',
 'J1645',
 '75958',
 'J9206',
 '84432',
 '97161GP',
 '70546',
 '82164',
 '99291',
 '77080',
 '95863',
 'J2765',
 '82139',
 'C1062',
 '78730',
 'C2617',
 '85260',
 '82308',
 '99203',
 '83550',
 '88334',
 'J9098',
 'P9045',
 '83586',
 '93618',
 '73660RT',
 '86235',
 '76390',
 '97018GO',
 '72190',
 '78761',
 '29581LT',
 '97763GPCQ',
 '84260',
 'A9539',
 '93925',
 '82728',
 'J0135',
 'S0136',
 '88108',
 'J0834',
 '73706LT',
 '73721RT',
 '73525LT',
 '82248',
 'A9548',
 '77321',
 'G0109',
 '87799',
 'J0285',
 '78018',
 '72133',
 '93325',
 '84153',
 '75805LT',
 'J7504',
 '82271',
 '86617',
 'Q4114',
 'L3980',
 '82131',
 'C1751',
 '7372150',
 '86316',
 '87529',
 'A9516',
 'S0104',
 '76506',
 '77014',
 'C1766',
 '87591',
 'A4385',
 '97597',
 '77053',
 '86695',
 'J1450',
 '7370150',
 '71111',
 '9

In [17]:
def split_off_modifiers(hcpcs_cpt):
    if len(hcpcs_cpt) == 7:
        return hcpcs_cpt[-2:]

    if len(hcpcs_cpt) == 9:
        modifiers = hcpcs_cpt[-4:]
        modifiers = modifiers[:2] + "|" + modifiers[-2:]
        return modifiers

    return None

df_mid['modifiers'] = df_mid['hcpcs_cpt'].apply(split_off_modifiers)
df_mid['hcpcs_cpt'] = df_mid['hcpcs_cpt'].apply(lambda cpt: cpt[:5])
df_mid[df_mid['modifiers'].notnull()]

Unnamed: 0,local_code,hcpcs_cpt,description,payer_name,standard_charge,modifiers
1475,115423,77065,MAMMO DIAG CAD UNI LT,Gross Charge,1215.00,LT
1476,115424,77065,MAMMO DIAG CAD UNI RT,Gross Charge,1215.00,RT
1509,115468,73200,CT UP EXTREM W/O CONT LT,Gross Charge,6080.00,LT
1510,115469,73200,CT UP EXTREM W/O CONT RT,Gross Charge,6080.00,RT
1511,115470,73200,CT UP EXTREM W/O CONT BI,Gross Charge,9128.00,50
...,...,...,...,...,...,...
36291,795737,97763,ORTHPRS MGT SBSQ ENCPTCQ,Discounted Cash Price (Gross Charges),409.00,GP|CQ
36362,796312,29580,UNNA BOOT BI,Discounted Cash Price (Gross Charges),419.88,50
36363,796313,29580,UNNA BOOT LT,Discounted Cash Price (Gross Charges),419.88,LT
36364,796314,29580,UNNA BOOT RT,Discounted Cash Price (Gross Charges),419.88,RT


In [18]:
df_mid.loc[df_mid['payer_name'] == 'Gross Charge', 'payer_category'] = 'gross'
df_mid.loc[df_mid['payer_name'] == 'Discounted Cash Price (Gross Charges)', 'payer_category'] = 'cash'
df_mid

Unnamed: 0,local_code,hcpcs_cpt,description,payer_name,standard_charge,modifiers,payer_category
0,298,G0378,OBS PER HOUR,Gross Charge,191.0,,gross
1,327,G0379,DIRECT REFERRAL TO OBS,Gross Charge,174.0,,gross
2,419,,RM & BD PRIVATE,Gross Charge,1541.0,,gross
3,474,,RM & BD SEMI PRIVATE,Gross Charge,1540.0,,gross
4,477,,SEMI-PVT RM W/ TELE,Gross Charge,3117.0,,gross
...,...,...,...,...,...,...,...
63055,936322,,SPACER SPN PEEK 50X18X8M,Discounted Cash Price (Gross Charges),6000.0,,cash
63056,936323,C1713,ROD SPNL STRT 5.5X150MM,Discounted Cash Price (Gross Charges),360.0,,cash
63057,936369,A2006,NOVOSORB SYNPATH 100SQCM,Discounted Cash Price (Gross Charges),7350.0,,cash
63058,936379,,NUT RETAINER STERILE,Discounted Cash Price (Gross Charges),1803.0,,cash


In [19]:
def fill_null_fields(df_mid):
    for col_name in TARGET_COLUMNS:
        if not col_name in df_mid.columns:
            df_mid[col_name] = None

fill_null_fields(df_mid)
df_mid

Unnamed: 0,local_code,hcpcs_cpt,description,payer_name,standard_charge,modifiers,payer_category,hospital_id,line_type,rev_code,...,drug_quantity,drug_unit_of_measurement,drug_type_of_measurement,billing_class,setting,plan_name,standard_charge_percent,contracting_method,additional_generic_notes,additional_payer_specific_notes
0,298,G0378,OBS PER HOUR,Gross Charge,191.0,,gross,,,,...,,,,,,,,,,
1,327,G0379,DIRECT REFERRAL TO OBS,Gross Charge,174.0,,gross,,,,...,,,,,,,,,,
2,419,,RM & BD PRIVATE,Gross Charge,1541.0,,gross,,,,...,,,,,,,,,,
3,474,,RM & BD SEMI PRIVATE,Gross Charge,1540.0,,gross,,,,...,,,,,,,,,,
4,477,,SEMI-PVT RM W/ TELE,Gross Charge,3117.0,,gross,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63055,936322,,SPACER SPN PEEK 50X18X8M,Discounted Cash Price (Gross Charges),6000.0,,cash,,,,...,,,,,,,,,,
63056,936323,C1713,ROD SPNL STRT 5.5X150MM,Discounted Cash Price (Gross Charges),360.0,,cash,,,,...,,,,,,,,,,
63057,936369,A2006,NOVOSORB SYNPATH 100SQCM,Discounted Cash Price (Gross Charges),7350.0,,cash,,,,...,,,,,,,,,,
63058,936379,,NUT RETAINER STERILE,Discounted Cash Price (Gross Charges),1803.0,,cash,,,,...,,,,,,,,,,


In [20]:
output_dfs = []
output_dfs.append(pd.DataFrame(df_mid[TARGET_COLUMNS]))

In [21]:
payer_chunk = aetna_chunk
starts_at = payer_chunk.index('\n') + 1
payer_name = payer_chunk[:starts_at].strip()
payer_name

'Aetna'

In [22]:
payer_chunk = payer_chunk[starts_at:]
df2 = pd.read_csv(StringIO(payer_chunk))

In [23]:
df2.loc[df2['Coding'].isnull(), 'Coding'] = None
df2.to_dict('records')[:2]

[{'Service Description': 'Angioplasty',
  'Coding': 'CPT/HCPC 92920-92944, C9600-C9608',
  'Rate': '$16,518.00'},
 {'Service Description': 'Behavioral Health',
  'Coding': 'CPT/HCPC H0008-H0011; ICD 9/10 F10.10, F10.11, F10.120, F10.129, F10.130, F10.131, F10.132, F10.139, F10.20, F10.21, F10.220, F10.229, F10.230, F10.232, F10.239, F10.930, F10.931, F10.932, F10.939, F11.10, F11.11, F11.120, F11.129, F11.13, F11.20, F11.21, F11.220, F11.221, F11.222, F11.229, F11.23, F11.24, F11.250, F11.251, F11.259, F11.281, F11.282, F11.288, F11.29, F11.90, F12.10, F12.11, F12.13, F12.20, F12.21, F12.220, F12.221, F12.222, F12.229, F12.250, F12.251, F12.259, F12.280, F12.288, F12.29, F12.90, F13.10, F13.11, F13.120, F13.130, F13.131, F13.132, F13.139, F13.20, F13.21, F13.220, F13.221, F13.229, F13.230, F13.231, F13.232, F13.239, F13.24, F13.250, F13.251, F13.259, F13.26, F13.27, F13.280, F13.281, F13.282, F13.288, F13.29, F13.90, F14.10, F14.11, F14.120, F14.13, F14.20, F14.21, F14.220, F14.221, F1

In [24]:
def split_codes(code_str):
    codes = []

    if code_str is None:
        return codes

    line_type = None
    
    for component in code_str.split(';'):
        component = component.strip()
        
        if component.startswith('CPT/HCPC '):
            line_type = 'hcpcs_cpt'
            component = component.replace("CPT/HCPC ", "")
        elif component.startswith('MS-DRG '):
            line_type = 'ms_drg'
            component = component.replace("MS-DRG ", "")
        elif component.startswith('ICD 9/10 '):
            line_type = 'icd'
            component = component.replace("ICD 9/10 ", "")
        elif line_type is None:
            print("Unknown line_type for code string:")
            print(code_str)
            continue

        component = component.replace(" (hips)", "")
        component = component.replace(" (knees)", "")
        component = component.replace(" and ", ", ")

        for subcomponent in component.split(", "):
            if len(subcomponent) == 0:
                continue
                
            code = subcomponent
            thru = None
            if "-" in code:
                thru = code.split("-")[-1]
                code = code.split("-")[0]

            codes.append({
                'code': code,
                'thru': thru,
                'line_type': line_type
            })

    return codes

split_codes("CPT/HCPC 27125, 27130, 27132, 27134, 27137 and 27138 (hips) and 27445, 27446, 27447, 27485 and 27487 (knees)")

[{'code': '27125', 'thru': None, 'line_type': 'hcpcs_cpt'},
 {'code': '27130', 'thru': None, 'line_type': 'hcpcs_cpt'},
 {'code': '27132', 'thru': None, 'line_type': 'hcpcs_cpt'},
 {'code': '27134', 'thru': None, 'line_type': 'hcpcs_cpt'},
 {'code': '27137', 'thru': None, 'line_type': 'hcpcs_cpt'},
 {'code': '27138', 'thru': None, 'line_type': 'hcpcs_cpt'},
 {'code': '27445', 'thru': None, 'line_type': 'hcpcs_cpt'},
 {'code': '27446', 'thru': None, 'line_type': 'hcpcs_cpt'},
 {'code': '27447', 'thru': None, 'line_type': 'hcpcs_cpt'},
 {'code': '27485', 'thru': None, 'line_type': 'hcpcs_cpt'},
 {'code': '27487', 'thru': None, 'line_type': 'hcpcs_cpt'}]

In [25]:
split_codes("CPT/HCPC 23470, 23472-23474, 27130, 27132, 27134, 27137-27138, 27446, 27447, 27486, 27487")

[{'code': '23470', 'thru': None, 'line_type': 'hcpcs_cpt'},
 {'code': '23472', 'thru': '23474', 'line_type': 'hcpcs_cpt'},
 {'code': '27130', 'thru': None, 'line_type': 'hcpcs_cpt'},
 {'code': '27132', 'thru': None, 'line_type': 'hcpcs_cpt'},
 {'code': '27134', 'thru': None, 'line_type': 'hcpcs_cpt'},
 {'code': '27137', 'thru': '27138', 'line_type': 'hcpcs_cpt'},
 {'code': '27446', 'thru': None, 'line_type': 'hcpcs_cpt'},
 {'code': '27447', 'thru': None, 'line_type': 'hcpcs_cpt'},
 {'code': '27486', 'thru': None, 'line_type': 'hcpcs_cpt'},
 {'code': '27487', 'thru': None, 'line_type': 'hcpcs_cpt'}]

In [26]:
split_codes("MS-DRG , 001-013, 034-042, 113-117, 129-139, 163-168, 239-241, 252-257, 263, 264, 326-358, 405-425, 463-465, 471-479, 485-489, 492-514, 573-585, 614-618, 622-630, 652, 675, 707-718, 734-750, 769, 770, 799-804, 820-830, 853-858, 876, 901-909, 927-929, 939-941, 955-959, 969, 970, 981-989")

[{'code': '001', 'thru': '013', 'line_type': 'ms_drg'},
 {'code': '034', 'thru': '042', 'line_type': 'ms_drg'},
 {'code': '113', 'thru': '117', 'line_type': 'ms_drg'},
 {'code': '129', 'thru': '139', 'line_type': 'ms_drg'},
 {'code': '163', 'thru': '168', 'line_type': 'ms_drg'},
 {'code': '239', 'thru': '241', 'line_type': 'ms_drg'},
 {'code': '252', 'thru': '257', 'line_type': 'ms_drg'},
 {'code': '263', 'thru': None, 'line_type': 'ms_drg'},
 {'code': '264', 'thru': None, 'line_type': 'ms_drg'},
 {'code': '326', 'thru': '358', 'line_type': 'ms_drg'},
 {'code': '405', 'thru': '425', 'line_type': 'ms_drg'},
 {'code': '463', 'thru': '465', 'line_type': 'ms_drg'},
 {'code': '471', 'thru': '479', 'line_type': 'ms_drg'},
 {'code': '485', 'thru': '489', 'line_type': 'ms_drg'},
 {'code': '492', 'thru': '514', 'line_type': 'ms_drg'},
 {'code': '573', 'thru': '585', 'line_type': 'ms_drg'},
 {'code': '614', 'thru': '618', 'line_type': 'ms_drg'},
 {'code': '622', 'thru': '630', 'line_type': 'ms_d

In [27]:
split_codes("CPT/HCPC H0008-H0011; ICD 9/10 F10.10, F10.11, F10.120, F10.129, F10.130, F10.131, F10.132, F10.139, F10.20, F10.21, F10.220, F10.229, F10.230, F10.232, F10.239, F10.930, F10.931, F10.932, F10.939, F11.10, F11.11, F11.120, F11.129, F11.13, F11.20, F11.21, F11.220, F11.221, F11.222, F11.229, F11.23, F11.24, F11.250, F11.251, F11.259, F11.281, F11.282, F11.288, F11.29, F11.90, F12.10, F12.11, F12.13, F12.20, F12.21, F12.220, F12.221, F12.222, F12.229, F12.250, F12.251, F12.259, F12.280, F12.288, F12.29, F12.90, F13.10, F13.11, F13.120, F13.130, F13.131, F13.132, F13.139, F13.20, F13.21, F13.220, F13.221, F13.229, F13.230, F13.231, F13.232, F13.239, F13.24, F13.250, F13.251, F13.259, F13.26, F13.27, F13.280, F13.281, F13.282, F13.288, F13.29, F13.90, F14.10, F14.11, F14.120, F14.13, F14.20, F14.21, F14.220, F14.221, F14.222, F14.229, F14.23, F14.24, F14.250, F14.251, F14.259, F14.280, F14.281, F14.282, F14.288, F14.29, F14.90, F14.93, F15.10, F15.11, F15.120, F15.13, F15.20, F15.21, F15.220, F15.221, F15.222, F15.229, F15.23, F15.24, F15.250, F15.251, F15.259, F15.280, F15.281, F15.282, F15.288, F15.29, F15.90, F16.10, F16.11, F16.120, F16.20, F16.21, F16.220, F16.221, F16.229, F16.24, F16.250, F16.251, F16.259, F16.280, F16.283, F16.288, F16.29, F16.90, F18.10, F18.11, F18.120, F18.20, F18.21, F18.220, F18.221, F18.229, F18.24, F18.250, F18.251, F18.259, F18.27, F18.280, F18.288, F18.29, F18.90, F19.10, F19.11, F19.120, F19.130, F19.131, F19.132, F19.139, F19.20, F19.21, F19.220, F19.221, F19.222, F19.229, F19.230, F19.231, F19.232, F19.239, F19.24, F19.250, F19.251, F19.259, F19.26, F19.27, F19.280, F19.281, F19.282, F19.288, F19.29, F19.90, F55.0, F55.1, F55.2, F55.3, F55.4, F55.8, Z72.0, HZ2ZZZZ, HZ3BZZZ, HZ30ZZZ, HZ31ZZZ, HZ32ZZZ, HZ33ZZZ, HZ34ZZZ, HZ35ZZZ, HZ36ZZZ, HZ37ZZZ, HZ38ZZZ, HZ39ZZZ, HZ5BZZZ, HZ5CZZZ, HZ5DZZZ, HZ50ZZZ, HZ51ZZZ, HZ52ZZZ, HZ53ZZZ, HZ54ZZZ, HZ55ZZZ, HZ56ZZZ, HZ57ZZZ, HZ58ZZZ, HZ59ZZZ, HZ63ZZZ, HZ80ZZZ, HZ81ZZZ, HZ82ZZZ, HZ83ZZZ, HZ84ZZZ, HZ85ZZZ, HZ86ZZZ, HZ87ZZZ, HZ88ZZZ, HZ89ZZZ, HZ90ZZZ, HZ91ZZZ, HZ92ZZZ, HZ93ZZZ, HZ94ZZZ, HZ95ZZZ, HZ96ZZZ, HZ97ZZZ, HZ98ZZZ, HZ99ZZZ")

[{'code': 'H0008', 'thru': 'H0011', 'line_type': 'hcpcs_cpt'},
 {'code': 'F10.10', 'thru': None, 'line_type': 'icd'},
 {'code': 'F10.11', 'thru': None, 'line_type': 'icd'},
 {'code': 'F10.120', 'thru': None, 'line_type': 'icd'},
 {'code': 'F10.129', 'thru': None, 'line_type': 'icd'},
 {'code': 'F10.130', 'thru': None, 'line_type': 'icd'},
 {'code': 'F10.131', 'thru': None, 'line_type': 'icd'},
 {'code': 'F10.132', 'thru': None, 'line_type': 'icd'},
 {'code': 'F10.139', 'thru': None, 'line_type': 'icd'},
 {'code': 'F10.20', 'thru': None, 'line_type': 'icd'},
 {'code': 'F10.21', 'thru': None, 'line_type': 'icd'},
 {'code': 'F10.220', 'thru': None, 'line_type': 'icd'},
 {'code': 'F10.229', 'thru': None, 'line_type': 'icd'},
 {'code': 'F10.230', 'thru': None, 'line_type': 'icd'},
 {'code': 'F10.232', 'thru': None, 'line_type': 'icd'},
 {'code': 'F10.239', 'thru': None, 'line_type': 'icd'},
 {'code': 'F10.930', 'thru': None, 'line_type': 'icd'},
 {'code': 'F10.931', 'thru': None, 'line_type

In [28]:
from helpers import cleanup_dollar_value

def transform_payer_row(payer_row, payer_name):
    description = payer_row.get('Service Description')

    standard_charge = None
    standard_charge_percentage = None
    rate = payer_row.get('Rate')
    if type(rate) == float and np.isnan(rate):
        return []
        
    if '%' in rate:
        standard_charge_percentage = rate.split("% ")[0]
    else:
        standard_charge = cleanup_dollar_value(rate)

    codes = payer_row.get('Coding')
    if codes is None or (type(codes) == float and np.isnan(codes)):
        return []
    
    codes = codes.strip()

    if len(codes) == '':
        return [{
            'description': description,
            'standard_charge': standard_charge,
            'standard_charge_percentage': standard_charge_percentage,
            'payer_name': payer_name,
            'code': None,
            'thru': None,
            'line_type': None
        }]

    out_rows = []
    
    for subcode_dict in split_codes(codes):
        line_type = subcode_dict.get('line_type')
        code = subcode_dict.get('code')
        thru = subcode_dict.get('thru')

        out_rows.append({
            'description': description,
            'standard_charge': standard_charge,
            'standard_charge_percentage': standard_charge_percentage,
            'payer_name': payer_name,
            'code': code,
            'thru': thru,
            'line_type': line_type
        })

    return out_rows

transform_payer_row({
    'Service Description': 'Behavioral Health',
    'Coding': "CPT/HCPC H0008-H0011; ICD 9/10 F10.10, F10.11, F10.120, F10.129, F10.130, F10.131, F10.132, F10.139, F10.20, F10.21, F10.220, F10.229, F10.230, F10.232, F10.239, F10.930, F10.931, F10.932, F10.939, F11.10, F11.11, F11.120, F11.129, F11.13, F11.20, F11.21, F11.220, F11.221, F11.222, F11.229, F11.23, F11.24, F11.250, F11.251, F11.259, F11.281, F11.282, F11.288, F11.29, F11.90, F12.10, F12.11, F12.13, F12.20, F12.21, F12.220, F12.221, F12.222, F12.229, F12.250, F12.251, F12.259, F12.280, F12.288, F12.29, F12.90, F13.10, F13.11, F13.120, F13.130, F13.131, F13.132, F13.139, F13.20, F13.21, F13.220, F13.221, F13.229, F13.230, F13.231, F13.232, F13.239, F13.24, F13.250, F13.251, F13.259, F13.26, F13.27, F13.280, F13.281, F13.282, F13.288, F13.29, F13.90, F14.10, F14.11, F14.120, F14.13, F14.20, F14.21, F14.220, F14.221, F14.222, F14.229, F14.23, F14.24, F14.250, F14.251, F14.259, F14.280, F14.281, F14.282, F14.288, F14.29, F14.90, F14.93, F15.10, F15.11, F15.120, F15.13, F15.20, F15.21, F15.220, F15.221, F15.222, F15.229, F15.23, F15.24, F15.250, F15.251, F15.259, F15.280, F15.281, F15.282, F15.288, F15.29, F15.90, F16.10, F16.11, F16.120, F16.20, F16.21, F16.220, F16.221, F16.229, F16.24, F16.250, F16.251, F16.259, F16.280, F16.283, F16.288, F16.29, F16.90, F18.10, F18.11, F18.120, F18.20, F18.21, F18.220, F18.221, F18.229, F18.24, F18.250, F18.251, F18.259, F18.27, F18.280, F18.288, F18.29, F18.90, F19.10, F19.11, F19.120, F19.130, F19.131, F19.132, F19.139, F19.20, F19.21, F19.220, F19.221, F19.222, F19.229, F19.230, F19.231, F19.232, F19.239, F19.24, F19.250, F19.251, F19.259, F19.26, F19.27, F19.280, F19.281, F19.282, F19.288, F19.29, F19.90, F55.0, F55.1, F55.2, F55.3, F55.4, F55.8, Z72.0, HZ2ZZZZ, HZ3BZZZ, HZ30ZZZ, HZ31ZZZ, HZ32ZZZ, HZ33ZZZ, HZ34ZZZ, HZ35ZZZ, HZ36ZZZ, HZ37ZZZ, HZ38ZZZ, HZ39ZZZ, HZ5BZZZ, HZ5CZZZ, HZ5DZZZ, HZ50ZZZ, HZ51ZZZ, HZ52ZZZ, HZ53ZZZ, HZ54ZZZ, HZ55ZZZ, HZ56ZZZ, HZ57ZZZ, HZ58ZZZ, HZ59ZZZ, HZ63ZZZ, HZ80ZZZ, HZ81ZZZ, HZ82ZZZ, HZ83ZZZ, HZ84ZZZ, HZ85ZZZ, HZ86ZZZ, HZ87ZZZ, HZ88ZZZ, HZ89ZZZ, HZ90ZZZ, HZ91ZZZ, HZ92ZZZ, HZ93ZZZ, HZ94ZZZ, HZ95ZZZ, HZ96ZZZ, HZ97ZZZ, HZ98ZZZ, HZ99ZZZ",
    'Rate': "$2,319.67"
}, "Aetna")

[{'description': 'Behavioral Health',
  'standard_charge': '2319.67',
  'standard_charge_percentage': None,
  'payer_name': 'Aetna',
  'code': 'H0008',
  'thru': 'H0011',
  'line_type': 'hcpcs_cpt'},
 {'description': 'Behavioral Health',
  'standard_charge': '2319.67',
  'standard_charge_percentage': None,
  'payer_name': 'Aetna',
  'code': 'F10.10',
  'thru': None,
  'line_type': 'icd'},
 {'description': 'Behavioral Health',
  'standard_charge': '2319.67',
  'standard_charge_percentage': None,
  'payer_name': 'Aetna',
  'code': 'F10.11',
  'thru': None,
  'line_type': 'icd'},
 {'description': 'Behavioral Health',
  'standard_charge': '2319.67',
  'standard_charge_percentage': None,
  'payer_name': 'Aetna',
  'code': 'F10.120',
  'thru': None,
  'line_type': 'icd'},
 {'description': 'Behavioral Health',
  'standard_charge': '2319.67',
  'standard_charge_percentage': None,
  'payer_name': 'Aetna',
  'code': 'F10.129',
  'thru': None,
  'line_type': 'icd'},
 {'description': 'Behavioral H

In [29]:
out_rows = []

for in_row in df2.to_dict('records'):
    for out_row in transform_payer_row(in_row, payer_name):
        out_rows.append(out_row)

df_tmp = pd.DataFrame(out_rows)
fill_null_fields(df_tmp)

df_tmp.loc[df_tmp['line_type'] == 'hcpcs_cpt', 'hcpcs_cpt'] = df_tmp[df_tmp['line_type'] == 'hcpcs_cpt']['code']
df_tmp.loc[df_tmp['line_type'] == 'ms_drg', 'ms_drg'] = df_tmp[df_tmp['line_type'] == 'ms_drg']['code']
df_tmp.loc[df_tmp['line_type'] == 'icd', 'icd'] = df_tmp[df_tmp['line_type'] == 'icd']['code']

df_tmp

Unnamed: 0,description,standard_charge,standard_charge_percentage,payer_name,code,thru,line_type,hospital_id,rev_code,local_code,...,drug_unit_of_measurement,drug_type_of_measurement,billing_class,setting,payer_category,plan_name,standard_charge_percent,contracting_method,additional_generic_notes,additional_payer_specific_notes
0,Angioplasty,16518.00,,Aetna,92920,92944,hcpcs_cpt,,,,...,,,,,,,,,,
1,Angioplasty,16518.00,,Aetna,C9600,C9608,hcpcs_cpt,,,,...,,,,,,,,,,
2,Behavioral Health,2319.67,,Aetna,H0008,H0011,hcpcs_cpt,,,,...,,,,,,,,,,
3,Behavioral Health,2319.67,,Aetna,F10.10,,icd,,,,...,,,,,,,,,,
4,Behavioral Health,2319.67,,Aetna,F10.11,,icd,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,Sleep Studies,,17.97,Aetna,G0400,,hcpcs_cpt,,,,...,,,,,,,,,,
763,Urology,24308.33,,Aetna,707,,ms_drg,,,,...,,,,,,,,,,
764,Urology,24308.33,,Aetna,708,,ms_drg,,,,...,,,,,,,,,,
765,Vascular Surgery,30636.67,,Aetna,252,254,ms_drg,,,,...,,,,,,,,,,


In [30]:
output_dfs.append(pd.DataFrame(df_tmp[TARGET_COLUMNS]))

In [31]:
for payer_chunk in chunks[1:-1]:
    starts_at = payer_chunk.index('\n') + 1
    payer_name = payer_chunk[:starts_at].strip()
    payer_chunk = payer_chunk[starts_at:]
    df2 = pd.read_csv(StringIO(payer_chunk))
    df2.loc[df2['Coding'].isnull(), 'Coding'] = None
    out_rows = []

    for in_row in df2.to_dict('records'):
        try:
            for out_row in transform_payer_row(in_row, payer_name):
                out_rows.append(out_row)
        except Exception as e:
            print(e)
            print(in_row)

    df_tmp = pd.DataFrame(out_rows)
    fill_null_fields(df_tmp)

    df_tmp.loc[df_tmp['line_type'] == 'hcpcs_cpt', 'hcpcs_cpt'] = df_tmp[df_tmp['line_type'] == 'hcpcs_cpt']['code']
    df_tmp.loc[df_tmp['line_type'] == 'ms_drg', 'ms_drg'] = df_tmp[df_tmp['line_type'] == 'ms_drg']['code']
    df_tmp.loc[df_tmp['line_type'] == 'icd', 'icd'] = df_tmp[df_tmp['line_type'] == 'icd']['code']

    output_dfs.append(pd.DataFrame(df_tmp[TARGET_COLUMNS]))

len(output_dfs)

86

In [33]:
output_dfs

[      hospital_id line_type               description rev_code  local_code  \
 0            None      None  OBS PER HOUR                 None         298   
 1            None      None  DIRECT REFERRAL TO OBS       None         327   
 2            None      None  RM & BD PRIVATE              None         419   
 3            None      None  RM & BD SEMI PRIVATE         None         474   
 4            None      None  SEMI-PVT RM W/ TELE          None         477   
 ...           ...       ...                       ...      ...         ...   
 63055        None      None  SPACER SPN PEEK 50X18X8M     None      936322   
 63056        None      None  ROD SPNL STRT 5.5X150MM      None      936323   
 63057        None      None  NOVOSORB SYNPATH 100SQCM     None      936369   
 63058        None      None  NUT RETAINER STERILE         None      936379   
 63059        None      None  STEM FEM 130MM 18MM REV      None      936380   
 
        code ms_drg apr_drg  eapg hcpcs_cpt  ... b