In [1]:
# Load duckdb, which lets us efficiently load large files
import duckdb

# Load pandas, which lets us manipulate dataframes
import pandas as pd

# Import jupysql Jupyter extension to create SQL cells
%load_ext sql

# Set configrations on jupysql to directly output data to Pandas and to simplify the output that is printed to the notebook.
%config SqlMagic.autopandas = True

%config SqlMagic.feedback = False
%config SqlMagic.displaycon = False

# Connect jupysql to DuckDB using a SQLAlchemy-style connection string. Either connect to an in memory DuckDB, or a file backed db.
%sql duckdb:///:memory:

In [2]:
!wget https://www.slhn.org/-/media/slhn/Billpay/File/PDF/Standard-Charges/231352213_StLukesHospitalBethlehemCampus_standardcharges.ashx -P /tmp

--2023-08-24 17:46:44--  https://www.slhn.org/-/media/slhn/Billpay/File/PDF/Standard-Charges/231352213_StLukesHospitalBethlehemCampus_standardcharges.ashx
Resolving www.slhn.org (www.slhn.org)... 2620:1ec:46::40, 2620:1ec:bdf::40, 13.107.213.40, ...
Connecting to www.slhn.org (www.slhn.org)|2620:1ec:46::40|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 12880838 (12M) [text/csv]
Saving to: ‘/tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges.ashx’


2023-08-24 17:46:47 (4.42 MB/s) - ‘/tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges.ashx’ saved [12880838/12880838]



In [3]:
!ls -lh /tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges.ashx

-rw-r--r--  1 me  wheel    12M Mar  2  2022 /tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges.ashx


In [4]:
!head /tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges.ashx

Record ID,Description,CPT/DRG,Gross Charge,Aetna (All Plans) Payment,Aetna MC (All Plans) Payment,Amerihealth Caritas (All Plans) Payment,Amerihealth NJ (All Plans) Payment,Amerihealth Northeast (All Plans) Payment,CBC (Except EPO) Payment,CBC EPO Payment,Cigna (All Plans) Payment,Freedom Blue (All Plans) Payment,Gateway (All Plans) Payment,Geisinger (All Plans) Payment,Geisinger GHP Family (All Plans) Payment,Geisinger Gold (All Plans) Payment,Highmark (All Plans) Payment,Horizon (All Plans) Payment,Horizon Blue MC (All Plans) Payment,Horizon NJ Health (All Plans) Payment,IBC (All Plans) Payment,Keystone 65 (All Plans) Payment,Keystone First (All Plans) Payment,United (All Plans) Payment,Self Pay,Min Payment,Max Payment
71728,"""DEVICE, MYOSURE TISSUE RESECTION""",,"$10,925.00 ","$3,159.11 ","$1,157.34 ",$821.81 ,"$1,135.28 ",$785.52 ,"$2,762.58 ","$2,584.11 ","$2,947.14 ","$1,229.23 ","$1,108.17 ","$2,784.11 ",$889.71 ,"$1,382.10 ","$2,778.85 ","$2,351.27 ","$1,110.26 ",$619.55 ,"$1,

In [30]:
!head /tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges.ashx

record_id,description,cpt_drg,gross_charge,aetna,aetna_mc,amerihealth_caritas,amerihealth_nj,amerihealth_northeast,cbc,cbc_epo,cigna,freedom_blue,gateway,geisinger,geisinger_ghp_family,geisinger_gold,highmark,horizon,horizon_blue_mc,horizon_nj_health,ibc,keystone_65,keystone_first,united,self_pay,min,max
71728,"""DEVICE, MYOSURE TISSUE RESECTION""",,"$10,925.00 ","$3,159.11 ","$1,157.34 ",$821.81 ,"$1,135.28 ",$785.52 ,"$2,762.58 ","$2,584.11 ","$2,947.14 ","$1,229.23 ","$1,108.17 ","$2,784.11 ",$889.71 ,"$1,382.10 ","$2,778.85 ","$2,351.27 ","$1,110.26 ",$619.55 ,"$1,649.93 ","$1,114.43 ",$570.04 ,"$3,011.88 ","$1,966.50 ",$570.04 ,"$3,159.11 "
72223,"""PLATE ACHORANGE  LARGE, LEFT PLP30201""",C1713,"$18,477.50 ","$5,343.02 ","$1,957.41 ","$1,389.93 ","$1,920.11 ","$1,328.56 ","$4,672.36 ","$4,370.52 ","$4,984.51 ","$2,079.01 ","$1,874.26 ","$4,708.78 ","$1,504.77 ","$2,337.55 ","$4,699.88 ","$3,976.71 ","$1,877.80 ","$1,047.84 ","$2,790.53 ","$1,884.84 ",$964.11 ,"$5,094.01 ","$3,325

## Create Column names

In [27]:
import re

string = "Record ID,Description,CPT/DRG,Gross Charge,Aetna (All Plans) Payment,Aetna MC (All Plans) Payment,Amerihealth Caritas (All Plans) Payment,Amerihealth NJ (All Plans) Payment,Amerihealth Northeast (All Plans) Payment,CBC (Except EPO) Payment,CBC EPO Payment,Cigna (All Plans) Payment,Freedom Blue (All Plans) Payment,Gateway (All Plans) Payment,Geisinger (All Plans) Payment,Geisinger GHP Family (All Plans) Payment,Geisinger Gold (All Plans) Payment,Highmark (All Plans) Payment,Horizon (All Plans) Payment,Horizon Blue MC (All Plans) Payment,Horizon NJ Health (All Plans) Payment,IBC (All Plans) Payment,Keystone 65 (All Plans) Payment,Keystone First (All Plans) Payment,United (All Plans) Payment,Self Pay,Min Payment,Max Payment"

column_names = string.split(",")

# convert to lowercase 
column_names = [x.lower() for x in column_names]

# remove the word 'payment'
column_names = [x.replace("payment", "") for x in column_names]

# remove parentheses and their contents
column_names = [re.sub(r'\([^)]*\)', '', x) for x in column_names]

# replace spaces with underscores
column_names = [x.replace(" ", "_") for x in column_names]

# remove double spaces
column_names = [x.replace("__", "_") for x in column_names]

# convert forward slashes to underscores
column_names = [x.replace("/", "_") for x in column_names]

# remove trailing underscore
column_names = [x.rstrip('_') for x in column_names]

In [28]:
column_names

['record_id',
 'description',
 'cpt_drg',
 'gross_charge',
 'aetna',
 'aetna_mc',
 'amerihealth_caritas',
 'amerihealth_nj',
 'amerihealth_northeast',
 'cbc',
 'cbc_epo',
 'cigna',
 'freedom_blue',
 'gateway',
 'geisinger',
 'geisinger_ghp_family',
 'geisinger_gold',
 'highmark',
 'horizon',
 'horizon_blue_mc',
 'horizon_nj_health',
 'ibc',
 'keystone_65',
 'keystone_first',
 'united',
 'self_pay',
 'min',
 'max']

## Remove dollar signs and commas

In [17]:
!tr '$' ' ' < /tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges.ashx > /tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges.csv

# Prompt for Claude 

```` 
``` 
%%sql
SELECT *
FROM read_csv('https://data.cityofnewyork.us/api/views/erm2-nwe9/rows.csv?accessType=DOWNLOAD',
    header=True,
    delim=',',
    quote='"',
    columns={'Unique Key': 'BIGINT',
    'Created Date': 'VARCHAR',
    'Closed Date': 'VARCHAR',
    'Agency': 'VARCHAR',
    'Agency Name': 'VARCHAR',
    'Complaint Type': 'VARCHAR',
    'Descriptor': 'VARCHAR',
    'Location Type': 'VARCHAR',
    'Incident Zip': 'VARCHAR',
    'Incident Address': 'VARCHAR',
    'Street Name': 'VARCHAR',
    'Cross Street 1': 'VARCHAR',
    'Cross Street 2': 'VARCHAR',
    'Intersection Street 1': 'VARCHAR',
    'Intersection Street 2': 'VARCHAR',
    'Address Type': 'VARCHAR',
    'City': 'VARCHAR',
    'Landmark': 'VARCHAR',
    'Facility Type': 'VARCHAR',
    'Status': 'VARCHAR',
    'Due Date': 'VARCHAR',
    'Resolution Description': 'VARCHAR',
    'Resolution Action Updated Date': 'VARCHAR',
    'Community Board': 'VARCHAR',
    'BBL': 'VARCHAR',
    'Borough': 'VARCHAR',
    'X Coordinate (State Plane)': 'VARCHAR',
    'Y Coordinate (State Plane)': 'VARCHAR',
    'Open Data Channel Type': 'VARCHAR',
    'Park Facility Name': 'VARCHAR',
    'Park Borough': 'VARCHAR',
    'Vehicle Type': 'VARCHAR',
    'Taxi Company Borough': 'VARCHAR',
    'Taxi Pick Up Location': 'VARCHAR',
    'Bridge Highway Name': 'VARCHAR',
    'Bridge Highway Direction': 'VARCHAR',
    'Road Ramp': 'VARCHAR',
    'Bridge Highway Segment': 'VARCHAR',
    'Latitude': 'DOUBLE',
    'Longitude': 'DOUBLE',
    'Location': 'VARCHAR'}) 
LIMIT 10;
```

Please take the above duckdb SQL query and rewrite it for the file `/tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges.ashx` with the following header: 

```
Record ID,Description,CPT/DRG,Gross Charge,Aetna (All Plans) Payment,Aetna MC (All Plans) Payment,Amerihealth Caritas (All Plans) Payment,Amerihealth NJ (All Plans) Payment,Amerihealth Northeast (All Plans) Payment,CBC (Except EPO) Payment,CBC EPO Payment,Cigna (All Plans) Payment,Freedom Blue (All Plans) Payment,Gateway (All Plans) Payment,Geisinger (All Plans) Payment,Geisinger GHP Family (All Plans) Payment,Geisinger Gold (All Plans) Payment,Highmark (All Plans) Payment,Horizon (All Plans) Payment,Horizon Blue MC (All Plans) Payment,Horizon NJ Health (All Plans) Payment,IBC (All Plans) Payment,Keystone 65 (All Plans) Payment,Keystone First (All Plans) Payment,United (All Plans) Payment,Self Pay,Min Payment,Max Payment
71728,"""DEVICE, MYOSURE TISSUE RESECTION""",,"$10,925.00 ","$3,159.11 ","$1,157.34 ",$821.81 ,"$1,135.28 ",$785.52 ,"$2,762.58 ","$2,584.11 ","$2,947.14 ","$1,229.23 ","$1,108.17 ","$2,784.11 ",$889.71 ,"$1,382.10 ","$2,778.85 ","$2,351.27 ","$1,110.26 ",$619.55 ,"$1,649.93 ","$1,114.43 ",$570.04 ,"$3,011.88 ","$1,966.50 ",$570.04 ,"$3,159.11 "
72223,"""PLATE ACHORANGE  LARGE, LEFT PLP30201""",C1713,"$18,477.50 ","$5,343.02 ","$1,957.41 ","$1,389.93 ","$1,920.11 ","$1,328.56 ","$4,672.36 ","$4,370.52 ","$4,984.51 ","$2,079.01 ","$1,874.26 ","$4,708.78 ","$1,504.77 ","$2,337.55 ","$4,699.88 ","$3,976.71 ","$1,877.80 ","$1,047.84 ","$2,790.53 ","$1,884.84 ",$964.11 ,"$5,094.01 ","$3,325.95 ",$964.11 ,"$5,343.02 "
72502,"""PLATE HIP FEMORAL R,L 317MM 02.03264.115""",C1713,"$14,794.92 ","$4,278.15 ","$1,567.30 ","$1,112.91 ","$1,537.43 ","$1,063.77 ","$3,741.15 ","$3,499.47 ","$3,991.09 ","$1,664.66 ","$1,500.72 ","$3,770.32 ","$1,204.87 ","$1,871.68 ","$3,763.19 ","$3,184.15 ","$1,503.55 ",$839.01 ,"$2,234.37 ","$1,509.19 ",$771.96 ,"$4,078.77 ","$2,663.09 ",$771.96 ,"$4,278.15 "
72520,"""SCREW  CANN MAGNA-FX 7.0 MM X 75MM,32MM 1146-75-32""",C1713,"$2,713.20 ",$784.56 ,$287.42 ,$204.09 ,$281.94 ,$195.08 ,$686.08 ,$641.76 ,$731.92 ,$305.28 ,$275.21 ,$691.43 ,$220.96 ,$343.24 ,$690.12 ,$583.93 ,$275.73 ,$153.86 ,$409.76 ,$276.77 ,$141.57 ,$747.99 ,$488.38 ,$141.57 ,$784.56 
72521,"""SCREW  CANN MAGNA-FX 7.0MM X 45MM,16MM 1146-45""",C1713,"$2,713.20 ",$784.56 ,$287.42 ,$204.09 ,$281.94 ,$195.08 ,$686.08 ,$641.76 ,$731.92 ,$305.28 ,$275.21 ,$691.43 ,$220.96 ,$343.24 ,$690.12 ,$583.93 ,$275.73 ,$153.86 ,$409.76 ,$276.77 ,$141.57 ,$747.99 ,$488.38 ,$141.57 ,$784.56 
72522,"""SCREW  CANN MAGNA-FX 7.0MM X 50MM,16MM 1146-50""",C1713,"$2,713.20 ",$784.56 ,$287.42 ,$204.09 ,$281.94 ,$195.08 ,$686.08 ,$641.76 ,$731.92 ,$305.28 ,$275.21 ,$691.43 ,$220.96 ,$343.24 ,$690.12 ,$583.93 ,$275.73 ,$153.86 ,$409.76 ,$276.77 ,$141.57 ,$747.99 ,$488.38 ,$141.57 ,$784.56 
72523,"""SCREW  CANN MAGNA-FX 7.0MM X 95MM,16MM 1146-95""",C1713,"$2,713.20 ",$784.56 ,$287.42 ,$204.09 ,$281.94 ,$195.08 ,$686.08 ,$641.76 ,$731.92 ,$305.28 ,$275.21 ,$691.43 ,$220.96 ,$343.24 ,$690.12 ,$583.93 ,$275.73 ,$153.86 ,$409.76 ,$276.77 ,$141.57 ,$747.99 ,$488.38 ,$141.57 ,$784.56 
72528,"""SCREW CANN MAGNA -FX 7.0MM X 70MM,16MM 1146-70""",C1713,"$2,713.20 ",$784.56 ,$287.42 ,$204.09 ,$281.94 ,$195.08 ,$686.08 ,$641.76 ,$731.92 ,$305.28 ,$275.21 ,$691.43 ,$220.96 ,$343.24 ,$690.12 ,$583.93 ,$275.73 ,$153.86 ,$409.76 ,$276.77 ,$141.57 ,$747.99 ,$488.38 ,$141.57 ,$784.56 
72529,"""SCREW CANN MAGNA-FX  70MM X 75MM,16MM 1146-75""",C1713,"$2,713.20 ",$784.56 ,$287.42 ,$204.09 ,$281.94 ,$195.08 ,$686.08 ,$641.76 ,$731.92 ,$305.28 ,$275.21 ,$691.43 ,$220.96 ,$343.24 ,$690.12 ,$583.93 ,$275.73 ,$153.86 ,$409.76 ,$276.77 ,$141.57 ,$747.99 ,$488.38 ,$141.57 ,$784.56 
``` 
````



## DuckDB SQL Query

In [13]:
%%sql 
SELECT * 
FROM read_csv('/tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges.ashx', 
    header=True,
    delim=',',
    normalize_names=True,
    columns={
        'Record ID': 'VARCHAR',
        'Description': 'VARCHAR',
        'CPT/DRG': 'VARCHAR',
        'Gross Charge': 'VARCHAR',
        'Aetna (All Plans) Payment': 'VARCHAR',
        'Aetna MC (All Plans) Payment': 'VARCHAR',
        'Amerihealth Caritas (All Plans) Payment': 'VARCHAR',
        'Amerihealth NJ (All Plans) Payment': 'VARCHAR',
        'Amerihealth Northeast (All Plans) Payment': 'VARCHAR',
        'CBC (Except EPO) Payment': 'VARCHAR',
        'CBC EPO Payment': 'VARCHAR',
        'Cigna (All Plans) Payment': 'VARCHAR',
        'Freedom Blue (All Plans) Payment': 'VARCHAR',
        'Gateway (All Plans) Payment': 'VARCHAR',
        'Geisinger (All Plans) Payment': 'VARCHAR',
        'Geisinger GHP Family (All Plans) Payment': 'VARCHAR',
        'Geisinger Gold (All Plans) Payment': 'VARCHAR',
        'Highmark (All Plans) Payment': 'VARCHAR',
        'Horizon (All Plans) Payment': 'VARCHAR',
        'Horizon Blue MC (All Plans) Payment': 'VARCHAR',
        'Horizon NJ Health (All Plans) Payment': 'VARCHAR',
        'IBC (All Plans) Payment': 'VARCHAR',
        'Keystone 65 (All Plans) Payment': 'VARCHAR',
        'Keystone First (All Plans) Payment': 'VARCHAR',
        'United (All Plans) Payment': 'VARCHAR',
        'Self Pay': 'VARCHAR',
        'Min Payment': 'VARCHAR',
        'Max Payment': 'VARCHAR'
    })
LIMIT 10;

RuntimeError: (duckdb.InvalidInputException) Invalid Input Error: Attempting to execute an unsuccessful or closed pending query result
Error: Invalid Error: Current transaction is aborted (please ROLLBACK)
[SQL: SELECT * 
FROM read_csv('/tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges.ashx', 
    header=True,
    delim=',',
    normalize_names=True,
    columns={
        'Record ID': 'VARCHAR',
        'Description': 'VARCHAR',
        'CPT/DRG': 'VARCHAR',
        'Gross Charge': 'VARCHAR',
        'Aetna (All Plans) Payment': 'VARCHAR',
        'Aetna MC (All Plans) Payment': 'VARCHAR',
        'Amerihealth Caritas (All Plans) Payment': 'VARCHAR',
        'Amerihealth NJ (All Plans) Payment': 'VARCHAR',
        'Amerihealth Northeast (All Plans) Payment': 'VARCHAR',
        'CBC (Except EPO) Payment': 'VARCHAR',
        'CBC EPO Payment': 'VARCHAR',
        'Cigna (All Plans) Payment': 'VARCHAR',
        'Freedom Blue (All Plans) Payment': 'VARCHAR',
        'Gateway (Al

In [8]:
%%sql
COPY (SELECT * 
FROM read_csv('/tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges.csv', 
    header=True,
    delim=',',
    normalize_names=True,
    columns={
        'Record ID': 'VARCHAR',
        'Description': 'VARCHAR',
        'CPT/DRG': 'VARCHAR',
        'Gross Charge': 'VARCHAR',
        'Aetna (All Plans) Payment': 'VARCHAR',
        'Aetna MC (All Plans) Payment': 'VARCHAR',
        'Amerihealth Caritas (All Plans) Payment': 'VARCHAR',
        'Amerihealth NJ (All Plans) Payment': 'VARCHAR',
        'Amerihealth Northeast (All Plans) Payment': 'VARCHAR',
        'CBC (Except EPO) Payment': 'VARCHAR',
        'CBC EPO Payment': 'VARCHAR',
        'Cigna (All Plans) Payment': 'VARCHAR',
        'Freedom Blue (All Plans) Payment': 'VARCHAR',
        'Gateway (All Plans) Payment': 'VARCHAR',
        'Geisinger (All Plans) Payment': 'VARCHAR',
        'Geisinger GHP Family (All Plans) Payment': 'VARCHAR',
        'Geisinger Gold (All Plans) Payment': 'VARCHAR',
        'Highmark (All Plans) Payment': 'VARCHAR',
        'Horizon (All Plans) Payment': 'VARCHAR',
        'Horizon Blue MC (All Plans) Payment': 'VARCHAR',
        'Horizon NJ Health (All Plans) Payment': 'VARCHAR',
        'IBC (All Plans) Payment': 'VARCHAR',
        'Keystone 65 (All Plans) Payment': 'VARCHAR',
        'Keystone First (All Plans) Payment': 'VARCHAR',
        'United (All Plans) Payment': 'VARCHAR',
        'Self Pay': 'VARCHAR',
        'Min Payment': 'VARCHAR',
        'Max Payment': 'VARCHAR'
    })
) TO '/tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges.parquet' (COMPRESSION ZSTD);

Unnamed: 0,Success


## Replacing commas

````
please write a duckdb SQL query using the regexp_replace function with docstring below to replace the commas in every column of this file: '/tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges.parquet'

regexp_replace(string, pattern, replacement); 	if string contains the regexp pattern, replaces the matching part with replacement 	select regexp_replace('hello', '[lo]', '-') 	he-lo
```` 

In [10]:
%%sql 
SELECT * FROM '/tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges.parquet'

Unnamed: 0,Record ID,Description,CPT/DRG,Gross Charge,Aetna (All Plans) Payment,Aetna MC (All Plans) Payment,Amerihealth Caritas (All Plans) Payment,Amerihealth NJ (All Plans) Payment,Amerihealth Northeast (All Plans) Payment,CBC (Except EPO) Payment,...,Horizon (All Plans) Payment,Horizon Blue MC (All Plans) Payment,Horizon NJ Health (All Plans) Payment,IBC (All Plans) Payment,Keystone 65 (All Plans) Payment,Keystone First (All Plans) Payment,United (All Plans) Payment,Self Pay,Min Payment,Max Payment
0,71728,"""DEVICE, MYOSURE TISSUE RESECTION""",,10925.00,3159.11,1157.34,821.81,1135.28,785.52,2762.58,...,2351.27,1110.26,619.55,1649.93,1114.43,570.04,3011.88,1966.50,570.04,3159.11
1,72223,"""PLATE ACHORANGE LARGE, LEFT PLP30201""",C1713,18477.50,5343.02,1957.41,1389.93,1920.11,1328.56,4672.36,...,3976.71,1877.80,1047.84,2790.53,1884.84,964.11,5094.01,3325.95,964.11,5343.02
2,72502,"""PLATE HIP FEMORAL R,L 317MM 02.03264.115""",C1713,14794.92,4278.15,1567.30,1112.91,1537.43,1063.77,3741.15,...,3184.15,1503.55,839.01,2234.37,1509.19,771.96,4078.77,2663.09,771.96,4278.15
3,72520,"""SCREW CANN MAGNA-FX 7.0 MM X 75MM,32MM 1146-...",C1713,2713.20,784.56,287.42,204.09,281.94,195.08,686.08,...,583.93,275.73,153.86,409.76,276.77,141.57,747.99,488.38,141.57,784.56
4,72521,"""SCREW CANN MAGNA-FX 7.0MM X 45MM,16MM 1146-45""",C1713,2713.20,784.56,287.42,204.09,281.94,195.08,686.08,...,583.93,275.73,153.86,409.76,276.77,141.57,747.99,488.38,141.57,784.56
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42546,8880,ZINC SULFATE 220 (50 ZN) MG PO CAPS,A9270,1.25,0.30,0.14,0.12,0.23,0.12,0.28,...,0.24,0.16,0.08,0.20,0.14,0.11,0.31,0.23,0.08,0.33
42547,81434,ZOLEDRONIC ACID 5 MG/100ML IV SOLN,J3489,125.66,12.77,0.00,12.45,1.97,1.29,11.70,...,0.00,1.48,1.12,3.00,1.99,0.72,2.49,22.62,0.00,12.77
42548,11700,ZOLPIDEM TARTRATE 10 MG PO TABS,A9270,1.25,0.30,0.14,0.12,0.23,0.12,0.28,...,0.24,0.16,0.08,0.20,0.14,0.11,0.31,0.23,0.08,0.33
42549,11701,ZOLPIDEM TARTRATE 5 MG PO TABS,A9270,1.25,0.30,0.14,0.12,0.23,0.12,0.28,...,0.24,0.16,0.08,0.20,0.14,0.11,0.31,0.23,0.08,0.33


In [2]:
%%sql 
SELECT * 
FROM read_csv('/tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges.ashx', 
  header=True,
  delim=',',
  quote='"',
  columns={'record_id': 'VARCHAR',
           'description': 'VARCHAR',
           'cpt_drg': 'VARCHAR',
           'gross_charge': 'VARCHAR',
           'aetna': 'VARCHAR',
           'aetna_mc': 'VARCHAR',
           'amerihealth_caritas': 'VARCHAR',
           'amerihealth_nj': 'VARCHAR',
           'amerihealth_northeast': 'VARCHAR',
           'cbc': 'VARCHAR',
           'cbc_epo': 'VARCHAR',
           'cigna': 'VARCHAR',
           'freedom_blue': 'VARCHAR',
           'gateway': 'VARCHAR',
           'geisinger': 'VARCHAR',  
           'geisinger_ghp_family': 'VARCHAR',
           'geisinger_gold': 'VARCHAR',
           'highmark': 'VARCHAR',
           'horizon': 'VARCHAR',
           'horizon_blue_mc': 'VARCHAR',
           'horizon_nj_health': 'VARCHAR',
           'ibc': 'VARCHAR',
           'keystone_65': 'VARCHAR',
           'keystone_first': 'VARCHAR',
           'united': 'VARCHAR',
           'self_pay': 'VARCHAR',
           'min': 'VARCHAR',
           'max': 'VARCHAR'})
LIMIT 10;

Unnamed: 0,record_id,description,cpt_drg,gross_charge,aetna,aetna_mc,amerihealth_caritas,amerihealth_nj,amerihealth_northeast,cbc,...,horizon,horizon_blue_mc,horizon_nj_health,ibc,keystone_65,keystone_first,united,self_pay,min,max
0,71728,"""DEVICE, MYOSURE TISSUE RESECTION""",,"$10,925.00","$3,159.11","$1,157.34",$821.81,"$1,135.28",$785.52,"$2,762.58",...,"$2,351.27","$1,110.26",$619.55,"$1,649.93","$1,114.43",$570.04,"$3,011.88","$1,966.50",$570.04,"$3,159.11"
1,72223,"""PLATE ACHORANGE LARGE, LEFT PLP30201""",C1713,"$18,477.50","$5,343.02","$1,957.41","$1,389.93","$1,920.11","$1,328.56","$4,672.36",...,"$3,976.71","$1,877.80","$1,047.84","$2,790.53","$1,884.84",$964.11,"$5,094.01","$3,325.95",$964.11,"$5,343.02"
2,72502,"""PLATE HIP FEMORAL R,L 317MM 02.03264.115""",C1713,"$14,794.92","$4,278.15","$1,567.30","$1,112.91","$1,537.43","$1,063.77","$3,741.15",...,"$3,184.15","$1,503.55",$839.01,"$2,234.37","$1,509.19",$771.96,"$4,078.77","$2,663.09",$771.96,"$4,278.15"
3,72520,"""SCREW CANN MAGNA-FX 7.0 MM X 75MM,32MM 1146-...",C1713,"$2,713.20",$784.56,$287.42,$204.09,$281.94,$195.08,$686.08,...,$583.93,$275.73,$153.86,$409.76,$276.77,$141.57,$747.99,$488.38,$141.57,$784.56
4,72521,"""SCREW CANN MAGNA-FX 7.0MM X 45MM,16MM 1146-45""",C1713,"$2,713.20",$784.56,$287.42,$204.09,$281.94,$195.08,$686.08,...,$583.93,$275.73,$153.86,$409.76,$276.77,$141.57,$747.99,$488.38,$141.57,$784.56
5,72522,"""SCREW CANN MAGNA-FX 7.0MM X 50MM,16MM 1146-50""",C1713,"$2,713.20",$784.56,$287.42,$204.09,$281.94,$195.08,$686.08,...,$583.93,$275.73,$153.86,$409.76,$276.77,$141.57,$747.99,$488.38,$141.57,$784.56
6,72523,"""SCREW CANN MAGNA-FX 7.0MM X 95MM,16MM 1146-95""",C1713,"$2,713.20",$784.56,$287.42,$204.09,$281.94,$195.08,$686.08,...,$583.93,$275.73,$153.86,$409.76,$276.77,$141.57,$747.99,$488.38,$141.57,$784.56
7,72528,"""SCREW CANN MAGNA -FX 7.0MM X 70MM,16MM 1146-70""",C1713,"$2,713.20",$784.56,$287.42,$204.09,$281.94,$195.08,$686.08,...,$583.93,$275.73,$153.86,$409.76,$276.77,$141.57,$747.99,$488.38,$141.57,$784.56
8,72529,"""SCREW CANN MAGNA-FX 70MM X 75MM,16MM 1146-75""",C1713,"$2,713.20",$784.56,$287.42,$204.09,$281.94,$195.08,$686.08,...,$583.93,$275.73,$153.86,$409.76,$276.77,$141.57,$747.99,$488.38,$141.57,$784.56
9,72530,"""SCREW CANN MAGNA-FX 7.0 MM X 55MM,32 1146-55-32""",C1713,"$2,713.20",$784.56,$287.42,$204.09,$281.94,$195.08,$686.08,...,$583.93,$275.73,$153.86,$409.76,$276.77,$141.57,$747.99,$488.38,$141.57,$784.56


In [4]:
%%sql
COPY (SELECT * 
FROM read_csv('/tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges.ashx', 
    header=True,
    delim=',',
    normalize_names=True,
    columns={'record_id': 'VARCHAR',
           'description': 'VARCHAR',
           'cpt_drg': 'VARCHAR',
           'gross_charge': 'VARCHAR',
           'aetna': 'VARCHAR',
           'aetna_mc': 'VARCHAR',
           'amerihealth_caritas': 'VARCHAR',
           'amerihealth_nj': 'VARCHAR',
           'amerihealth_northeast': 'VARCHAR',
           'cbc': 'VARCHAR',
           'cbc_epo': 'VARCHAR',
           'cigna': 'VARCHAR',
           'freedom_blue': 'VARCHAR',
           'gateway': 'VARCHAR',
           'geisinger': 'VARCHAR',  
           'geisinger_ghp_family': 'VARCHAR',
           'geisinger_gold': 'VARCHAR',
           'highmark': 'VARCHAR',
           'horizon': 'VARCHAR',
           'horizon_blue_mc': 'VARCHAR',
           'horizon_nj_health': 'VARCHAR',
           'ibc': 'VARCHAR',
           'keystone_65': 'VARCHAR',
           'keystone_first': 'VARCHAR',
           'united': 'VARCHAR',
           'self_pay': 'VARCHAR',
           'min': 'VARCHAR',
           'max': 'VARCHAR'})
) TO '/tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges-clean.parquet' (COMPRESSION ZSTD);

Unnamed: 0,Success


In [2]:
%%sql 
SELECT
  record_id,
  description,
  cpt_drg,
  regexp_replace(gross_charge, '[$,#]', '', 'g') AS gross_charge, 
  regexp_replace(aetna, '[$,#]', '', 'g') AS aetna,
  regexp_replace(aetna_mc, '[$,#]', '', 'g') AS aetna_mc,
  regexp_replace(amerihealth_caritas, '[$,#]', '', 'g') AS amerihealth_caritas,
  regexp_replace(amerihealth_nj, '[$,#]', '', 'g') AS amerihealth_nj,
  regexp_replace(amerihealth_northeast, '[$,#]', '', 'g') AS amerihealth_northeast,
  regexp_replace(cbc, '[$,#]', '', 'g') AS cbc,
  regexp_replace(cbc_epo, '[$,#]', '', 'g') AS cbc_epo,
  regexp_replace(cigna, '[$,#]', '', 'g') AS cigna,
  regexp_replace(freedom_blue, '[$,#]', '', 'g') AS freedom_blue,
  regexp_replace(gateway, '[$,#]', '', 'g') AS gateway,
  regexp_replace(geisinger, '[$,#]', '', 'g') AS geisinger,
  regexp_replace(geisinger_ghp_family, '[$,#]', '', 'g') AS geisinger_ghp_family,
  regexp_replace(geisinger_gold, '[$,#]', '', 'g') AS geisinger_gold,
  regexp_replace(highmark, '[$,#]', '', 'g') AS highmark,
  regexp_replace(horizon, '[$,#]', '', 'g') AS horizon,
  regexp_replace(horizon_blue_mc, '[$,#]', '', 'g') AS horizon_blue_mc,
  regexp_replace(horizon_nj_health, '[$,#]', '', 'g') AS horizon_nj_health,
  regexp_replace(ibc, '[$,#]', '', 'g') AS ibc,
  regexp_replace(keystone_65, '[$,#]', '', 'g') AS keystone_65,
  regexp_replace(keystone_first, '[$,#]', '', 'g') AS keystone_first,
  regexp_replace(united, '[$,#]', '', 'g') AS united,
  regexp_replace(self_pay, '[$,#]', '', 'g') AS self_pay,
  regexp_replace(min, '[$,#]', '', 'g') AS min,
  regexp_replace(max, '[$,#]', '', 'g') AS max
FROM read_csv('/tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges.ashx', 
  header=True, 
  delim=',',
  quote='"',
  nullstr='#N/A',
  columns={'record_id': 'VARCHAR',
           'description': 'VARCHAR',
           'cpt_drg': 'VARCHAR',
           'gross_charge': 'VARCHAR',
           'aetna': 'VARCHAR',
           'aetna_mc': 'VARCHAR',
           'amerihealth_caritas': 'VARCHAR',
           'amerihealth_nj': 'VARCHAR',
           'amerihealth_northeast': 'VARCHAR',
           'cbc': 'VARCHAR',
           'cbc_epo': 'VARCHAR',
           'cigna': 'VARCHAR',
           'freedom_blue': 'VARCHAR',
           'gateway': 'VARCHAR',
           'geisinger': 'VARCHAR',
           'geisinger_ghp_family': 'VARCHAR',
           'geisinger_gold': 'VARCHAR',
           'highmark': 'VARCHAR',
           'horizon': 'VARCHAR',
           'horizon_blue_mc': 'VARCHAR',
           'horizon_nj_health': 'VARCHAR',
           'ibc': 'VARCHAR',
           'keystone_65': 'VARCHAR',
           'keystone_first': 'VARCHAR',
           'united': 'VARCHAR',
           'self_pay': 'VARCHAR',
           'min': 'VARCHAR',
           'max': 'VARCHAR'})
LIMIT 10;

Unnamed: 0,record_id,description,cpt_drg,gross_charge,aetna,aetna_mc,amerihealth_caritas,amerihealth_nj,amerihealth_northeast,cbc,...,horizon,horizon_blue_mc,horizon_nj_health,ibc,keystone_65,keystone_first,united,self_pay,min,max
0,71728,"""DEVICE, MYOSURE TISSUE RESECTION""",,10925.0,3159.11,1157.34,821.81,1135.28,785.52,2762.58,...,2351.27,1110.26,619.55,1649.93,1114.43,570.04,3011.88,1966.5,570.04,3159.11
1,72223,"""PLATE ACHORANGE LARGE, LEFT PLP30201""",C1713,18477.5,5343.02,1957.41,1389.93,1920.11,1328.56,4672.36,...,3976.71,1877.8,1047.84,2790.53,1884.84,964.11,5094.01,3325.95,964.11,5343.02
2,72502,"""PLATE HIP FEMORAL R,L 317MM 02.03264.115""",C1713,14794.92,4278.15,1567.3,1112.91,1537.43,1063.77,3741.15,...,3184.15,1503.55,839.01,2234.37,1509.19,771.96,4078.77,2663.09,771.96,4278.15
3,72520,"""SCREW CANN MAGNA-FX 7.0 MM X 75MM,32MM 1146-...",C1713,2713.2,784.56,287.42,204.09,281.94,195.08,686.08,...,583.93,275.73,153.86,409.76,276.77,141.57,747.99,488.38,141.57,784.56
4,72521,"""SCREW CANN MAGNA-FX 7.0MM X 45MM,16MM 1146-45""",C1713,2713.2,784.56,287.42,204.09,281.94,195.08,686.08,...,583.93,275.73,153.86,409.76,276.77,141.57,747.99,488.38,141.57,784.56
5,72522,"""SCREW CANN MAGNA-FX 7.0MM X 50MM,16MM 1146-50""",C1713,2713.2,784.56,287.42,204.09,281.94,195.08,686.08,...,583.93,275.73,153.86,409.76,276.77,141.57,747.99,488.38,141.57,784.56
6,72523,"""SCREW CANN MAGNA-FX 7.0MM X 95MM,16MM 1146-95""",C1713,2713.2,784.56,287.42,204.09,281.94,195.08,686.08,...,583.93,275.73,153.86,409.76,276.77,141.57,747.99,488.38,141.57,784.56
7,72528,"""SCREW CANN MAGNA -FX 7.0MM X 70MM,16MM 1146-70""",C1713,2713.2,784.56,287.42,204.09,281.94,195.08,686.08,...,583.93,275.73,153.86,409.76,276.77,141.57,747.99,488.38,141.57,784.56
8,72529,"""SCREW CANN MAGNA-FX 70MM X 75MM,16MM 1146-75""",C1713,2713.2,784.56,287.42,204.09,281.94,195.08,686.08,...,583.93,275.73,153.86,409.76,276.77,141.57,747.99,488.38,141.57,784.56
9,72530,"""SCREW CANN MAGNA-FX 7.0 MM X 55MM,32 1146-55-32""",C1713,2713.2,784.56,287.42,204.09,281.94,195.08,686.08,...,583.93,275.73,153.86,409.76,276.77,141.57,747.99,488.38,141.57,784.56


In [5]:
%%sql
COPY (SELECT
  record_id,
  description,
  cpt_drg,
  regexp_replace(gross_charge, '[$,]', '', 'g') AS gross_charge, 
  regexp_replace(aetna, '[$,]', '', 'g') AS aetna,
  regexp_replace(aetna_mc, '[$,]', '', 'g') AS aetna_mc,
  regexp_replace(amerihealth_caritas, '[$,]', '', 'g') AS amerihealth_caritas,
  regexp_replace(amerihealth_nj, '[$,]', '', 'g') AS amerihealth_nj,
  regexp_replace(amerihealth_northeast, '[$,]', '', 'g') AS amerihealth_northeast,
  regexp_replace(cbc, '[$,]', '', 'g') AS cbc,
  regexp_replace(cbc_epo, '[$,]', '', 'g') AS cbc_epo,
  regexp_replace(cigna, '[$,]', '', 'g') AS cigna,
  regexp_replace(freedom_blue, '[$,]', '', 'g') AS freedom_blue,
  regexp_replace(gateway, '[$,]', '', 'g') AS gateway,
  regexp_replace(geisinger, '[$,]', '', 'g') AS geisinger,
  regexp_replace(geisinger_ghp_family, '[$,]', '', 'g') AS geisinger_ghp_family,
  regexp_replace(geisinger_gold, '[$,]', '', 'g') AS geisinger_gold,
  regexp_replace(highmark, '[$,]', '', 'g') AS highmark,
  regexp_replace(horizon, '[$,]', '', 'g') AS horizon,
  regexp_replace(horizon_blue_mc, '[$,]', '', 'g') AS horizon_blue_mc,
  regexp_replace(horizon_nj_health, '[$,]', '', 'g') AS horizon_nj_health,
  regexp_replace(ibc, '[$,]', '', 'g') AS ibc,
  regexp_replace(keystone_65, '[$,]', '', 'g') AS keystone_65,
  regexp_replace(keystone_first, '[$,]', '', 'g') AS keystone_first,
  regexp_replace(united, '[$,]', '', 'g') AS united,
  regexp_replace(self_pay, '[$,]', '', 'g') AS self_pay,
  regexp_replace(min, '[$,]', '', 'g') AS min,
  regexp_replace(max, '[$,]', '', 'g') AS max
FROM read_csv('/tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges.ashx', 
  header=True, 
  delim=',',
  nullstr='#N/A',
  quote='"',
  columns={'record_id': 'VARCHAR',
           'description': 'VARCHAR',
           'cpt_drg': 'VARCHAR',
           'gross_charge': 'VARCHAR',
           'aetna': 'VARCHAR',
           'aetna_mc': 'VARCHAR',
           'amerihealth_caritas': 'VARCHAR',
           'amerihealth_nj': 'VARCHAR',
           'amerihealth_northeast': 'VARCHAR',
           'cbc': 'VARCHAR',
           'cbc_epo': 'VARCHAR',
           'cigna': 'VARCHAR',
           'freedom_blue': 'VARCHAR',
           'gateway': 'VARCHAR',
           'geisinger': 'VARCHAR',
           'geisinger_ghp_family': 'VARCHAR',
           'geisinger_gold': 'VARCHAR',
           'highmark': 'VARCHAR',
           'horizon': 'VARCHAR',
           'horizon_blue_mc': 'VARCHAR',
           'horizon_nj_health': 'VARCHAR',
           'ibc': 'VARCHAR',
           'keystone_65': 'VARCHAR',
           'keystone_first': 'VARCHAR',
           'united': 'VARCHAR',
           'self_pay': 'VARCHAR',
           'min': 'VARCHAR',
           'max': 'VARCHAR'})
) TO '/tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges.parquet' (COMPRESSION ZSTD);

RuntimeError: (duckdb.InvalidInputException) Invalid Input Error: Attempting to execute an unsuccessful or closed pending query result
Error: Invalid Error: Current transaction is aborted (please ROLLBACK)
[SQL: COPY (SELECT
  record_id,
  description,
  cpt_drg,
  regexp_replace(gross_charge, '[$,]', '', 'g') AS gross_charge, 
  regexp_replace(aetna, '[$,]', '', 'g') AS aetna,
  regexp_replace(aetna_mc, '[$,]', '', 'g') AS aetna_mc,
  regexp_replace(amerihealth_caritas, '[$,]', '', 'g') AS amerihealth_caritas,
  regexp_replace(amerihealth_nj, '[$,]', '', 'g') AS amerihealth_nj,
  regexp_replace(amerihealth_northeast, '[$,]', '', 'g') AS amerihealth_northeast,
  regexp_replace(cbc, '[$,]', '', 'g') AS cbc,
  regexp_replace(cbc_epo, '[$,]', '', 'g') AS cbc_epo,
  regexp_replace(cigna, '[$,]', '', 'g') AS cigna,
  regexp_replace(freedom_blue, '[$,]', '', 'g') AS freedom_blue,
  regexp_replace(gateway, '[$,]', '', 'g') AS gateway,
  regexp_replace(geisinger, '[$,]', '', 'g') AS geisinger,

In [2]:
%%sql 
SELECT * FROM '/tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges.parquet'

Unnamed: 0,record_id,description,cpt_drg,gross_charge,aetna,aetna_mc,amerihealth_caritas,amerihealth_nj,amerihealth_northeast,cbc,...,horizon,horizon_blue_mc,horizon_nj_health,ibc,keystone_65,keystone_first,united,self_pay,min,max
0,71728,"""DEVICE, MYOSURE TISSUE RESECTION""",,10925.00,3159.11,1157.34,821.81,1135.28,785.52,2762.58,...,2351.27,1110.26,619.55,1649.93,1114.43,570.04,3011.88,1966.50,570.04,3159.11
1,72223,"""PLATE ACHORANGE LARGE, LEFT PLP30201""",C1713,18477.50,5343.02,1957.41,1389.93,1920.11,1328.56,4672.36,...,3976.71,1877.80,1047.84,2790.53,1884.84,964.11,5094.01,3325.95,964.11,5343.02
2,72502,"""PLATE HIP FEMORAL R,L 317MM 02.03264.115""",C1713,14794.92,4278.15,1567.30,1112.91,1537.43,1063.77,3741.15,...,3184.15,1503.55,839.01,2234.37,1509.19,771.96,4078.77,2663.09,771.96,4278.15
3,72520,"""SCREW CANN MAGNA-FX 7.0 MM X 75MM,32MM 1146-...",C1713,2713.20,784.56,287.42,204.09,281.94,195.08,686.08,...,583.93,275.73,153.86,409.76,276.77,141.57,747.99,488.38,141.57,784.56
4,72521,"""SCREW CANN MAGNA-FX 7.0MM X 45MM,16MM 1146-45""",C1713,2713.20,784.56,287.42,204.09,281.94,195.08,686.08,...,583.93,275.73,153.86,409.76,276.77,141.57,747.99,488.38,141.57,784.56
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42546,8880,ZINC SULFATE 220 (50 ZN) MG PO CAPS,A9270,1.25,0.30,0.14,0.12,0.23,0.12,0.28,...,0.24,0.16,0.08,0.20,0.14,0.11,0.31,0.23,0.08,0.33
42547,81434,ZOLEDRONIC ACID 5 MG/100ML IV SOLN,J3489,125.66,12.77,0.00,12.45,1.97,1.29,11.70,...,0.00,1.48,1.12,3.00,1.99,0.72,2.49,22.62,0.00,12.77
42548,11700,ZOLPIDEM TARTRATE 10 MG PO TABS,A9270,1.25,0.30,0.14,0.12,0.23,0.12,0.28,...,0.24,0.16,0.08,0.20,0.14,0.11,0.31,0.23,0.08,0.33
42549,11701,ZOLPIDEM TARTRATE 5 MG PO TABS,A9270,1.25,0.30,0.14,0.12,0.23,0.12,0.28,...,0.24,0.16,0.08,0.20,0.14,0.11,0.31,0.23,0.08,0.33


In [3]:
%%sql 
SELECT 
  CAST(NULLIF(gross_charge, '') AS FLOAT) AS gross_charge,
  CAST(NULLIF(aetna, '') AS FLOAT) AS aetna,
  CAST(NULLIF(aetna_mc, '') AS FLOAT) AS aetna_mc,
  CAST(NULLIF(amerihealth_caritas, '') AS FLOAT) AS amerihealth_caritas,
  CAST(NULLIF(amerihealth_nj, '') AS FLOAT) AS amerihealth_nj,
  CAST(NULLIF(amerihealth_northeast, '') AS FLOAT) AS amerihealth_northeast,
  CAST(NULLIF(cbc, '') AS FLOAT) AS cbc,
  CAST(NULLIF(cbc_epo, '') AS FLOAT) AS cbc_epo,
  CAST(NULLIF(cigna, '') AS FLOAT) AS cigna,
  CAST(NULLIF(freedom_blue, '') AS FLOAT) AS freedom_blue,
  CAST(NULLIF(gateway, '') AS FLOAT) AS gateway,
  CAST(NULLIF(geisinger, '') AS FLOAT) AS geisinger,
  CAST(NULLIF(geisinger_ghp_family, '') AS FLOAT) AS geisinger_ghp_family,
  CAST(NULLIF(geisinger_gold, '') AS FLOAT) AS geisinger_gold,
  CAST(NULLIF(highmark, '') AS FLOAT) AS highmark,
  CAST(NULLIF(horizon, '') AS FLOAT) AS horizon,
  CAST(NULLIF(horizon_blue_mc, '') AS FLOAT) AS horizon_blue_mc,
  CAST(NULLIF(horizon_nj_health, '') AS FLOAT) AS horizon_nj_health,
  CAST(NULLIF(ibc, '') AS FLOAT) AS ibc,
  CAST(NULLIF(keystone_65, '') AS FLOAT) AS keystone_65,
  CAST(NULLIF(keystone_first, '') AS FLOAT) AS keystone_first,
  CAST(NULLIF(united, '') AS FLOAT) AS united,
  CAST(NULLIF(self_pay, '') AS FLOAT) AS self_pay,
  CAST(NULLIF(min, '') AS FLOAT) AS min,
  CAST(NULLIF(max, '') AS FLOAT) AS max
FROM '/tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges.parquet'

Unnamed: 0,gross_charge,aetna,aetna_mc,amerihealth_caritas,amerihealth_nj,amerihealth_northeast,cbc,cbc_epo,cigna,freedom_blue,...,horizon,horizon_blue_mc,horizon_nj_health,ibc,keystone_65,keystone_first,united,self_pay,min,max
0,10925.000000,3159.110107,1157.339966,821.809998,1135.280029,785.520020,2762.580078,2584.110107,2947.139893,1229.229980,...,2351.270020,1110.260010,619.549988,1649.930054,1114.430054,570.039978,3011.879883,1966.500000,570.039978,3159.110107
1,18477.500000,5343.020020,1957.410034,1389.930054,1920.109985,1328.560059,4672.359863,4370.520020,4984.509766,2079.010010,...,3976.709961,1877.800049,1047.839966,2790.530029,1884.839966,964.109985,5094.009766,3325.949951,964.109985,5343.020020
2,14794.919922,4278.149902,1567.300049,1112.910034,1537.430054,1063.770020,3741.149902,3499.469971,3991.090088,1664.660034,...,3184.149902,1503.550049,839.010010,2234.370117,1509.189941,771.960022,4078.770020,2663.090088,771.960022,4278.149902
3,2713.199951,784.559998,287.420013,204.089996,281.940002,195.080002,686.080017,641.760010,731.919983,305.279999,...,583.929993,275.730011,153.860001,409.760010,276.769989,141.570007,747.989990,488.380005,141.570007,784.559998
4,2713.199951,784.559998,287.420013,204.089996,281.940002,195.080002,686.080017,641.760010,731.919983,305.279999,...,583.929993,275.730011,153.860001,409.760010,276.769989,141.570007,747.989990,488.380005,141.570007,784.559998
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42546,1.250000,0.300000,0.140000,0.120000,0.230000,0.120000,0.280000,0.270000,0.310000,0.150000,...,0.240000,0.160000,0.080000,0.200000,0.140000,0.110000,0.310000,0.230000,0.080000,0.330000
42547,125.660004,12.770000,0.000000,12.450000,1.970000,1.290000,11.700000,10.940000,9.060000,0.000000,...,0.000000,1.480000,1.120000,3.000000,1.990000,0.720000,2.490000,22.620001,0.000000,12.770000
42548,1.250000,0.300000,0.140000,0.120000,0.230000,0.120000,0.280000,0.270000,0.310000,0.150000,...,0.240000,0.160000,0.080000,0.200000,0.140000,0.110000,0.310000,0.230000,0.080000,0.330000
42549,1.250000,0.300000,0.140000,0.120000,0.230000,0.120000,0.280000,0.270000,0.310000,0.150000,...,0.240000,0.160000,0.080000,0.200000,0.140000,0.110000,0.310000,0.230000,0.080000,0.330000


In [6]:
%%sql 
COPY (
SELECT 
  record_id,
  description,
  cpt_drg,
  CAST(NULLIF(gross_charge, '') AS FLOAT) AS gross_charge,
  CAST(NULLIF(aetna, '') AS FLOAT) AS aetna,
  CAST(NULLIF(aetna_mc, '') AS FLOAT) AS aetna_mc,
  CAST(NULLIF(amerihealth_caritas, '') AS FLOAT) AS amerihealth_caritas,
  CAST(NULLIF(amerihealth_nj, '') AS FLOAT) AS amerihealth_nj,
  CAST(NULLIF(amerihealth_northeast, '') AS FLOAT) AS amerihealth_northeast,
  CAST(NULLIF(cbc, '') AS FLOAT) AS cbc,
  CAST(NULLIF(cbc_epo, '') AS FLOAT) AS cbc_epo,
  CAST(NULLIF(cigna, '') AS FLOAT) AS cigna,
  CAST(NULLIF(freedom_blue, '') AS FLOAT) AS freedom_blue,
  CAST(NULLIF(gateway, '') AS FLOAT) AS gateway,
  CAST(NULLIF(geisinger, '') AS FLOAT) AS geisinger,
  CAST(NULLIF(geisinger_ghp_family, '') AS FLOAT) AS geisinger_ghp_family,
  CAST(NULLIF(geisinger_gold, '') AS FLOAT) AS geisinger_gold,
  CAST(NULLIF(highmark, '') AS FLOAT) AS highmark,
  CAST(NULLIF(horizon, '') AS FLOAT) AS horizon,
  CAST(NULLIF(horizon_blue_mc, '') AS FLOAT) AS horizon_blue_mc,
  CAST(NULLIF(horizon_nj_health, '') AS FLOAT) AS horizon_nj_health,
  CAST(NULLIF(ibc, '') AS FLOAT) AS ibc,
  CAST(NULLIF(keystone_65, '') AS FLOAT) AS keystone_65,
  CAST(NULLIF(keystone_first, '') AS FLOAT) AS keystone_first,
  CAST(NULLIF(united, '') AS FLOAT) AS united,
  CAST(NULLIF(self_pay, '') AS FLOAT) AS self_pay,
  CAST(NULLIF(min, '') AS FLOAT) AS min,
  CAST(NULLIF(max, '') AS FLOAT) AS max
FROM '/tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges.parquet'
) TO '/tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges-clean.parquet' (COMPRESSION ZSTD);

Unnamed: 0,Success


In [7]:
%%sql 
SELECT * FROM '/tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges-clean.parquet'

Unnamed: 0,record_id,description,cpt_drg,gross_charge,aetna,aetna_mc,amerihealth_caritas,amerihealth_nj,amerihealth_northeast,cbc,...,horizon,horizon_blue_mc,horizon_nj_health,ibc,keystone_65,keystone_first,united,self_pay,min,max
0,71728,"""DEVICE, MYOSURE TISSUE RESECTION""",,10925.000000,3159.110107,1157.339966,821.809998,1135.280029,785.520020,2762.580078,...,2351.270020,1110.260010,619.549988,1649.930054,1114.430054,570.039978,3011.879883,1966.500000,570.039978,3159.110107
1,72223,"""PLATE ACHORANGE LARGE, LEFT PLP30201""",C1713,18477.500000,5343.020020,1957.410034,1389.930054,1920.109985,1328.560059,4672.359863,...,3976.709961,1877.800049,1047.839966,2790.530029,1884.839966,964.109985,5094.009766,3325.949951,964.109985,5343.020020
2,72502,"""PLATE HIP FEMORAL R,L 317MM 02.03264.115""",C1713,14794.919922,4278.149902,1567.300049,1112.910034,1537.430054,1063.770020,3741.149902,...,3184.149902,1503.550049,839.010010,2234.370117,1509.189941,771.960022,4078.770020,2663.090088,771.960022,4278.149902
3,72520,"""SCREW CANN MAGNA-FX 7.0 MM X 75MM,32MM 1146-...",C1713,2713.199951,784.559998,287.420013,204.089996,281.940002,195.080002,686.080017,...,583.929993,275.730011,153.860001,409.760010,276.769989,141.570007,747.989990,488.380005,141.570007,784.559998
4,72521,"""SCREW CANN MAGNA-FX 7.0MM X 45MM,16MM 1146-45""",C1713,2713.199951,784.559998,287.420013,204.089996,281.940002,195.080002,686.080017,...,583.929993,275.730011,153.860001,409.760010,276.769989,141.570007,747.989990,488.380005,141.570007,784.559998
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42546,8880,ZINC SULFATE 220 (50 ZN) MG PO CAPS,A9270,1.250000,0.300000,0.140000,0.120000,0.230000,0.120000,0.280000,...,0.240000,0.160000,0.080000,0.200000,0.140000,0.110000,0.310000,0.230000,0.080000,0.330000
42547,81434,ZOLEDRONIC ACID 5 MG/100ML IV SOLN,J3489,125.660004,12.770000,0.000000,12.450000,1.970000,1.290000,11.700000,...,0.000000,1.480000,1.120000,3.000000,1.990000,0.720000,2.490000,22.620001,0.000000,12.770000
42548,11700,ZOLPIDEM TARTRATE 10 MG PO TABS,A9270,1.250000,0.300000,0.140000,0.120000,0.230000,0.120000,0.280000,...,0.240000,0.160000,0.080000,0.200000,0.140000,0.110000,0.310000,0.230000,0.080000,0.330000
42549,11701,ZOLPIDEM TARTRATE 5 MG PO TABS,A9270,1.250000,0.300000,0.140000,0.120000,0.230000,0.120000,0.280000,...,0.240000,0.160000,0.080000,0.200000,0.140000,0.110000,0.310000,0.230000,0.080000,0.330000


In [8]:
import pandas as pd
df = pd.read_parquet('/tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges-clean.parquet')

In [10]:
df.head(5)

Unnamed: 0,record_id,description,cpt_drg,gross_charge,aetna,aetna_mc,amerihealth_caritas,amerihealth_nj,amerihealth_northeast,cbc,...,horizon,horizon_blue_mc,horizon_nj_health,ibc,keystone_65,keystone_first,united,self_pay,min,max
0,71728,"""DEVICE, MYOSURE TISSUE RESECTION""",,10925.0,3159.110107,1157.339966,821.809998,1135.280029,785.52002,2762.580078,...,2351.27002,1110.26001,619.549988,1649.930054,1114.430054,570.039978,3011.879883,1966.5,570.039978,3159.110107
1,72223,"""PLATE ACHORANGE LARGE, LEFT PLP30201""",C1713,18477.5,5343.02002,1957.410034,1389.930054,1920.109985,1328.560059,4672.359863,...,3976.709961,1877.800049,1047.839966,2790.530029,1884.839966,964.109985,5094.009766,3325.949951,964.109985,5343.02002
2,72502,"""PLATE HIP FEMORAL R,L 317MM 02.03264.115""",C1713,14794.919922,4278.149902,1567.300049,1112.910034,1537.430054,1063.77002,3741.149902,...,3184.149902,1503.550049,839.01001,2234.370117,1509.189941,771.960022,4078.77002,2663.090088,771.960022,4278.149902
3,72520,"""SCREW CANN MAGNA-FX 7.0 MM X 75MM,32MM 1146-...",C1713,2713.199951,784.559998,287.420013,204.089996,281.940002,195.080002,686.080017,...,583.929993,275.730011,153.860001,409.76001,276.769989,141.570007,747.98999,488.380005,141.570007,784.559998
4,72521,"""SCREW CANN MAGNA-FX 7.0MM X 45MM,16MM 1146-45""",C1713,2713.199951,784.559998,287.420013,204.089996,281.940002,195.080002,686.080017,...,583.929993,275.730011,153.860001,409.76001,276.769989,141.570007,747.98999,488.380005,141.570007,784.559998


In [72]:
df_melted = pd.melt(df, id_vars=['record_id', 'description', 'cpt_drg', 'gross_charge', 'min', 'max'], var_name='payer', value_name='charge')

In [73]:
df_melted

Unnamed: 0,record_id,description,cpt_drg,gross_charge,min,max,payer,charge
0,71728,"""DEVICE, MYOSURE TISSUE RESECTION""",,10925.000000,570.039978,3159.110107,aetna,3159.110107
1,72223,"""PLATE ACHORANGE LARGE, LEFT PLP30201""",C1713,18477.500000,964.109985,5343.020020,aetna,5343.020020
2,72502,"""PLATE HIP FEMORAL R,L 317MM 02.03264.115""",C1713,14794.919922,771.960022,4278.149902,aetna,4278.149902
3,72520,"""SCREW CANN MAGNA-FX 7.0 MM X 75MM,32MM 1146-...",C1713,2713.199951,141.570007,784.559998,aetna,784.559998
4,72521,"""SCREW CANN MAGNA-FX 7.0MM X 45MM,16MM 1146-45""",C1713,2713.199951,141.570007,784.559998,aetna,784.559998
...,...,...,...,...,...,...,...,...
936117,8880,ZINC SULFATE 220 (50 ZN) MG PO CAPS,A9270,1.250000,0.080000,0.330000,self_pay,0.230000
936118,81434,ZOLEDRONIC ACID 5 MG/100ML IV SOLN,J3489,125.660004,0.000000,12.770000,self_pay,22.620001
936119,11700,ZOLPIDEM TARTRATE 10 MG PO TABS,A9270,1.250000,0.080000,0.330000,self_pay,0.230000
936120,11701,ZOLPIDEM TARTRATE 5 MG PO TABS,A9270,1.250000,0.080000,0.330000,self_pay,0.230000


In [74]:
df_melted = df_melted.dropna(subset=['charge'])


In [78]:
df_melted.drop(df_melted[df_melted['charge'] == 0].index, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_melted.drop(df_melted[df_melted['charge'] == 0].index, inplace = True)


In [79]:
df_melted

Unnamed: 0,record_id,description,cpt_drg,gross_charge,min,max,payer,charge
0,71728,"""DEVICE, MYOSURE TISSUE RESECTION""",,10925.000000,570.039978,3159.110107,aetna,3159.110107
1,72223,"""PLATE ACHORANGE LARGE, LEFT PLP30201""",C1713,18477.500000,964.109985,5343.020020,aetna,5343.020020
2,72502,"""PLATE HIP FEMORAL R,L 317MM 02.03264.115""",C1713,14794.919922,771.960022,4278.149902,aetna,4278.149902
3,72520,"""SCREW CANN MAGNA-FX 7.0 MM X 75MM,32MM 1146-...",C1713,2713.199951,141.570007,784.559998,aetna,784.559998
4,72521,"""SCREW CANN MAGNA-FX 7.0MM X 45MM,16MM 1146-45""",C1713,2713.199951,141.570007,784.559998,aetna,784.559998
...,...,...,...,...,...,...,...,...
936117,8880,ZINC SULFATE 220 (50 ZN) MG PO CAPS,A9270,1.250000,0.080000,0.330000,self_pay,0.230000
936118,81434,ZOLEDRONIC ACID 5 MG/100ML IV SOLN,J3489,125.660004,0.000000,12.770000,self_pay,22.620001
936119,11700,ZOLPIDEM TARTRATE 10 MG PO TABS,A9270,1.250000,0.080000,0.330000,self_pay,0.230000
936120,11701,ZOLPIDEM TARTRATE 5 MG PO TABS,A9270,1.250000,0.080000,0.330000,self_pay,0.230000


In [80]:
len(df_melted.record_id.unique())

41015

In [81]:
df_melted[['record_id', 'description', 'cpt_drg', 'payer', 'charge']].to_parquet('/tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges-clean-melted.parquet')

In [82]:
%%sql 
SELECT * FROM '/tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges-clean-melted.parquet'

Unnamed: 0,record_id,description,cpt_drg,payer,charge,__index_level_0__
0,71728,"""DEVICE, MYOSURE TISSUE RESECTION""",,aetna,3159.110107,0
1,72223,"""PLATE ACHORANGE LARGE, LEFT PLP30201""",C1713,aetna,5343.020020,1
2,72502,"""PLATE HIP FEMORAL R,L 317MM 02.03264.115""",C1713,aetna,4278.149902,2
3,72520,"""SCREW CANN MAGNA-FX 7.0 MM X 75MM,32MM 1146-...",C1713,aetna,784.559998,3
4,72521,"""SCREW CANN MAGNA-FX 7.0MM X 45MM,16MM 1146-45""",C1713,aetna,784.559998,4
...,...,...,...,...,...,...
932887,8880,ZINC SULFATE 220 (50 ZN) MG PO CAPS,A9270,self_pay,0.230000,936117
932888,81434,ZOLEDRONIC ACID 5 MG/100ML IV SOLN,J3489,self_pay,22.620001,936118
932889,11700,ZOLPIDEM TARTRATE 10 MG PO TABS,A9270,self_pay,0.230000,936119
932890,11701,ZOLPIDEM TARTRATE 5 MG PO TABS,A9270,self_pay,0.230000,936120


## Prompt for getting max and min

````
%%sql 
COPY (
  WITH max_min_charges AS (
  SELECT 
    Primary_Code,
    Code_type,
    description,
    MIN(Minimum_Negotiated_Charge) AS Min_Charge,
    MAX(Maximum_Negotiated_Charge) AS Max_Charge
  FROM '131624096_mount-sinai-hospital_standardcharges.parquet'
  GROUP BY Code_type, Primary_Code, description
),

min_max_product_names AS (
  SELECT
    mmc.Primary_Code,
    mmc.Code_type,
    mmc.description, 
    mmc.Min_Charge,
    mmc.Max_Charge,
    FIRST_VALUE(sc.Product_Name) OVER (PARTITION BY mmc.Primary_Code, mmc.Code_type ORDER BY sc.Minimum_Negotiated_Charge ASC) AS Product_Name_Minimum,
    FIRST_VALUE(sc.Product_Name) OVER (PARTITION BY mmc.Primary_Code, mmc.Code_type ORDER BY sc.Maximum_Negotiated_Charge DESC) AS Product_Name_Maximum
  FROM max_min_charges mmc
  JOIN '131624096_mount-sinai-hospital_standardcharges.parquet' sc
  ON mmc.Primary_Code = sc.Primary_Code AND mmc.Code_type = sc.Code_type AND mmc.description = sc.description
)
SELECT DISTINCT
  Primary_Code,
  Code_Type,
  description,
  Min_Charge,
  Max_Charge,
  Product_Name_Minimum,
  Product_Name_Maximum
FROM min_max_product_names
) TO '/Users/me/projects/beta.payless.health/docs/public/data/131624096_mount-sinai-hospital_standardcharges-subset.parquet' (FORMAT 'parquet');
using this example, please redo the analysis for the following file: /tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges-clean-melted.parquet

that contains the following columns: record_id, description, cpt_drg, payer, charge
````

In [83]:
%%sql
COPY (
  WITH max_min_charges AS (
    SELECT 
      record_id,
      description,
      cpt_drg,
      MIN(charge) AS min_charge,
      MAX(charge) AS max_charge
    FROM '/tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges-clean-melted.parquet' 
    GROUP BY record_id, description, cpt_drg
  ),

  min_max_names AS (
    SELECT
      mmc.record_id,
      mmc.cpt_drg,
      mmc.description,
      mmc.min_charge,
      mmc.max_charge,
      FIRST_VALUE(payer) OVER (PARTITION BY mmc.record_id ORDER BY charge ASC) AS name_minimum,  
      FIRST_VALUE(payer) OVER (PARTITION BY mmc.record_id ORDER BY charge DESC) AS name_maximum
    FROM max_min_charges mmc
    JOIN '/tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges-clean-melted.parquet' sc
    ON mmc.record_id = sc.record_id AND mmc.description = sc.description
  )

  SELECT DISTINCT
    record_id,
    cpt_drg, 
    description,
    min_charge,
    max_charge,
    name_minimum,
    name_maximum
  FROM min_max_names
) TO '/tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges-analysis.parquet' (FORMAT 'parquet');

Unnamed: 0,Success


In [84]:
%%sql 
SELECT * FROM '/tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges-analysis.parquet'

Unnamed: 0,record_id,cpt_drg,description,min_charge,max_charge,name_minimum,name_maximum
0,21635,C1776,INSERT TIBIAL BRNG LRG SZ 4 LT OXFORD,570.530029,3161.860107,keystone_first,aetna
1,21670,C1776,INSERT TIBIAL ARTC SZ 6 10MM ROTPLT SIGMA RPF,786.650024,4359.580078,keystone_first,aetna
2,21709,C1713,PLATE VA-LCP 2.4/2.7MM FIRST TMT 39MM LT,384.779999,2132.399902,keystone_first,aetna
3,21722,C1713,SCREW BONE LCK 2.7 X 24MM,55.520000,307.670013,keystone_first,aetna
4,21810,C1713,ROD SPINAL 40MM CRV,99.139999,549.409973,keystone_first,aetna
...,...,...,...,...,...,...,...
41722,28672,C1713,SCREW COMP 2.4 X 32MM HDLS LNG THRD,130.830002,725.030029,keystone_first,aetna
41723,28688,C1713,NAIL SCREW TI TROCH FIX 11.0 X 85MM STRL,194.080002,1075.579956,keystone_first,aetna
41724,28697,C1713,PLATE LCP 4.5MM CONDYLAR CRVD 16HL 350MM LT,688.359985,3814.860107,keystone_first,aetna
41725,28710,,FORCEPS PK CUTTING 5MM 33CM,327.149994,1813.060059,keystone_first,aetna


In [85]:
!cp /tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges-analysis.parquet /Users/me/projects/beta.payless.health/docs/public/data/

In [86]:
ls -lh /tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges-analysis.parquet

-rw-r--r--  1 me  wheel   1.5M Aug 24 19:39 /tmp/231352213_StLukesHospitalBethlehemCampus_standardcharges-analysis.parquet
