# NewYork-Presbyterian Hospital

Shortname: `newyork-presbyterian`

https://www.nyp.org/patients-visitors/paying-for-care/hospital-price-transparency/standard-charges

In [1]:
# Load duckdb, which lets us efficiently load large files
import duckdb

# Load pandas, which lets us manipulate dataframes
import pandas as pd

# Import jupysql Jupyter extension to create SQL cells
%load_ext sql

# Set configrations on jupysql to directly output data to Pandas and to simplify the output that is printed to the notebook.
%config SqlMagic.autopandas = True

%config SqlMagic.feedback = False
%config SqlMagic.displaycon = False

# Allow named parameters (python variables) in SQL cells
%config SqlMagic.named_parameters=True

# Connect jupysql to DuckDB using a SQLAlchemy-style connection string. Either connect to an in memory DuckDB, or a file backed db.
%sql duckdb:///:memory:

In [3]:
!wget https://nyp.widen.net/s/jkfljbb2gq/133957095_newyorkpresbyterianhospital_standardcharges -P /tmp/

--2023-09-22 11:55:08--  https://nyp.widen.net/s/jkfljbb2gq/133957095_newyorkpresbyterianhospital_standardcharges
Resolving nyp.widen.net (nyp.widen.net)... 108.138.128.20, 108.138.128.66, 108.138.128.37, ...
Connecting to nyp.widen.net (nyp.widen.net)|108.138.128.20|:443... connected.
HTTP request sent, awaiting response... 303 See Other
Location: https://cf-store.widencdn.net/nyp/f/1/9/f19a7183-5518-45a2-993f-d209f9f2944c.zip?response-content-disposition=attachment%3B%20filename%3D%22133957095_NewYorkPresbyterianHospital_standardcharges.zip%22&response-content-type=application%2Fzip&Expires=1695405096&Signature=bqofLi~Ynpa0NR8uT3C8jqadFL~YkbPXuTA5v5K63HUVSW-kPKYThJBWYQuAg5WNnQcR-40qhYA1tMy4Kak8lOdfmuLejJmc8nGI2Pb3oSS~kO6oRpgcB8sD074YsVFyfj3bfsTmuuZ6hQL3VHYud6bDRIQpD72r3zuwGOPQy0eR4xgSjOp2PykctcR3f8IKy7d1VRu0tMmITJ~iC7uzCDzcqLrfmeMANl~jTDJ~6CHq3m6UT9bQlXBI63o1iqhhwBUctOGuHk04dtKks3XnrCVX6Z6vA01FY39yS6BgrAM7GwmAiqkEJIyUwzAGiJRwi8tmiyTRCm-mCYVzL-Ca0w__&Key-Pair-Id=APKAJD5XONOBVWWOA65A [

In [1]:
!unzip /tmp/133957095_newyorkpresbyterianhospital_standardcharges && mv 133957095_NewYorkPresbyterianHospital_standardcharges.json /tmp/

Archive:  /tmp/133957095_newyorkpresbyterianhospital_standardcharges
  inflating: 133957095_NewYorkPresbyterianHospital_standardcharges.json  


In [4]:
!head -c 1000 /tmp/133957095_newyorkpresbyterianhospital_standardcharges.json

[{"Code (CPT/DRG)": 96360, "Description": "HC IV INFUSION HYDRATION INITIAL 31 MIN-1HR", "Rev Code": "0260", "Inpatient/Outpatient": "Inpatient/Outpatient", "Gross Charges": 866.0, "Discounted Cash Price": 866.0, "Aetna": 641.706, "Cigna": 494.486, "Empire Blue Cross Blue Shield": 640.84, "Emblem Health": 1388.424, "United Health Group": 260.25, "Aetna Medicare": 253.04825, "AgeWell Medicare": 316.3103125, "Emblem Medicare": 253.04825, "Empire Medicare": 253.04825, "Fidelis Medicare": 253.04825, "Healthfirst Medicare": 260.6396975, "UHC Community Plan/United Medicare": 255.5787325, "VNS Medicare": 260.6396975, "WellCare Medicare": 261.90493875, "1199": 433, "Affinity Molina Essential": 773.1477801568, "Affinity Molina Medicaid/CHP": 343.6212356252, "Amida Care Medicaid": 395.164420969, "Emblem Medicaid/CHP": 395.164420969, "Empire Healthplus Essential": 773.1477801568, "Empire Healthplus Exchange": 859.0530890631, "Empire Healthplus Medicaid/CHP": 343.6212356252, "Fidelis Essential/Exc

In [5]:
%%sql
SELECT * FROM read_json_auto('/tmp/133957095_newyorkpresbyterianhospital_standardcharges.json', records=true, maximum_object_size=500000000)

Unnamed: 0,Code (CPT/DRG),Description,Rev Code,Inpatient/Outpatient,Gross Charges,Discounted Cash Price,Aetna,Cigna,Empire Blue Cross Blue Shield,Emblem Health,...,Consumer Health Network,Devon,Equian,First Health,Magnacare,Multiplan/Beechstreet/PHCS,QHM,Worldwide,Minimum Negotiated Charge,Maximum Negotiated Charge
0,96360,HC IV INFUSION HYDRATION INITIAL 31 MIN-1HR,0260,Inpatient/Outpatient,866.000000,866.000000,641.706,494.486,640.84,1388.424,...,393.5104,393.5104,393.5104,393.5104,393.5104,393.5104,393.5104,393.5104,253.048250,1388.424000
1,96361,HC IV INFUSION HYDRATION FLUIDS ADDL HR,0260,Inpatient/Outpatient,358.000000,358.000000,265.278,204.418,264.92,1305.112,...,162.6752,162.6752,162.6752,162.6752,162.6752,162.6752,162.6752,162.6752,51.903250,1990.891341
2,96365,HC IV INFUSION FOR THER/PROPH/DIAG INITIAL UP ...,0260,Inpatient/Outpatient,1397.000000,1397.000000,1035.177,797.687,1033.78,1475.508,...,634.7968,634.7968,634.7968,634.7968,634.7968,634.7968,634.7968,634.7968,253.048250,1475.508000
3,96367,HC IV INF THER/PROPH/DIAG ADDL SEQ NEW DRUG UP...,0260,Inpatient/Outpatient,480.000000,480.000000,355.68,274.08,355.2,1325.12,...,218.112,218.112,218.112,218.112,218.112,218.112,218.112,218.112,82.650750,1325.120000
4,96368,HC IV INFUSION FOR THER/PROPH/DIAG CONCURRENT,0260,Inpatient/Outpatient,480.000000,480.000000,355.68,274.08,355.2,1325.12,...,218.112,218.112,218.112,218.112,218.112,218.112,218.112,218.112,156.982800,1325.120000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6188,982,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,Multiple,Inpatient,184095.129020,184095.129020,86723.5232,98911.694706,82299.84,115129.89148,...,83652.8266268735,83652.8266268735,83652.8266268735,83652.8266268735,83652.8266268735,83652.8266268735,83652.8266268735,83652.8266268735,12561.144027,115129.891480
6189,983,EXTENSIVE O.R. PROCEDURES UNRELATED TO PRINCIP...,Multiple,Inpatient,85385.941857,85385.941857,57748.8352,65864.888166,63931.4550933333,76664.51828,...,38799.3719796408,38799.3719796408,38799.3719796408,38799.3719796408,38799.3719796408,38799.3719796408,38799.3719796408,38799.3719796408,7833.746854,76664.518280
6190,987,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,Multiple,Inpatient,411665.999955,411665.999955,114736.9984,130862.199072,214294.876425,152319.20576,...,187061.0303797436,187061.0303797436,187061.0303797436,187061.0303797436,187061.0303797436,187061.0303797436,187061.0303797436,187061.0303797436,21016.798830,214294.876425
6191,988,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,Multiple,Inpatient,126522.871140,126522.871140,58654.7264,66898.093812,91907.6928,77867.13496,...,57491.9926458445,57491.9926458445,57491.9926458445,57491.9926458445,57491.9926458445,57491.9926458445,57491.9926458445,57491.9926458445,10429.428059,91907.692800


In [6]:
%%sql 
COPY (
SELECT * FROM read_json_auto('/tmp/133957095_newyorkpresbyterianhospital_standardcharges.json') 
) TO '/Users/me/data/payless_health/133957095_newyorkpresbyterianhospital_standardcharges.parquet' (COMPRESSION ZSTD)

Unnamed: 0,Success


In [17]:
df = pd.read_parquet('/Users/me/data/payless_health/133957095_newyorkpresbyterianhospital_standardcharges.parquet')

In [19]:
df.columns

Index(['Code (CPT/DRG)', 'Description', 'Rev Code', 'Inpatient/Outpatient',
       'Gross Charges', 'Discounted Cash Price', 'Aetna', 'Cigna',
       'Empire Blue Cross Blue Shield', 'Emblem Health', 'United Health Group',
       'Aetna Medicare', 'AgeWell Medicare', 'Emblem Medicare',
       'Empire Medicare', 'Fidelis Medicare', 'Healthfirst Medicare',
       'UHC Community Plan/United Medicare', 'VNS Medicare',
       'WellCare Medicare', '1199', 'Affinity Molina Essential',
       'Affinity Molina Medicaid/CHP', 'Amida Care Medicaid',
       'Emblem Medicaid/CHP', 'Empire Healthplus Essential',
       'Empire Healthplus Exchange', 'Empire Healthplus Medicaid/CHP',
       'Fidelis Essential/Exchange', 'Fidelis Medicaid/CHP',
       'Healthfirst Essential/Exchange', 'Healthfirst Medicaid/CHP',
       'MVP Medicaid/CHP', 'MVP Essential', 'United Community Plan Essential',
       'United Community Plan Medicaid', 'VNS Medicaid',
       'Consumer Health Network', 'Devon', 'Equian', 'Fir

In [2]:
%%sql 
CREATE TABLE nyp AS SELECT * FROM '/Users/me/data/payless_health/133957095_newyorkpresbyterianhospital_standardcharges.parquet';

Unnamed: 0,Success


In [3]:
%%sql 
WITH unpivot_alias AS (
    UNPIVOT nyp
    ON COLUMNS(* EXCLUDE ('Code (CPT/DRG)', 'Description', 'Rev Code', 'Inpatient/Outpatient'))
    INTO
        NAME payor
        VALUE negotiated_rate
) 
SELECT * FROM unpivot_alias;

Unnamed: 0,Code (CPT/DRG),Description,Rev Code,Inpatient/Outpatient,payor,negotiated_rate
0,96360,HC IV INFUSION HYDRATION INITIAL 31 MIN-1HR,0260,Inpatient/Outpatient,Gross Charges,866.0
1,96360,HC IV INFUSION HYDRATION INITIAL 31 MIN-1HR,0260,Inpatient/Outpatient,Discounted Cash Price,866.0
2,96360,HC IV INFUSION HYDRATION INITIAL 31 MIN-1HR,0260,Inpatient/Outpatient,Aetna,641.706
3,96360,HC IV INFUSION HYDRATION INITIAL 31 MIN-1HR,0260,Inpatient/Outpatient,Cigna,494.486
4,96360,HC IV INFUSION HYDRATION INITIAL 31 MIN-1HR,0260,Inpatient/Outpatient,Empire Blue Cross Blue Shield,640.84
...,...,...,...,...,...,...
265732,989,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,Multiple,Inpatient,Multiplan/Beechstreet/PHCS,35025.8969869067
265733,989,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,Multiple,Inpatient,QHM,35025.8969869067
265734,989,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,Multiple,Inpatient,Worldwide,35025.8969869067
265735,989,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,Multiple,Inpatient,Minimum Negotiated Charge,5995.0715522036


In [24]:
%%sql 
COPY (
WITH unpivot_alias AS (
    UNPIVOT nyp
    ON COLUMNS(* EXCLUDE ('Code (CPT/DRG)', 'Description', 'Rev Code', 'Inpatient/Outpatient'))
    INTO
        NAME payor
        VALUE negotiated_rate
) 
SELECT * FROM unpivot_alias
WHERE payor != 'Minimum Negotiated Charge'
AND payor != 'Maximum Negotiated Charge'
AND payor != 'Gross Charges'
AND negotiated_rate != '"Not separately payable"'
AND negotiated_rate != '"Included in Service Package"'
AND "Code (CPT/DRG)" != '""'
) TO '/Users/me/data/payless_health/133957095_newyorkpresbyterianhospital_standardcharges-unpivoted.parquet' (COMPRESSION ZSTD);

Unnamed: 0,Success


In [34]:
%%sql 
SELECT CAST(regexp_replace("Code (CPT/DRG)", '"', '', 'g') AS VARCHAR) AS billing_code, "Description" AS description, payor, CAST(NULLIF(regexp_replace(negotiated_rate, '[^0-9.]+', '', 'g'), '') AS FLOAT) AS negotiated_rate FROM '/Users/me/data/payless_health/133957095_newyorkpresbyterianhospital_standardcharges-unpivoted.parquet'


Unnamed: 0,billing_code,description,payor,negotiated_rate
0,96360,HC IV INFUSION HYDRATION INITIAL 31 MIN-1HR,Discounted Cash Price,866.000000
1,96360,HC IV INFUSION HYDRATION INITIAL 31 MIN-1HR,Aetna,641.705994
2,96360,HC IV INFUSION HYDRATION INITIAL 31 MIN-1HR,Cigna,494.485992
3,96360,HC IV INFUSION HYDRATION INITIAL 31 MIN-1HR,Empire Blue Cross Blue Shield,640.840027
4,96360,HC IV INFUSION HYDRATION INITIAL 31 MIN-1HR,Emblem Health,1388.423950
...,...,...,...,...
216547,989,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,First Health,35025.898438
216548,989,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,Magnacare,35025.898438
216549,989,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,Multiplan/Beechstreet/PHCS,35025.898438
216550,989,NON-EXTENSIVE O.R. PROCEDURES UNRELATED TO PRI...,QHM,35025.898438


In [39]:
%%sql 
COPY (
SELECT regexp_replace("Code (CPT/DRG)", '"', '', 'g') AS billing_code, "Description" AS description, payor AS health_insurance_name, CAST(NULLIF(regexp_replace(negotiated_rate, '[^0-9.]+', '', 'g'), '') AS FLOAT) AS negotiated_rate FROM '/Users/me/data/payless_health/133957095_newyorkpresbyterianhospital_standardcharges-unpivoted.parquet'
WHERE billing_code != '""' AND negotiated_rate IS NOT NULL
) TO '/Users/me/data/payless_health/EIN_133957095_newyork-presbyterian.parquet' (COMPRESSION ZSTD);

Unnamed: 0,Success


In [40]:
!cp /Users/me/data/payless_health/EIN_133957095_newyork-presbyterian.parquet /Users/me/projects/payless.health/docs/public/data/