In [1]:
import polars as pl
from polars import col as c
import polars.selectors as cs
from config import BASE_PARQUET_PATH

In [2]:
[print(x) for x in BASE_PARQUET_PATH.iterdir()]

C:\Users\mwine\Projects\AUGEST_2025\dev\PRA-FRONT-END\PRA_NEW\data\benchmark_table.parquet
C:\Users\mwine\Projects\AUGEST_2025\dev\PRA-FRONT-END\PRA_NEW\data\hcpcs_desc_table.parquet
C:\Users\mwine\Projects\AUGEST_2025\dev\PRA-FRONT-END\PRA_NEW\data\hospital_price_table.parquet
C:\Users\mwine\Projects\AUGEST_2025\dev\PRA-FRONT-END\PRA_NEW\data\hospital_table.parquet
C:\Users\mwine\Projects\AUGEST_2025\dev\PRA-FRONT-END\PRA_NEW\data\ndc_name_table.parquet
C:\Users\mwine\Projects\AUGEST_2025\dev\PRA-FRONT-END\PRA_NEW\data\unique_lob_names.parquet
C:\Users\mwine\Projects\AUGEST_2025\dev\PRA-FRONT-END\PRA_NEW\data\unique_plan_names.parquet


[None, None, None, None, None, None, None]

In [3]:
def load_hospital_price_table() -> pl.LazyFrame:
    return pl.scan_parquet(BASE_PARQUET_PATH / "hospital_price_table.parquet")

def load_hcpcs_desc_table() -> pl.LazyFrame:
    return pl.scan_parquet(BASE_PARQUET_PATH / "hcpcs_desc_table.parquet")

def load_ndc_name_table() -> pl.LazyFrame:
    return pl.scan_parquet(BASE_PARQUET_PATH / "ndc_name_table.parquet")

def load_hospital_table() -> pl.LazyFrame:
    return pl.scan_parquet(BASE_PARQUET_PATH / "hospital_table.parquet")

def unique_hospital_ct() -> pl.Expr:
    return c.hospital_id.n_unique().alias('unique_hospital_ct')

def hcpcs_name() -> pl.Expr:
    return c.hcpcs_desc.str.split(' - ').list.get(1).str.split('[').list.get(0).alias('hcpcs_name')

def unique_type_of_measurements() -> pl.Expr:
    return cs.matches('(?i)type.*meas').str.to_lowercase().str.strip_chars().unique().alias('unique_type_of_measurements')

def calculate_price_pct() -> pl.Expr:
    return (cs.matches('(?i)calculated').sum().truediv(pl.len()).round(4)).alias('calculate_price_pct')

def price_stats() -> list[pl.Expr]:
    return (
        [
            c.standard_charge_negotiated_dollar.min().round(2).alias('min_price'),
            c.standard_charge_negotiated_dollar.max().round(2).alias('max_price'),
            c.standard_charge_negotiated_dollar.mean().round(2).alias('mean_price'),
            c.standard_charge_negotiated_dollar.std().round(2).alias('std_price'),
        ]
    )

def pct_negotiated_gt_cash() -> pl.Expr:
    # sum boolean true values and divide by total count to get percentage
    return (c.standard_charge_negotiated_dollar.gt(c.standard_charge_discounted_cash).sum().truediv(pl.len()).round(4).alias('pct_price_gt_cash_price'))

(
load_hospital_price_table()
.filter(c.hcpcs.str.starts_with('J'))
.group_by(c.hcpcs)
.agg(
    unique_hospital_ct(),
    unique_type_of_measurements(),
    unique_type_of_measurements().len().alias('unique_measure_ct'),
    calculate_price_pct(),
    *price_stats(),
    pct_negotiated_gt_cash(),

)
.join(load_hcpcs_desc_table(), on='hcpcs')
.with_columns(hcpcs_name())
.head(10)
.collect(engine='streaming')
.to_pandas()
)

Unnamed: 0,hcpcs,unique_hospital_ct,unique_type_of_measurements,unique_measure_ct,calculate_price_pct,min_price,max_price,mean_price,std_price,pct_price_gt_cash_price,hcpcs_desc,hcpcs_name
0,J9209,466,"[gram, unit, ml, ea, me, unknown]",6,0.3655,0.0,13000.0,98.55,228.83,0.3372,"J9209 - Mesna injection [Mesna, Mesnex]",Mesna injection
1,J2792,283,"[unit, unknown, gram, me, ml, f2, ea]",7,0.3882,0.01,55893.25,1756.01,4232.95,0.3324,"J2792 - Rho(d) immune globulin h, sd [Rho, Win...","Rho(d) immune globulin h, sd"
2,J3315,265,"[unit, ml, ea, me, unknown]",5,0.2407,0.01,32436.5,2557.16,3481.87,0.3252,"J3315 - Triptorelin pamoate [Trelstar, Triptor...",Triptorelin pamoate
3,J3095,238,"[unit, unknown, ea, me, ml]",5,0.2751,0.01,133472.0,1205.16,8575.58,0.3693,"J3095 - Telavancin injection [Telavancin, Viba...",Telavancin injection
4,J1645,239,"[f2, unknown, ml, ea, unit]",5,0.2531,0.01,28362.0,208.99,535.24,0.353,"J1645 - Dalteparin sodium [Dalteparin, Fragmin]",Dalteparin sodium
5,J2798,202,"[me, ea, unit, ml, unknown]",5,0.0263,1.55,43275.54,907.11,2719.09,0.3255,"J2798 - Inj., perseris, 0.5 mg [Perseris, Risp...","Inj., perseris, 0.5 mg"
6,J1302,183,"[me, unknown, ml]",3,0.1812,0.01,25988.63,1664.07,3277.35,0.3028,"J1302 - Inj, sutimlimab-jome, 10 mg [Enjaymo, ...","Inj, sutimlimab-jome, 10 mg"
7,J2356,310,"[unknown, unit, ml, me, ea]",5,0.416,0.01,619225.0,5070.2,7774.97,0.333,"J2356 - Inj tezepelumab-ekko, 1mg [Tezepelumab...","Inj tezepelumab-ekko, 1mg"
8,J0517,425,"[ml, me, gram, ea, unit, unknown]",6,0.3774,0.01,10000000000.0,497816.93,69707097.28,0.3126,"J0517 - Inj., benralizumab, 1 mg [Benralizumab...","Inj., benralizumab, 1 mg"
9,J7503,226,"[unit, unknown, me, ea]",4,0.3933,0.01,4463.2,212.4,567.51,0.2787,"J7503 - Tacrol envarsus ex rel oral [Envarsus,...",Tacrol envarsus ex rel oral


In [4]:
(
load_hospital_price_table()
.head()
.collect(engine='streaming')
.to_pandas()
)

Unnamed: 0,hospital_id,description,setting,drug_unit_of_measurement,drug_type_of_measurement,standard_charge_gross,standard_charge_discounted_cash,standard_charge_negotiated_dollar,plan_name,payer_name,standard_charge_methodology,standard_charge_negotiated_percentage,hcpcs,ndc,calculated_negotiated_dollars,mapped_plan_name,mapped_lob_name
0,00117837-839b-4714-98d0-4395a91d8d9a,BUMETANIDE 0.25 MG/ML IJ SOLN,outpatient,2.0,ml,75.0,45.0,1.44,PPO,UNITED_HEALTHCARE_CHOICE_PLU_3112014,fee schedule,,J1939,68462046954,False,United Health Care,
1,00117837-839b-4714-98d0-4395a91d8d9a,BUMETANIDE 0.25 MG/ML IJ SOLN,outpatient,2.0,ml,75.0,45.0,22.8,PPO,CIGNA_3020001,percent of total billed charges,30.4,J1939,68462046954,True,Cigna,
2,00117837-839b-4714-98d0-4395a91d8d9a,BUMETANIDE 0.25 MG/ML IJ SOLN,outpatient,2.0,ml,75.0,45.0,13.75,HMO,SENTARA_VANTAGE_HMO_4984001,percent of total billed charges,18.33,J1939,68462046954,True,Vantage,
3,00117837-839b-4714-98d0-4395a91d8d9a,BUMETANIDE 0.25 MG/ML IJ SOLN,outpatient,2.0,ml,75.0,45.0,22.8,HMO,CIGNA_FLEXCARE_HMO_3020048,percent of total billed charges,30.4,J1939,68462046954,True,Cigna,
4,00117837-839b-4714-98d0-4395a91d8d9a,BUMETANIDE 0.25 MG/ML IJ SOLN,outpatient,2.0,ml,75.0,45.0,13.27,PPO,AETNA_3004001,percent of total billed charges,17.7,J1939,68462046954,True,Aetna,


In [5]:
(
load_hospital_price_table()
.filter(c.ndc.is_not_null())
.join(load_ndc_name_table(), on='ndc')
.group_by(c.product)
.agg(
    unique_hospital_ct(),
    unique_type_of_measurements(),
    unique_type_of_measurements().len().alias('unique_measure_ct'),
    calculate_price_pct(),
)
.collect(engine='streaming')
.sort('unique_hospital_ct', descending=True)
.to_pandas()
.head(10)
)

Unnamed: 0,product,unique_hospital_ct,unique_type_of_measurements,unique_measure_ct,calculate_price_pct
0,Eliquis Oral Tablet 2.5 MG,349,"[ml, ea, gram, unit, me]",5,0.4457
1,Eliquis Oral Tablet 5 MG,346,"[gram, unit, unknown, me, ml, ea]",6,0.491
2,Entresto Oral Tablet 24-26 MG,330,"[unit, unknown, me, gram, ml, ea]",6,0.4441
3,Xarelto Oral Tablet 10 MG,325,"[unit, me, ea, gram]",4,0.5996
4,Xifaxan Oral Tablet 550 MG,307,"[unit, ea, me, unknown]",4,0.6033
5,Entresto Oral Tablet 49-51 MG,280,"[ml, gram, ea, unit, me, unknown]",6,0.448
6,HumaLOG Injection Solution 100 UNIT/ML,275,"[unit, ea, ml]",3,0.4305
7,Cyanocobalamin Injection Solution 1000 MCG/ML,275,"[ml, me, ea, unit, gram]",5,0.2943
8,Jardiance Oral Tablet 10 MG,274,"[gram, me, ea, unit]",4,0.5955
9,diphenhydrAMINE HCl Injection Solution 50 MG/ML,273,"[gram, ea, me, unit, ml]",5,0.3385
