In [1]:
from data_loaders import DataLoaders as dl
import polars as pl
from polars import col as c
import polars.selectors as cs
from asp_ndc import add_standard_drug_name
import analysis
import importlib

### Product Overview Report
- unique drug names is based on GPI-10 generic name from Medispan
- A single ndc was mapped to each HCPCS code based on the ASP NDC crosswalk file from CMS
    - the NDC was then mapped to MEDISPAN GPI-10 generic name
- All NDCs were mapped to MEDISPAN GPI-10 generic name
- The record (line) coalesced the HCPCS GPI-10 mapping and NDC GPI-10 mapping to get a drug name
    - HCPCS mapping was preferred over NDC mapping

In [4]:
(
    (
    dl()
    .load_hospital_price_table()
    .pipe(add_standard_drug_name)
    .select(
        pl.len().alias('total_lines'),
        c.description.filter(c.drug_name.is_not_null()).len().alias('lines_w_drug_names'),
        c.description.filter(c.hcpcs.is_not_null()).len().alias('hcpcs_lines'),
        c.description.filter(c.ndc.is_not_null()).len().alias('ndc_lines'),
        c.drug_name.n_unique().alias('unique_drug_names'),
        c.hcpcs.n_unique().alias('unique_hcpcs'),
        c.hcpcs.filter(c.drug_name.is_not_null()).n_unique().alias('hcpcs_with_drug_name'),
        c.ndc.n_unique().alias('unique_ndcs'),
    )
    .collect(engine="streaming")
    .to_pandas()
    )
)

Unnamed: 0,total_lines,lines_w_drug_names,hcpcs_lines,ndc_lines,unique_drug_names,unique_hcpcs,hcpcs_with_drug_name,unique_ndcs
0,31098523,27696191,30729426,20996834,2400,1327,1094,70482


In [9]:
# Top 20 drugs by number of lines
(
dl()
    .load_hospital_price_table()
    .pipe(add_standard_drug_name)
    .filter(c.drug_name.is_not_null())
    .group_by('drug_name')
    .agg(pl.len().alias('num_lines'), c.hospital_id.n_unique().alias('num_hospitals'))
    .sort('num_lines', descending=True)
    .head(20)
    .collect(engine="streaming")
    .to_pandas()
)

Unnamed: 0,drug_name,num_lines,num_hospitals
0,Enoxaparin Sodium,461376,1086
1,Vancomycin HCl,443059,1108
2,Heparin Sodium (Porcine),428414,1093
3,Sodium Chloride,360219,1036
4,Morphine Sulfate,330885,1109
5,Ceftriaxone Sodium,304519,1136
6,Bupivacaine HCl,297263,841
7,Potassium Chloride,289857,1071
8,Midazolam HCl,286896,1085
9,Magnesium Sulfate,266519,1081


In [11]:
# Top 20 HCPCS by number of lines with drug names
(
dl()
    .load_hospital_price_table()
    .pipe(add_standard_drug_name)
    .filter(c.drug_name.is_not_null())
    .filter(c.hcpcs.is_not_null())
    .group_by('hcpcs', 'drug_name')
    .agg(pl.len().alias('num_lines'), c.hospital_id.n_unique().alias('num_hospitals'))
    .sort('num_lines', descending=True)
    .head(20)
    .collect(engine="streaming")
    .to_pandas()
)

Unnamed: 0,hcpcs,drug_name,num_lines,num_hospitals
0,J1650,Enoxaparin Sodium,461280,1086
1,J1644,Heparin Sodium (Porcine),404896,1087
2,J3370,Vancomycin HCl,363166,1091
3,J0696,Ceftriaxone Sodium,296247,1136
4,J2250,Midazolam HCl,284768,1084
5,J3480,Potassium Chloride,283855,1071
6,J0665,Bupivacaine HCl,272751,805
7,J3475,Magnesium Sulfate,266406,1081
8,J3010,Fentanyl Citrate,256025,1076
9,J2543,Piperacillin Sodium-Tazobactam Sodium,249975,1069
