In [9]:
# Imports

import polars as pl

from compute_top_spending_cohorts import compute_top_spending_cohorts
from compute_emerging_cohorts import compute_emerging_cohorts
from compute_outlier_cohorts import compute_outlier_cohorts

from cohorts_diseases import get_cohort_diseases_icd_level_1, get_cohort_diseases_icd_level_2, get_cohort_diseases_chronic, get_cohort_diseases_trigger_level_2

from cohorts_procedures import get_cohort_procedures

from cohorts_facilities import get_cohort_facilities

from cohorts_drugs import get_cohort_drugs_usage

from cohorts_demographics import get_cohort_demographics_ages, get_cohort_demographics_relationships, get_cohort_demographics_genders

from cohorts_providers import get_cohort_individual_medical_provider, get_cohort_medical_provider_speciality, get_cohort_individual_rx_provider

In [10]:
# Polars display configurations

pl.Config.set_float_precision(2)
pl.Config.set_fmt_float("full")
pl.Config.set_fmt_str_lengths(200)
pl.Config.set_tbl_cell_numeric_alignment("RIGHT")
pl.Config.set_tbl_hide_dataframe_shape(False)
pl.Config.set_tbl_rows(20)
pl.Config.set_tbl_width_chars(8000)
pl.Config.set_thousands_separator(",")

polars.config.Config

In [11]:
# Constants

eg_nid = "PS" 
reference_date = "2025-05-10"

In [12]:
# df1 = pl.DataFrame({"spend": top_spending_diseases_icd_level_2})
# df2 = pl.DataFrame({"spend": top_spending_procedures})
# df3 = pl.DataFrame({"spend": top_spending_drugs})
# df4 = pl.DataFrame({"spend": top_spending_providers})

# print(df1)

# results = pl.concat([df1, df2, df3, df4], how="vertical_relaxed")

# results_flat = results.select([
#     pl.col("spend").struct.field("cohort_name").alias("cohort_name"),
#     pl.col("spend").struct.field("spend").cast(pl.Float64).alias("spend")
# ])

# filtered_sorted = (
#     results_flat
#     .filter(
#         (pl.col("cohort_name") != "*") & (pl.col("cohort_name") != "None")
#     )
#     .sort("spend", descending=True)
#     .head(3)   
# )


### By Shreesh - type 1

In [None]:
def new_top_n_rows(df: pl.DataFrame, analysis_type):
    df = df.filter(pl.col("cohort_name") != "*", pl.col("cohort_name") != "None")
    if analysis_type == "top spending":
        df = df.sort("spend", descending=True)
        df = df.head(3)
        return df
    elif analysis_type == "surge in spending" or analysis_type == "emerging spending":
        df = df.sort("pct_increase", descending=True)
        df = df.head(3)
        return df
    else:
        print("Invalid analysis type. Please choose 'top spending', 'surge in spending', or 'emerging spending'.")
       
       return None



In [None]:
def new_top_n_rows(df: pl.DataFrame):
    # df = df.filter(pl.col("cohort_name") != "*", pl.col("cohort_name") != "None")
    # if analysis_type == "top spending":
    #     df = df.sort("spend", descending=True)
    #     df = df.head(3)
    #     return df
    # elif analysis_type == "surge in spending" or analysis_type == "emerging spending":
    #     df = df.sort("pct_increase", descending=True)
    #     df = df.head(3)
    #     return df
    # else:
    #     print("Invalid analysis type. Please choose 'top spending', 'surge in spending', or 'emerging spending'.")
       
    #    return None

    if len(df.columns) == 2: # top spending



[(1, 6, 'a'), (2, 7, 'b'), (3, 8, 'c')]

In [None]:
x = 4
y = [1, 2, 3]
1 in y

int x = 1;
x=1


False

In [14]:
# all_top_spending_dataframes = pl.concat([
#     surge_in_spending_diseases_icd_level_2.sort("pct_increase",descending=True).head(10), 
#     surge_in_spending_procedures.sort("pct_increase",descending=True).head(10), 
#     # surge_in_spending_drugs.sort("pct_increase",descending=True).head(10), 
#     # surge_in_spending_providers_medical_speciality.sort("pct_increase",descending=True).head(10)])
# ])

# # print(all_top_spending_dataframes.unique())

# new_top_n_rows(all_top_spending_dataframes, "surge in spending")

# TOP SPENDING COHORTS - TOP 3 EXPENSES

# Top Spending Cohorts - Diseases (ICD Level 2)

In [15]:
top_spending_diseases_icd_level_2 = compute_top_spending_cohorts(
    claims_df=get_cohort_diseases_icd_level_2(eg_nid=eg_nid), reference_date=reference_date,
    number_of_rows=5
).collect()

##### Top 5

In [16]:
print(top_spending_diseases_icd_level_2)

shape: (5, 2)
┌────────────────────────────────────────────────────────────────────┬────────────┐
│ cohort_name                                                        ┆      spend │
│ ---                                                                ┆        --- │
│ str                                                                ┆        f64 │
╞════════════════════════════════════════════════════════════════════╪════════════╡
│  Malignant neoplasms of lymphoid, hematopoietic and related tissue ┆ 506,320.65 │
│  Osteoarthritis                                                    ┆ 330,809.30 │
│  Encounters for other specific health care                         ┆ 311,173.55 │
│  Hemorrhagic and hematological disorders of newborn                ┆ 297,557.61 │
│  Acute kidney failure and chronic kidney disease                   ┆ 291,109.99 │
└────────────────────────────────────────────────────────────────────┴────────────┘


---

# Top Spending Cohorts - Procedures (Type of Service)



In [17]:
top_spending_procedures = compute_top_spending_cohorts(
    claims_df=get_cohort_procedures(eg_nid=eg_nid),
    reference_date=reference_date,
    number_of_rows=6
).collect()

##### Top 5

In [18]:
print(top_spending_procedures.filter(pl.col("cohort_name") != "*"))

shape: (5, 2)
┌────────────────────────────────────────────────────┬──────────────┐
│ cohort_name                                        ┆        spend │
│ ---                                                ┆          --- │
│ str                                                ┆          f64 │
╞════════════════════════════════════════════════════╪══════════════╡
│ E&M (Office/Outpatient Services)                   ┆ 1,058,813.02 │
│ E&M (Behavioral Health Services)                   ┆   304,546.09 │
│ Other (Ambulance)                                  ┆   277,621.42 │
│ Treatment (Injections and Infusions (nononcolog... ┆   248,133.55 │
│ Anesthesia (Anesthesia)                            ┆   205,667.55 │
└────────────────────────────────────────────────────┴──────────────┘


---

# Top Spending Cohorts - Drug Classes

In [19]:
top_spending_drugs = compute_top_spending_cohorts(
    claims_df=get_cohort_drugs_usage(eg_nid=eg_nid),
    reference_date=reference_date,
    number_of_rows=5
).collect()

##### Top 5

In [20]:
print(top_spending_drugs)

shape: (5, 2)
┌───────────────────────────────────────┬──────────────┐
│ cohort_name                           ┆        spend │
│ ---                                   ┆          --- │
│ str                                   ┆          f64 │
╞═══════════════════════════════════════╪══════════════╡
│ Immunosuppressants                    ┆ 2,312,603.13 │
│ Drugs Used In Diabetes                ┆   589,535.72 │
│ Other Nervous System Drugs            ┆   187,774.15 │
│ Drugs For Obstructive Airway Diseases ┆   120,708.26 │
│ Analgesics                            ┆    77,971.92 │
└───────────────────────────────────────┴──────────────┘


---

# Top Sepnding Cohorts - Providers (Medical Speciality)

In [21]:
top_spending_providers = compute_top_spending_cohorts(
    claims_df=get_cohort_medical_provider_speciality(eg_nid=eg_nid),
    reference_date=reference_date,
    number_of_rows=6
).collect()

##### Top 5

In [22]:
print(top_spending_providers.filter(pl.col("cohort_name") != "None"))

shape: (5, 2)
┌─────────────────────────────────────────────┬──────────────┐
│ cohort_name                                 ┆        spend │
│ ---                                         ┆          --- │
│ str                                         ┆          f64 │
╞═════════════════════════════════════════════╪══════════════╡
│ Hospital Short Term  General and Specialty  ┆ 1,706,251.37 │
│ Hospital Psychiatric Unit                   ┆   931,914.47 │
│ Ambulance Service Provider                  ┆   531,101.09 │
│ Hospital Rehabilitation Unit                ┆   413,872.97 │
│ Physician Assistant                         ┆   314,240.19 │
└─────────────────────────────────────────────┴──────────────┘


---

In [23]:
# df1 = pl.DataFrame({"pct_increase": surge_in_spending_diseases_icd_level_2})
# df2 = pl.DataFrame({"pct_increase": surge_in_spending_procedures})
# df3 = pl.DataFrame({"pct_increase": surge_in_spending_drugs})
# df4 = pl.DataFrame({"pct_increase": surge_in_spending_providers_medical_speciality})

# results = pl.concat([df1, df2, df3, df4], how="vertical_relaxed")

# results_flat = results.select([
#     pl.col("pct_increase").struct.field("cohort_name").alias("cohort_name"),
#     pl.col("pct_increase").struct.field("pct_increase").cast(pl.Float64).alias("pct_increase")
# ])

# filtered_sorted = (
#     results_flat
#     .filter(
#         (pl.col("cohort_name") != "*") & (pl.col("cohort_name") != "None")
#     )
#     .sort("pct_increase", descending=True)
#     .head(3)   
# )




# TOP SURGE SPENDING COHORTS - TOP 3 BY % INCREASE

In [24]:

#print(filtered_sorted)

# Surge in Spending Cohorts - Diseases (ICD Level 2)



In [25]:
surge_in_spending_diseases_icd_level_2 = compute_outlier_cohorts(
    claims_df=get_cohort_diseases_icd_level_2(eg_nid=eg_nid), reference_date=reference_date,
    number_of_rows=20   
).collect()



##### Top 10

In [26]:
#print(surge_in_spending_diseases_icd_level_2.sort("pct_increase",descending=True).head(10))
if (
    surge_in_spending_diseases_icd_level_2 is not None and
    surge_in_spending_diseases_icd_level_2.shape[0] > 0 and
    "pct_increase" in surge_in_spending_diseases_icd_level_2.columns
):
    print(
        surge_in_spending_diseases_icd_level_2
        .sort("pct_increase", descending=True)
        .head(10)
    )
else:
    print(" Surge in spending diseases data is not available, due to the lack of sufficient amount of historical data. ")


shape: (10, 4)
┌────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬──────────────┬──────────────┬──────────────┐
│ cohort_name                                                                                                                    ┆ avg_3m_spend ┆ avg_3y_spend ┆ pct_increase │
│ ---                                                                                                                            ┆          --- ┆          --- ┆          --- │
│ str                                                                                                                            ┆          f64 ┆          f64 ┆          f64 │
╞════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╪══════════════╪══════════════╪══════════════╡
│  Diseases of veins, lymphatic vessels and lymph nodes, not elsewhere classified                        

---

# Surge in Spending Cohorts - Procedures (Type of Service)



In [27]:
surge_in_spending_procedures = compute_outlier_cohorts(
   claims_df=get_cohort_procedures(eg_nid=eg_nid),
   reference_date=reference_date,
   number_of_rows=15
).collect()

In [28]:
print(surge_in_spending_procedures.sort("pct_increase",descending=True).head(10))

shape: (10, 4)
┌────────────────────────────────────────────────────┬──────────────┬──────────────┬──────────────┐
│ cohort_name                                        ┆ avg_3m_spend ┆ avg_3y_spend ┆ pct_increase │
│ ---                                                ┆          --- ┆          --- ┆          --- │
│ str                                                ┆          f64 ┆          f64 ┆          f64 │
╞════════════════════════════════════════════════════╪══════════════╪══════════════╪══════════════╡
│ Procedure (Musculoskeletal)                        ┆     2,820.55 ┆       916.43 ┆       207.78 │
│ Procedure (Musculoskeletal)                        ┆              ┆              ┆              │
│ Treatment (Spinal Manipulation)                    ┆     2,656.77 ┆     2,614.85 ┆         1.60 │
│ E&M (Behavioral Health Services)                   ┆    11,930.44 ┆    11,826.16 ┆         0.88 │
│ Other (Vision, Hearing, and Speech Services)       ┆     4,932.87 ┆     4,904.90 ┆ 

In [29]:
#surge_in_spending_procedures = compute_outlier_cohorts(
 #   claims_df=get_cohort_procedures(eg_nid=eg_nid),
  #  reference_date=reference_date,
   # number_of_rows=15
#).collect()

#disease_claims_df = get_cohort_diseases_icd_level_2(eg_nid=eg_nid)


#if disease_claims_df is not None and disease_claims_df.collect().height > 0:
 #   outlier_lazy = compute_outlier_cohorts(
  #      claims_df=disease_claims_df,
   #     reference_date=reference_date,
    #    number_of_rows=20
    #)
    #if outlier_lazy is not None:
     #   surge_in_spending_procedures = outlier_lazy.collect()
    #else:
     #   print(" compute_outlier_cohorts returned None. Not enough history for outlier detection.")
      #  surge_in_spending_procedures = pl.DataFrame()
#else:
 #   print(" No historical disease claims available.")
  #  surge_in_spending_procedures = pl.DataFrame()
disease_claims_df = get_cohort_diseases_icd_level_2(eg_nid=eg_nid)


if disease_claims_df is not None and disease_claims_df.collect().height > 0:
    outlier_lazy = compute_outlier_cohorts(
        claims_df=disease_claims_df,
        reference_date=reference_date,
        number_of_rows=20
    )
    if outlier_lazy is not None:
        surge_in_spending_procedures = outlier_lazy.collect()
    else:
        #print(" compute_outlier_cohorts returned None. Not enough history for outlier detection.")
        surge_in_spending_procedures = pl.DataFrame()
else:
    print(" No historical disease claims available.")
    surge_in_spending_procedures = pl.DataFrame()




##### Top 10

In [30]:
#print(surge_in_spending_procedures.sort("pct_increase",descending=True).filter(pl.col("cohort_name") != "*").head(10))
#print(surge_in_spending_procedures)
#print(cleaned_df.sort("pct_increase", descending=True).head(10))
if (
    surge_in_spending_procedures is not None and
    surge_in_spending_procedures.shape[0] > 0 and
    "pct_increase" in surge_in_spending_procedures.columns
):
    print(
        surge_in_spending_procedures
        .sort("pct_increase", descending=True)
        .head(10)
    )
else:
    print(" Surge in spending procedures data is not available, due to the lack of sufficient amount of historical data.")



shape: (10, 4)
┌────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬──────────────┬──────────────┬──────────────┐
│ cohort_name                                                                                                                    ┆ avg_3m_spend ┆ avg_3y_spend ┆ pct_increase │
│ ---                                                                                                                            ┆          --- ┆          --- ┆          --- │
│ str                                                                                                                            ┆          f64 ┆          f64 ┆          f64 │
╞════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╪══════════════╪══════════════╪══════════════╡
│  Diseases of veins, lymphatic vessels and lymph nodes, not elsewhere classified                        

---

# Surge in Spending Cohorts - Drug Classes



In [31]:
#surge_in_spending_drugs = compute_outlier_cohorts(
 #   claims_df=get_cohort_drugs_usage(eg_nid=eg_nid),
  #  reference_date=reference_date,
   # number_of_rows=10
#).collect()
Drugs_claims_df = get_cohort_drugs_usage(eg_nid=eg_nid)


if disease_claims_df is not None and disease_claims_df.collect().height > 0:
    outlier_lazy = compute_outlier_cohorts(
        claims_df=disease_claims_df,
        reference_date=reference_date,
        number_of_rows=20
    )
    if outlier_lazy is not None:
        surge_in_spending_drugs = outlier_lazy.collect()
    else:
        #print(" compute_outlier_cohorts returned None. Not enough history for outlier detection.")
        surge_in_spending_drugs = pl.DataFrame()
else:
    print(" No historical disease claims available.")
    surge_in_spending_drugs = pl.DataFrame()

##### Top 10

In [32]:
#print(surge_in_spending_drugs.sort("pct_increase",descending=True).head(10))
if (
    surge_in_spending_drugs is not None and
    surge_in_spending_drugs.shape[0] > 0 and
    "pct_increase" in surge_in_spending_drugs.columns
):
    print(
        surge_in_spending_drugs
        .sort("pct_increase", descending=True)
        .head(10)
    )
else:
    print(" Surge in spending drugs data is not available, due to the lack of sufficient amount of historical data.")




shape: (10, 4)
┌────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬──────────────┬──────────────┬──────────────┐
│ cohort_name                                                                                                                    ┆ avg_3m_spend ┆ avg_3y_spend ┆ pct_increase │
│ ---                                                                                                                            ┆          --- ┆          --- ┆          --- │
│ str                                                                                                                            ┆          f64 ┆          f64 ┆          f64 │
╞════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╪══════════════╪══════════════╪══════════════╡
│  Diseases of veins, lymphatic vessels and lymph nodes, not elsewhere classified                        

---

# Surge in Spending Cohorts - Providers (Medical Speciality)



In [33]:
#surge_in_spending_providers_medical_speciality = compute_outlier_cohorts(
 #   claims_df=get_cohort_medical_provider_speciality(eg_nid=eg_nid),
  #  reference_date=reference_date,
   # number_of_rows=20
#).collect()
Providers_claims_df = get_cohort_medical_provider_speciality(eg_nid=eg_nid)


if disease_claims_df is not None and disease_claims_df.collect().height > 0:
    outlier_lazy = compute_outlier_cohorts(
        claims_df=disease_claims_df,
        reference_date=reference_date,
        number_of_rows=20
    )
    if outlier_lazy is not None:
        surge_in_spending_providers_medical_speciality = outlier_lazy.collect()
    else:
        #print(" compute_outlier_cohorts returned None. Not enough history for outlier detection.")
        surge_in_spending_providers_medical_speciality = pl.DataFrame()
else:
    print(" No historical disease claims available.")
    surge_in_spending_providers_medical_speciality = pl.DataFrame()

##### Top 10

In [34]:
#print(surge_in_spending_providers_medical_speciality.sort("pct_increase",descending=True).filter(pl.col("cohort_name") != "None").head(10))
if (
    surge_in_spending_providers_medical_speciality is not None and
    surge_in_spending_providers_medical_speciality.shape[0] > 0 and
    "pct_increase" in surge_in_spending_providers_medical_speciality.columns
):
    print(
        surge_in_spending_providers_medical_speciality
        .sort("pct_increase", descending=True)
        .head(10)
    )
else:
    print(" Surge in spending providers (medical speciality) data is not available, due to the lack of sufficient amount of historical data.")



shape: (10, 4)
┌────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬──────────────┬──────────────┬──────────────┐
│ cohort_name                                                                                                                    ┆ avg_3m_spend ┆ avg_3y_spend ┆ pct_increase │
│ ---                                                                                                                            ┆          --- ┆          --- ┆          --- │
│ str                                                                                                                            ┆          f64 ┆          f64 ┆          f64 │
╞════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╪══════════════╪══════════════╪══════════════╡
│  Diseases of veins, lymphatic vessels and lymph nodes, not elsewhere classified                        

---

In [35]:
# df1 = pl.DataFrame({"pct_increase": emerging_spending_diseases_icd_level_2})
# df2 = pl.DataFrame({"pct_increase": emerging_spending_procedures})
# df3 = pl.DataFrame({"pct_increase": emerging_spending_drugs})
# df4 = pl.DataFrame({"pct_increase": emerging_spending_providers_medical_speciality})

# results = pl.concat([df1, df2, df3, df4], how="vertical_relaxed")

# results_flat = results.select([
#     pl.col("pct_increase").struct.field("cohort_name").alias("cohort_name"),
#     pl.col("pct_increase").struct.field("pct_increase").cast(pl.Float64).alias("pct_increase")
# ])

# filtered_sorted = (
#     results_flat
#     .filter(
#         (pl.col("cohort_name") != "*") & (pl.col("cohort_name") != "None")
#     )
#     .sort("pct_increase", descending=True)
#     .head(3)   
# )

# TOP 3 EMERGING COHORTS - TOP 3 BY % INCREASE

In [36]:
#print(filtered_sorted)

# Emerging Spending Cohorts - Diseases (ICD Level 2)


In [37]:
#emerging_spending_diseases_icd_level_2 = compute_emerging_cohorts(
 #   claims_df=get_cohort_diseases_icd_level_2(eg_nid=eg_nid), reference_date=reference_date,
  #  number_of_rows=10
#).collect()
disease_claims_df = get_cohort_diseases_icd_level_2(eg_nid=eg_nid)

if disease_claims_df is not None and disease_claims_df.collect().height > 0:
    emerging_lazy = compute_emerging_cohorts(
        claims_df=disease_claims_df,
        reference_date=reference_date,
        number_of_rows=10
    )
    if emerging_lazy is not None:
        emerging_spending_diseases_icd_level_2 = emerging_lazy.collect()
    else:
       # print(" compute_emerging_cohorts returned None. Not enough history for emerging cohort detection.")
        emerging_spending_diseases_icd_level_2 = pl.DataFrame()
else:
    print(" No historical disease claims available for emerging cohort detection.")
    emerging_spending_diseases_icd_level_2 = pl.DataFrame()


##### Top 10

In [38]:
#print(emerging_spending_diseases_icd_level_2.sort("pct_increase",descending=True).head(10))

if emerging_spending_diseases_icd_level_2 is not None and emerging_spending_diseases_icd_level_2.height > 0:
    print(emerging_spending_diseases_icd_level_2.sort("pct_increase", descending=True).head(10))
else:
    print(" Emergent spending diseases icd level 2 data is not available, due to the lack of sufficient amount of historical data.")


shape: (10, 4)
┌────────────────────────────────────────────────────────────────────────────────────────────┬──────────────┬──────────────┬──────────────┐
│ cohort_name                                                                                ┆ avg_1y_spend ┆ avg_3y_spend ┆ pct_increase │
│ ---                                                                                        ┆          --- ┆          --- ┆          --- │
│ str                                                                                        ┆          f64 ┆          f64 ┆          f64 │
╞════════════════════════════════════════════════════════════════════════════════════════════╪══════════════╪══════════════╪══════════════╡
│  Edema, proteinuria and hypertensive disorders in pregnancy, childbirth and the puerperium ┆    10,722.29 ┆     3,664.82 ┆       192.57 │
│  Malignant neoplasms of lymphoid, hematopoietic and related tissue                         ┆    39,685.13 ┆    14,522.33 ┆       173.27 │
│  Is

---

# Emerging Spending Cohorts - Procedures (Type of Service)



In [39]:
#emerging_spending_procedures = compute_emerging_cohorts(
 #   claims_df=get_cohort_procedures(eg_nid=eg_nid),
  #  reference_date=reference_date,
   # number_of_rows=11
#).collect()
disease_claims_df = get_cohort_procedures(eg_nid=eg_nid)

if disease_claims_df is not None and disease_claims_df.collect().height > 0:
    emerging_lazy = compute_emerging_cohorts(
        claims_df=disease_claims_df,
        reference_date=reference_date,
        number_of_rows=10
    )
    if emerging_lazy is not None:
        emerging_spending_procedures = emerging_lazy.collect()
    else:
        #print(" compute_emerging_cohorts returned None. Not enough history for emerging cohort detection.")
        emerging_spending_procedures = pl.DataFrame()
else:
    print(" No historical disease claims available for emerging cohort detection.")
    emerging_spending_procedures = pl.DataFrame()

##### Top 10

In [40]:
# print(emerging_spending_procedures.sort("pct_increase",descending=True).filter(pl.col("cohort_name")!= "*").head(10))


if emerging_spending_procedures is not None and emerging_spending_procedures.height > 0:
    print(emerging_spending_procedures.sort("pct_increase", descending=True).head(10))
else:
    print(" Emergent spending procedures data is not available, due to the lack of sufficient amount of historical data.")



shape: (10, 4)
┌────────────────────────────────────────────────────┬──────────────┬──────────────┬──────────────┐
│ cohort_name                                        ┆ avg_1y_spend ┆ avg_3y_spend ┆ pct_increase │
│ ---                                                ┆          --- ┆          --- ┆          --- │
│ str                                                ┆          f64 ┆          f64 ┆          f64 │
╞════════════════════════════════════════════════════╪══════════════╪══════════════╪══════════════╡
│ Treatment (Chemotherapy)                           ┆    15,565.31 ┆     5,737.62 ┆       171.29 │
│ *                                                  ┆   283,509.14 ┆   251,926.98 ┆        12.54 │
│ Treatment (Physical, Occupational, and Speech T... ┆     6,128.89 ┆     5,857.13 ┆         4.64 │
│ E&M (Behavioral Health Services)                   ┆    12,369.28 ┆    11,826.16 ┆         4.59 │
│ Procedure (Other Organ Systems)                    ┆     6,264.91 ┆     6,029.97 ┆ 

---

# Emerging Spending Cohorts - Drug Classes



In [41]:
#emerging_spending_drugs = compute_emerging_cohorts(
 #   claims_df=get_cohort_drugs_usage(eg_nid=eg_nid),
  #  reference_date=reference_date,
   # number_of_rows=10
#).collect()

drugs_claims_df = get_cohort_drugs_usage(eg_nid=eg_nid)

if drugs_claims_df is not None and drugs_claims_df.collect().height > 0:
    emerging_lazy = compute_emerging_cohorts(
        claims_df=disease_claims_df,
        reference_date=reference_date,
        number_of_rows=10
    )
    if emerging_lazy is not None:
        emerging_spending_drugs = emerging_lazy.collect()
    else:
        #print(" compute_emerging_cohorts returned None. Not enough history for emerging cohort detection.")
        emerging_spending_drugs = pl.DataFrame()
else:
    print(" No historical disease claims available for emerging cohort detection.")
    emerging_spending_drugs = pl.DataFrame()

##### Top 10

In [42]:
#print(emerging_spending_drugs.sort("pct_increase",descending=True).head(10))

if emerging_spending_drugs is not None and emerging_spending_drugs.height > 0:
    print(emerging_spending_drugs.sort("pct_increase", descending=True).head(10))
else:
    print("Emergent spending drugs data is not available, due to the lack of sufficient amount of historical data.")

shape: (10, 4)
┌────────────────────────────────────────────────────┬──────────────┬──────────────┬──────────────┐
│ cohort_name                                        ┆ avg_1y_spend ┆ avg_3y_spend ┆ pct_increase │
│ ---                                                ┆          --- ┆          --- ┆          --- │
│ str                                                ┆          f64 ┆          f64 ┆          f64 │
╞════════════════════════════════════════════════════╪══════════════╪══════════════╪══════════════╡
│ Treatment (Chemotherapy)                           ┆    15,565.31 ┆     5,737.62 ┆       171.29 │
│ *                                                  ┆   283,509.14 ┆   251,926.98 ┆        12.54 │
│ Treatment (Physical, Occupational, and Speech T... ┆     6,128.89 ┆     5,857.13 ┆         4.64 │
│ E&M (Behavioral Health Services)                   ┆    12,369.28 ┆    11,826.16 ┆         4.59 │
│ Procedure (Other Organ Systems)                    ┆     6,264.91 ┆     6,029.97 ┆ 

---

# Emerging Spending Cohorts - Providers (Medical Speciality)



In [43]:
#emerging_spending_providers_medical_speciality = compute_emerging_cohorts(
 #   claims_df=get_cohort_medical_provider_speciality(eg_nid=eg_nid),
  #  reference_date=reference_date,
   # number_of_rows=11
#).collect()

Providers_claims_df = get_cohort_medical_provider_speciality(eg_nid=eg_nid)

if Providers_claims_df is not None and Providers_claims_df.collect().height > 0:
    emerging_lazy = compute_emerging_cohorts(
        claims_df=Providers_claims_df,
        reference_date=reference_date,
        number_of_rows=10
    )
    if emerging_lazy is not None:
        emerging_spending_providers_medical_speciality = emerging_lazy.collect()
    else:
       # print(" compute_emerging_cohorts returned None. Not enough history for emerging cohort detection.")
        emerging_spending_providers_medical_speciality = pl.DataFrame()
else:
    print(" No historical disease claims available for emerging cohort detection.")
    emerging_spending_providers_medical_speciality = pl.DataFrame()

##### Top 10

In [44]:
#print(emerging_spending_providers_medical_speciality.sort("pct_increase",descending=True).filter(pl.col("cohort_name") != "None").head(10))

if emerging_spending_providers_medical_speciality is not None and emerging_spending_providers_medical_speciality.height > 0:
    print(emerging_spending_providers_medical_speciality.sort("pct_increase", descending=True).head(10))
else:
    print(" Emergent spending data providers (medical speciality) is not available, due to the lack of sufficient amount of historical data.")

shape: (10, 4)
┌─────────────────────────────────────────────┬──────────────┬──────────────┬──────────────┐
│ cohort_name                                 ┆ avg_1y_spend ┆ avg_3y_spend ┆ pct_increase │
│ ---                                         ┆          --- ┆          --- ┆          --- │
│ str                                         ┆          f64 ┆          f64 ┆          f64 │
╞═════════════════════════════════════════════╪══════════════╪══════════════╪══════════════╡
│ Pharmacy                                    ┆    18,563.87 ┆    11,189.42 ┆        65.91 │
│ Physician General Surgery                   ┆     9,569.09 ┆     7,234.33 ┆        32.27 │
│ Hospital Rehabilitation Unit                ┆    18,852.60 ┆    14,265.73 ┆        32.15 │
│ Hospital Psychiatric Unit                   ┆    37,013.58 ┆    28,212.85 ┆        31.19 │
│ None                                        ┆   160,384.83 ┆   135,456.16 ┆        18.40 │
│ Critical Access Hospital                    ┆    10,0

---

In [47]:
import polars as pl

# top_spending_diseases_icd_level_2 = top_spending_diseases_icd_level_2
# top_spending_procedures = top_spending_procedures.select([pl.col("cohort_name"),pl.col("spend")])
# top_spending_drugs = top_spending_drugs.select([pl.col("cohort_name"),pl.col("spend")])
# top_spending_providers = top_spending_providers.select([pl.col("cohort_name"),pl.col("spend")])

results = pl.concat([top_spending_diseases_icd_level_2,top_spending_procedures,top_spending_drugs,top_spending_providers], how="vertical")
results = results.filter(pl.col("cohort_name") != "*").filter(pl.col("cohort_name") != "None")

top3 = results.sort("spend", descending=True).head(3)
print(top3)



shape: (3, 2)
┌─────────────────────────────────────────────┬──────────────┐
│ cohort_name                                 ┆        spend │
│ ---                                         ┆          --- │
│ str                                         ┆          f64 │
╞═════════════════════════════════════════════╪══════════════╡
│ Immunosuppressants                          ┆ 2,312,603.13 │
│ Hospital Short Term  General and Specialty  ┆ 1,706,251.37 │
│ E&M (Office/Outpatient Services)            ┆ 1,058,813.02 │
└─────────────────────────────────────────────┴──────────────┘
