###  Managed Care Sales Performance

In [32]:
import polars as pl
import gc
import pandas as pd
from datetime import datetime, timedelta,date
import json
import numpy as np

In [33]:
# load variables from JSON
with open('vars_wk.json', 'r') as json_file:
    js = json.load(json_file)

bucket = js['bucket']
# data_date = js['data_date']
# monthly_data_date = js['monthly_data_date']
data_date = '20240712'
monthly_data_date = '202406'
QTD = 3
YTD = 6 
#TODO: CONNECT TO JSON LATER

dflib = f's3://{bucket}/BIT/dataframes/'
pln = f's3://{bucket}/PYADM/weekly/archive/{data_date}/plantrak/' 
mpln = f's3://{bucket}/PYADM/monthly/archive/{monthly_data_date}/plantrak/'

In [34]:
# Utility Functions -
def load(df, lib=dflib):
    globals()[df] = pl.read_parquet(f'{lib}{df}.parquet')

In [35]:
# Imporing Dependencies
prod_mapping = pl.read_csv(f's3://{bucket}/BIT/docs/productmapping_pybit.txt',separator='|')
geo_code_mapper = pl.from_pandas(pd.read_excel(f's3://{bucket}/BIT/docs/geo_id_full.xlsx'))
load('mp_spec_seg_dec')
load('MASTER_UNI')
fetch_products = ['LI1','LI2','LI3','TRU','AMT','LAC','MOT','LUB','IRL']

---

Formulary

In [36]:
# import forumlary -
fm_monthly = pl.read_parquet(mpln+'FORMULARY.parquet',columns = ['IMS_PLAN_ID','GROUP_TYPE','FORMULARY_GROUP_STATUS','PFAM_CD','PFAM_NAME','IRWD_FGN_NAME','BRAND'])
fm_weekly = pl.read_parquet(pln+'FORMULARY.parquet',columns = ['IMS_PLAN_ID','GROUP_TYPE','FORMULARY_GROUP_STATUS','PFAM_CD','PFAM_NAME','IRWD_FGN_NAME','BRAND'])
payer_names = (
    fm_monthly.select('IRWD_FGN_NAME')
    .vstack(fm_weekly.select('IRWD_FGN_NAME'))
    .unique()
    .sort('IRWD_FGN_NAME')
    .with_row_index(offset=1)
    .rename({'index':'payer_id'})
)
#FORMULARY
group_type_mapping = {
    'HIX' : 'Commercial','Com' : 'Commercial','Cash' : 'Cash','Voucher':'Voucher',
    'FFS' : 'FFS','Mgd Medicaid' : 'Mgd Medicaid','Part D' : 'Part D','MAC A' : 'Others',
}

def classify_plan_class(status):
    status = status.upper()
    if status[:7] == "COVERED" or status[:6] == "ON PDL":
        return "COVERED"
    elif status[:9] == "PREFERRED":
        return "PREFERRED"
    elif status[:13] == "NON-PREFERRED":
        return "NON PREFERRED"
    elif status[:7] == "NON-PDL" or status[:11] == "NOT COVERED":
        return "NOT COVERED"
    else:
        return "N_A"

# Reading 
fm = fm_monthly.with_columns(
        pl.when(pl.col('BRAND')=='IBR')
        .then(pl.lit('IRL'))
        .otherwise(pl.col('BRAND'))
        .alias('BRAND')
)
fm = fm.filter((pl.col('PFAM_CD')==(pl.col('BRAND'))) | (pl.col('BRAND')==''))
fm = (
    fm
    .with_columns(
        pl.col('GROUP_TYPE').map_elements(lambda x: group_type_mapping.get(x,'Others'), return_dtype=pl.Utf8) #NOTE : IF new plan types flow , they will go to Others by default
        .fill_null('Others')
        .alias('plan_type'),
        pl.col('IMS_PLAN_ID').cast(pl.Int64)
    )
    .rename({'IMS_PLAN_ID':'PlanID'})
    .drop('GROUP_TYPE')
    .with_columns(pl.col('FORMULARY_GROUP_STATUS').fill_null(pl.lit('N_A')))
    .with_columns(pl.col('FORMULARY_GROUP_STATUS').map_elements(classify_plan_class,return_dtype=pl.String).alias('plan_class'))
    .drop('FORMULARY_GROUP_STATUS')
    .unique()
)

###############
# HARD CODED - 
fm = fm.with_columns(pl.when(pl.col("PlanID") == 13670614).then(pl.lit('Others')).otherwise(pl.col("plan_type")).alias("plan_type"))
###############
fm2 = (
    fm
    .select('PFAM_CD','IRWD_FGN_NAME','plan_class').unique()
    .group_by(['IRWD_FGN_NAME','PFAM_CD'])
    .agg(
        pl.col('plan_class').unique().str.concat(' / ').alias('plan_class')
    )
    .with_columns(pl.col('plan_class').str.to_titlecase())
)

Plantrak

In [37]:
# Import and prepare Raw data - 
ln = (
    pl.read_parquet(mpln+'LAX_N.parquet',columns=['IID','MonthKey','PFAM_CD','PROD_CD','PlanID','TUF']) #read req cols only
    .rename({'MonthKey':'PeriodKey'})
    .filter(pl.col('PROD_CD').is_in(fetch_products)) #only keep data for BIT products
    .with_columns(pl.col('PeriodKey').cast(pl.Utf8).str.to_date("%Y%m%d")) #Convert Categorical column Back to date
)
date_list = ln['PeriodKey'].unique().sort(descending=True)
# Any PlanIds startign with -0000002 should be excluded
ln = (
    ln
    .with_columns(pl.col('PlanID').cast(pl.Utf8).str.zfill(10).alias('planid_chr'))
    .filter(~pl.col('planid_chr').str.starts_with('000002'))
    .drop('planid_chr')
)

ln = ln.join(
    (pl.DataFrame(date_list).with_row_index(offset = 1).rename({'index':'num_month'})),
    on = 'PeriodKey', how = 'left'
)

ln = (
    ln
    .join(fm.select(['PlanID','IRWD_FGN_NAME']).unique(),on='PlanID',how='left')
)

In [38]:
# Function That Gets Data for each data cut - INPUT ln
filter_cond_dict = {
    '1c' : pl.col('num_month')==1,'1p' : pl.col('num_month')==2,
    '3c' : pl.col('num_month').is_in([1,2,3]),'3p' : pl.col('num_month').is_in([4,5,6]),
    '6c' : pl.col('num_month').is_in([1,2,3,4,5,6]),'6p' : pl.col('num_month').is_in([7,8,9,10,11,12]),
    '12c' : pl.col('num_month').is_in([i for i in range(1,13)]),'12p' : pl.col('num_month').is_in([i for i in range(13,25)]),
    'qtdc' : pl.col('num_month').is_in([i for i in range(1,QTD+1)]),'qtdp' : pl.col('num_month').is_in([i for i in range(4,4+QTD)]),
    'ytdc' : pl.col('num_month').is_in([i for i in range(1,YTD+1)]),'ytdp' : pl.col('num_month').is_in([i for i in range(13,13+YTD)])
}
def get_data_cuts(df):
    result = pl.DataFrame()
    for period,cond in filter_cond_dict.items():
        df_filter = df.filter(cond)
        df_filter = (
            df_filter
            .group_by(['IID','IRWD_FGN_NAME','PFAM_CD','PROD_CD']).agg(pl.col('TUF').sum().alias(f'TUF_{period}'))
        )

        if period == '1c':
            result = df_filter
        else:
            result = result.join(df_filter,on =['IID','IRWD_FGN_NAME','PFAM_CD','PROD_CD'],how = 'outer_coalesce')

    # Pulling in Plan Type -
    result = (
        result
        .join(
            fm.select(['IRWD_FGN_NAME','PFAM_CD','plan_type']).unique(),
            on = ['IRWD_FGN_NAME', 'PFAM_CD'], how = 'left'
        )
        .with_columns(
            pl.col('plan_type').fill_null(pl.lit('Others')),
        )
    )

    # Pulling in Plan Class
    result = (
        result.join(fm2, on=['IRWD_FGN_NAME', 'PFAM_CD'], how='left')
        .with_columns(
            pl.col('plan_class').fill_null(pl.lit('N_a'))
        )
    )

    # Dropping Records with Voucher , FFS , Medicaid
    result = result.filter(
        ~(pl.col('plan_type').is_in(['Voucher','Mgd Medicaid','FFS']))
    )

    #Joining Payer ID-
    result = result.join(payer_names, on ='IRWD_FGN_NAME', how = 'left')
    
    return(result)

ln1 = get_data_cuts(ln)

# adding product_id
ln1 = (
    ln1
    .join(
        prod_mapping.select(['code','product_id','parent_product_id']),
        left_on = 'PROD_CD', right_on='code', how = 'left'
    )
)

In [39]:
# Parent Product Rows - OUTPUT ln2
prod_agg_expn_list = {col : pl.col(col).sum() for col in [f'TUF_{p}' for p in filter_cond_dict.keys()]}
prod_agg_expn_list.update({'plan_type':pl.col('plan_type').first()})

#lin and amt-

ln1_235 = (
    ln1
    .filter(pl.col('parent_product_id').is_in([2,35]))
    .group_by(['IID','IRWD_FGN_NAME','payer_id','parent_product_id'])
    .agg(
        **{**prod_agg_expn_list,'plan_class':pl.col('plan_class').first()}
    )
    .rename({'parent_product_id':'product_id'})
)


#for lax mkt - 
ln1_1 = (
    ln1
    .group_by(['IID','IRWD_FGN_NAME','payer_id'])
    .agg(**prod_agg_expn_list)
    .with_columns(pl.lit(1).alias('product_id').cast(pl.Int64),pl.lit('N_a').alias('plan_class'))
    .select(ln1_235.columns)
)

ln2 = (
    ln1.select(ln1_235.columns)
    .vstack(ln1_235)
    .vstack(ln1_1)
)

RANKING -

In [40]:
# Adding Geo information -
ln2 = ln2.join(mp_spec_seg_dec[['IID','geography_id']],on='IID',how='left').join(geo_code_mapper,on = 'geography_id', how = 'left')

In [41]:
ln2 = ln2.filter(pl.col('payer_id').is_not_null())

In [42]:
# # for top 10 payers ? 

# t1 = ln2.filter(region_geography_id = 5).filter(plan_type = 'Commerical').filter(product_id = 1)

# (
#     t1
#     .group_by('payer_id')
#     .agg(TUF = pl.col('TUF_1c').sum())
#     .with_columns(pl.col("TUF").rank("ordinal",descending=True).alias("rank"))   
#     .filter(pl.col('rank') <= 10)
# )

NOTES ->

- For a given Product
- and a given Geography_ID (can be any terr , region , area nation)
- - I have 7 ROWS
  - One For Each Payer Type : CASH , COMMERCIAL , OTHERS, PART D, PART D and COM, TOTAL and a row with slash N

In [43]:
ln2

IID,IRWD_FGN_NAME,payer_id,product_id,TUF_1c,TUF_1p,TUF_3c,TUF_3p,TUF_6c,TUF_6p,TUF_12c,TUF_12p,TUF_qtdc,TUF_qtdp,TUF_ytdc,TUF_ytdp,plan_type,plan_class,geography_id,region_geography_id,area_geography_id,nation_geography_id
i64,str,u32,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,str,i64,i64,i64,i64
3613316,"""Magellan Rx Mgt Unspec (Com)""",1880,4,,1.001,1.001,,1.001,,1.001,,1.001,,1.001,,"""Commercial""","""N_a / Preferred""",85,13,3,1
2527959,"""Uhc/Pacificare/Aarp Med D (Pa…",3076,8,0.342,1.222,1.564,,1.564,,1.564,1.397,1.564,,1.564,,"""Part D""","""N_a""",,,,
2106113,"""Bcbs Alabama (Part D)""",369,8,2.68,,2.68,,2.68,,2.68,,2.68,,2.68,,"""Part D""","""N_a""",85,13,3,1
18175778,"""Caremark Unspec (Com)""",661,8,5.37,,5.37,,5.37,,5.37,,5.37,,5.37,,"""Commercial""","""N_a""",39,7,2,1
226106,"""State Of Mississippi Ees (Ms) …",2860,4,,1.005,1.005,2.161,3.166,1.023,4.189,6.152,1.005,2.161,3.166,4.095,"""Commercial""","""Preferred""",,,,
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
2756686,"""Elixir Rx Insurance Company (…",1154,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.688,0.0,0.0,0.0,0.0,"""Part D""","""N_a""",,,,
240840,"""Federal Employees/ Fehbp (Com…",1255,1,0.0,0.0,0.0,0.0,0.0,5.874,5.874,4.8,0.0,0.0,0.0,4.8,"""Commercial""","""N_a""",,,,
292420,"""Caremark Unspec (Com)""",661,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"""Commercial""","""N_a""",,,,
3788902,"""Blue Cross Capital/Cbc (Part …",481,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.784,0.0,0.0,0.0,0.0,"""Part D""","""N_a""",40,7,2,1


# Logic to Follow for Creating Drill Down (Geo to -> Payer Types)

In [23]:
# group it by ->
t1 = ln2.group_by(['geography_id', 'product_id', 'plan_type']).agg(TUF = pl.col('TUF_6c').sum())
t1_total = ln2.group_by(['geography_id', 'product_id']).agg(TUF = pl.col('TUF_6c').sum(),plan_type = pl.lit('Total')).select(t1.columns)
t1_total2 = ln2.filter(pl.col('plan_type').is_in(['Commerical','Part D'])).group_by(['geography_id', 'product_id']).agg(TUF = pl.col('TUF_6c').sum(),plan_type = pl.lit('Part D and Commercial')).select(t1.columns)

t1 = t1.vstack(t1_total).vstack(t1_total2)

# Logic to follow for Creating Second Drill Down (Payer Type -> top 10 Plans)

In [57]:
t1 = (
    ln2
    .group_by(['geography_id', 'product_id', 'plan_type','payer_id','IRWD_FGN_NAME'])
    .agg(TUF = pl.col('TUF_6c').sum())
    .with_columns(
        pl.col("TUF")
        .rank("ordinal",descending=True)
        .over(['geography_id', 'product_id', 'plan_type'])
        .alias("rank")
    )
    .filter(pl.col('rank')<=5)
)

In [59]:
check = (
    t1
    .filter(geography_id = 98)
    .filter(product_id = 1)
    .filter(plan_type = 'Commerical')
)

In [61]:
check.join(ln2, on = ['geography_id',	'product_id'	,'plan_type',	'payer_id'],how = 'left')

geography_id,product_id,plan_type,payer_id,IRWD_FGN_NAME,TUF,rank,IID,IRWD_FGN_NAME_right,TUF_1c,TUF_1p,TUF_3c,TUF_3p,TUF_6c,TUF_6p,TUF_12c,TUF_12p,TUF_qtdc,TUF_qtdp,TUF_ytdc,TUF_ytdp,plan_class,region_geography_id,area_geography_id,nation_geography_id
i64,i64,str,u32,str,f64,u32,i64,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,i64,i64,i64
98,1,"""Commerical""",87,"""Aetna Inc (Com)""",1805.268,4,2542469,"""Aetna Inc (Com)""",0.0,0.0,0.322,0.0,0.322,0.322,0.644,0.0,0.322,0.0,0.322,0.0,"""N_a""",14,3,1
98,1,"""Commerical""",87,"""Aetna Inc (Com)""",1805.268,4,1619397,"""Aetna Inc (Com)""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.488,0.0,0.0,0.0,0.556,"""N_a""",14,3,1
98,1,"""Commerical""",87,"""Aetna Inc (Com)""",1805.268,4,261801,"""Aetna Inc (Com)""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,14.464,0.0,0.0,0.0,11.42,"""N_a""",14,3,1
98,1,"""Commerical""",87,"""Aetna Inc (Com)""",1805.268,4,127412,"""Aetna Inc (Com)""",6.229,3.585,10.815,20.501,31.316,8.334,39.65,21.613,10.815,20.501,31.316,18.503,"""N_a""",14,3,1
98,1,"""Commerical""",87,"""Aetna Inc (Com)""",1805.268,4,2360509,"""Aetna Inc (Com)""",0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,"""N_a""",14,3,1
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
98,1,"""Commerical""",125,"""All Other Third Party (Com)""",1796.554,5,2058963,"""All Other Third Party (Com)""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3,0.0,0.0,0.0,0.3,"""N_a""",14,3,1
98,1,"""Commerical""",125,"""All Other Third Party (Com)""",1796.554,5,476038,"""All Other Third Party (Com)""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.01,0.0,0.0,0.0,1.01,"""N_a""",14,3,1
98,1,"""Commerical""",125,"""All Other Third Party (Com)""",1796.554,5,19188,"""All Other Third Party (Com)""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,"""N_a""",14,3,1
98,1,"""Commerical""",125,"""All Other Third Party (Com)""",1796.554,5,3694641,"""All Other Third Party (Com)""",0.0,0.0,0.0,0.444,0.444,0.0,0.444,0.0,0.0,0.444,0.444,0.0,"""N_a""",14,3,1
