## Doc 360 Managed Care

In [1]:
import polars as pl
import gc
import pandas as pd
from datetime import datetime, timedelta,date
import json
import numpy as np

In [2]:
# load variables from JSON
with open('vars_wk.json', 'r') as json_file:
    js = json.load(json_file)

bucket = js['bucket']
data_date = js['data_date']
monthly_data_date = js['monthly_data_date']

dflib = f's3://{bucket}/BIT/dataframes/'
pln = f's3://{bucket}/PYADM/weekly/archive/{data_date}/plantrak/' #20240705  {data_date}
mpln = f's3://{bucket}/PYADM/monthly/archive/{monthly_data_date}/plantrak/'

In [3]:
# Utility Functions -
def load(df, lib=dflib):
    globals()[df] = pl.read_parquet(f'{lib}{df}.parquet')

In [4]:
# Imporing Dependencies
prod_mapping = pl.read_csv(f's3://{bucket}/BIT/docs/productmapping_pybit.txt',separator='|')
geo_code_mapper = pl.from_pandas(pd.read_excel(f's3://{bucket}/BIT/docs/geo_id_full.xlsx'))
load('mp_spec_seg_dec')
load('MASTER_UNI')
fetch_products = ['LI1','LI2','LI3','TRU','AMT','LAC','MOT','LUB','IRL']

---

Formulary -
---

In [5]:
#FORMULARY
group_type_mapping = {
    'HIX' : 'Commercial','Com' : 'Commercial','Cash' : 'Cash','Voucher':'Voucher',
    'FFS' : 'FFS','Mgd Medicaid' : 'Mgd Medicaid','Part D' : 'Part D','MAC A' : 'Others',
}

def classify_plan_class(status):
    status = status.upper()
    if status[:7] == "COVERED" or status[:6] == "ON PDL":
        return "COVERED"
    elif status[:9] == "PREFERRED":
        return "PREFERRED"
    elif status[:13] == "NON-PREFERRED":
        return "NON PREFERRED"
    elif status[:7] == "NON-PDL" or status[:11] == "NOT COVERED":
        return "NOT COVERED"
    else:
        return "N_A"

# Reading 
fm = pl.read_parquet(pln+'FORMULARY.parquet',columns = ['IMS_PLAN_ID','GROUP_TYPE','FORMULARY_GROUP_STATUS','PFAM_CD','PFAM_NAME','IRWD_FGN_NAME','BRAND'])
fm = fm.with_columns(
        pl.when(pl.col('BRAND')=='IBR')
        .then(pl.lit('IRL'))
        .otherwise(pl.col('BRAND'))
        .alias('BRAND')
)
fm = fm.filter((pl.col('PFAM_CD')==(pl.col('BRAND'))) | (pl.col('BRAND')==''))
fm = (
    fm
    .with_columns(
        pl.col('GROUP_TYPE').map_elements(lambda x: group_type_mapping.get(x,'Others'), return_dtype=pl.Utf8) #NOTE : IF new plan types flow , they will go to Others by default
        .fill_null('Others')
        .alias('plan_type'),
        pl.col('IMS_PLAN_ID').cast(pl.Int64)
    )
    .rename({'IMS_PLAN_ID':'PlanID'})
    .drop('GROUP_TYPE')
    .with_columns(pl.col('FORMULARY_GROUP_STATUS').fill_null(pl.lit('N_A')))
    .with_columns(pl.col('FORMULARY_GROUP_STATUS').map_elements(classify_plan_class,return_dtype=pl.String).alias('plan_class'))
    .drop('FORMULARY_GROUP_STATUS')
    .unique()
)

###############
# HARD CODED - 
fm = fm.with_columns(pl.when(pl.col("PlanID") == 13670614).then(pl.lit('Others')).otherwise(pl.col("plan_type")).alias("plan_type"))
###############

# fm1 = fm.select(['PlanID','IRWD_FGN_NAME','plan_type']).unique(subset='PlanID')
fm2 = (
    fm
    .select('PFAM_CD','IRWD_FGN_NAME','plan_class').unique()
    .group_by(['IRWD_FGN_NAME','PFAM_CD'])
    .agg(
        pl.col('plan_class').unique().str.concat(' / ').alias('plan_class')
    )
    .with_columns(pl.col('plan_class').str.to_titlecase())
)

Plantrak -
---

In [6]:
# Output -> ln1
#TUF at PROD , IID , PLAN LEVEL ->
ln = (
    pl.read_parquet(mpln+'LAX_N.parquet',columns=['IID','MonthKey','PFAM_CD','PROD_CD','PlanID','TUF']) #read req cols only
    .rename({'MonthKey':'PeriodKey'})
    .filter(pl.col('PROD_CD').is_in(fetch_products)) #only keep data for BIT products
    .with_columns(pl.col('PeriodKey').cast(pl.Utf8).str.to_date("%Y%m%d")) #Convert Categorical column Back to date
)

# Any PlanIds startign with -0000002 should be excluded
ln = (
    ln
    .with_columns(pl.col('PlanID').cast(pl.Utf8).str.zfill(10).alias('planid_chr'))
    .filter(~pl.col('planid_chr').str.starts_with('000002'))
    .drop('planid_chr')
)

# Adding Payer from formulary -
ln = (
    ln
    .join(fm.select(['PlanID','IRWD_FGN_NAME']).unique(),on='PlanID',how='left')
    #.rename({'IRWD_FGN_NAME':'PAYER'})
)
# NOTE : THIS WILL HAVE NULLS IN PAYER (NAME)

date_list = ln['PeriodKey'].unique().sort(descending=True)

#current 6 months -

ln_c = (
    ln
    .filter(pl.col('PeriodKey') >= date_list[5])
    .group_by(['IID','IRWD_FGN_NAME','PFAM_CD','PROD_CD']).agg(TUF_c = pl.col('TUF').sum())
)

# prior 6 months -
ln_p = (
    ln
    .filter(
        (pl.col('PeriodKey') <= date_list[6]) &(pl.col('PeriodKey') >= date_list[11])
    )
    .group_by(['IID','IRWD_FGN_NAME','PFAM_CD','PROD_CD']).agg(TUF_p = pl.col('TUF').sum())
)

# prior prior 6 months - (need this for vol_change_ind)
ln_p2 = (
    ln
    .filter(
        (pl.col('PeriodKey') <= date_list[12]) &(pl.col('PeriodKey') >= date_list[17])
    )
    .group_by(['IID','IRWD_FGN_NAME','PFAM_CD','PROD_CD']).agg(TUF_p2 = pl.col('TUF').sum())
)

# combining to get master dataset -
# contains current 6m and prior 6m volume at IID - PAYER - PROD level
ln1 = (
    ln_c
    .join(ln_p, on =['IID','IRWD_FGN_NAME','PFAM_CD','PROD_CD'], how = 'outer_coalesce')
    .join(ln_p2, on =['IID','IRWD_FGN_NAME','PFAM_CD','PROD_CD'], how = 'outer_coalesce')
)

In [7]:
# Output -> ln2
# Pulling in Plan Type -
ln2 = (
    ln1
    .join(
        fm.select(['IRWD_FGN_NAME','PFAM_CD','plan_type']).unique(),
        on = ['IRWD_FGN_NAME', 'PFAM_CD'], how = 'left'
    )
    .with_columns(
        pl.col('plan_type').fill_null(pl.lit('Others')),
    )
) # NOBS increased !!!

# Pulling in Plan Class
ln2 = (
    ln2.join(fm2, on=['IRWD_FGN_NAME', 'PFAM_CD'], how='left')
    .with_columns(
        pl.col('plan_class').fill_null(pl.lit('N_a'))
    )
)

# Dropping Records with Voucher , FFS , Medicaid
ln2 = ln2.filter(
    ~(pl.col('plan_type').is_in(['Voucher','Mgd Medicaid','FFS']))
)

In [8]:
# Output -> ln3
# Product Mapping and Parent Product Rows -

ln3 = (
    ln2
    .join(
        prod_mapping.select(['code','product_id','parent_product_id']),
        left_on = 'PROD_CD', right_on='code', how = 'left'
    )
    .select(['IID','IRWD_FGN_NAME','product_id','parent_product_id','TUF_c','TUF_p','TUF_p2','plan_type','plan_class'])
)

#lin and amt-

ln3_235 = (
    ln3
    .filter(pl.col('parent_product_id').is_in([2,35]))
    .group_by(['IID','IRWD_FGN_NAME','parent_product_id'])
    .agg(
        pl.col('TUF_c').sum().alias('TUF_c'),
        pl.col('TUF_p').sum().alias('TUF_p'),
        pl.col('TUF_p2').sum().alias('TUF_p2'),
        pl.col('plan_type').first().alias('plan_type'),
        pl.col('plan_class').first().alias('plan_class')
    )
    .rename({'parent_product_id':'product_id'})
)

#lax mkt-

ln3_1 = (
    ln3
    .group_by(['IID','IRWD_FGN_NAME'])
    .agg(
        pl.col('TUF_c').sum().alias('TUF_c'),
        pl.col('TUF_p').sum().alias('TUF_p'),
        pl.col('TUF_p2').sum().alias('TUF_p2'),
        pl.col('plan_type').first().alias('plan_type'),
    )
    .with_columns(pl.lit(1).alias('product_id').cast(pl.Int64),pl.lit('N_a').alias('plan_class'))
    .select(ln3_235.columns)
)

ln3 = (
    ln3.drop('parent_product_id')
    .vstack(ln3_235)
    .vstack(ln3_1)
)

In [9]:
# Output -> ln4
# Ranking - 

ibsc_ranking = (
    ln3_1 # picking dataset with lax mkt rollups- 
    .with_columns(pl.col("TUF_c").rank("ordinal",descending=True).over(["IID"]).alias("rank"))
    .filter(pl.col('rank') <= 5)
    .sort(['IID','rank'],maintain_order=True)
    .select(['IID','IRWD_FGN_NAME','plan_type']) # Not Keeping Plan Class here as it differs by product
    .with_columns(fl = 1)
    .join(prod_mapping.select('product_id').with_columns(fl = 1),on = 'fl',how = 'left')
    .drop('fl')
)

ln4 = (
    ibsc_ranking
    .join(
        ln3.select(['IID','IRWD_FGN_NAME','product_id','TUF_c','TUF_p','TUF_p2']),
        on = ['IID','IRWD_FGN_NAME','product_id'], how = 'left'
    )
    .join(
        ln3.select(['IRWD_FGN_NAME','product_id','plan_class']).unique(),
        on = ['IRWD_FGN_NAME','product_id'], how = 'left'
    )
    .with_columns(pl.col('TUF_c').fill_null(pl.lit(0)),pl.col('TUF_p').fill_null(pl.lit(0)),pl.col('TUF_p2').fill_null(pl.lit(0)))
    .filter(pl.col('IRWD_FGN_NAME').is_not_null())
)

Formula Based Columns -
---

In [10]:
#Output -> ln5

In [11]:
# volume growth percentage - 
ln5 = (
    ln4
    .with_columns(
        ((pl.col('TUF_c') / pl.col('TUF_p')) - 1).alias('vol_growth_prc').round(10)
    )
    .with_columns(
        ((pl.col('TUF_p') / pl.col('TUF_p2')) - 1).alias('pri_vol_growth_prc').round(10)
    )
    .with_columns(
        pl.col('vol_growth_prc').replace([np.inf,np.nan,-1.0],None),pl.col('pri_vol_growth_prc').replace([np.inf,np.nan],None)
    )
    .with_columns(
        (pl.col('vol_growth_prc')-pl.col('pri_vol_growth_prc')).alias('prc_change')
    )
)

In [12]:
# shr-
def get_shr(df):
    # Isolate Row with Product = 1

    df_1 = (
        df
        .filter(pl.col('product_id')==1)
        .select(['IID','IRWD_FGN_NAME','TUF_c'])
        .rename({'TUF_c':'lax_TUF_c'})
    )

    df = (
        df
        .join(df_1, on =['IID','IRWD_FGN_NAME'],how = 'left')
        .with_columns(
            (pl.col('TUF_c')/pl.col('lax_TUF_c')).round(10).alias('shr')
        )
        .drop('lax_TUF_c')
        .with_columns(
            pl.col('shr').replace([np.inf,np.nan],None)
        )
    )
    return (df)
    
ln5 = get_shr(ln5)

In [13]:
# vol growth ind - 
ln5 = (
    ln5
    .with_columns(
        pl.when(pl.col('prc_change') > 0.02).then(pl.lit('P'))
        .when(pl.col('prc_change') < -0.02).then(pl.lit('Q'))
        .otherwise(None).alias('vol_growth_ind')
    )
    .drop()
)

In [14]:
# FILTER STEP - 
keep_flag_dataset = (
    ln5
    .filter(pl.col('product_id')==1)
    .with_columns(
        pl.when(
            (
                (pl.col('TUF_c')!=0)
                |(pl.col('vol_growth_prc').is_not_null())
                |(pl.col('shr').is_not_null())
            )
        ).then(1).otherwise(0).alias('keep_flag')
    )
    .select(['IID','IRWD_FGN_NAME','keep_flag'])
)

ln5_f = ln5.join(keep_flag_dataset,on=['IID','IRWD_FGN_NAME'],how='left').filter(keep_flag = 1).drop('keep_flag')

Format and Table Fixes -
---

In [15]:
# Output -> ln6
# Feed Creation -
ln6 = (
    ln5_f
    .drop(['TUF_p'])
    .with_columns(
        pl.col('vol_growth_prc').replace([np.nan, np.inf, -np.inf,None], '\\N'),
        pl.when(pl.col('vol_growth_prc').is_in([np.nan, np.inf, -np.inf,None])).then(pl.lit('\\N')).otherwise(pl.col('vol_growth_ind')).alias('vol_growth_ind'),
        pl.col('shr').replace([np.nan, np.inf, -np.inf,None,0], '\\N'),
        pl.col('TUF_c').round(3).replace(0.0,'\\N'),
        pl.col('plan_type').str.to_uppercase()
    )
    .rename(
        {
            'IID' : 'DOCTORID',
            'IRWD_FGN_NAME' : 'PAYER',
            'plan_type' : 'PAYERTYPE',
            'plan_class' : 'COVERAGESTATUS',
            'TUF_c' : 'VOL',
            'product_id' : 'PRODUCT_ID',
            'vol_growth_prc' : 'VOL_GROWTH_PRC',
            'shr' : 'SHR',
            'vol_growth_ind' : 'VOL_GROWTH_IND'
        }
    )
    .select(['DOCTORID','PRODUCT_ID','PAYER','PAYERTYPE','COVERAGESTATUS','VOL','VOL_GROWTH_PRC','SHR','VOL_GROWTH_IND'])
)

#PDRP override - 
pdrp = MASTER_UNI.select(['IID','PDRPOptOutFlag'])
override_columns = ['VOL','VOL_GROWTH_PRC','SHR','VOL_GROWTH_IND']
expression_list = [
    pl.when(pl.col('PDRPOptOutFlag')=='Y').then(pl.lit('\\N')).otherwise(pl.col(c)).alias(c)
    for c in override_columns
]
ln6 = (
    ln6
    .join(pdrp, left_on = 'DOCTORID',right_on='IID',how='left')
    .with_columns(expression_list)
    .drop('PDRPOptOutFlag')
)

In [16]:
#Exporting Feeds-
OUT = 's3://vortex-staging-a65ced90/BIT/output/Doc360/'
ln6.to_pandas().to_csv(f'{OUT}Doc360_ManagedCare_Feed.txt', sep='|',lineterminator='\r\n',index=False)
print('Doc360_ManagedCare_Feed.txt Exported !')

Doc360_ManagedCare_Feed.txt Exported !


---