### Weekly_LinzessSnapshot_SalesActivity_Feed

In [1]:
import polars as pl
import pandas as pd
import gc
import numpy as np
import json

In [2]:
# load variables from JSON
with open('vars_wk.json', 'r') as json_file:
    js = json.load(json_file)

bucket = js['bucket']
num_weeks_rx = js['num_weeks_rx']
num_weeks_calls = js['num_weeks_calls']
data_date = js['data_date']

dflib = f's3://{bucket}/BIT/dataframes/'
xpn = f's3://{bucket}/PYADM/weekly/archive/{data_date}/xponent/'

In [3]:
# Utility Functions -
def load(df, lib=dflib):
    globals()[df] = pl.read_parquet(f'{lib}{df}.parquet')

In [4]:
# Imporing Dependencies
prod_mapping = pl.read_csv(f's3://{bucket}/BIT/docs/productmapping_pybit.txt',separator='|')
geo_code_mapper = pl.from_pandas(pd.read_excel(f's3://{bucket}/BIT/docs/geo_id_full.xlsx'))
load('mp_spec_seg_dec')
load('temp_calls')
load('temp_samples')
load('temp_abbv')
fetch_products = ['LI1','LI2','LI3','TRU','AMT','LAC','MOT','LUB','IRL']

#fixes for vortex import -> Probably caused by Polars Upgrades
temp_calls = temp_calls.with_columns(pl.col('SalesRepIID').cast(pl.Int64))
temp_samples = temp_samples.with_columns(pl.col('SalesRepIID').cast(pl.Int64))
temp_abbv = temp_abbv.with_columns(pl.col('SalesRepIID').cast(pl.Int64))



### Functions -

In [5]:
#13 wks IW Calls, QTD IW CALLS-
def process_1(df,limit,name):
    source_df = (
        temp_calls.filter(pl.col('call_week')<=limit)
        .group_by(['AttendeeIID'])
        .agg(pl.col('CallID').n_unique().alias(name))
    )

    df = df.join(source_df,left_on='IID',right_on = 'AttendeeIID',how='left')

    return(df)

#abbv_visit
def process_1_5(df):
    source_df = (
        temp_abbv.filter(pl.col('call_week')<=num_weeks_calls)
        .group_by('AttendeeIID')
        .agg(abbv_visit=pl.col('CallID').n_unique())
    )

    df = df.join(source_df,left_on='IID',right_on='AttendeeIID',how='left')
    return (df)

# Total IW Samples , Total Samples IW + ABBV (no samples for ABBV so its the same value)
# NOTE : the samples are summed for all 3 dossages here
def process_2(df):
    source_df = (
        temp_samples.filter(pl.col('sample_week')<=num_weeks_calls)
        .group_by('AttendeeIID')
        .agg(total_iw_samples = pl.col('CallProductQuantity').sum())
        .with_columns(total_iw_abbv_samples = pl.col('total_iw_samples'))
    )

    df = df.join(source_df,left_on='IID',right_on='AttendeeIID',how='left')
    return(df)

# Trx Per Sample IW + ABBV -
def process_3(df):
    # getting Rx Data -
    cols_to_read =['IID','PROD_CD'] + ['TUF'+str(i) for i in range(1,num_weeks_rx+1)]
    lin_iid = (
        pl.read_parquet(xpn+'LAX.parquet',columns=cols_to_read)
        .filter(pl.col('PROD_CD').is_in(['LI1','LI2','LI3']))
        .with_columns(rx_qtd = pl.sum_horizontal(cols_to_read[2:]))
        .group_by('IID').agg(rx_qtd = pl.col('rx_qtd').sum())
    )

    df = (
        df.join(lin_iid,on='IID',how='left')
        .with_columns(trx_per_sample = pl.col('rx_qtd')/pl.col('total_iw_abbv_samples')).drop('rx_qtd')
    )
    return(df)


In [6]:
def get_feed(temp1):
    for col in ['iw_calls_13wks','qtd_iw_calls','total_iw_samples','total_iw_abbv_samples','trx_per_sample']:
        globals()['temp1'] = globals()['temp1'].with_columns(pl.col(col).fill_null('\\N'))
    #renaming columns according to feed
    rnm_cols = {
        'IID':'Physician_ID',
        'geography_id':'Geography_id',
        'iw_calls_13wks':'SA_NumberMetric6',
        'qtd_iw_calls':'SA_NumberMetric1',
        'abbv_visit':'SA_NumberMetric2',
        'total_iw_samples':'SA_NumberMetric3',
        'total_iw_abbv_samples':'SA_NumberMetric4',
        'trx_per_sample':'SA_NumberMetric5'
    }
    final_feed = temp1.rename(rnm_cols)
    return(final_feed)

In [7]:
# Processing and Exporting !
OUT = 's3://vortex-staging-a65ced90/BIT/output/LinzessSnapshot/Weekly/'
for wc,PN in zip([4,13],[2,3]):
    # Function Calls-
    temp1 = mp_spec_seg_dec.select(['IID','geography_id'])
    temp1 = process_1(temp1,wc,'iw_calls_13wks')
    temp1 = process_1(temp1,num_weeks_calls,'qtd_iw_calls')
    temp1 = process_1_5(temp1)
    temp1 = process_2(temp1)
    temp1 = process_3(temp1)
    feed_dataset = get_feed(temp1)
    #Exporting Feeds-
    feed_dataset.to_pandas().to_csv(f'{OUT}Weekly_LinzessSnapshot_MetricPerformance_P{PN}_Feed.txt', sep='|')
    print(f'LS Metric Performance {PN} Exported !')

LS Metric Performance 2 Exported !


LS Metric Performance 3 Exported !
