### Weekly_LinzessSnapshot_ProfileInfo_Feed

In [1]:
import polars as pl
import pandas as pd
import gc
import numpy as np
import json

In [2]:
# load variables from JSON
with open('vars_wk.json', 'r') as json_file:
    js = json.load(json_file)

bucket = js['bucket']
IBSC_ptype_file = js['IBSC_ptype_file']

dflib = f's3://{bucket}/BIT/dataframes/'

In [3]:
# Utility Functions -
def load(df, lib=dflib):
    globals()[df] = pl.read_parquet(f'{lib}{df}.parquet')

In [4]:
# Imporing Dependencies
prod_mapping = pl.read_csv(f's3://{bucket}/BIT/docs/productmapping_pybit.txt',separator='|')
geo_code_mapper = pl.from_pandas(pd.read_excel(f's3://{bucket}/BIT/docs/geo_id_full.xlsx'))
ibsc_ptype = pl.from_pandas(pd.read_excel(f's3://{bucket}/BIT/docs/{IBSC_ptype_file}.xlsx'))
load('mp_spec_seg_dec')
load('MASTER_UNI')
fetch_products = ['LI1','LI2','LI3','TRU','AMT','LAC','MOT','LUB','IRL']

In [5]:
# Processing -
temp1 = (
    MASTER_UNI.select(
        [
            'IID','FirstName','LastName','CREDENTIAL',
            'AddressLine1','AddressLine2','AddressLine3','AddressLine4','CityName','StateCode','ZIP','DECILE'
        ]
    )
    .with_columns(
        pl.concat_str([pl.col('AddressLine1'),pl.col('AddressLine2'),pl.col('AddressLine3'),pl.col('AddressLine4')],separator=' ').alias('Address'),
        pl.concat_str([pl.col('FirstName'),pl.col('LastName')],separator=' ').alias('Physician_Name')
    )
    .join(mp_spec_seg_dec,on='IID',how='left')
    .join(ibsc_ptype,on='IID',how='left')
    .drop(['AddressLine1','AddressLine2','AddressLine3','AddressLine4','FirstName','LastName','decile'])
)

In [6]:
# Converting to feed ready data
#Renaming columns
column_mapping = {
    "IID": "Physician_ID",
    "CREDENTIAL": "Credential",
    "CityName": "City",
    "StateCode": "State",
    "ZIP": "Zip",
    "DECILE": "Decile",
    "Address": "Address",
    "Physician_Name": "Physician_Name",
    "specialty_group": "Specialty",
    "segment": "Segment",
    "geography_id": "Geography_id",
    "IBSC_VALUE": "IBSCPrimaryPayerType"
}
final_feed = temp1.rename(column_mapping)

# changing value of column to match with sas - 06/21
final_feed = final_feed.with_columns(
        pl.when(pl.col('Segment')=='ALG-ONLY-TARGET')
        .then(pl.lit('AGNT'))
        .when(pl.col('Segment')=='Target')
        .then(pl.lit('T'))
        .when(pl.col('Segment')=='Non-Target')
        .then(pl.lit('NT'))
        .alias('Segment'))

# arranging columns according to feed
req_cols = ["Physician_Name", "Physician_ID", "Geography_id", "Segment", "Specialty", "Decile", "Address", "City", "State", "Zip", "IBSCPrimaryPayerType", "Credential"]
final_feed = final_feed.select(req_cols)#final_dataset

In [7]:
#Exporting Feeds-
OUT = 's3://vortex-staging-a65ced90/BIT/output/LinzessSnapshot/Weekly/'
final_feed.to_pandas().to_csv(f'{OUT}Weekly_LinzessSnapshot_ProfileInfo_Feed.txt', sep='|')
print('LS Profile Info Exported !')

LS Profile Info Exported !
