# Quaterly Specialty Inclusion Exclusion File -

In [1]:
import pandas as pd
import polars as pl
import numpy as np

In [2]:
# Update Variables - 
QTR = '2025Q1'
QTRP = '2024Q4'
UPDATE = '20241111' #Receive data Monday, Frozen week;

INPT = 'Q12025_Specialty Inclusion Exclusion_formatted'
TAB = 'IRWD Specialty List'

SPEC = f's3://vortex-staging-a65ced90/PYADM/quaterly/{QTR}/reference/'
SPEC_P = f's3://vortex-staging-a65ced90/PYADM/quaterly/{QTRP}/reference/'

---

In [3]:
# Importing Raw FILE - 
SPECIALTYEXCLUSION = pl.from_pandas(pd.read_excel(
    f's3://vortex-staging-a65ced90/PYADM/raw/{UPDATE}/quaterly_raw/{INPT}.xlsx',
    sheet_name= TAB
))

In [4]:
# Main File - 
SPEC_INCL_LIN = (
    SPECIALTYEXCLUSION
    .filter(pl.col('I_Spclty_Cd').is_not_null())
    .with_columns(
        pl.col('I_Spclty_Cd').str.to_uppercase().alias('SPECIALTY_CD'),
        pl.col('I_Spclty_Desc').str.to_uppercase().alias('SPECIALTY_DESCRIPTION'),
        pl.when(pl.col('LIN_Specialty_Excln')=='EXCLUDE').then(pl.lit('N'))
        .when(pl.col('LIN_Specialty_Excln')=='INCLUDE').then(pl.lit('Y'))
        .otherwise(None).alias('SPEC_INCL')
    )
    .select(['SPECIALTY_CD', 'SPECIALTY_DESCRIPTION', 'SPEC_INCL'])
    .unique()
)

In [5]:
# QC  - *Compare manually to prev spec file and log updates;
PRIOR_SPEC_INCL_LIN = pl.read_parquet(f'{SPEC_P}qtrspec_SPEC_INCL_LIN.parquet')

SPEC_INCL_DIFF_LIN = (
    PRIOR_SPEC_INCL_LIN
    .join(SPEC_INCL_LIN,on = 'SPECIALTY_CD',how = 'outer_coalesce')
    .with_columns(
        pl.when(pl.col('SPEC_INCL').is_not_null() & pl.col('SPEC_INCL_right').is_null()).then(pl.lit('1_0'))
        .when(pl.col('SPEC_INCL').is_null() & pl.col('SPEC_INCL_right').is_not_null()).then(pl.lit('0_1'))
        .otherwise(pl.lit('1_1')).alias('OLD_NEW'),

        pl.when(pl.col('SPEC_INCL')==pl.col('SPEC_INCL_right')).then(0)
        .otherwise(1).alias('DIFF_INCL'),
        pl.when(pl.col('SPECIALTY_DESCRIPTION')==pl.col('SPECIALTY_DESCRIPTION_right')).then(0)
        .otherwise(1).alias('DIFF_DESC')
    )
    .filter(
        (pl.col('DIFF_INCL') == 1) | (pl.col('DIFF_DESC') == 1) | (pl.col('OLD_NEW') != '1_1')
    )
)
SPEC_INCL_DIFF_LIN

SPECIALTY_CD,SPECIALTY_DESCRIPTION,SPEC_INCL,SPECIALTY_DESCRIPTION_right,SPEC_INCL_right,OLD_NEW,DIFF_INCL,DIFF_DESC
str,str,str,str,str,str,i32,i32


In [6]:
# EXPORT - 
SPEC_INCL_LIN.to_pandas().to_parquet(f'{SPEC}qtrspec_SPEC_INCL_LIN.parquet')