#### Cluster Test File

Use Case: Run this file once a new version of PALET has been pushed to their respective cluster on either the PROD or VAL Servers

Purpose: Does not return dataframes, this file is used to confirm that our assertations about the PALET library are correct. 

Functionality Testing:
1. Paletable objects i.e. Enrollment, Eligibility, etc.
2. Counts for enrollment
3. By groups i.e. byMonth(), byState(), etc.
4. Time units  i.e. month, year, full month, partial month
5. Filtering by Chronic Conidition

In [0]:
# only required for std cluster, if testing dedicated PALET clusters do not run this cell
import sys
sys.path.append('/dbfs/FileStore/shared_uploads/akira/lib')

In [0]:
from palet.Enrollment import Enrollment
from palet.Eligibility import Eligibility
import pandas as pd
from palet.Diagnoses import Diagnoses
from palet.ServiceCategory import ServiceCategory

#### Paletable Objects

In [0]:
enrollapi = Enrollment()

assert str(type(enrollapi)) == "<class 'palet.Enrollment.Enrollment'>", 'The Paletable object is not an Enrollment object.'
print('The Paletable object is an Enrollment object.')

In [0]:
eligapi = Eligibility()

assert str(type(eligapi)) == "<class 'palet.Eligibility.Eligibility'>", 'The Paletable object is not an Eligibility object.'
print('The Paletable object is an Eligibility object.')

#### Enrollment Counts
Work in progress

Yearly Counts

In [0]:
api = Enrollment().byYear()

df = api.fetch()

In [0]:
df = df[['mdcd_enrollment','chip_enrollment']]

df

Unnamed: 0,mdcd_enrollment,chip_enrollment
0,37533400,1680784
1,87618078,3162475
2,94942910,3789852
3,95748823,4259367
4,94605172,4556629
5,93217105,4363985
6,93339719,3974170
7,97208839,3510349


In [0]:
year = {'mdcd_enrollment':[37533400,87618078,94942910,95748823,94605172,93217105,93339719,97208839],'chip_enrollment':[1680784,3162475,3789852,4259367,4556629,4363985,3974170,3510349]}

In [0]:
assert all((df == pd.DataFrame(year))), 'Yearly counts for Medicaid enrollment do not match the expected counts.'
print('Yearly counts for Medicaid enrollment match the expected counts.')

#### By Groups - Filters

Enrollment object with by groups

In [0]:
df = enrollapi.byMonth().fetch()

df.dtypes

In [0]:
d = {
  'counter': 'object',
  'year': 'object',
  'month': 'int32',
  'mdcd_enrollment': 'int64',
  'chip_enrollment': 'int64',
  'mdcd_pct_mom': 'float64',
  'chip_pct_mom': 'float64',
  'mdcd_pct_yoy': 'float64',
  'chip_pct_yoy': 'float64'
}
ser = pd.Series(data=d, index=['counter','year','month','mdcd_enrollment','chip_enrollment','mdcd_pct_mom','chip_pct_mom','mdcd_pct_yoy','chip_pct_yoy'])
ser

In [0]:
assert (ser == df.dtypes).all(), 'The columns or data types returned do not match the expected values.'
print('DataFrame contains the correct columns.')

In [0]:
df = enrollapi.byRaceEthnicity().fetch()

df.dtypes

In [0]:
d = {
  'counter': 'object',
  'race_ethncty_flag': 'string',
  'year': 'object',
  'month': 'int32',
  'mdcd_enrollment': 'int64',
  'chip_enrollment': 'int64',
  'mdcd_pct_mom': 'float64',
  'chip_pct_mom': 'float64',
  'mdcd_pct_yoy': 'float64',
  'chip_pct_yoy': 'float64',
  'race': 'object',
}
ser = pd.Series(data=d, index=['counter','race_ethncty_flag','year','month','mdcd_enrollment','chip_enrollment','mdcd_pct_mom','chip_pct_mom','mdcd_pct_yoy','chip_pct_yoy','race'])
ser

In [0]:
assert (ser == df.dtypes).all(), 'The columns or data types returned do not match the expected values.'
print('DataFrame contains the correct columns.')

In [0]:
df = enrollapi.byEnrollmentType().fetch()
             
df.dtypes

In [0]:
d = {
  'counter': 'object',
  'race_ethncty_flag': 'string',
  'enrollment_type': 'object',
  'year': 'object',
  'month': 'int32',
  'mdcd_enrollment': 'int64',
  'chip_enrollment': 'int64',
  'mdcd_pct_mom': 'float64',
  'chip_pct_mom': 'float64',
  'mdcd_pct_yoy': 'float64',
  'chip_pct_yoy': 'float64',
  'race': 'object',
  'enrollment_type_label': 'object'
}
ser = pd.Series(data=d, index=['counter','race_ethncty_flag','enrollment_type','year','month','mdcd_enrollment','chip_enrollment','mdcd_pct_mom','chip_pct_mom','mdcd_pct_yoy','chip_pct_yoy','race','enrollment_type_label'])
ser

In [0]:
assert (ser == df.dtypes).all(), 'The columns or data types returned do not match the expected values.'
print('DataFrame contains the correct columns.')

## Eligibility object with by groups

In [0]:
df = eligapi.byState().fetch()
df.dtypes

In [0]:
print(eligapi.sql())

In [0]:
d = {
  'SUBMTG_STATE_CD': 'string',
  'da_run_id': 'int32',
  'de_fil_dt': 'object',
  'month': 'int32',
  'elgblty_grp_cd': 'object',
  'benes': 'int64',
  'mdcd_enrlmt': 'int64',
  'chip_enrlmt': 'int64',
  'year': 'object',
  'mdcd_pct_mom': 'float64',
  'chip_pct_mom': 'float64',
  'mdcd_pct_yoy': 'float64',
  'chip_pct_yoy': 'float64',
  'STNAME': 'object',
  'STABBREV': 'object',
  'eligibility_category': 'object'
}
ser = pd.Series(data=d, index=['SUBMTG_STATE_CD','da_run_id','de_fil_dt','month','elgblty_grp_cd','benes','mdcd_enrlmt','chip_enrlmt','year', 
                               'mdcd_pct_mom', 'chip_pct_mom', 'mdcd_pct_yoy','chip_pct_yoy','STNAME','STABBREV','eligibility_category'])
ser

In [0]:
assert (ser == df.dtypes).all(), 'The columns or data types returned do not match the expected values.'
print('DataFrame contains the correct columns.')

In [0]:
df = eligapi.byGender().fetch()
df.dtypes

In [0]:
d = {
  'SUBMTG_STATE_CD': 'string',
  'gndr_cd': 'string',
  'da_run_id': 'int32',
  'de_fil_dt': 'object',
  'month': 'int32',
  'elgblty_grp_cd': 'object',
  'benes': 'int64',
  'mdcd_enrlmt': 'int64',
  'chip_enrlmt': 'int64',
  'year': 'object',
  'mdcd_pct_mom': 'float64',
  'chip_pct_mom': 'float64',
  'mdcd_pct_yoy': 'float64',
  'chip_pct_yoy': 'float64',
  'STNAME': 'object',
  'STABBREV': 'object',
  'eligibility_category': 'object'
}
ser = pd.Series(data=d, index=['SUBMTG_STATE_CD','gndr_cd','da_run_id','de_fil_dt','month','elgblty_grp_cd','benes','mdcd_enrlmt','chip_enrlmt','year','mdcd_pct_mom', 'chip_pct_mom', 'mdcd_pct_yoy','chip_pct_yoy','STNAME','STABBREV','eligibility_category'])
ser

In [0]:
assert (ser == df.dtypes).all(), 'The columns or data types returned do not match the expected values.'
print('DataFrame contains the correct columns.')

In [0]:
df = eligapi.byIncomeBracket().fetch()
df.dtypes

In [0]:
d = {
  'SUBMTG_STATE_CD': 'string',
  'gndr_cd': 'string',
  'incm_cd': 'string',
  'da_run_id': 'int32',
  'de_fil_dt': 'object',
  'month': 'int32',
  'elgblty_grp_cd': 'object',
  'benes': 'int64',
  'mdcd_enrlmt': 'int64',
  'chip_enrlmt': 'int64',
  'year': 'object',
  'mdcd_pct_mom': 'float64',
  'chip_pct_mom': 'float64',
  'mdcd_pct_yoy': 'float64',
  'chip_pct_yoy': 'float64',
  'STNAME': 'object',
  'STABBREV': 'object',
  'eligibility_category': 'object',
  'income': 'object'
}
ser = pd.Series(data=d, index=['SUBMTG_STATE_CD','gndr_cd','incm_cd','da_run_id','de_fil_dt','month','elgblty_grp_cd','benes','mdcd_enrlmt','chip_enrlmt','year','mdcd_pct_mom','chip_pct_mom','mdcd_pct_yoy','chip_pct_yoy','STNAME','STABBREV','eligibility_category','income'])
ser

In [0]:
assert (ser == df.dtypes).all(), 'The columns or data types returned do not match the expected values.'
print('DataFrame contains the correct columns.')

#### Time Units

In [0]:
api = Enrollment().byMonth()

df = api.fetch()

display(df)

counter,year,month,mdcd_enrollment,chip_enrollment,mdcd_pct_mom,chip_pct_mom,mdcd_pct_yoy,chip_pct_yoy
In Month,2014,1,14975535,486478,,,,
In Month,2014,2,15432733,497923,0.031,0.024,,
In Month,2014,3,15886865,504583,0.029,0.013,,
In Month,2014,4,17186024,571423,0.082,0.132,,
In Month,2014,5,17367637,568343,0.011,-0.005,,
In Month,2014,6,17534955,567161,0.01,-0.002,,
In Month,2014,7,24315774,1066883,0.387,0.881,,
In Month,2014,8,24629738,1044634,0.013,-0.021,,
In Month,2014,9,24913112,1016549,0.012,-0.027,,
In Month,2014,10,31079041,1155004,0.247,0.136,,


In [0]:
assert (df['counter'] == 'In Month').all(), 'Enrollment object does not return the expected time unit'
print('Enrollment object returns the expected time unit')

In [0]:
api = Enrollment().byYear()

df = api.fetch()

display(df)

counter,year,mdcd_enrollment,chip_enrollment,mdcd_pct_yoy,chip_pct_yoy
In Year,2014,37533400,1680784,,
In Year,2015,87618078,3162475,1.334,0.882
In Year,2016,94942910,3789852,0.084,0.198
In Year,2017,95748823,4259367,0.008,0.124
In Year,2018,94605172,4556629,-0.012,0.07
In Year,2019,93217105,4363985,-0.015,-0.042
In Year,2020,93339719,3974170,0.001,-0.089
In Year,2021,97208839,3510349,0.041,-0.117


In [0]:
assert (df['counter'] == 'In Year').all(), 'Enrollment object does not return the expected time unit'
print('Enrollment object returns the expected time unit')

In [0]:
api = Enrollment()

api.timeunit = 'full'

df = api.fetch()

display(df)

counter,year,month,mdcd_enrollment,chip_enrollment,mdcd_pct_mom,chip_pct_mom,mdcd_pct_yoy,chip_pct_yoy
Full Month,2014,1,14900095,482406,,,,
Full Month,2014,2,15368205,489868,0.031,0.015,,
Full Month,2014,3,15817541,495619,0.029,0.012,,
Full Month,2014,4,17118449,563774,0.082,0.138,,
Full Month,2014,5,17296918,560674,0.01,-0.005,,
Full Month,2014,6,17467292,560079,0.01,-0.001,,
Full Month,2014,7,24204044,1053983,0.386,0.882,,
Full Month,2014,8,24502331,1030881,0.012,-0.022,,
Full Month,2014,9,24795474,1002708,0.012,-0.027,,
Full Month,2014,10,30892228,1135328,0.246,0.132,,


In [0]:
assert (df['counter'] == 'Full Month').all(), 'Enrollment object does not return the expected time unit'
print('Enrollment object returns the expected time unit')

In [0]:
api = Enrollment()

api.timeunit = 'partial'

df = api.fetch()

display(df)

counter,year,month,mdcd_enrollment,chip_enrollment,mdcd_pct_mom,chip_pct_mom,mdcd_pct_yoy,chip_pct_yoy
Partial Month,2014,1,75440,4072,,,,
Partial Month,2014,2,64528,8055,-0.145,0.978,,
Partial Month,2014,3,69324,8964,0.074,0.113,,
Partial Month,2014,4,67575,7649,-0.025,-0.147,,
Partial Month,2014,5,70719,7669,0.047,0.003,,
Partial Month,2014,6,67663,7082,-0.043,-0.077,,
Partial Month,2014,7,111730,12900,0.651,0.822,,
Partial Month,2014,8,127407,13753,0.14,0.066,,
Partial Month,2014,9,117638,13841,-0.077,0.006,,
Partial Month,2014,10,186813,19676,0.588,0.422,,


In [0]:
assert (df['counter'] == 'Partial Month').all(), 'Enrollment object does not return the expected time unit'
print('Enrollment object returns the expected time unit')

#### Context (Object) Changing

In [0]:
conapi = Enrollment()

assert str(type(conapi)) == "<class 'palet.Enrollment.Enrollment'>", 'The Paletable object is not an Enrollment object.'
print('The Paletable object is an Enrollment object.')

In [0]:
conapi = Eligibility([6278, 6280], conapi)

assert str(type(conapi)) == "<class 'palet.Eligibility.Eligibility'>", 'The Paletable object is not an Eligibility object.'
print('The Paletable object is an Eligibility object.')

#### Chronic Condition
Work in progress

Filtering and marking by one chronic condition

In [0]:
AFib = ['I230', 'I231', 'I232', 'I233', 'I234', 'I235', 'I236', 'I237', 'I238', 'I213', 'I214', 'I219', 'I220', 'I221', 'I222', 'I228', 'I229', 'I21A1', 'I21A9', 'I2101', 'I2102', 'I2109', 'I2111', 'I2119', 'I2121', 'I2129']

In [0]:
ccapi = Enrollment([6280]).byMonth().mark(Diagnoses.where(ServiceCategory.inpatient, AFib), 'AFib')

In [0]:
df = ccapi.fetch()
display(df)

counter,year,month,AFib,mdcd_enrollment,chip_enrollment,mdcd_pct_mom,chip_pct_mom,mdcd_pct_yoy,chip_pct_yoy
In Month,2021,1,0,87572662,2853684,,,,
In Month,2021,1,1,185953,29,-0.998,-1.0,-0.998,-1.0
In Month,2021,2,0,88066483,2842216,472.595,98006.448,,
In Month,2021,2,1,184838,32,-0.998,-1.0,-0.998,-1.0
In Month,2021,3,0,88729652,2824540,479.04,88265.875,,
In Month,2021,3,1,184298,31,-0.998,-1.0,-0.998,-1.0
In Month,2021,4,0,89307790,2823082,483.584,91066.161,,
In Month,2021,4,1,183007,30,-0.998,-1.0,-0.998,-1.0
In Month,2021,5,0,89828588,2815974,489.848,93864.8,,
In Month,2021,5,1,181300,30,-0.998,-1.0,-0.998,-1.0


In [0]:
assert 'AFib' in df.columns, 'Marked chronic conidition is not present in DataFrame'
print('Marked chronic conidition is present in DataFrame')

In [0]:
Diabetes = [   'E0800','E0801','E0810','E0811','E0821','E0822','E0829','E08311','E08319','E08321','E083211','E083212','E083213','E083219','E08329','E083291','E083292','E083293','E083299','E08331','E083311','E083312','E083313','E083319','E08339','E083391','E083392','E083393','E083399','E08341','E083411','E083412','E083413','E083419','E08349','E083491','E083492','E083493','E083499','E08351','E083511','E083512','E083513','E083519','E083521','E083522','E083523','E083529','E083531','E083532','E083533','E083539','E083541','E083542','E083543','E083549','E083551','E083552','E083553','E083559','E08359','E083591','E083592','E083593','E083599','E0836','E0837X1','E0837X2','E0837X3','E0837X9','E0839','E0840','E0841','E0842','E0843','E0844','E0849','E0851','E0852','E0859','E08610','E08618','E08620','E08621','E08622','E08628','E08630','E08638','E08641','E08649','E0865','E0869','E088','E089','E0900','E0901','E0910','E0911','E0921','E0922','E0929','E09311','E09319','E09321','E093211','E093212','E093213','E093219','E09329','E093291','E093292','E093293','E093299','E09331','E093311','E093312','E093313','E093319','E09339','E093391','E093392','E093393','E093399','E09341','E093411','E093412','E093413','E093419','E09349','E093491','E093492','E093493','E093499','E09351','E093511','E093512','E093513','E093519','E093521','E093522','E093523','E093529','E093531','E093532','E093533','E093539','E093541','E093542','E093543','E093549','E093551','E093552','E093553','E093559','E09359','E093591','E093592','E093593','E093599','E0936','E0937X1','E0937X2','E0937X3','E0937X9','E0939','E0940','E0941','E0942','E0943','E0944','E0949','E0951','E0952','E0959','E09610','E09618','E09620','E09621','E09622','E09628','E09630','E09638','E09641','E09649','E0965','E0969','E098','E099','E1010','E1011','E1021','E1022','E1029','E10311','E10319','E10321','E103211','E103212','E103213','E103219','E10329','E103291','E103292','E103293','E103299','E10331','E103311','E103312','E103313','E103319','E10339','E103391','E103392','E103393','E103399','E10341','E103411','E103412','E103413','E103419','E10349','E103491','E103492','E103493','E103499','E10351','E103511','E103512','E103513','E103519','E10359','E1036','E1037X1','E1037X2','E1037X3','E1037X9','E1039','E1040','E1041','E1042','E1043','E1044','E1049','E1051','E1052','E1059','E10610','E10618','E10620','E10621','E10622','E10628','E10630','E10638','E10641','E10649','E1065','E1069','E108','E109','E1100','E1101','E1110','E1111','E1121','E1122','E1129','E11311','E11319','E11321','E113211','E113212','E113213','E113219','E11329','E113291','E113292','E113293','E113299','E11331','E113311','E113312','E113313','E113319','E11339','E113391','E113392','E113393','E113399','E11341','E113411','E113412','E113413','E113419','E11349','E113491','E113492','E113493','E113499','E11351','E113511','E113512','E113513','E113519','E113521','E113522','E113523','E113529','E113531','E113532','E113533','E113539','E113541','E113542','E113543','E113549','E113551','E113552','E113553','E113559','E11359','E113591','E113592','E113593','E113599','E1136','E1137X1','E1137X2','E1137X3','E1137X9','E1139','E1140','E1141','E1142','E1143','E1144','E1149','E1151','E1152','E1159','E11610','E11618','E11620','E11621','E11622','E11628','E11630','E11638','E11641','E11649','E1165','E1169','E118','E119','E1300','E1301','E1310','E1311','E1321','E1322','E1329','E13311','E13319','E13321','E133211','E133212','E133213','E133219','E13329','E133291','E133292','E133293','E133299','E13331','E133311','E133312','E133313','E133319','E13339','E133391','E133392','E133393','E133399','E13341','E133411','E133412','E133413','E133419','E13349','E133491','E133492','E133493','E133499','E13351','E133511','E133512','E133513','E133519','E133521','E133522','E133523','E133529','E133531','E133532','E133533','E133539','E133541','E133542','E133543','E133549','E133551','E133552','E133553','E133559','E13359','E1336','E1339','E1340','E1341','E1342','E1343','E1344','E1349','E1351','E1352','E1359','E13610','E13618','E13620','E13621','E13622','E13628','E13630','E13638','E13641','E13649','E1365','E1369','E138','E139']

In [0]:
ccapi = Enrollment([6280]).byMonth()          \
  .mark(                                         \
    Diagnoses.within([                           \
      (ServiceCategory.inpatient, 1),            \
      (ServiceCategory.other_services, 2)],      \
          Diabetes), 'Diabetes')                 \
  .mark(                                         \
     Diagnoses.within([                          \
       (ServiceCategory.inpatient, 1),           \
       (ServiceCategory.other_services, 2)],     \
           AFib), 'AFib')                          

In [0]:
df = ccapi.fetch()
display(df)

counter,year,month,Diabetes,AFib,mdcd_enrollment,chip_enrollment,mdcd_pct_mom,chip_pct_mom,mdcd_pct_yoy,chip_pct_yoy
In Month,2021,1,0,0,86632910,2851703,,,,
In Month,2021,1,0,1,109130,26,-0.999,-1.0,-0.999,-1.0
In Month,2021,1,1,0,939752,1981,7.611,75.192,7.611,75.192
In Month,2021,1,1,1,76823,3,-0.918,-0.998,-0.918,-0.998
In Month,2021,2,0,0,87124677,2840215,1133.096,946737.333,,
In Month,2021,2,0,1,108111,29,-0.999,-1.0,-0.999,-1.0
In Month,2021,2,1,0,941806,2001,7.711,68.0,7.711,68.0
In Month,2021,2,1,1,76727,3,-0.919,-0.999,-0.919,-0.999
In Month,2021,3,0,0,87785703,2822487,1143.131,940828.0,,
In Month,2021,3,0,1,107570,28,-0.999,-1.0,-0.999,-1.0


In [0]:
assert 'AFib' in df.columns and 'Diabetes' in df.columns, 'Marked chronic coniditions are not present in DataFrame'
print('Marked chronic coniditions are present in DataFrame')