#### Cluster Test File - Readmits

Use Case: Run this file once a new version of PALET has been pushed to their respective cluster on either the PROD or VAL Servers

This file is used to confirm that our assertations about the PALET library are correct. 

Test Run - Spark, No Pandas - Focusing in on One State
1. Readmits using calculate()
2. Readmits using having()

Unit Testing and Assertions:
1. Type check
2. having()
3. calculate()
4. Time units
5. By groups

In [0]:
# only required for std cluster, if testing dedicated PALET clusters do not run this cell
import sys
sys.path.append('/dbfs/FileStore/shared_uploads/akira/lib')

In [0]:
from palet.Enrollment import Enrollment
from palet.Readmits import Readmits
import pandas as pd

#### Test Run

In [0]:
# rate = Enrollment().calculate(Readmits.allcause(30))
rate = Enrollment().byState(['NY']).calculate(Readmits.allcause(30))

In [0]:
# having = Enrollment().having(Readmits.allcause(30))
having = Enrollment().byState(['NY']).having(Readmits.allcause(30))

In [0]:
rates = rate.fetch(False)

In [0]:
havings = having.fetch(False)

Readmits with calculate()

In [0]:
display(rates)

counter,SUBMTG_STATE_CD,year,month,readmits,admits,readmit_rate,mdcd_enrollment,chip_enrollment
In Month,36,2018,1,15129.0,87832.0,0.1722,6174153,589313
In Month,36,2018,2,13382.0,77192.0,0.1734,6173103,593022
In Month,36,2018,3,14253.0,82789.0,0.1722,6178846,606553
In Month,36,2018,4,13704.0,80020.0,0.1713,6180162,612719
In Month,36,2018,5,13951.0,83480.0,0.1671,6183503,617029
In Month,36,2018,6,13279.0,79651.0,0.1667,6181981,624423
In Month,36,2018,7,13931.0,80982.0,0.172,6187954,628209
In Month,36,2018,8,14009.0,82908.0,0.169,6192131,297788
In Month,36,2018,9,13178.0,77958.0,0.169,6187541,299907
In Month,36,2018,10,14160.0,83996.0,0.1686,6197415,302370


Readmits with having()

In [0]:
display(havings)

counter,SUBMTG_STATE_CD,year,month,mdcd_enrollment,chip_enrollment
In Month,36,2018,1,727094,3516
In Month,36,2018,2,746499,3776
In Month,36,2018,3,765452,4040
In Month,36,2018,4,781528,4222
In Month,36,2018,5,795230,4338
In Month,36,2018,6,805569,4456
In Month,36,2018,7,813285,4518
In Month,36,2018,8,819256,4570
In Month,36,2018,9,821424,4629
In Month,36,2018,10,822553,4615


#### Unit Testing

Type Check

In [0]:
readmits = Readmits.allcause(30)

In [0]:
assert str(type(readmits)) == "<class 'palet.Readmits.Readmits'>", 'The Paletable object is not a Readmits object.'
print('The Paletable object is a Readmits object.')

In [0]:
enrolleereadmits = Enrollment().having(Readmits.allcause(30))

In [0]:
assert str(type(enrolleereadmits)) == "<class 'palet.Enrollment.Enrollment'>", 'The Paletable object is not an Enrollment object.'
print('The Paletable object is an Enrollment object.')

having()

In [0]:
having = Enrollment().having(Readmits.allcause(30))

In [0]:
a = str(having.having_constraints)
b = a[174:205]

assert b == 'sum(readmit_ind) as has_readmit', 'Enrollment counts are not representative of enrollees who were readmitted during the given time period.'
print('Enrollment counts are representative of enrollees who were readmitted during the given time period.')

calculate

In [0]:
rate = Enrollment().calculate(Readmits.allcause(30))

In [0]:
df = rate.fetch()
df.dtypes

In [0]:
assert 'readmits' and 'admits' and 'readmit_rate' in df.columns, 'Calculated readmit columns are not present in DataFrame'
print('Calculated readmit columns are present in DataFrame')

Time Units

In [0]:
api = Enrollment(period='year').having(Readmits.allcause(30))

In [0]:
assert api.timeunit == 'year', 'Enrollment object and Readmits sub-object are not being counted by the proper time unit.'
print('Enrollment object and Readmits sub-object are being counted by the proper time unit.')

In [0]:
api = Enrollment(period='month').having(Readmits.allcause(30))

In [0]:
assert api.timeunit == 'month', 'Enrollment object and Readmits sub-object are not being counted by the proper time unit.'
print('Enrollment object and Readmits sub-object are being counted by the proper time unit.')

In [0]:
api = Enrollment(period='full').calculate(Readmits.allcause(30))

In [0]:
assert api.timeunit == 'full', 'Enrollment object and Readmits sub-object are not being counted by the proper time unit.'
print('Enrollment object and Readmits sub-object are being counted by the proper time unit.')

In [0]:
api = Enrollment(period='partial').calculate(Readmits.allcause(30))

In [0]:
assert api.timeunit == 'partial', 'Enrollment object and Readmits sub-object are not being counted by the proper time unit.'
print('Enrollment object and Readmits sub-object are being counted by the proper time unit.')

by groups - having()

In [0]:
having = Enrollment(period='year').byState(['CA']).having(Readmits.allcause(30))

df = having.fetch()
display(df)

counter,SUBMTG_STATE_CD,year,mdcd_enrollment,chip_enrollment,mdcd_pct_yoy,chip_pct_yoy,STNAME,STABBREV
In Year,6,2018,1292878,6019,,,California,CA
In Year,6,2019,1187907,6265,-0.081,0.041,California,CA
In Year,6,2020,1147065,5301,-0.034,-0.154,California,CA
In Year,6,2021,1101320,3835,-0.04,-0.277,California,CA
In Year,6,2022,39747,161,-0.964,-0.958,California,CA


In [0]:
df.dtypes

In [0]:
d = {
  'counter': 'object',
  'SUBMTG_STATE_CD': 'string',
  'year': 'object',
  'mdcd_enrollment': 'int64',
  'chip_enrollment': 'int64',
  'mdcd_pct_yoy': 'float64',
  'chip_pct_yoy': 'float64',
  'STNAME': 'object',
  'STABBREV': 'object'
}
ser = pd.Series(data=d, index=['counter','SUBMTG_STATE_CD','year','mdcd_enrollment','chip_enrollment','mdcd_pct_yoy','chip_pct_yoy','STNAME','STABBREV'])
ser

In [0]:
assert (ser == df.dtypes).all(), 'The columns or data types returned do not match the expected values.'
print('DataFrame contains the correct columns.')

In [0]:
df = having.byMonth().fetch()
df.dtypes

In [0]:
d = {
  'counter': 'object',
  'SUBMTG_STATE_CD': 'string',
  'year': 'object',
  'month': 'int32',
  'mdcd_enrollment': 'int64',
  'chip_enrollment': 'int64',
  'mdcd_pct_mom': 'float64',
  'chip_pct_mom': 'float64',
  'mdcd_pct_yoy': 'float64',
  'chip_pct_yoy': 'float64',
  'STNAME': 'object',
  'STABBREV': 'object'
}
ser = pd.Series(data=d, index=['counter','SUBMTG_STATE_CD','year','month','mdcd_enrollment','chip_enrollment','mdcd_pct_mom','chip_pct_mom','mdcd_pct_yoy','chip_pct_yoy','STNAME','STABBREV'])
ser

In [0]:
assert (ser == df.dtypes).all(), 'The columns or data types returned do not match the expected values.'
print('DataFrame contains the correct columns.')

In [0]:
df = having.byGender().fetch()
df.dtypes

In [0]:
d = {
  'counter': 'object',
  'SUBMTG_STATE_CD': 'string',
  'gndr_cd': 'string',
  'year': 'object',
  'month': 'int32',
  'mdcd_enrollment': 'int64',
  'chip_enrollment': 'int64',
  'mdcd_pct_mom': 'float64',
  'chip_pct_mom': 'float64',
  'mdcd_pct_yoy': 'float64',
  'chip_pct_yoy': 'float64',
  'STNAME': 'object',
  'STABBREV': 'object'
}
ser = pd.Series(data=d, index=['counter','SUBMTG_STATE_CD','gndr_cd','year','month','mdcd_enrollment','chip_enrollment','mdcd_pct_mom','chip_pct_mom','mdcd_pct_yoy','chip_pct_yoy','STNAME','STABBREV'])
ser

In [0]:
assert (ser == df.dtypes).all(), 'The columns or data types returned do not match the expected values.'
print('DataFrame contains the correct columns.')

In [0]:
df = having.byCoverageType().fetch()
df.dtypes

In [0]:
d = {
  'counter': 'object',
  'SUBMTG_STATE_CD': 'string',
  'gndr_cd': 'string',
  'coverage_type': 'object',
  'year': 'object',
  'month': 'int32',
  'mdcd_enrollment': 'int64',
  'chip_enrollment': 'int64',
  'mdcd_pct_mom': 'float64',
  'chip_pct_mom': 'float64',
  'mdcd_pct_yoy': 'float64',
  'chip_pct_yoy': 'float64',
  'STNAME': 'object',
  'STABBREV': 'object',
  'coverage_type_label':'object'
}
ser = pd.Series(data=d, index=['counter','SUBMTG_STATE_CD','gndr_cd','coverage_type','year','month','mdcd_enrollment','chip_enrollment','mdcd_pct_mom','chip_pct_mom','mdcd_pct_yoy','chip_pct_yoy','STNAME','STABBREV','coverage_type_label'])
ser

In [0]:
assert (ser == df.dtypes).all(), 'The columns or data types returned do not match the expected values.'
print('DataFrame contains the correct columns.')

by groups - calculate()

In [0]:
rate = Enrollment().byYear([2020]).byState(['WA']).calculate(Readmits.allcause(30))

rate.timeunit = 'month'

df = rate.fetch()
display(df)

counter,SUBMTG_STATE_CD,year,month,readmits,admits,readmit_rate,mdcd_enrollment,chip_enrollment,mdcd_pct_mom,chip_pct_mom,mdcd_pct_yoy,chip_pct_yoy,STNAME,STABBREV
In Month,53,2018,1,2696.0,18167.0,0.1484,1810312,62681,,,,,Washington,WA
In Month,53,2018,2,2316.0,15511.0,0.1493,1804649,63586,-0.003,0.014,,,Washington,WA
In Month,53,2018,3,2672.0,17372.0,0.1538,1801657,64928,-0.002,0.021,,,Washington,WA
In Month,53,2018,4,2514.0,16455.0,0.1528,1792933,61984,-0.005,-0.045,,,Washington,WA
In Month,53,2018,5,2531.0,16662.0,0.1519,1792875,62665,-0.0,0.011,,,Washington,WA
In Month,53,2018,6,2359.0,15844.0,0.1489,1785377,63404,-0.004,0.012,,,Washington,WA
In Month,53,2018,7,2453.0,16303.0,0.1505,1783677,64655,-0.001,0.02,,,Washington,WA
In Month,53,2018,8,2413.0,16288.0,0.1481,1777973,66185,-0.003,0.024,,,Washington,WA
In Month,53,2018,9,2214.0,14833.0,0.1493,1770788,66993,-0.004,0.012,,,Washington,WA
In Month,53,2018,10,2393.0,15696.0,0.1525,1769913,67931,-0.0,0.014,,,Washington,WA


In [0]:
df.dtypes

In [0]:
d = {
  'counter': 'object',
  'SUBMTG_STATE_CD': 'string',
  'year': 'object',
  'month': 'int32',
  'readmits': 'float64',
  'admits': 'float64',
  'readmit_rate': 'float64',
  'mdcd_enrollment': 'int64',
  'chip_enrollment': 'int64',
  'mdcd_pct_mom': 'float64',
  'chip_pct_mom': 'float64',
  'mdcd_pct_yoy': 'float64',
  'chip_pct_yoy': 'float64',
  'STNAME': 'object',
  'STABBREV': 'object'
}
ser = pd.Series(data=d, index=['counter','SUBMTG_STATE_CD','year','month','readmits','admits','readmit_rate','mdcd_enrollment','chip_enrollment','mdcd_pct_mom','chip_pct_mom','mdcd_pct_yoy','chip_pct_yoy','STNAME','STABBREV'])
ser

In [0]:
assert (ser == df.dtypes).all(), 'The columns or data types returned do not match the expected values.'
print('DataFrame contains the correct columns.')

In [0]:
df = rate.byEthnicity().fetch()
df.dtypes

In [0]:
d = {
  'counter': 'object',
  'SUBMTG_STATE_CD': 'string',
  'ethncty_cd': 'string',
  'year': 'object',
  'month': 'int32',
  'readmits': 'float64',
  'admits': 'float64',
  'readmit_rate': 'float64',
  'mdcd_enrollment': 'int64',
  'chip_enrollment': 'int64',
  'mdcd_pct_mom': 'float64',
  'chip_pct_mom': 'float64',
  'mdcd_pct_yoy': 'float64',
  'chip_pct_yoy': 'float64',
  'STNAME': 'object',
  'STABBREV': 'object',
  'ethnicity':'object'
}
ser = pd.Series(data=d, index=['counter','SUBMTG_STATE_CD','ethncty_cd','year','month','readmits','admits','readmit_rate','mdcd_enrollment','chip_enrollment','mdcd_pct_mom','chip_pct_mom','mdcd_pct_yoy','chip_pct_yoy','STNAME','STABBREV','ethnicity'])
ser

In [0]:
assert (ser == df.dtypes).all(), 'The columns or data types returned do not match the expected values.'
print('DataFrame contains the correct columns.')

In [0]:
df = rate.byEligibilityType().fetch()
df.dtypes

In [0]:
d = {
  'counter': 'object',
  'SUBMTG_STATE_CD': 'string',
  'ethncty_cd': 'string',
  'eligibility_type': 'object',
  'year': 'object',
  'month': 'int32',
  'readmits': 'float64',
  'admits': 'float64',
  'readmit_rate': 'float64',
  'mdcd_enrollment': 'int64',
  'chip_enrollment': 'int64',
  'mdcd_pct_mom': 'float64',
  'chip_pct_mom': 'float64',
  'mdcd_pct_yoy': 'float64',
  'chip_pct_yoy': 'float64',
  'STNAME': 'object',
  'STABBREV': 'object',
  'ethnicity':'object',
  'eligibility_category': 'object'
}
ser = pd.Series(data=d, index=['counter','SUBMTG_STATE_CD','ethncty_cd','eligibility_type','year','month','readmits','admits','readmit_rate','mdcd_enrollment','chip_enrollment','mdcd_pct_mom','chip_pct_mom','mdcd_pct_yoy','chip_pct_yoy','STNAME','STABBREV','ethnicity','eligibility_category'])
ser

In [0]:
assert (ser == df.dtypes).all(), 'The columns or data types returned do not match the expected values.'
print('DataFrame contains the correct columns.')