In [195]:
%run task_setup.ipynb

Foundation: 2.11.030
Transition: 3.02.034


-------------------------
## Build Feedback synthetic file

In [196]:
builder = SyntheticBuilder.from_env('feedback')

In [197]:
tools = builder.intent_model

In [198]:
builder.pm.reset_intents()
builder.pm_persist()

------------------------
### Load the watchlist

In [199]:
builder.add_connector_uri(connector_name='feedback', uri="s3://project-hadron-cs-repo/domain/helloworld/data/feedback/watchlist/plan_executor_results_09-03-2021.parquet", template_aligned=False)

In [200]:
df = tools.frame_starter(canonical='feedback', column_name='watchlist')
builder.add_column_description(column_name='watchlist', description="extracted watchlist from the plan executor")

In [201]:
sample_size = df.shape[0]

In [218]:
builder.canonical_report(df)

Unnamed: 0,Attributes (13),dType,%_Null,%_Dom,Count,Unique,Observations
0,activation_id,object,2.5%,83.4%,1727,3,Sample: 5d0001a7-b61d-4d73-83f7-071eabb6312e | 9d0ae5a5-7783-44ec-bcb7-618478f748a4
1,feedback_dt,datetime64[ns],0.0%,42.0%,1727,5,max=2021-03-10 12:03:00 | min=2021-03-08 08:20:00 | yr mean= 2021
2,feedback_id,object,0.0%,0.1%,1727,1727,Sample: 20210313001331 | 20210312184410 | 20210313060844 | 20210313023016 | 20210313015540
3,has_interacted,int64,0.0%,95.4%,1727,2,max=1 | min=0 | mean=0.95 | dominant=1
4,has_interacted_dt,datetime64[ns],4.6%,4.6%,1727,1554,max=2021-03-10 12:02:23 | min=2021-03-08 06:50:06 | yr mean= 2021
5,has_permission,int64,0.0%,94.7%,1727,2,max=1 | min=0 | mean=0.95 | dominant=1
6,intervention_cat,int64,0.0%,83.4%,1727,3,max=3 | min=1 | mean=1.19 | dominant=1
7,intervention_id,object,0.0%,83.4%,1727,3,Sample: Outreach nurse contacts high flu risk member | Prompt member to call the nurseline for infor...
8,plan_id,object,0.0%,83.4%,1727,3,Sample: ZvJqpKwGV1 | lvXqVd5N7b | WDK5ZPq3mo
9,profile_id,object,0.0%,0.3%,1727,968,Sample: 105849998 | 122981494 | 143595989 | 134568991 | 147884488


### Modify the watchlist

In [203]:
df['plan_id'] = tools.correlate_choice(df, header='plan_id', list_size=1, random_choice=False, column_name='plan_id')
builder.add_column_description(column_name='plan_id', description="temporary modification of the Plan id, taking only one id from the list")

In [204]:
df.intervention_id.value_counts()

Outreach nurse contacts high flu risk member                           1440
Prompt member to call the nurseline for information about flu shots     243
Do nothing                                                               44
Name: intervention_id, dtype: int64

In [205]:
selection = ['Outreach nurse contacts high flu risk member',
             'Prompt member to call the nurseline for information about flu shots',
             'Do nothing']

df['intervention_cat'] = tools.correlate_categories(df, header='intervention_id', correlations=selection, actions={0: 1, 1: 2, 2: 3}, default_action=4, column_name='intervention_cat')
builder.add_column_description(column_name='intervention_cat', description="for clarity changing the intervention_id string to a numberic category")

**adjust watch start**

In [206]:
actions = {0: tools.action2dict(method='get_datetime', start=-4, until=-3, ignore_time=True, date_format="%Y-%m-%d %H:%M"),
           1: tools.action2dict(method='get_datetime', start=-3, until=-2, ignore_time=True, date_format="%Y-%m-%d %H:%M"),
           2: tools.action2dict(method='get_datetime', start=-2, until=-1, ignore_time=True, date_format="%Y-%m-%d %H:%M")}

df['watch_start'] = tools.correlate_categories(df, header='intervention_cat', correlations=[1, 2, 3], actions=actions, column_name='watch_start', intent_order=0)
builder.add_column_description(column_name='watch_start', description="modify the watch start to place it in the past")

In [207]:
actions = {0: tools.action2dict(method='correlate_dates', header='watch_start', offset={'hours':8, 'minutes': 20}),
           1: tools.action2dict(method='correlate_dates', header='watch_start', offset={'hours':12, 'minutes': 3}),
           2: tools.action2dict(method='correlate_dates', header='watch_start', offset={'hours':7, 'minutes': 31})}

df['watch_start'] = tools.correlate_categories(df, header='intervention_cat', correlations=[1, 2, 3], actions=actions, column_name='watch_start', intent_order=1)

**adjust watch end**

In [208]:
actions = {0: tools.action2dict(method='get_datetime', start=-1, until=0, ignore_time=True, date_format="%Y-%m-%d %H:%M"),
           1: tools.action2dict(method='get_datetime', start=-1, until=0, ignore_time=True, date_format="%Y-%m-%d %H:%M"),
           2: tools.action2dict(method='get_datetime', start=-1, until=0, ignore_time=True, date_format="%Y-%m-%d %H:%M")}

df['watch_stop'] = tools.correlate_categories(df, header='intervention_cat', correlations=[1, 2, 3], actions=actions, default_action='', column_name='watch_stop', intent_order=0)
builder.add_column_description(column_name='watch_stop', description="modify the watch end after the watch_start")

In [209]:
actions = {0: tools.action2dict(method='correlate_dates', header='watch_stop', offset={'hours':-7, 'minutes': 45}),
           1: tools.action2dict(method='correlate_dates', header='watch_stop', offset={'hours':-4, 'minutes': 3}),
           2: tools.action2dict(method='correlate_dates', header='watch_stop', offset={'hours':-1, 'minutes': 23})}

df['watch_stop'] = tools.correlate_categories(df, header='intervention_cat', correlations=[1, 2, 3], actions=actions, default_action='', column_name='watch_stop', intent_order=1)

------------------------
### Feedback Date

In [210]:
actions = {0: tools.action2dict(method='correlate_dates', header='watch_start', jitter=3, jitter_units='D', jitter_freq=[0, 1]),
           1: tools.action2dict(method='correlate_dates', header='watch_start', jitter=2, jitter_units='D', jitter_freq=[0, 1]),
           2: tools.action2dict(method='correlate_dates', header='watch_start', jitter=1, jitter_units='D', jitter_freq=[0, 1])}

df['feedback_dt'] = tools.correlate_categories(df, header='intervention_cat', correlations=[1, 2, 3], actions=actions, column_name='feedback_dt')
builder.add_column_description(column_name='feedback_dt', description="The date when feedback was recieved")

------------------------
### Add the gets

In [212]:
# feedback_id
df['feedback_id'] = tools.get_datetime(start=0, until=1, at_most=1, date_format="%Y%m%d%H%M%S", ordered='asc', size=sample_size, column_name='feedback_id')
builder.add_column_description(column_name='feedback_id', description="a unique reference id from the feedback system unique across all feeback batches")

In [213]:
df['has_permission'] = tools.get_category(selection=[1, 0], relative_freq=[20,1], size=sample_size, column_name='has_permission')
builder.add_column_description(column_name='has_permission', description="If the profile has their permissions set favourably for this intervention")

In [214]:
df['has_interacted'] = tools.get_category(selection=[1, 0], relative_freq=[40,1], size=sample_size, column_name='has_interacted', intent_order=0)
builder.add_column_description(column_name='has_interacted', description="If the member has interacted with the intervention, e.g. contacted, enacted upon an action or read a notification")

In [215]:
selection = [builder.tools.select2dict(column='intervention_cat', condition="@ == 3")]

default = builder.tools.action2dict(method='@header', header='has_interacted')

df['has_interacted'] = builder.tools.correlate_selection(df, selection=selection, action=0, default_action=default, column_name='has_interacted', intent_order=2)

In [217]:
# The date of the interaction
selection = [builder.tools.select2dict(column='has_interacted', condition="@ == 1")]

action = builder.tools.action2dict(method='correlate_dates', header='feedback_dt', jitter=3, jitter_units='h', jitter_freq=[1,0])
default = builder.tools.action2dict(method='@constant', value=np.nan)

df['has_interacted_dt'] = builder.tools.correlate_selection(df, selection=selection, action=action, default_action=default, column_name='has_interacted_dt', intent_order=0)
builder.add_column_description(column_name='has_interacted_dt', description="a recorded date time when the recorded interaction occured")

------------------

In [219]:
selection = ['Outreach nurse contacts high flu risk member',
             'Prompt member to call the nurseline for information about flu shots',
             'Do nothing']

actions = {0: 'CSS',
           1: 'SydneyCare',
           2: 'N/A'}

df['reporting_system'] = tools.correlate_categories(df, header='intervention_id', correlations=selection, actions=actions, column_name='reporting_system')
builder.add_column_description(column_name='reporting_system', description="The system that reported the feedback")

------------------
### Inferred feedback

In [220]:
# if has interacted then if there is an observed action
df['inferred_change'] = builder.tools.correlate_selection(df, selection = [], action=0, default_action=0, column_name='inferred_change', intent_order=0)
builder.add_column_description(column_name='inferred_change', description="An inferred action on the intervention from a transitive system e.g. claim made")


In [221]:
# which channel the observed action was on
selection = [builder.tools.select2dict(column='inferred_change', condition="@ == 1")]

action = builder.tools.action2dict(method='get_category', selection=['claims', 'gov', 'external'], relative_freq=[1, 0.1, 0.01])

df['change_channel'] = builder.tools.correlate_selection(df, selection=selection, action=action, default_action='', column_name='change_channel', intent_order=0)
builder.add_column_description(column_name='change_channel', description="The channel from which the observation came")

In [222]:
# which channel the observed action was on
selection = [builder.tools.select2dict(column='inferred_change', condition="@ == 1")]

action = builder.tools.action2dict(method='correlate_dates', header='has_interacted_dt', offset=2, jitter=1, jitter_units='D', jitter_freq=[0, 1])

df['change_dt'] = builder.tools.correlate_selection(df, selection=selection, action=action, default_action=np.nan, column_name='change_dt', intent_order=0)
builder.add_column_description(column_name='change_dt', description="The timestamp associated with the observed action")


------------------

In [223]:
builder.report_intent(stylise=False)

Unnamed: 0,level,order,intent,parameters,creator
0,change_channel,0,correlate_selection,"[selection=[{'column': 'inferred_change', 'condition': '@ == 1'}], action={'method': 'get_category', 'selection': ['claims', 'gov', 'external'], 'relative_freq': [1, 0.1, 0.01]}, default_action=''...",doatridge
1,change_dt,0,correlate_selection,"[selection=[{'column': 'inferred_change', 'condition': '@ == 1'}], action={'method': 'correlate_dates', 'header': 'has_interacted_dt', 'offset': 2, 'jitter': 1, 'jitter_units': 'D', 'jitter_freq':...",doatridge
2,feedback_dt,0,correlate_categories,"[header='intervention_cat', correlations=[1, 2, 3], actions={0: {'method': 'correlate_dates', 'header': 'watch_start', 'jitter': 3, 'jitter_units': 'D', 'jitter_freq': [0, 1]}, 1: {'method': 'corr...",doatridge
3,feedback_id,0,get_datetime,"[start=0, until=1, at_most=1, ordered='asc', date_format='%Y%m%d%H%M%S', column_name='feedback_id']",doatridge
4,has_interacted,0,get_category,"[selection=[1, 0], relative_freq=[40, 1], column_name='has_interacted']",doatridge
5,has_interacted,2,correlate_selection,"[selection=[{'column': 'intervention_cat', 'condition': '@ == 3'}], action=0, default_action={'method': '@header', 'header': 'has_interacted'}, column_name='has_interacted']",doatridge
6,has_interacted_dt,0,correlate_selection,"[selection=[{'column': 'has_interacted', 'condition': '@ == 1'}], action={'method': 'correlate_dates', 'header': 'feedback_dt', 'jitter': 3, 'jitter_units': 'h', 'jitter_freq': [1, 0]}, default_ac...",doatridge
7,has_permission,0,get_category,"[selection=[1, 0], relative_freq=[20, 1], column_name='has_permission']",doatridge
8,inferred_change,0,correlate_selection,"[selection=[], action=0, default_action=0, column_name='inferred_change']",doatridge
9,intervention_cat,0,correlate_categories,"[header='intervention_id', correlations=['Outreach nurse contacts high flu risk member', 'Prompt member to call the nurseline for information about flu shots', 'Do nothing'], actions={0: 1, 1: 2, ...",doatridge


In [224]:
builder.canonical_report(df)

Unnamed: 0,Attributes (17),dType,%_Null,%_Dom,Count,Unique,Observations
0,activation_id,object,2.5%,83.4%,1727,3,Sample: 5d0001a7-b61d-4d73-83f7-071eabb6312e | 9d0ae5a5-7783-44ec-bcb7-618478f748a4
1,change_channel,object,100.0%,100.0%,1727,1,Sample:
2,change_dt,float64,100.0%,0.0%,1727,1,max=nan | min=nan | mean=nan | dominant=[]
3,feedback_dt,datetime64[ns],0.0%,42.0%,1727,5,max=2021-03-10 12:03:00 | min=2021-03-08 08:20:00 | yr mean= 2021
4,feedback_id,object,0.0%,0.1%,1727,1727,Sample: 20210313001331 | 20210312184410 | 20210313060844 | 20210313023016 | 20210313015540
5,has_interacted,int64,0.0%,95.4%,1727,2,max=1 | min=0 | mean=0.95 | dominant=1
6,has_interacted_dt,datetime64[ns],4.6%,4.6%,1727,1554,max=2021-03-10 12:02:23 | min=2021-03-08 06:50:06 | yr mean= 2021
7,has_permission,int64,0.0%,94.7%,1727,2,max=1 | min=0 | mean=0.95 | dominant=1
8,inferred_change,int64,0.0%,100.0%,1727,1,max=0 | min=0 | mean=0.0 | dominant=0
9,intervention_cat,int64,0.0%,83.4%,1727,3,max=3 | min=1 | mean=1.19 | dominant=1


In [225]:
tools.run_intent_pipeline(simulate=True)

Unnamed: 0,column,order,method
0,watchlist,0,frame_starter
1,feedback_id,0,get_datetime
2,has_permission,0,get_category
3,plan_id,0,correlate_choice
4,intervention_cat,0,correlate_categories
5,watch_start,0,correlate_categories
6,watch_start,1,correlate_categories
7,watch_stop,0,correlate_categories
8,watch_stop,1,correlate_categories
9,feedback_dt,0,correlate_categories


In [226]:
builder.run_component_pipeline(canonical='feedback')

In [227]:
builder.report_column_catalog()

Unnamed: 0,column_name,description
0,change_channel,The channel from which the observation came
1,change_dt,The timestamp associated with the observed action
2,feedback_dt,The date when feedback was recieved
3,feedback_id,a unique reference id from the feedback system unique across all feeback batches
4,has_interacted,"If the member has interacted with the intervention, e.g. contacted, enacted upon an action or read a notification"
5,has_interacted_dt,a recorded date time when the recorded interaction occured
6,has_permission,If the profile has their permissions set favourably for this intervention
7,inferred_change,An inferred action on the intervention from a transitive system e.g. claim made
8,intervention_cat,for clarity changing the intervention_id string to a numberic category
9,plan_id,"temporary modification of the Plan id, taking only one id from the list"


-------------------------
-------------------------
## Intervention Feedback

In [228]:
tr = Transition.from_env('feedback')

In [229]:
tr.pm.reset_intents()
tr.pm_persist()

In [230]:
df = tr.load_source_canonical()

In [231]:
df = tr.intent_model.auto_clean_header(df, rename_map={'member_id': 'profile_id'})
df = tr.intent_model.auto_transition(df)
df = tr.intent_model.to_remove(df, regex=['watch_', 'intervention_cat'])

In [232]:
tr.upload_attributes(builder.report_column_catalog(stylise=False), label_key='column_name', text_key='description', constraints=list(df.columns))

## Run the Pipeline  save the Schema and show the Schema Report

In [233]:
# run the pipeline
tr.run_component_pipeline()

# report the canonical
tr.canonical_report(df)

Unnamed: 0,Attributes (13),dType,%_Null,%_Dom,Count,Unique,Observations
0,activation_id,category,0.0%,83.4%,1727,3,Sample: 5d0001a7-b61d-4d73-83f7-071eabb6312e | 9d0ae5a5-7783-44ec-bcb7-618478f748a4 | None
1,change_channel,category,0.0%,100.0%,1727,1,Sample: nan
2,change_dt,datetime64[ns],100.0%,0.0%,1727,1,max=NaT | min=NaT | yr mean= nan
3,feedback_dt,category,0.0%,42.4%,1727,5,Sample: 2021-03-09 08:20:00 | 2021-03-08 08:20:00 | 2021-03-10 12:03:00 | 2021-03-09 12:03:00 | 2021...
4,feedback_id,int64,0.0%,0.1%,1727,1727,"max=20210313075030 | min=20210312075235 | mean=20210312451121.84 | dominant=[20210312075235, 20210312075300]"
5,has_interacted,bool,0.0%,94.7%,1727,2,False | True
6,has_interacted_dt,datetime64[ns],5.3%,5.3%,1727,1540,max=2021-03-10 12:02:56 | min=2021-03-08 06:50:39 | yr mean= 2021
7,has_permission,bool,0.0%,95.0%,1727,2,True | False
8,inferred_change,category,0.0%,100.0%,1727,1,Sample: 0
9,intervention_id,category,0.0%,83.4%,1727,3,Sample: Outreach nurse contacts high flu risk member | Prompt member to call the nurseline for infor...


In [234]:
tr.report_attributes(df)

Unnamed: 0,Attributes (13),dType,Description
0,activation_id,category,
1,change_channel,category,The channel from which the observation came
2,change_dt,datetime64[ns],The timestamp associated with the observed action
3,feedback_dt,category,The date when feedback was recieved
4,feedback_id,int64,a unique reference id from the feedback system unique across all feeback batches
5,has_interacted,bool,"If the member has interacted with the intervention, e.g. contacted, enacted upon an action or read a notification"
6,has_interacted_dt,datetime64[ns],a recorded date time when the recorded interaction occured
7,has_permission,bool,If the profile has their permissions set favourably for this intervention
8,inferred_change,category,An inferred action on the intervention from a transitive system e.g. claim made
9,intervention_id,category,
