In [1]:
%run task_setup.ipynb

Foundation: 2.11.030
Transition: 3.02.034


-------------------------
## Build Feedback synthetic file

In [2]:
builder = SyntheticBuilder.from_env('feedback')

In [3]:
tools = builder.intent_model

In [4]:
builder.pm.reset_intents()
builder.pm_persist()

------------------------
### Load the watchlist

In [5]:
builder.add_connector_uri(connector_name='feedback', uri="s3://project-hadron-cs-repo/domain/helloworld/data/feedback/watchlist/${PLAN_WATCHLIST}", template_aligned=False)

In [6]:
df = tools.frame_starter(canonical='feedback', column_name='watchlist')
builder.add_column_description(column_name='watchlist', description="extracted watchlist from the plan executor")

In [7]:
sample_size = df.shape[0]

In [8]:
builder.canonical_report(df)

Unnamed: 0,Attributes (7),dType,%_Null,%_Dom,Count,Unique,Observations
0,activation_id,object,2.5%,83.4%,1727,3,Sample: 5d0001a7-b61d-4d73-83f7-071eabb6312e | 9d0ae5a5-7783-44ec-bcb7-618478f748a4
1,intervention_id,object,0.0%,83.4%,1727,3,Sample: Outreach nurse contacts high flu risk member | Prompt member to call the nurseline for infor...
2,plan_id,object,0.0%,83.4%,1727,3,Sample: ['ZvJqpKwGV1' '9jqVgsoBeR' '9jqVdT45eR' 'Ga5Zds8qON' '9wDNs8tmBA'  'wWEs1F7tnB' 'wErsmfotVq'...
3,profile_id,object,0.0%,0.3%,1727,968,Sample: 105849998 | 134568991 | 143595989 | 122981494 | 131819492
4,watch_contract,object,0.0%,100.0%,1727,1,Sample: ABC
5,watch_start,datetime64[ns],0.0%,83.4%,1727,3,max=2021-03-09 15:20:02.445625 | min=2021-03-09 15:19:58.963942 | yr mean= 2021
6,watch_stop,datetime64[ns],0.0%,83.4%,1727,3,max=2021-03-09 17:20:01.421872 | min=2021-03-09 15:19:58.963948 | yr mean= 2021


### Modify the watchlist

In [9]:
df['plan_id'] = tools.correlate_choice(df, header='plan_id', list_size=1, random_choice=False, column_name='plan_id')
builder.add_column_description(column_name='plan_id', description="temporary modification of the Plan id, taking only one id from the list")

In [10]:
df.intervention_id.value_counts()

Outreach nurse contacts high flu risk member                           1440
Prompt member to call the nurseline for information about flu shots     243
Do nothing                                                               44
Name: intervention_id, dtype: int64

In [11]:
selection = ['Outreach nurse contacts high flu risk member',
             'Prompt member to call the nurseline for information about flu shots',
             'Do nothing']

df['intervention_cat'] = tools.correlate_categories(df, header='intervention_id', correlations=selection, actions={0: 1, 1: 2, 2: 3}, default_action=4, column_name='intervention_cat')
builder.add_column_description(column_name='intervention_cat', description="for clarity changing the intervention_id string to a numberic category")

**adjust watch start**

In [12]:
actions = {0: tools.action2dict(method='get_datetime', start=-4, until=-3, ignore_time=True, date_format="%Y-%m-%d %H:%M"),
           1: tools.action2dict(method='get_datetime', start=-3, until=-2, ignore_time=True, date_format="%Y-%m-%d %H:%M"),
           2: tools.action2dict(method='get_datetime', start=-2, until=-1, ignore_time=True, date_format="%Y-%m-%d %H:%M")}

df['watch_start'] = tools.correlate_categories(df, header='intervention_cat', correlations=[1, 2, 3], actions=actions, column_name='watch_start', intent_order=0)
builder.add_column_description(column_name='watch_start', description="modify the watch start to place it in the past")

In [13]:
actions = {0: tools.action2dict(method='correlate_dates', header='watch_start', offset={'hours':8, 'minutes': 20}),
           1: tools.action2dict(method='correlate_dates', header='watch_start', offset={'hours':12, 'minutes': 3}),
           2: tools.action2dict(method='correlate_dates', header='watch_start', offset={'hours':7, 'minutes': 31})}

df['watch_start'] = tools.correlate_categories(df, header='intervention_cat', correlations=[1, 2, 3], actions=actions, column_name='watch_start', intent_order=1)

**adjust watch end**

In [14]:
actions = {0: tools.action2dict(method='get_datetime', start=-1, until=0, ignore_time=True, date_format="%Y-%m-%d %H:%M"),
           1: tools.action2dict(method='get_datetime', start=-1, until=0, ignore_time=True, date_format="%Y-%m-%d %H:%M"),
           2: tools.action2dict(method='get_datetime', start=-1, until=0, ignore_time=True, date_format="%Y-%m-%d %H:%M")}

df['watch_stop'] = tools.correlate_categories(df, header='intervention_cat', correlations=[1, 2, 3], actions=actions, default_action='', column_name='watch_stop', intent_order=0)
builder.add_column_description(column_name='watch_stop', description="modify the watch end after the watch_start")

In [15]:
actions = {0: tools.action2dict(method='correlate_dates', header='watch_stop', offset={'hours':-7, 'minutes': 45}),
           1: tools.action2dict(method='correlate_dates', header='watch_stop', offset={'hours':-4, 'minutes': 3}),
           2: tools.action2dict(method='correlate_dates', header='watch_stop', offset={'hours':-1, 'minutes': 23})}

df['watch_stop'] = tools.correlate_categories(df, header='intervention_cat', correlations=[1, 2, 3], actions=actions, default_action='', column_name='watch_stop', intent_order=1)

------------------------
### Feedback Date

In [16]:
actions = {0: tools.action2dict(method='correlate_dates', header='watch_start', jitter=3, jitter_units='D', jitter_freq=[0, 1]),
           1: tools.action2dict(method='correlate_dates', header='watch_start', jitter=2, jitter_units='D', jitter_freq=[0, 1]),
           2: tools.action2dict(method='correlate_dates', header='watch_start', jitter=1, jitter_units='D', jitter_freq=[0, 1])}

df['feedback_dt'] = tools.correlate_categories(df, header='intervention_cat', correlations=[1, 2, 3], actions=actions, column_name='feedback_dt')
builder.add_column_description(column_name='feedback_dt', description="The date when feedback was recieved")

------------------------
### Add the gets

In [17]:
# feedback_id
df['feedback_id'] = tools.get_datetime(start=0, until=1, at_most=1, date_format="%Y%m%d%H%M%S", ordered='asc', size=sample_size, column_name='feedback_id')
builder.add_column_description(column_name='feedback_id', description="a unique reference id from the feedback system unique across all feeback batches")

In [18]:
df['action_success'] = tools.get_category(selection=[1, 0], relative_freq=[99,1], size=sample_size, column_name='action_success')
builder.add_column_description(column_name='action_success', description="If the profile has their permissions set favourably for this intervention")

In [19]:
df['action_reward'] = tools.get_category(selection=[1, 0], relative_freq=[40,1], size=sample_size, column_name='action_reward', intent_order=0)
builder.add_column_description(column_name='action_reward', description="If the member has interacted with the intervention, e.g. contacted, enacted upon an action or read a notification")

In [20]:
selection = [builder.tools.select2dict(column='intervention_cat', condition="@ == 3"),
             builder.tools.select2dict(column='action_success', condition="@ == 0", logic='OR')]

action = builder.tools.action2dict(method='@constant', value=0)
default = builder.tools.action2dict(method='@header', header='action_reward')

df['action_reward'] = builder.tools.correlate_selection(df, selection=selection, action=action, default_action=default, column_name='action_reward', intent_order=1)

In [21]:
# The date of the interaction
selection = [builder.tools.select2dict(column='action_reward', condition="@ == 1")]

action = builder.tools.action2dict(method='correlate_dates', header='feedback_dt', jitter=3, jitter_units='h', jitter_freq=[1,0])
default = builder.tools.action2dict(method='@constant', value=np.nan)

df['action_reward_dt'] = builder.tools.correlate_selection(df, selection=selection, action=action, default_action=default, column_name='action_reward_dt')
builder.add_column_description(column_name='action_reward_dt', description="a recorded date time when the recorded interaction occured")

------------------

In [22]:
selection = ['Outreach nurse contacts high flu risk member',
             'Prompt member to call the nurseline for information about flu shots',
             'Do nothing']

actions = {0: 'CSS',
           1: 'SydneyCare',
           2: 'N/A'}

df['reporting_system'] = tools.correlate_categories(df, header='intervention_id', correlations=selection, actions=actions, column_name='reporting_system')
builder.add_column_description(column_name='reporting_system', description="The system that reported the feedback")

------------------
### Inferred feedback

In [23]:
# if has interacted then if there is an observed action
selection = [builder.tools.select2dict(column='action_reward', condition="@ == 1")]

action = builder.tools.action2dict(method='get_category', selection=[1, 0], relative_freq=[0.001,1])
default = builder.tools.action2dict(method='@constant', value=0)

df['inferred_reward'] = builder.tools.correlate_selection(df, selection = selection, action=action, default_action=default, column_name='inferred_reward', intent_order=0)
builder.add_column_description(column_name='inferred_reward', description="a signal to indicate the ")


In [24]:
# which channel the observed action was on
selection = [builder.tools.select2dict(column='inferred_reward', condition="@ == 1")]

action = builder.tools.action2dict(method='get_number', from_value=0, to_value=1.0, precision=2, relative_freq=[0.01, 0.01, 0.1, 1, 3, 2, 0.1])
default = builder.tools.action2dict(method='@constant', value=0)

df['inferred_causation'] = builder.tools.correlate_selection(df, selection=selection, action=action, default_action=default, column_name='inferred_causation', intent_order=0)
builder.add_column_description(column_name='inferred_causation', description="a confidence value of the inferred causation of the action to the reward")

In [25]:
# which channel the observed action was on
selection = [builder.tools.select2dict(column='inferred_reward', condition="@ == 1")]

action = builder.tools.action2dict(method='get_category', selection=['BI systems', 'Claims', 'Remmitance', 'Contact Center Systems', 'Government', 'External', 'Others'],  relative_freq=[4, 2, 2, 1, 0.5, 0.1, 0.01])

df['inferred_channel'] = builder.tools.correlate_selection(df, selection=selection, action=action, default_action='N/A', column_name='inferred_channel', intent_order=0)
builder.add_column_description(column_name='inferred_channel', description="The channel from which the observation came")

In [26]:
# which channel the observed action was on
selection = [builder.tools.select2dict(column='inferred_reward', condition="@ == 1")]

action = builder.tools.action2dict(method='correlate_dates', header='action_reward_dt', offset=1, jitter=1, jitter_units='D', jitter_freq=[0, 1])
default = builder.tools.action2dict(method='@constant', value=np.nan)

df['inferred_reward_dt'] = builder.tools.correlate_selection(df, selection=selection, action=action, default_action=default, column_name='inferred_reward_dt', intent_order=0)
builder.add_column_description(column_name='inferred_reward_dt', description="The timestamp associated with the inferred reward")


------------------

In [27]:
builder.report_intent(stylise=False)

Unnamed: 0,level,order,intent,parameters,creator
0,action_reward,0,get_category,"[selection=[1, 0], relative_freq=[40, 1], column_name='action_reward']",doatridge
1,action_reward,1,correlate_selection,"[selection=[{'column': 'intervention_cat', 'condition': '@ == 3'}, {'column': 'action_success', 'condition': '@ == 0', 'logic': 'OR'}], action={'method': '@constant', 'value': 0}, default_action={...",doatridge
2,action_reward_dt,0,correlate_selection,"[selection=[{'column': 'action_reward', 'condition': '@ == 1'}], action={'method': 'correlate_dates', 'header': 'feedback_dt', 'jitter': 3, 'jitter_units': 'h', 'jitter_freq': [1, 0]}, default_act...",doatridge
3,action_success,0,get_category,"[selection=[1, 0], relative_freq=[99, 1], column_name='action_success']",doatridge
4,feedback_dt,0,correlate_categories,"[header='intervention_cat', correlations=[1, 2, 3], actions={0: {'method': 'correlate_dates', 'header': 'watch_start', 'jitter': 3, 'jitter_units': 'D', 'jitter_freq': [0, 1]}, 1: {'method': 'corr...",doatridge
5,feedback_id,0,get_datetime,"[start=0, until=1, at_most=1, ordered='asc', date_format='%Y%m%d%H%M%S', column_name='feedback_id']",doatridge
6,inferred_causation,0,correlate_selection,"[selection=[{'column': 'inferred_reward', 'condition': '@ == 1'}], action={'method': 'get_number', 'from_value': 0, 'to_value': 1.0, 'precision': 2, 'relative_freq': [0.01, 0.01, 0.1, 1, 3, 2, 0.1...",doatridge
7,inferred_channel,0,correlate_selection,"[selection=[{'column': 'inferred_reward', 'condition': '@ == 1'}], action={'method': 'get_category', 'selection': ['BI systems', 'Claims', 'Remmitance', 'Contact Center Systems', 'Government', 'Ex...",doatridge
8,inferred_reward,0,correlate_selection,"[selection=[{'column': 'action_reward', 'condition': '@ == 1'}], action={'method': 'get_category', 'selection': [1, 0], 'relative_freq': [0.001, 1]}, default_action={'method': '@constant', 'value'...",doatridge
9,inferred_reward_dt,0,correlate_selection,"[selection=[{'column': 'inferred_reward', 'condition': '@ == 1'}], action={'method': 'correlate_dates', 'header': 'action_reward_dt', 'offset': 1, 'jitter': 1, 'jitter_units': 'D', 'jitter_freq': ...",doatridge


In [28]:
builder.canonical_report(df)

Unnamed: 0,Attributes (18),dType,%_Null,%_Dom,Count,Unique,Observations
0,action_reward,int64,0.0%,93.1%,1727,2,max=1 | min=0 | mean=0.93 | dominant=1
1,action_reward_dt,datetime64[ns],6.9%,6.9%,1727,1526,max=2021-03-10 12:02:21 | min=2021-03-08 06:50:06 | yr mean= 2021
2,action_success,int64,0.0%,98.9%,1727,2,max=1 | min=0 | mean=0.99 | dominant=1
3,activation_id,object,2.5%,83.4%,1727,3,Sample: 5d0001a7-b61d-4d73-83f7-071eabb6312e | 9d0ae5a5-7783-44ec-bcb7-618478f748a4
4,feedback_dt,datetime64[ns],0.0%,42.8%,1727,5,max=2021-03-10 12:03:00 | min=2021-03-08 08:20:00 | yr mean= 2021
5,feedback_id,object,0.0%,0.1%,1727,1727,Sample: 20210313092356 | 20210313121239 | 20210312232138 | 20210313075106 | 20210313011249
6,inferred_causation,float64,0.0%,99.9%,1727,3,max=0.75 | min=0.0 | mean=0.0 | dominant=0.0
7,inferred_channel,object,0.0%,99.9%,1727,2,Sample: N/A | BI systems
8,inferred_reward,int64,0.0%,99.9%,1727,2,max=1 | min=0 | mean=0.0 | dominant=0
9,inferred_reward_dt,datetime64[ns],99.9%,99.9%,1727,3,max=2021-03-10 10:41:24 | min=2021-03-10 06:58:14 | yr mean= 2021


In [29]:
tools.run_intent_pipeline(simulate=True)

Unnamed: 0,column,order,method
0,watchlist,0,frame_starter
1,feedback_id,0,get_datetime
2,action_success,0,get_category
3,plan_id,0,correlate_choice
4,intervention_cat,0,correlate_categories
5,watch_start,0,correlate_categories
6,watch_start,1,correlate_categories
7,watch_stop,0,correlate_categories
8,watch_stop,1,correlate_categories
9,feedback_dt,0,correlate_categories


In [30]:
builder.run_component_pipeline(canonical='feedback')

In [31]:
builder.report_column_catalog()

Unnamed: 0,column_name,description
0,action_reward,"If the member has interacted with the intervention, e.g. contacted, enacted upon an action or read a notification"
1,action_reward_dt,a recorded date time when the recorded interaction occured
2,action_success,If the profile has their permissions set favourably for this intervention
3,feedback_dt,The date when feedback was recieved
4,feedback_id,a unique reference id from the feedback system unique across all feeback batches
5,inferred_causation,a confidence value of the inferred causation of the action to the reward
6,inferred_channel,The channel from which the observation came
7,inferred_reward,a signal to indicate the
8,inferred_reward_dt,The timestamp associated with the inferred reward
9,intervention_cat,for clarity changing the intervention_id string to a numberic category


-------------------------
-------------------------
## Intervention Feedback

In [32]:
tr = Transition.from_env('feedback')

In [33]:
tr.pm.reset_intents()
tr.pm_persist()

In [34]:
df = tr.load_source_canonical()

In [35]:
df = tr.cleaners.auto_clean_header(df, rename_map={'member_id': 'profile_id'})
df = tr.cleaners.auto_transition(df)
df = tr.cleaners.to_date_type(df, regex=['_dt'])
df = tr.cleaners.to_remove(df, regex=['watch_', 'intervention_cat'])

In [36]:
tr.upload_attributes(builder.report_column_catalog(stylise=False), label_key='column_name', text_key='description', constraints=list(df.columns))

## Run the Pipeline  save the Schema and show the Schema Report

In [37]:
# run the pipeline
tr.run_component_pipeline()

# report the canonical
tr.canonical_report(df)

Unnamed: 0,Attributes (14),dType,%_Null,%_Dom,Count,Unique,Observations
0,action_reward,bool,0.0%,94.0%,1727,2,False | True
1,action_reward_dt,datetime64[ns],6.0%,6.0%,1727,1532,max=2021-03-10 12:02:40 | min=2021-03-08 06:50:00 | yr mean= 2021
2,action_success,bool,0.0%,98.9%,1727,2,True | False
3,activation_id,category,0.0%,83.4%,1727,3,Sample: 5d0001a7-b61d-4d73-83f7-071eabb6312e | 9d0ae5a5-7783-44ec-bcb7-618478f748a4 | None
4,feedback_dt,datetime64[ns],0.0%,42.2%,1727,5,max=2021-03-10 12:03:00 | min=2021-03-08 08:20:00 | yr mean= 2021
5,feedback_id,int64,0.0%,0.1%,1727,1727,"max=20210313123659 | min=20210312123814 | mean=20210312640857.9 | dominant=[20210312123814, 20210312123840]"
6,inferred_causation,category,0.0%,99.9%,1727,2,Sample: 0.0 | 0.43
7,inferred_channel,category,0.0%,99.9%,1727,2,Sample: N/A | Remmitance
8,inferred_reward,bool,0.0%,99.9%,1727,2,False | True
9,inferred_reward_dt,datetime64[ns],99.9%,99.9%,1727,2,max=2021-03-09 07:25:40 | min=2021-03-09 07:25:40 | yr mean= 2021


In [38]:
tr.report_attributes(df)

Unnamed: 0,Attributes (14),dType,Description
0,action_reward,bool,"If the member has interacted with the intervention, e.g. contacted, enacted upon an action or read a notification"
1,action_reward_dt,datetime64[ns],a recorded date time when the recorded interaction occured
2,action_success,bool,If the profile has their permissions set favourably for this intervention
3,activation_id,category,
4,feedback_dt,datetime64[ns],The date when feedback was recieved
5,feedback_id,int64,a unique reference id from the feedback system unique across all feeback batches
6,inferred_causation,category,a confidence value of the inferred causation of the action to the reward
7,inferred_channel,category,The channel from which the observation came
8,inferred_reward,bool,a signal to indicate the
9,inferred_reward_dt,datetime64[ns],The timestamp associated with the inferred reward
