In [1]:
%run ../base_setup.ipynb

Foundation: 2.11.018
Transition: 3.01.007
Engines   : 2.03.028


-------------------------
## Build Intervention Feedback raw synthetic file

In [2]:
builder = SyntheticBuilder.from_env('action_feedback')

In [3]:
tools = builder.intent_model

In [4]:
builder.pm.reset_intents()
builder.pm_persist()

--------------------------

In [5]:
sample_size = 1000

------------------------

**get the data from the members dataset on s3**

In [6]:
# Set up the members data
factory_members = "s3://project-hadron-cs-repo/factory/healthcare/members/factory_transition_members_dataset_v05.parquet"
builder.add_connector_uri(connector_name='members', uri=factory_members, profile_name='cogscaledev')

In [7]:
# pass an empty dataframe  of sample_size
df = tools.canonical2dict(method='@empty', size=sample_size)

# Get the columns from the Members file
df = builder.tools.model_concat(df, other='members', as_rows=False, headers=['member_id'], column_name='member_reference')
builder.add_column_description(column_name='member_reference', description="member reference data taken from the members distribution")

--------------------------

In [8]:
builder.canonical_report(df)

Unnamed: 0,Attributes (1),dType,%_Null,%_Dom,Count,Unique,Observations
0,member_id,int64,0.0%,0.1%,1000,1000,"max=999180789 | min=100080999 | mean=552519344.42 | dominant=[100080999, 101849499]"


--------------------------

In [9]:
# feedback_id
df['feedback_id'] = tools.get_datetime(start=0, until=1, at_most=1, date_format="%Y%m%d%H%S", ordered='asc', size=sample_size, column_name='feedback_id')
builder.add_column_description(column_name='feedback_id', description="a unique reference id from the feedback system unique across all feeback batches")

In [10]:
df['feedback_dt'] = tools.get_datetime(start=-18, until=-1, at_most=1, ordered=True, relative_freq=[1,4,8,3,4,6,1,0.5,0.1], size=sample_size, column_name='feedback_dt')
builder.add_column_description(column_name='feedback_dt', description="a date time when the feedback was constructed")

------------------

In [11]:
# reporting system
df['reporting_system'] = tools.get_category(selection=['SydneyCare', 'RCP', 'Agent Assist', 'CCS', 'RCS Analytics', 'Google Analytics'], relative_freq=[5, 1, 1, 8], size=sample_size, column_name='reporting_system')
builder.add_column_description(column_name='reporting_system', description="The name or reference of the system the feedback originated from")

In [12]:
# The reporting system codes
selection = [builder.tools.select2dict(column='reporting_system', condition="@ == 'CCS'")]

action = builder.tools.action2dict(method='get_sample', sample_name='complaint')
default = builder.tools.action2dict(method='get_string_pattern', pattern="lddd-dddd", choice_only=False)

df['reporting_code'] = builder.tools.correlate_selection(df, selection=selection, action=action, default_action=default, column_name='reporting_code', intent_order=0)
builder.add_column_description(column_name='reporting_code', description="any reference code asocciated with the feedback response from that system")

------------------

In [13]:
# intervention id
df['actionable_id'] = tools.get_category(selection=['47613313', '61284651', '92188326'], relative_freq=[5,2,1], size=sample_size, column_name='actionable_id')
builder.add_column_description(column_name='actionable_id', description="an intervention reference id to the intervation this feedback relates too")

------------------

In [14]:
df['has_permission'] = tools.get_category(selection=[1, 0], relative_freq=[20,1], size=sample_size, column_name='has_permission')
builder.add_column_description(column_name='has_permission', description="If the profile has their permissions set favourably for this intervention")

In [15]:
df['has_interacted'] = tools.get_category(selection=[1, 0], relative_freq=[40,1], size=sample_size, column_name='has_interacted')
builder.add_column_description(column_name='has_interacted', description="If the member has interacted with the intervention, e.g. contacted, enacted upon an action or read a notification")

In [16]:
df['reason_msg_id'] = tools.get_number(from_value=101, to_value=121, relative_freq=[10,3,2,1,1,1,1,1], size=sample_size, column_name='reason_msg_id')
builder.add_column_description(column_name='reason_msg_id', description="A reference id to a finite set of internal message responses groupings relating to the feedback")

----------------------

In [17]:
# The date of the interaction
selection = [builder.tools.select2dict(column='has_interacted', condition="@ == 1")]

action = builder.tools.action2dict(method='correlate_dates', header='feedback_dt', offset=-6, jitter=3, jitter_units='D', date_format="%Y-%m-%d %H:%M:%S")
default = builder.tools.action2dict(method='@constant', value='')

df['has_interacted_dt'] = builder.tools.correlate_selection(df, selection=selection, action=action, default_action=default, column_name='has_interacted_dt', intent_order=0)
builder.add_column_description(column_name='has_interacted_dt', description="a recorded date time when the recorded interaction occured")

----------------------

In [18]:
# The response feedback to the intervention from the member
selection = [builder.tools.select2dict(column='has_interacted', condition="@ == 1")]


action = builder.tools.action2dict(method='get_category', selection=['Positive', 'Negative', 'Neutral', 'No Response'], relative_freq=[5, 2, 10, 50])
default = builder.tools.action2dict(method='@constant', value='NA')

df['profile_feedback'] = builder.tools.correlate_selection(df, selection=selection, action=action, default_action=default, column_name='profile_feedback', intent_order=0)
builder.add_column_description(column_name='profile_feedback', description="The feedback response to the intervention directly from the profile though the channel preference")

In [19]:
# A star rating of the member
selection = [builder.tools.select2dict(column='has_interacted', condition="@ == 1")]


action = builder.tools.action2dict(method='get_category', selection=[1,2,3,4,5], relative_freq=[1,3,8,10,3])
default = builder.tools.action2dict(method='@constant', value=0)

df['profile_rating'] = builder.tools.correlate_selection(df, selection=selection, action=action, default_action=default, column_name='profile_rating', intent_order=0)
builder.add_column_description(column_name='profile_rating', description="The feedback rating score provided at feedback time")

------------------
### Inferred feedback

In [20]:
# if has interacted then if there is an observed action
selection = [builder.tools.select2dict(column='has_interacted', condition="@ == 1")]

action = builder.tools.action2dict(method='get_category', selection=[1, 0], relative_freq=[6, 4])

df['observed_action'] = builder.tools.correlate_selection(df, selection=selection, action=action, default_action=0, column_name='observed_action', intent_order=0)
builder.add_column_description(column_name='observed_action', description="An inferred action on the intervention from a transitive system e.g. claim made")


In [21]:
# which channel the observed action was on
selection = [builder.tools.select2dict(column='observed_action', condition="@ == 1")]

action = builder.tools.action2dict(method='get_category', selection=['claims', 'gov', 'external'], relative_freq=[1, 0.1, 0.01])

df['observed_channel'] = builder.tools.correlate_selection(df, selection=selection, action=action, default_action='', column_name='observed_channel', intent_order=0)
builder.add_column_description(column_name='observed_channel', description="The channel from which the observation came")

In [22]:
# which channel the observed action was on
selection = [builder.tools.select2dict(column='observed_action', condition="@ == 1")]

action = builder.tools.action2dict(method='correlate_dates', header='has_interacted_dt', offset=2, jitter=1, jitter_units='D', date_format="%Y-%m-%d %H:%M:%S")

df['observed_dt'] = builder.tools.correlate_selection(df, selection=selection, action=action, default_action='', column_name='observed_dt', intent_order=0)
builder.add_column_description(column_name='observed_dt', description="The timestamp associated with the observed action")


device information - what was the member contact device (web, iphone, etc.)
observed_action
observed_channel
observable_dt

------------------

In [23]:
builder.run_synthetic_pipeline(size=sample_size)

In [24]:
builder.report_column_catalog()

Unnamed: 0,column_name,description
0,actionable_id,an intervention reference id to the intervation this feedback relates too
1,feedback_dt,a date time when the feedback was constructed
2,feedback_id,a unique reference id from the feedback system unique across all feeback batches
3,has_interacted,"If the member has interacted with the intervention, e.g. contacted, enacted upon an action or read a notification"
4,has_interacted_dt,a recorded date time when the recorded interaction occured
5,has_permission,If the profile has their permissions set favourably for this intervention
6,member_reference,member reference data taken from the members distribution
7,observed_action,An inferred action on the intervention from a transitive system e.g. claim made
8,observed_channel,The channel from which the observation came
9,observed_dt,The timestamp associated with the observed action


-------------------------
-------------------------
## Intervention Feedback

In [25]:
tr = Transition.from_env('action_feedback')

In [26]:
tr.pm.reset_intents()
tr.pm_persist()

In [27]:
df = tr.load_source_canonical()

In [28]:
df = tr.intent_model.auto_clean_header(df, rename_map={'member_id': 'profile_id'})
df = tr.intent_model.auto_transition(df)
df = tr.intent_model.to_remove(df, headers=['city', 'channel_pref'])

In [29]:
tr.upload_attributes(builder.report_column_catalog(stylise=False), label_key='column_name', text_key='description', constraints=list(df.columns))

## Run the Pipeline  save the Schema and show the Schema Report

In [30]:
# run the pipeline
tr.run_transition_pipeline()

# report the canonical
tr.canonical_report(df)

Unnamed: 0,Attributes (15),dType,%_Null,%_Dom,Count,Unique,Observations
0,actionable_id,category,0.0%,62.2%,1000,3,Sample: 47613313 | 61284651 | 92188326
1,feedback_dt,datetime64[ns],0.0%,0.1%,1000,1000,max=2021-02-10 06:50:57.835914 | min=2021-01-24 22:24:17.835832 | yr mean= 2021
2,feedback_id,int64,0.0%,0.4%,1000,530,"max=202102122159 | min=202102112120 | mean=202102120373.64 | dominant=[202102120020, 202102120225]"
3,has_interacted,bool,0.0%,96.9%,1000,2,True | False
4,has_interacted_dt,object,3.1%,3.1%,1000,969,Sample: | 2021-01-21 15:17:37 | 2021-01-23 11:30:28 | 2021-01-24 13:48:47 | 2021-01-28 23:09:21
5,has_permission,bool,0.0%,95.8%,1000,2,True | False
6,observed_action,bool,0.0%,59.8%,1000,2,False | True
7,observed_channel,category,0.0%,54.6%,1000,4,Sample: claims | nan | gov | external
8,observed_dt,object,40.2%,40.2%,1000,599,Sample: | 2021-01-27 01:44:38 | 2021-01-22 23:05:58 | 2021-01-29 10:13:42 | 2021-01-25 16:15:54
9,profile_feedback,category,0.0%,72.5%,1000,5,Sample: No Response | Neutral | Positive | NA | Negative


In [31]:
tr.report_attributes(df)

Unnamed: 0,Attributes (15),dType,Description
0,actionable_id,category,an intervention reference id to the intervation this feedback relates too
1,feedback_dt,datetime64[ns],a date time when the feedback was constructed
2,feedback_id,int64,a unique reference id from the feedback system unique across all feeback batches
3,has_interacted,bool,"If the member has interacted with the intervention, e.g. contacted, enacted upon an action or read a notification"
4,has_interacted_dt,object,a recorded date time when the recorded interaction occured
5,has_permission,bool,If the profile has their permissions set favourably for this intervention
6,observed_action,bool,An inferred action on the intervention from a transitive system e.g. claim made
7,observed_channel,category,The channel from which the observation came
8,observed_dt,object,The timestamp associated with the observed action
9,profile_feedback,category,The feedback response to the intervention directly from the profile though the channel preference
