# Download the data files

In [113]:
interactions_filename = 'INTERACTIONS.csv'
users_filename = 'USERS.csv'

The user dataset in the next cell is originally downloaded from a public dataset from keggel website. 

In [114]:
!wget https://aws-contact-center-blog.s3.amazonaws.com/predict-customer-intent/customer11k.csv  -O USERS.csv

--2021-11-17 01:06:49--  https://aws-contact-center-blog.s3.amazonaws.com/predict-customer-intent/customer11k.csv
Resolving aws-contact-center-blog.s3.amazonaws.com (aws-contact-center-blog.s3.amazonaws.com)... 52.92.129.201
Connecting to aws-contact-center-blog.s3.amazonaws.com (aws-contact-center-blog.s3.amazonaws.com)|52.92.129.201|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 456961 (446K) [text/csv]
Saving to: ‘USERS.csv’


2021-11-17 01:06:49 (39.9 MB/s) - ‘USERS.csv’ saved [456961/456961]



# Organize Python imports

In [115]:
import boto3
import json
from datetime import datetime
from datetime import timedelta
from array import array
from calendar import monthrange
from random import seed
import random
from random import randint
import numpy as np
import pandas as pd
import csv
import string 
import time
from sagemaker import get_execution_role
import sagemaker

# Generate customer interactions Data

In this section we use the same customer IDs as available in the user data set and synthetically generate the interactions events for those customers to be used as trainind data. For real-world use cases, you will get these interaction events from customer interaction channels such as ISV, Web, Mobile apps.

In [116]:
###################
## Declare constants 
###################

# customer ID start, as per the user dataset we have
starting_cust_id=30908

# customer ID end, as per the user dataset we have
ending_cust_id=30908+11000

# how many customers we need with one or more events
max_interactions_sequence_count=8000

# starting date of the dataset
event_start_date = datetime.now() - timedelta(days=380) # more than one year back

# convert to epoch time
epoch_event_start_date = int(event_start_date.strftime('%s'))

event_time_range = 31536000 # We will spread the events over an year. This is number of seconds in a year. 

max_time_between_events = 1059200 # over 12 days
min_time_between_events = 3900 # about half days. 


# lets createa a set of possible interactions 
# Lets say there are 6 different types of customer intents. 
# each intent can have a sequence of calls of different types
# we will use these fixed set of interaction sequence to generate synthetic data

intent_type=6 # number denoting the primary intent
interaction_sequence_length = 5 # this number is denoting secondary intent within a primary intent

# So we will have a 6 x 5 matrix of intent and sub-intents 
# Let's name it call_matrix - as in our case each call has an intent and a sub-intent. 
call_matrix  = [[0 for x in range(interaction_sequence_length)] for y in range(intent_type)]


# Intents e.g. HL is an intent e.g., Home Loan related.  HL_NE is new account opening in Home Loan product. 
# Similarly, 

call_matrix[0] = ['HL_NE','HL_AS','HL_DS','HL_RP','HL_CS']  # sub-intent sequence for primary intent HL (home loan)
call_matrix[1] = ['CC_NE','CC_CA','CC_AA','CC_LC','CC_CS']  # sub-intent sequence for primary intent CC (credit card)
call_matrix[2] = ['AC_NE','AC_AS','AC_PA','AC_AA','AC_CS']  # sub-intent sequence for primary intent AC (retail account)
call_matrix[3] = ['FD_NE','FD_NA','FD_AD','FD_PW','FD_CS']  # sub-intent sequence for primary intent FD (fixed deposit)
call_matrix[4] = ['LI_NE','LI_AS','LI_PA','LI_SW','LI_CS']  # sub-intent sequence for primary intent LI (life insurance)
call_matrix[5] = ['TA_NE','TA_AS','TA_AA','TA_FN','TA_CS']  # sub-intent sequence for primary intent TA (trading account)

# We will pick randomly from one of these number as number of times a customer contacts for a primary intent.
# This constricts a sequence of calls with sub-intents for the same primary intent. E.g., FD_NE followed by FD_NA etc.
random_sequence_length = ['1','1','1','2','2','2','2','2','2','3','3','3','3','3','3','4','4','5','5','6','7']

###################
## Generate interactions event sequences
###################


#data column headers 
# USER_ID (string), ITEM_ID (string), TIMESTAMP (long), IMPRESSION


with open(interactions_filename, 'w', newline='') as file:
    writer = csv.writer(file, delimiter=',')
    writer.writerow(["USER_ID", "ITEM_ID", "TIMESTAMP"])

    ## Loop for interaction sequences 
    interactions_sequence_count=0 
    while ( interactions_sequence_count <= max_interactions_sequence_count ):

        #pick a customer id randomly from the whole range
        customer_id = randint(starting_cust_id, ending_cust_id)

        #pick a primary intent for the calling events for that customer
        intent_id =  randint(0, intent_type-1)

        # randomly select how many calling events will be recorded for a particular sequence of events 
        # involving this particular primary intent 
        max_sequence_length = int(random.choice(random_sequence_length))

        sequence_length = 0  ## sequence counter 
        
        # select a random start time for the next event, within the time range created earlier. 
        time_lapse_between_event = epoch_event_start_date+event_time_range
        next_event_time =randint(epoch_event_start_date , time_lapse_between_event)

        while (sequence_length <= max_sequence_length):
            
            # next we will pick the sub-intents from the call_matrix
            
            # make sure we do not exceed the max length of the interactions array
            if sequence_length >= interaction_sequence_length:
                event_sequence_index = interaction_sequence_length-1  
            
            else:
                event_sequence_index = sequence_length
                writer.writerow([customer_id, call_matrix[intent_id][event_sequence_index] ,next_event_time])
            
            sequence_length+=1
            next_event_time += randint(min_time_between_events, max_time_between_events)

        interactions_sequence_count+=1

file.close()

# Describe the user dataset info

In [117]:
users_df=pd.read_csv(users_filename)

In [118]:
print(users_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11162 entries, 0 to 11161
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   USER_ID    11162 non-null  int64 
 1   deposit    11162 non-null  object
 2   age        11162 non-null  int64 
 3   job        11162 non-null  object
 4   marital    11162 non-null  object
 5   education  11162 non-null  object
dtypes: int64(2), object(4)
memory usage: 523.3+ KB
None


In [119]:
users_df.head()

Unnamed: 0,USER_ID,deposit,age,job,marital,education
0,30908,yes,59,admin.,married,secondary
1,30909,yes,56,admin.,married,secondary
2,30910,yes,41,technician,married,secondary
3,30911,yes,55,services,married,secondary
4,30912,yes,54,admin.,married,tertiary


# Describe the interactions dataset info

In [120]:
interactions_df=pd.read_csv(interactions_filename)

In [121]:
print(interactions_df.describe())

            USER_ID     TIMESTAMP
count  29627.000000  2.962700e+04
mean   36497.523408  1.620739e+09
std     3164.050405  9.076235e+06
min    30909.000000  1.604280e+09
25%    33773.000000  1.612899e+09
50%    36534.000000  1.620637e+09
75%    39248.000000  1.628548e+09
max    41905.000000  1.638956e+09


In [122]:
interactions_df.head(20)

Unnamed: 0,USER_ID,ITEM_ID,TIMESTAMP
0,36168,FD_NE,1618655080
1,36168,FD_NA,1618820839
2,36168,FD_AD,1618887060
3,36168,FD_PW,1619290920
4,41412,FD_NE,1625258874
5,41412,FD_NA,1625780865
6,41412,FD_AD,1625819144
7,41412,FD_PW,1626146061
8,41412,FD_CS,1626827324
9,32235,CC_NE,1612652289


# Upload data files to S3

Creating a random string to be used as a suffix when naming the programatically created AWS artifacts such as S3 buckets and Personalize resources. This way subsequent runs of the notebook will not have conflict with a previously created resource with the same name. 

In [123]:
random_suffix = ''.join(np.random.choice(list(string.ascii_lowercase + string.digits), 5))
print(random_suffix)

l0mid


In [124]:
%store random_suffix

Stored 'random_suffix' (str)


In [125]:
bucket = "personalize-connect-blog-data-"+random_suffix    
!aws s3 mb s3://{bucket}

make_bucket: personalize-connect-blog-data-l0mid


In [126]:
personalize = boto3.client(service_name='personalize')
personalize_runtime = boto3.client(service_name='personalize-runtime')

In [127]:
boto3.Session().resource('s3').Bucket(bucket).Object(interactions_filename).upload_file(interactions_filename)

In [128]:
boto3.Session().resource('s3').Bucket(bucket).Object(users_filename).upload_file(users_filename)

# Create Personalize service artifacts using Amazon boto3 APIs

## Create Personalize dataset schemas

In [129]:
users_schema_name='personalize-connect-blog-users-schema-'+random_suffix
interactions_schema_name='personalize-connect-blog-interactions-schema-'+random_suffix
print(users_schema_name)

personalize-connect-blog-users-schema-l0mid


In [130]:
users_schema = {
    "type": "record",
    "name": "Users",
    "namespace": "com.amazonaws.personalize.schema",
    "fields": [
        {
            "name": "USER_ID",
            "type": "string"
        },
        {
            "name": "deposit",
            "type": "string"
        },
        {
            "name": "age",
            "type": "long"
        },
        {
            "name": "job",
            "type": "string"
        },
        {
            "name": "marital",
            "type": "string"
        },
        {
            "name": "education",
            "type": "string"
        }
    ],
    "version": "1.0"
}

create_schema_response = personalize.create_schema(
    name = users_schema_name,
    schema = json.dumps(users_schema)
)

users_schema_arn = create_schema_response['schemaArn']
print(json.dumps(create_schema_response, indent=2))

{
  "schemaArn": "arn:aws:personalize:us-west-2:311381347798:schema/personalize-connect-blog-users-schema-l0mid",
  "ResponseMetadata": {
    "RequestId": "9acfcd8d-bf0f-4ef6-a5ed-bb281a6ba93d",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Wed, 17 Nov 2021 01:06:51 GMT",
      "x-amzn-requestid": "9acfcd8d-bf0f-4ef6-a5ed-bb281a6ba93d",
      "content-length": "109",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [131]:
interactions_schema =  {
  "type": "record",
  "name": "Interactions",
  "namespace": "com.amazonaws.personalize.schema",
  "fields": [
      {
          "name": "USER_ID",
          "type": "string"
      },
      {
          "name": "ITEM_ID",
          "type": "string"
      },
      {
          "name": "TIMESTAMP",
          "type": "long"
      }
  ],
  "version": "1.0"
}

create_schema_response = personalize.create_schema(
    name = interactions_schema_name,
    schema = json.dumps(interactions_schema)
)

interactions_schema_arn = create_schema_response['schemaArn']

print(json.dumps(create_schema_response, indent=2))

{
  "schemaArn": "arn:aws:personalize:us-west-2:311381347798:schema/personalize-connect-blog-interactions-schema-l0mid",
  "ResponseMetadata": {
    "RequestId": "853aac6d-7efd-4a47-ac2e-84fefe284540",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Wed, 17 Nov 2021 01:06:52 GMT",
      "x-amzn-requestid": "853aac6d-7efd-4a47-ac2e-84fefe284540",
      "content-length": "116",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


## Create dataset group and dataset resources in Personalize

Create dataset group

In [132]:
create_dataset_group_response = personalize.create_dataset_group(
    name = "personalize-connect-blog-dataset-"+random_suffix
)

dataset_group_arn = create_dataset_group_response['datasetGroupArn']
print(json.dumps(create_dataset_group_response, indent=2))

{
  "datasetGroupArn": "arn:aws:personalize:us-west-2:311381347798:dataset-group/personalize-connect-blog-dataset-l0mid",
  "ResponseMetadata": {
    "RequestId": "b6f44102-8ebe-433b-acdf-2df90da985de",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Wed, 17 Nov 2021 01:06:51 GMT",
      "x-amzn-requestid": "b6f44102-8ebe-433b-acdf-2df90da985de",
      "content-length": "117",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


Check status of dataset group creation.

In [133]:
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_dataset_group_response = personalize.describe_dataset_group(
        datasetGroupArn = dataset_group_arn
    )
    status = describe_dataset_group_response["datasetGroup"]["status"]
    print("DatasetGroup: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

DatasetGroup: CREATE PENDING
DatasetGroup: ACTIVE


Create INTERACTIONS dataset. 

In [134]:
dataset_type = "INTERACTIONS"
create_dataset_response = personalize.create_dataset(
    datasetType = dataset_type,
    datasetGroupArn = dataset_group_arn,
    schemaArn = interactions_schema_arn,
    name = "personalize-connect-blog-interactions-dataset-"+random_suffix
)

interactions_dataset_arn = create_dataset_response['datasetArn']
print(json.dumps(create_dataset_response, indent=2))


{
  "datasetArn": "arn:aws:personalize:us-west-2:311381347798:dataset/personalize-connect-blog-dataset-l0mid/INTERACTIONS",
  "ResponseMetadata": {
    "RequestId": "9916af03-829a-4103-942f-2b6eb93bfdd7",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Wed, 17 Nov 2021 01:07:52 GMT",
      "x-amzn-requestid": "9916af03-829a-4103-942f-2b6eb93bfdd7",
      "content-length": "119",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


Check INTERACTIONS dateset creation status

In [135]:
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_dataset_response = personalize.describe_dataset(
        datasetArn = interactions_dataset_arn
    )
    status = describe_dataset_response["dataset"]["status"]
    print("dataset: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

dataset: CREATE PENDING
dataset: ACTIVE


Create USERS dataset

In [136]:
dataset_type = "USERS"
create_dataset_response = personalize.create_dataset(
    datasetType = dataset_type,
    datasetGroupArn = dataset_group_arn,
    schemaArn = users_schema_arn,
    name = "personalize-connect-blog-users-dataset-"+random_suffix
)

users_dataset_arn = create_dataset_response['datasetArn']
print(json.dumps(create_dataset_response, indent=2))


{
  "datasetArn": "arn:aws:personalize:us-west-2:311381347798:dataset/personalize-connect-blog-dataset-l0mid/USERS",
  "ResponseMetadata": {
    "RequestId": "0c711f1c-0468-47a2-948a-90511e501c3b",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Wed, 17 Nov 2021 01:08:52 GMT",
      "x-amzn-requestid": "0c711f1c-0468-47a2-948a-90511e501c3b",
      "content-length": "112",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


Check USERS dateset creation status

In [137]:
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_dataset_response = personalize.describe_dataset(
        datasetArn = users_dataset_arn
    )
    status = describe_dataset_response["dataset"]["status"]
    print("dataset: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

dataset: CREATE PENDING
dataset: ACTIVE


## Add S3 bucket policy for allowing access from personalize service

This code below adds necessary access policy to the S3 bucket so that AWS Personalize service can have read access to the bucket. This is necessary for Personalize service to read and import the training data. 

In [138]:
s3 = boto3.client("s3")

policy = {
    "Version": "2012-10-17",
    "Id": "PersonalizeS3BucketAccessPolicy",
    "Statement": [
        {
            "Sid": "PersonalizeS3BucketAccessPolicy",
            "Effect": "Allow",
            "Principal": {
                "Service": "personalize.amazonaws.com"
            },
            "Action": [
                "s3:GetObject",
                "s3:ListBucket"
            ],
            "Resource": [
                "arn:aws:s3:::{}".format(bucket),
                "arn:aws:s3:::{}/*".format(bucket)
            ]
        }
    ]
}

s3.put_bucket_policy(Bucket=bucket, Policy=json.dumps(policy));

## Get the ARN for Personalize IAM Role 

Amazon Resource Names (ARNs) are unique IDs of resources. Our CloudFormation template had earlier created an IAM Service Role specifically with permissions to execute Amazon Personalize APIs, have access to S3 etc. This role was associated to this SageMaker notebook so that we can execute the Personalize related tasks from this SageMaker notebook. In order for Personalize service to import data from S3, this role provides the necessary permissions. We pass this role ARN as a reference to Personalize.  

In [139]:
sagemaker_session = sagemaker.Session()
role_arn = sagemaker.get_execution_role(sagemaker_session=sagemaker_session)

In [140]:
role_arn

'arn:aws:iam::311381347798:role/service-role/predict-ci-nb-CustomPersonalizeServiceRole-10HRPWAGXBH5V'

## Create data import job in Personalize

These are long-running jobs run by the Personalize service to import the prepared datasets from specified S3 location

In [141]:
print("s3://{}/{}".format(bucket, interactions_filename))
print(interactions_dataset_arn)
interactions_dataset_import_job_arn = 'arn:aws:personalize:us-west-2:311381347798:dataset-import-job/interactions-dataset-import-zhc0r'
users_dataset_import_job_arn = 'arn:aws:personalize:us-west-2:311381347798:dataset-import-job/users-dataset-import-zhc0r'

s3://personalize-connect-blog-data-l0mid/INTERACTIONS.csv
arn:aws:personalize:us-west-2:311381347798:dataset/personalize-connect-blog-dataset-l0mid/INTERACTIONS


In [142]:
create_interactions_dataset_import_job_response = personalize.create_dataset_import_job(
    jobName = "interactions-dataset-import-"+random_suffix,
    datasetArn = interactions_dataset_arn,
    dataSource = {
        "dataLocation": "s3://{}/{}".format(bucket, interactions_filename)
        
    }, roleArn = role_arn
)

interactions_dataset_import_job_arn = create_interactions_dataset_import_job_response['datasetImportJobArn']
print(json.dumps(create_interactions_dataset_import_job_response, indent=2))

{
  "datasetImportJobArn": "arn:aws:personalize:us-west-2:311381347798:dataset-import-job/interactions-dataset-import-l0mid",
  "ResponseMetadata": {
    "RequestId": "ae00507c-44f8-41bb-98de-41ac57a21ad0",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Wed, 17 Nov 2021 01:09:53 GMT",
      "x-amzn-requestid": "ae00507c-44f8-41bb-98de-41ac57a21ad0",
      "content-length": "121",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [143]:
create_users_dataset_import_job_response = personalize.create_dataset_import_job(
    jobName = "users-dataset-import-"+random_suffix,
    datasetArn = users_dataset_arn,
    dataSource = {
        "dataLocation": "s3://{}/{}".format(bucket, users_filename)
        
    }, roleArn = role_arn
)

users_dataset_import_job_arn = create_users_dataset_import_job_response['datasetImportJobArn']
print(json.dumps(create_users_dataset_import_job_response, indent=2))

{
  "datasetImportJobArn": "arn:aws:personalize:us-west-2:311381347798:dataset-import-job/users-dataset-import-l0mid",
  "ResponseMetadata": {
    "RequestId": "6a7fcba4-3458-4f6e-aaa8-22c91f1115c1",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Wed, 17 Nov 2021 01:09:53 GMT",
      "x-amzn-requestid": "6a7fcba4-3458-4f6e-aaa8-22c91f1115c1",
      "content-length": "114",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


Check the status of the dataset creation jobs. 

In [144]:
status = None
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_users_dataset_import_job_response = personalize.describe_dataset_import_job(
        datasetImportJobArn = users_dataset_import_job_arn
    )
    
    dataset_import_job = describe_users_dataset_import_job_response["datasetImportJob"]
    if "latestDatasetImportJobRun" not in dataset_import_job:
        status = dataset_import_job["status"]
        print("DatasetImportJob: {}".format(status))
    else:
        status = dataset_import_job["latestDatasetImportJobRun"]["status"]
        print("LatestDatasetImportJobRun: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

DatasetImportJob: CREATE PENDING
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: ACTIVE


In [145]:
status = None
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
        
    interactions_dataset_import_job_response = personalize.describe_dataset_import_job(
        datasetImportJobArn = interactions_dataset_import_job_arn
    )
    
    dataset_import_job = interactions_dataset_import_job_response["datasetImportJob"]
#     print(dataset_import_job)
    if "latestDatasetImportJobRun" not in dataset_import_job:
        status = dataset_import_job["status"]
        print("DatasetImportJob: {}".format(status))
    else:
        status = dataset_import_job["latestDatasetImportJobRun"]["status"]
        print("LatestDatasetImportJobRun: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

DatasetImportJob: ACTIVE


## Create a solution in Personalize 

Use the appropriate recipe for this use case. One can try with different options. Please refer to https://docs.aws.amazon.com/personalize/latest/dg/working-with-predefined-recipes.html 

In [146]:
recipe_list = personalize.list_recipes()
for recipe in recipe_list['recipes']:
    print(recipe['recipeArn'])

arn:aws:personalize:::recipe/aws-hrnn
arn:aws:personalize:::recipe/aws-hrnn-coldstart
arn:aws:personalize:::recipe/aws-hrnn-metadata
arn:aws:personalize:::recipe/aws-personalized-ranking
arn:aws:personalize:::recipe/aws-popularity-count
arn:aws:personalize:::recipe/aws-similar-items
arn:aws:personalize:::recipe/aws-sims
arn:aws:personalize:::recipe/aws-user-personalization


In [147]:
recipe_arn = "arn:aws:personalize:::recipe/aws-user-personalization"

In [148]:
create_solution_response = personalize.create_solution(
    name='predict-ci-user-personalization-'+ random_suffix,
    datasetGroupArn = dataset_group_arn,
    recipeArn= recipe_arn
)

solution_arn = create_solution_response['solutionArn']
print(json.dumps(create_solution_response, indent=2))

{
  "solutionArn": "arn:aws:personalize:us-west-2:311381347798:solution/predict-ci-user-personalization-l0mid",
  "ResponseMetadata": {
    "RequestId": "a032b8fa-e606-4b95-b1b6-2d45c0375bbc",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Wed, 17 Nov 2021 01:14:54 GMT",
      "x-amzn-requestid": "a032b8fa-e606-4b95-b1b6-2d45c0375bbc",
      "content-length": "107",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [149]:
create_solution_version_response = personalize.create_solution_version(
    solutionArn = solution_arn,
    trainingMode='FULL'
)

solution_version_arn = create_solution_version_response['solutionVersionArn']
print(json.dumps(create_solution_version_response, indent=2))

{
  "solutionVersionArn": "arn:aws:personalize:us-west-2:311381347798:solution/predict-ci-user-personalization-l0mid/a31e0210",
  "ResponseMetadata": {
    "RequestId": "aa5c0cd6-9e5b-486f-9947-293dac9d6691",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Wed, 17 Nov 2021 01:14:54 GMT",
      "x-amzn-requestid": "aa5c0cd6-9e5b-486f-9947-293dac9d6691",
      "content-length": "123",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [150]:
status = None
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_solution_version_response = personalize.describe_solution_version(
        solutionVersionArn = solution_version_arn
    )
    status = describe_solution_version_response["solutionVersion"]["status"]
    print("SolutionVersion: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

SolutionVersion: CREATE PENDING
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: ACTIVE


## Deploy a campaign in Personalize for this solution

For a list of Properties and Hyperparameters available for the user personalization recipe, please refer to: https://docs.aws.amazon.com/personalize/latest/dg/native-recipe-new-item-USER_PERSONALIZATION.html#bandit-hyperparameters

In [151]:
create_campaign_response = personalize.create_campaign(
    name = "predict-ci-user-personalization-"+random_suffix,
    solutionVersionArn = solution_version_arn,
    minProvisionedTPS = 2,
    campaignConfig = {"itemExplorationConfig": {"explorationItemAgeCutOff": "2000"}}
)

campaign_arn = create_campaign_response['campaignArn']
print(json.dumps(create_campaign_response, indent=2))

{
  "campaignArn": "arn:aws:personalize:us-west-2:311381347798:campaign/predict-ci-user-personalization-l0mid",
  "ResponseMetadata": {
    "RequestId": "927f7b78-c150-47bf-97c6-1de5bd6490bc",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Wed, 17 Nov 2021 01:37:56 GMT",
      "x-amzn-requestid": "927f7b78-c150-47bf-97c6-1de5bd6490bc",
      "content-length": "107",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [152]:
status = None
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_campaign_response = personalize.describe_campaign(
        campaignArn = campaign_arn
    )
    status = describe_campaign_response["campaign"]["status"]
    print("Campaign: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

Campaign: CREATE PENDING
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: ACTIVE


# Get Customer Intent Predicted 

## Update new interaction data in real-time

Using personalize APIs for updating user interaction data in real-time. This is done using the put_events API offered by Amazon Personalize service.

In [153]:
event_tracker = 'CallInentTracker'
response = personalize.create_event_tracker(
    name=event_tracker,
    datasetGroupArn=dataset_group_arn
)
tracking_id = response['trackingId']
tracking_arn= response ['eventTrackerArn']
response

{'eventTrackerArn': 'arn:aws:personalize:us-west-2:311381347798:event-tracker/8c5eb3c9',
 'trackingId': '9224e121-c78f-4f6b-aa63-fa3809e71a68',
 'ResponseMetadata': {'RequestId': '778391b8-e4d0-4d49-b5db-adff1d47d7b2',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/x-amz-json-1.1',
   'date': 'Wed, 17 Nov 2021 01:48:57 GMT',
   'x-amzn-requestid': '778391b8-e4d0-4d49-b5db-adff1d47d7b2',
   'content-length': '139',
   'connection': 'keep-alive'},
  'RetryAttempts': 0}}

Create a random session ID for sending interaction events 

In [154]:
personalize_events = boto3.client(service_name='personalize-events')

letters = string.ascii_lowercase
session_id= ''.join(random.choice(letters) for i in range(10)) 

### Record new events for an existing customer 

Check status of the event tracker

In [155]:
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_event_tracker_response = personalize.describe_event_tracker(
        eventTrackerArn = tracking_arn
    )
    status = describe_event_tracker_response["eventTracker"]["status"]
    print("eventTracker: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

eventTracker: CREATE PENDING
eventTracker: ACTIVE


In [156]:
personalize_events.put_events(
    trackingId = tracking_id,
    userId= '35739',
    sessionId = session_id,
    eventList = [{
        'sentAt': int(time.time()-276480),
        'eventType': 'call',
        'properties': "{\"itemId\": \"AC_NE\"}"
        },{
        'sentAt': int(time.time()-86400),
        'eventType': 'call',
        'properties': "{\"itemId\": \"AC_AS\"}"
        },{
        'sentAt': int(time.time()-1000),
        'eventType': 'call',
        'properties': "{\"itemId\": \"AC_PA\"}"
        }]
)
time.sleep(120) # allowing two mins for personalize service to update the recommendaion based on real-time events

## Customer intent prediction

### A. High confidence score example

A deployed Personalized campaign offers Personalize runtime APIs for getting real-time personalized recommendations. In section of the notebook, we will use Boto APIs to interact with the Personalize runtime APIs. We would provide a customer's unique ID and receive that customers next intent predicted by Personalize. 

In [157]:
response = personalize_runtime.get_recommendations(
    campaignArn = campaign_arn,
    userId = '35739' )

print("Recommended items - score ")
for item in response['itemList']:
     print (item['itemId'], item['score'])

Recommended items - score 
AC_AA 0.9439064
CC_NE 0.0138092
LI_NE 0.0085333
HL_NE 0.007464
TA_NE 0.0071827
FD_NE 0.00682
AC_NE 0.0066388
AC_PA 0.000894
TA_AA 0.0006131
TA_FN 0.0005504
HL_RP 0.0004061
HL_CS 0.0003783
AC_CS 0.0003213
LI_AS 0.0003
TA_AS 0.0001959
CC_CA 0.0001781
CC_LC 0.0001759
AC_AS 0.0001737
LI_CS 0.0001436
CC_CS 0.0001373
HL_DS 0.0001343
FD_NA 0.0001288
TA_CS 0.0001215
CC_AA 0.0001116


### B. Low confidence score example

This returns a low confidence score as the customer ID is new. Personalize does not have any contact history for this customer. 

In [158]:
response = personalize_runtime.get_recommendations(
    campaignArn = campaign_arn,
    userId = '89789' )

print("Recommended items - score ")
for item in response['itemList']:
     print (item['itemId'], item['score'])

Recommended items - score 
HL_NE 0.2017641
AC_NE 0.1764564
FD_NE 0.1739267
CC_NE 0.1654633
TA_NE 0.1425011
LI_NE 0.1273979
HL_DS 0.0011574
FD_CS 0.0007902
CC_CS 0.0007501
AC_AA 0.0007381
TA_AA 0.0006833
FD_PW 0.0006245
HL_CS 0.0005332
LI_CS 0.0005196
LI_AS 0.0004616
FD_AD 0.0004566
HL_AS 0.0004515
AC_CS 0.0004445
TA_CS 0.0004393
HL_RP 0.0004161
CC_AA 0.0004146
CC_LC 0.000384
LI_SW 0.0003837
LI_PA 0.0003789


### C. Record  events for a new customer

In [159]:
personalize_events.put_events(
    trackingId = tracking_id,
    userId= '88889',
    sessionId = session_id,
    eventList = [{
        'sentAt': int(time.time()-86400),
        'eventType': 'call',
        'properties': "{\"itemId\": \"HL_NE\"}"
        }]
)
time.sleep(120) # allowing two mins for personalize service to update the recommendaion based on real-time events

### D. start getting intent prediction in real-time for the new customer

In [160]:
response = personalize_runtime.get_recommendations(
    campaignArn = campaign_arn,
    userId = '88889' )

print("Recommended items - scores")

for item in response['itemList']:
     print (item['itemId'], item['score'])

Recommended items - scores
HL_AS 0.9971374
LI_NE 0.0005079
HL_DS 0.0004015
CC_NE 0.0002941
HL_NE 0.0002761
FD_NE 0.0002717
AC_NE 0.0002577
TA_NE 0.0002323
TA_AA 9.24e-05
CC_CS 6.93e-05
FD_NA 5.61e-05
HL_RP 5.49e-05
AC_AS 4.17e-05
FD_AD 3.24e-05
FD_PW 3.11e-05
AC_CS 2.7e-05
HL_CS 2.21e-05
CC_AA 1.91e-05
FD_CS 1.66e-05
CC_CA 1.43e-05
LI_CS 1.33e-05
TA_CS 1.31e-05
TA_FN 1.19e-05
LI_AS 8.1e-06


In [161]:
campaign_arn

'arn:aws:personalize:us-west-2:311381347798:campaign/predict-ci-user-personalization-l0mid'

In [162]:
tracking_id

'9224e121-c78f-4f6b-aa63-fa3809e71a68'