# Download the data files

In [1]:
interactions_filename = 'INTERACTIONS.csv'
users_filename = 'USERS.csv'

In [2]:
!wget https://aconnect-proserve-blogs.s3.amazonaws.com/PredictCustomerIntent/customer11k.csv  -O USERS.csv
!wget https://aconnect-proserve-blogs.s3.amazonaws.com/PredictCustomerIntent/interactions.csv -O INTERACTIONS.csv

--2020-12-07 07:53:18--  https://aconnect-proserve-blogs.s3.amazonaws.com/PredictCustomerIntent/customer11k.csv
Resolving aconnect-proserve-blogs.s3.amazonaws.com (aconnect-proserve-blogs.s3.amazonaws.com)... 52.217.80.92
Connecting to aconnect-proserve-blogs.s3.amazonaws.com (aconnect-proserve-blogs.s3.amazonaws.com)|52.217.80.92|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 456961 (446K) [text/csv]
Saving to: ‘USERS.csv’


2020-12-07 07:53:19 (57.5 MB/s) - ‘USERS.csv’ saved [456961/456961]

--2020-12-07 07:53:19--  https://aconnect-proserve-blogs.s3.amazonaws.com/PredictCustomerIntent/interactions.csv
Resolving aconnect-proserve-blogs.s3.amazonaws.com (aconnect-proserve-blogs.s3.amazonaws.com)... 52.217.80.92
Connecting to aconnect-proserve-blogs.s3.amazonaws.com (aconnect-proserve-blogs.s3.amazonaws.com)|52.217.80.92|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 773307 (755K) [text/csv]
Saving to: ‘INTERACTIONS.csv’


2020-12-0

# Organize Python imports

In [3]:
import boto3
import json
import numpy as np
import pandas as pd
import csv
import string 
import time
from sagemaker import get_execution_role
import sagemaker


# Describe the downloaded data file info

In [4]:
users_df=pd.read_csv("USERS.csv")
interactions_df=pd.read_csv("INTERACTIONS.csv")

In [5]:
print(users_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11162 entries, 0 to 11161
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   USER_ID    11162 non-null  int64 
 1   deposit    11162 non-null  object
 2   age        11162 non-null  int64 
 3   job        11162 non-null  object
 4   marital    11162 non-null  object
 5   education  11162 non-null  object
dtypes: int64(2), object(4)
memory usage: 523.3+ KB
None


In [6]:
print(interactions_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32220 entries, 0 to 32219
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   USER_ID    32220 non-null  int64 
 1   ITEM_ID    32220 non-null  object
 2   TIMESTAMP  32220 non-null  int64 
dtypes: int64(2), object(1)
memory usage: 755.3+ KB
None


In [7]:
users_df.head()

Unnamed: 0,USER_ID,deposit,age,job,marital,education
0,30908,yes,59,admin.,married,secondary
1,30909,yes,56,admin.,married,secondary
2,30910,yes,41,technician,married,secondary
3,30911,yes,55,services,married,secondary
4,30912,yes,54,admin.,married,tertiary


In [8]:
interactions_df.head()

Unnamed: 0,USER_ID,ITEM_ID,TIMESTAMP
0,35739,AC_NE,1579609263
1,35739,AC_AS,1580612023
2,35739,AC_PA,1581390495
3,35739,AC_AA,1582229479
4,38695,TA_NE,1575001446


# Upload data to S3

Creating a random string to be used as a suffix when naming the programatically created AWS artifacts such as S3 buckets and Personalize resources. This way subsequent runs of the notebook will not have conflict with a previously created resource with the same name. 

In [9]:
random_suffix = ''.join(np.random.choice(list(string.ascii_lowercase + string.digits), 5))
print(random_suffix)

lr45h


In [10]:
%store random_suffix

Stored 'random_suffix' (str)


In [11]:
bucket = "personalize-connect-blog-data-"+random_suffix    
!aws s3 mb s3://{bucket}

make_bucket: personalize-connect-blog-data-lr45h


In [12]:
personalize = boto3.client(service_name='personalize')
personalize_runtime = boto3.client(service_name='personalize-runtime')

In [13]:
boto3.Session().resource('s3').Bucket(bucket).Object(interactions_filename).upload_file(interactions_filename)

In [14]:
boto3.Session().resource('s3').Bucket(bucket).Object(users_filename).upload_file(users_filename)

# Create Personalize service artifacts using Amazon boto3 APIs

## Create Personalize dataset schemas

In [15]:
users_schema_name='personalize-connect-blog-users-schema-'+random_suffix
interactions_schema_name='personalize-connect-blog-interactions-schema-'+random_suffix
print(users_schema_name)

personalize-connect-blog-users-schema-lr45h


In [16]:
users_schema = {
    "type": "record",
    "name": "Users",
    "namespace": "com.amazonaws.personalize.schema",
    "fields": [
        {
            "name": "USER_ID",
            "type": "string"
        },
        {
            "name": "deposit",
            "type": "string"
        },
        {
            "name": "age",
            "type": "long"
        },
        {
            "name": "job",
            "type": "string"
        },
        {
            "name": "marital",
            "type": "string"
        },
        {
            "name": "education",
            "type": "string"
        }
    ],
    "version": "1.0"
}

create_schema_response = personalize.create_schema(
    name = users_schema_name,
    schema = json.dumps(users_schema)
)

users_schema_arn = create_schema_response['schemaArn']
print(json.dumps(create_schema_response, indent=2))

{
  "schemaArn": "arn:aws:personalize:us-east-1:248025046818:schema/personalize-connect-blog-users-schema-lr45h",
  "ResponseMetadata": {
    "RequestId": "fed6df53-e993-445f-834e-e6e75ce0153c",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Mon, 07 Dec 2020 07:53:25 GMT",
      "x-amzn-requestid": "fed6df53-e993-445f-834e-e6e75ce0153c",
      "content-length": "109",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [17]:
interactions_schema =  {
  "type": "record",
  "name": "Interactions",
  "namespace": "com.amazonaws.personalize.schema",
  "fields": [
      {
          "name": "USER_ID",
          "type": "string"
      },
      {
          "name": "ITEM_ID",
          "type": "string"
      },
      {
          "name": "TIMESTAMP",
          "type": "long"
      }
  ],
  "version": "1.0"
}

create_schema_response = personalize.create_schema(
    name = interactions_schema_name,
    schema = json.dumps(interactions_schema)
)

interactions_schema_arn = create_schema_response['schemaArn']

print(json.dumps(create_schema_response, indent=2))

{
  "schemaArn": "arn:aws:personalize:us-east-1:248025046818:schema/personalize-connect-blog-interactions-schema-lr45h",
  "ResponseMetadata": {
    "RequestId": "f9d63d31-4939-4256-899f-e56e87203ff7",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Mon, 07 Dec 2020 07:53:25 GMT",
      "x-amzn-requestid": "f9d63d31-4939-4256-899f-e56e87203ff7",
      "content-length": "116",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


## Create dataset group and datasets

Create dataset group

In [18]:
create_dataset_group_response = personalize.create_dataset_group(
    name = "personalize-connect-blog-dataset-"+random_suffix
)

dataset_group_arn = create_dataset_group_response['datasetGroupArn']
print(json.dumps(create_dataset_group_response, indent=2))

{
  "datasetGroupArn": "arn:aws:personalize:us-east-1:248025046818:dataset-group/personalize-connect-blog-dataset-lr45h",
  "ResponseMetadata": {
    "RequestId": "1b2cdb94-08e1-40d7-b818-b3e9bda8e1f8",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Mon, 07 Dec 2020 07:53:25 GMT",
      "x-amzn-requestid": "1b2cdb94-08e1-40d7-b818-b3e9bda8e1f8",
      "content-length": "117",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


Check status of dataset group creation.

In [19]:
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_dataset_group_response = personalize.describe_dataset_group(
        datasetGroupArn = dataset_group_arn
    )
    status = describe_dataset_group_response["datasetGroup"]["status"]
    print("DatasetGroup: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

DatasetGroup: CREATE PENDING
DatasetGroup: ACTIVE


Create INTERACTIONS dataset. 

In [20]:
dataset_type = "INTERACTIONS"
create_dataset_response = personalize.create_dataset(
    datasetType = dataset_type,
    datasetGroupArn = dataset_group_arn,
    schemaArn = interactions_schema_arn,
    name = "personalize-connect-blog-interactions-dataset-"+random_suffix
)

interactions_dataset_arn = create_dataset_response['datasetArn']
print(json.dumps(create_dataset_response, indent=2))


{
  "datasetArn": "arn:aws:personalize:us-east-1:248025046818:dataset/personalize-connect-blog-dataset-lr45h/INTERACTIONS",
  "ResponseMetadata": {
    "RequestId": "819242ea-3be4-4c42-948b-e9d5e91bcb1d",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Mon, 07 Dec 2020 07:54:25 GMT",
      "x-amzn-requestid": "819242ea-3be4-4c42-948b-e9d5e91bcb1d",
      "content-length": "119",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


Create USERS dataset

In [21]:
dataset_type = "USERS"
create_dataset_response = personalize.create_dataset(
    datasetType = dataset_type,
    datasetGroupArn = dataset_group_arn,
    schemaArn = users_schema_arn,
    name = "personalize-connect-blog-users-dataset-"+random_suffix
)

users_dataset_arn = create_dataset_response['datasetArn']
print(json.dumps(create_dataset_response, indent=2))


{
  "datasetArn": "arn:aws:personalize:us-east-1:248025046818:dataset/personalize-connect-blog-dataset-lr45h/USERS",
  "ResponseMetadata": {
    "RequestId": "9dc18ed2-f2b4-42e8-97fe-f362f2a52463",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Mon, 07 Dec 2020 07:54:25 GMT",
      "x-amzn-requestid": "9dc18ed2-f2b4-42e8-97fe-f362f2a52463",
      "content-length": "112",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


## Add S3 bucket policy for allowing access from personalize service

This code below adds necessary access policy to the S3 bucket so that AWS Personalize service can have read access to the bucket. This is necessary for Personalize service to read and import the training data. 

In [22]:
s3 = boto3.client("s3")

policy = {
    "Version": "2012-10-17",
    "Id": "PersonalizeS3BucketAccessPolicy",
    "Statement": [
        {
            "Sid": "PersonalizeS3BucketAccessPolicy",
            "Effect": "Allow",
            "Principal": {
                "Service": "personalize.amazonaws.com"
            },
            "Action": [
                "s3:GetObject",
                "s3:ListBucket"
            ],
            "Resource": [
                "arn:aws:s3:::{}".format(bucket),
                "arn:aws:s3:::{}/*".format(bucket)
            ]
        }
    ]
}

s3.put_bucket_policy(Bucket=bucket, Policy=json.dumps(policy));

## Get the ARN for Personalize IAM Role 

Amazon Resource Names (ARNs) are unique IDs of resources. Our CloudFormation template had earlier created an IAM Service Role specifically with permissions to execute Amazon Personalize APIs, have access to S3 etc. This role was associated to this SageMaker notebook so that we can execute the Personalize related tasks from this SageMaker notebook. In order for Personalize service to import data from S3, this role provides the necessary permissions. We pass this role ARN as a reference to Personalize.  

In [23]:
sagemaker_session = sagemaker.Session()
role_arn = sagemaker.get_execution_role(sagemaker_session=sagemaker_session)

In [24]:
role_arn

'arn:aws:iam::248025046818:role/service-role/predict-ci-nb-CustomPersonalizeServiceRole-8UCQDPMCCIKH'

## Create data import job

These are long-running jobs run by the Personalize service to import the prepared datasets from specified S3 location

In [25]:
create_interactions_dataset_import_job_response = personalize.create_dataset_import_job(
    jobName = "interactions-dataset-import-"+random_suffix,
    datasetArn = interactions_dataset_arn,
    dataSource = {
        "dataLocation": "s3://{}/{}".format(bucket, interactions_filename)
        
    }, roleArn = role_arn
)

interactions_dataset_import_job_arn = create_interactions_dataset_import_job_response['datasetImportJobArn']
print(json.dumps(create_interactions_dataset_import_job_response, indent=2))

{
  "datasetImportJobArn": "arn:aws:personalize:us-east-1:248025046818:dataset-import-job/interactions-dataset-import-lr45h",
  "ResponseMetadata": {
    "RequestId": "16905ac2-4c98-434d-9777-17252c3290b7",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Mon, 07 Dec 2020 07:54:26 GMT",
      "x-amzn-requestid": "16905ac2-4c98-434d-9777-17252c3290b7",
      "content-length": "121",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [26]:
create_users_dataset_import_job_response = personalize.create_dataset_import_job(
    jobName = "users-dataset-import-"+random_suffix,
    datasetArn = users_dataset_arn,
    dataSource = {
        "dataLocation": "s3://{}/{}".format(bucket, users_filename)
        
    }, roleArn = role_arn
)

users_dataset_import_job_arn = create_users_dataset_import_job_response['datasetImportJobArn']
print(json.dumps(create_users_dataset_import_job_response, indent=2))

{
  "datasetImportJobArn": "arn:aws:personalize:us-east-1:248025046818:dataset-import-job/users-dataset-import-lr45h",
  "ResponseMetadata": {
    "RequestId": "7c841d6a-28ad-43c8-9ed2-c9bb516621f5",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Mon, 07 Dec 2020 07:54:26 GMT",
      "x-amzn-requestid": "7c841d6a-28ad-43c8-9ed2-c9bb516621f5",
      "content-length": "114",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


Check the status of the dataset creation jobs. 

In [27]:
status = None
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_users_dataset_import_job_response = personalize.describe_dataset_import_job(
        datasetImportJobArn = users_dataset_import_job_arn
    )
    
    dataset_import_job = describe_users_dataset_import_job_response["datasetImportJob"]
    if "latestDatasetImportJobRun" not in dataset_import_job:
        status = dataset_import_job["status"]
        print("DatasetImportJob: {}".format(status))
    else:
        status = dataset_import_job["latestDatasetImportJobRun"]["status"]
        print("LatestDatasetImportJobRun: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

DatasetImportJob: CREATE PENDING
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: ACTIVE


In [28]:
status = None
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
        
    interactions_dataset_import_job_response = personalize.describe_dataset_import_job(
        datasetImportJobArn = interactions_dataset_import_job_arn
    )
    
    dataset_import_job = interactions_dataset_import_job_response["datasetImportJob"]
#     print(dataset_import_job)
    if "latestDatasetImportJobRun" not in dataset_import_job:
        status = dataset_import_job["status"]
        print("DatasetImportJob: {}".format(status))
    else:
        status = dataset_import_job["latestDatasetImportJobRun"]["status"]
        print("LatestDatasetImportJobRun: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

DatasetImportJob: ACTIVE


## Create a solution 

Use the appropriate recipe for this use case. One can try with different options. Please refer to https://docs.aws.amazon.com/personalize/latest/dg/working-with-predefined-recipes.html 

In [29]:
recipe_list = personalize.list_recipes()
for recipe in recipe_list['recipes']:
    print(recipe['recipeArn'])

arn:aws:personalize:::recipe/aws-hrnn
arn:aws:personalize:::recipe/aws-hrnn-coldstart
arn:aws:personalize:::recipe/aws-hrnn-metadata
arn:aws:personalize:::recipe/aws-personalized-ranking
arn:aws:personalize:::recipe/aws-popularity-count
arn:aws:personalize:::recipe/aws-sims
arn:aws:personalize:::recipe/aws-user-personalization


In [30]:
recipe_arn = "arn:aws:personalize:::recipe/aws-user-personalization"

In [31]:
create_solution_response = personalize.create_solution(
    name='predict-ci-user-personalization-'+ random_suffix,
    datasetGroupArn = dataset_group_arn,
    recipeArn= recipe_arn
)

solution_arn = create_solution_response['solutionArn']
print(json.dumps(create_solution_response, indent=2))

{
  "solutionArn": "arn:aws:personalize:us-east-1:248025046818:solution/predict-ci-user-personalization-lr45h",
  "ResponseMetadata": {
    "RequestId": "56a65c75-19cc-48c9-a995-7616e0e49bcf",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Mon, 07 Dec 2020 08:10:28 GMT",
      "x-amzn-requestid": "56a65c75-19cc-48c9-a995-7616e0e49bcf",
      "content-length": "107",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [32]:
create_solution_version_response = personalize.create_solution_version(
    solutionArn = solution_arn,
    trainingMode='FULL'
)

solution_version_arn = create_solution_version_response['solutionVersionArn']
print(json.dumps(create_solution_version_response, indent=2))

{
  "solutionVersionArn": "arn:aws:personalize:us-east-1:248025046818:solution/predict-ci-user-personalization-lr45h/3f3b8980",
  "ResponseMetadata": {
    "RequestId": "3aef9f0b-1549-4e65-a1ad-eed80cb1d8cc",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Mon, 07 Dec 2020 08:10:28 GMT",
      "x-amzn-requestid": "3aef9f0b-1549-4e65-a1ad-eed80cb1d8cc",
      "content-length": "123",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [33]:
status = None
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_solution_version_response = personalize.describe_solution_version(
        solutionVersionArn = solution_version_arn
    )
    status = describe_solution_version_response["solutionVersion"]["status"]
    print("SolutionVersion: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

SolutionVersion: CREATE PENDING
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGR

## Deploy a campaign

For a list of Properties and Hyperparameters available for the user personalization recipe, please refer to: https://docs.aws.amazon.com/personalize/latest/dg/native-recipe-new-item-USER_PERSONALIZATION.html#bandit-hyperparameters

In [34]:
create_campaign_response = personalize.create_campaign(
    name = "predict-ci-user-personalization-"+random_suffix,
    solutionVersionArn = solution_version_arn,
    minProvisionedTPS = 2,
    campaignConfig = {"itemExplorationConfig": {"explorationItemAgeCutOff": "2000"}}
)

campaign_arn = create_campaign_response['campaignArn']
print(json.dumps(create_campaign_response, indent=2))

{
  "campaignArn": "arn:aws:personalize:us-east-1:248025046818:campaign/predict-ci-user-personalization-lr45h",
  "ResponseMetadata": {
    "RequestId": "6dd16459-fda7-4645-9a04-65552d7bf2c0",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Mon, 07 Dec 2020 08:58:32 GMT",
      "x-amzn-requestid": "6dd16459-fda7-4645-9a04-65552d7bf2c0",
      "content-length": "107",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [35]:
status = None
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_campaign_response = personalize.describe_campaign(
        campaignArn = campaign_arn
    )
    status = describe_campaign_response["campaign"]["status"]
    print("Campaign: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

Campaign: CREATE PENDING
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: ACTIVE


# Get Customer Intent Predicted 

## Prediction of intent for existing customer id with existing transactions

### A. High confidence score example

A deployed Personalized campaign offers Personalize runtime APIs for getting real-time personalized recommendations. In section of the notebook, we will use Boto APIs to interact with the Personalize runtime APIs. We would provide a customer's unique ID and receive that customers next intent predicted by Personalize. 

In [36]:
response = personalize_runtime.get_recommendations(
    campaignArn = campaign_arn,
    userId = '35739' )

# Also try a few other customer IDs 
# 41250
# 35071
# 37604
# 36720
# 37003

print("Recommended items - score ")
for item in response['itemList']:
     print (item['itemId'], item['score'])

Recommended items - score 
AC_CS 0.7790073
LI_NE 0.0539453
HL_NE 0.035067
AC_NE 0.0347238
TA_NE 0.0347189
FD_NE 0.0238516
CC_NE 0.0210973
AC_AA 0.0032689
AC_AS 0.0024376
LI_CS 0.0017669
FD_CS 0.0017241
LI_PA 0.0010326
LI_SW 0.0009471
TA_CS 0.0007477
AC_PA 0.0007411
CC_CS 0.000591
FD_AD 0.0005001
CC_LC 0.000437
FD_PW 0.0004024
CC_AA 0.0003992
HL_DS 0.0003804
HL_CS 0.0002834
LI_AS 0.00026
CC_CA 0.0002511


### B. Low confidence score example

In [37]:
response = personalize_runtime.get_recommendations(
    campaignArn = campaign_arn,
    userId = '89789' )

print("Recommended items - score ")
for item in response['itemList']:
     print (item['itemId'], item['score'])

Recommended items - score 
FD_NE 0.179914
CC_NE 0.1751883
LI_NE 0.1718069
TA_NE 0.1697547
AC_NE 0.1520881
HL_NE 0.1439025
TA_CS 0.0009345
HL_RP 0.0005418
CC_LC 0.0004933
HL_DS 0.0004255
FD_NA 0.0003846
LI_CS 0.0003729
LI_AS 0.000357
CC_CS 0.0003478
LI_SW 0.000317
AC_CS 0.0003048
HL_AS 0.0002988
LI_PA 0.0002733
AC_PA 0.000255
FD_PW 0.000235
AC_AA 0.0002209
FD_AD 0.0001991
FD_CS 0.0001685
CC_AA 0.0001676


## Update new interaction data in real-time

Using personalize APIs for updating user interaction data in real-time we can get recommendations after new events has heppened. This is done using the put_events API offered by Amazon Personalize service.

In [38]:
event_tracker = 'CallInentTracker'
response = personalize.create_event_tracker(
    name=event_tracker,
    datasetGroupArn=dataset_group_arn
)
tracking_id = response['trackingId']
response

{'eventTrackerArn': 'arn:aws:personalize:us-east-1:248025046818:event-tracker/cfb0387f',
 'trackingId': '2f1a59f1-2bd3-42dc-8987-45cb83990ed2',
 'ResponseMetadata': {'RequestId': '231ff337-3d5d-4579-b020-f71bddd6645f',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/x-amz-json-1.1',
   'date': 'Mon, 07 Dec 2020 09:08:34 GMT',
   'x-amzn-requestid': '231ff337-3d5d-4579-b020-f71bddd6645f',
   'content-length': '139',
   'connection': 'keep-alive'},
  'RetryAttempts': 0}}

Create a random session ID for sending interaction events 

In [39]:
import random
import string

personalize_events = boto3.client(service_name='personalize-events')

letters = string.ascii_lowercase
session_id= ''.join(random.choice(letters) for i in range(10)) 

### Record new events for an existing customer 

In [40]:
personalize_events.put_events(
    trackingId = tracking_id,
    userId= '35739',
    sessionId = session_id,
    eventList = [{
        'sentAt': int(time.time()-604800),
        'eventType': 'call',
        'properties': "{\"itemId\": \"AC_NE\"}"
        },{
        'sentAt': int(time.time()-404800),
        'eventType': 'call',
        'properties': "{\"itemId\": \"AC_AS\"}"
        },{
        'sentAt': int(time.time()-94800),
        'eventType': 'call',
        'properties': "{\"itemId\": \"AC_PA\"}"
        }]
)

{'ResponseMetadata': {'RequestId': '2e129b93-0481-4e0d-9d20-e9ed61532e12',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/json',
   'date': 'Mon, 07 Dec 2020 09:08:34 GMT',
   'x-amzn-requestid': '2e129b93-0481-4e0d-9d20-e9ed61532e12',
   'content-length': '0',
   'connection': 'keep-alive'},
  'RetryAttempts': 0}}

### Observe new intent prediction for the same customer, generated based on the new events recorded in real time

In [41]:
response = personalize_runtime.get_recommendations(
    campaignArn = campaign_arn,
    userId = '35739' )

print("Recommended items - scores")

for item in response['itemList']:
     print (item['itemId'], item['score'])

Recommended items - scores
AC_CS 0.7790073
LI_NE 0.0539453
HL_NE 0.035067
AC_NE 0.0347238
TA_NE 0.0347189
FD_NE 0.0238516
CC_NE 0.0210973
AC_AA 0.0032689
AC_AS 0.0024376
LI_CS 0.0017669
FD_CS 0.0017241
LI_PA 0.0010326
LI_SW 0.0009471
TA_CS 0.0007477
AC_PA 0.0007411
CC_CS 0.000591
FD_AD 0.0005001
CC_LC 0.000437
FD_PW 0.0004024
CC_AA 0.0003992
HL_DS 0.0003804
HL_CS 0.0002834
LI_AS 0.00026
CC_CA 0.0002511


## Record  events for new customer

In [42]:
personalize_events.put_events(
    trackingId = tracking_id,
    userId= '88888',
    sessionId = session_id,
    eventList = [{
        'sentAt': int(time.time()-86400),
        'eventType': 'call',
        'properties': "{\"itemId\": \"HL_NE\"}"
        }]
)

{'ResponseMetadata': {'RequestId': 'e6aaac3c-726a-452a-a48f-b9e60257eba3',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/json',
   'date': 'Mon, 07 Dec 2020 09:08:35 GMT',
   'x-amzn-requestid': 'e6aaac3c-726a-452a-a48f-b9e60257eba3',
   'content-length': '0',
   'connection': 'keep-alive'},
  'RetryAttempts': 0}}

### start getting intent prediction in real-time for the new customer

In [43]:
response = personalize_runtime.get_recommendations(
    campaignArn = campaign_arn,
    userId = '88888' )

print("Recommended items - scores")

for item in response['itemList']:
     print (item['itemId'], item['score'])

Recommended items - scores
FD_NE 0.179914
CC_NE 0.1751883
LI_NE 0.1718069
TA_NE 0.1697547
AC_NE 0.1520881
HL_NE 0.1439025
TA_CS 0.0009345
HL_RP 0.0005418
CC_LC 0.0004933
HL_DS 0.0004255
FD_NA 0.0003846
LI_CS 0.0003729
LI_AS 0.000357
CC_CS 0.0003478
LI_SW 0.000317
AC_CS 0.0003048
HL_AS 0.0002988
LI_PA 0.0002733
AC_PA 0.000255
FD_PW 0.000235
AC_AA 0.0002209
FD_AD 0.0001991
FD_CS 0.0001685
CC_AA 0.0001676


In [44]:
campaign_arn

'arn:aws:personalize:us-east-1:248025046818:campaign/predict-ci-user-personalization-lr45h'

In [45]:
tracking_id

'2f1a59f1-2bd3-42dc-8987-45cb83990ed2'