# AMAZON PERSONALIZE DEMO - USER RECOMMENDATIONS

In [1]:
# INSTALL BOTO

!pip install --upgrade boto3

Collecting boto3
[?25l  Downloading https://files.pythonhosted.org/packages/8a/b7/9d7dfe81716157bb1a9887ddd66780a228fc5c3ea66917aebfca93210f71/boto3-1.9.190-py2.py3-none-any.whl (128kB)
[K    100% |████████████████████████████████| 133kB 2.1MB/s ta 0:00:01
Collecting botocore<1.13.0,>=1.12.190 (from boto3)
[?25l  Downloading https://files.pythonhosted.org/packages/19/37/251f631b01a62e55e8e290b1a445ce3c8febfbdee43921fe67da457500a2/botocore-1.12.190-py2.py3-none-any.whl (5.6MB)
[K    100% |████████████████████████████████| 5.6MB 4.5MB/s ta 0:00:011
Installing collected packages: botocore, boto3
  Found existing installation: botocore 1.12.172
    Uninstalling botocore-1.12.172:
      Successfully uninstalled botocore-1.12.172
  Found existing installation: boto3 1.9.172
    Uninstalling boto3-1.9.172:
      Successfully uninstalled boto3-1.9.172
Successfully installed boto3-1.9.190 botocore-1.12.190


In [2]:
#IMPORT LIBRARIES
import boto3
import json
import numpy as np
import pandas as pd
import time


In [3]:
# GET BOTO CLIENT FOR PERSONALIZATION
personalize = boto3.client('personalize')
personalize_runtime = boto3.client('personalize-runtime')

In [5]:
# GET THE MOVIELENS DATATSET (use wget instead of curl for linux machine)
!curl -O -N http://files.grouplens.org/datasets/movielens/ml-100k.zip
!unzip -o ml-100k.zip


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 4808k  100 4808k    0     0  4141k      0  0:00:01  0:00:01 --:--:-- 4145k
Archive:  ml-100k.zip
   creating: ml-100k/
  inflating: ml-100k/allbut.pl       
  inflating: ml-100k/mku.sh          
  inflating: ml-100k/README          
  inflating: ml-100k/u.data          
  inflating: ml-100k/u.genre         
  inflating: ml-100k/u.info          
  inflating: ml-100k/u.item          
  inflating: ml-100k/u.occupation    
  inflating: ml-100k/u.user          
  inflating: ml-100k/u1.base         
  inflating: ml-100k/u1.test         
  inflating: ml-100k/u2.base         
  inflating: ml-100k/u2.test         
  inflating: ml-100k/u3.base         
  inflating: ml-100k/u3.test         
  inflating: ml-100k/u4.base         
  inflating: ml-100k/u4.test         
  inflating: ml-100k/u5.base         
  inflating: ml-100k/u5.test       

In [16]:
# READ THE DATA INTO PANDAS
df_data = pd.read_csv('./ml-100k/u.data', sep='\t', names=['USER_ID', 'ITEM_ID', 'RATING', 'TIMESTAMP'])

df_data.head(5)

Unnamed: 0,USER_ID,ITEM_ID,RATING,TIMESTAMP
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [17]:
# UPLOAD DATA TO S3 bucket
df_data = df_data[df_data['RATING'] > 4]                # keep only movies rated 4 and above


df_data = df_data[['USER_ID', 'ITEM_ID', 'TIMESTAMP']] # select columns that match the columns in the schema below




df_data.to_csv('movie_lens_data.csv', index=False)

boto3.Session().resource('s3').Bucket('bucket-amazon-personalize').Object('movie_lens_data.csv').upload_file('movie_lens_data.csv')


In [19]:
df_data.head(5)

Unnamed: 0,USER_ID,ITEM_ID,TIMESTAMP
7,253,465,891628467
11,286,1014,879781125
12,200,222,876042340
16,122,387,879270459
26,38,95,892430094


In [22]:
# Create Schema
data_schema = {
    "type": "record",
    "name": "Interactions",
    "namespace": "com.amazonaws.personalize.schema",
    "fields": [
        {
            "name": "USER_ID",
            "type": "string"
        },
        {
            "name": "ITEM_ID",
            "type": "string"
        },
        {
            "name": "TIMESTAMP",
            "type": "long"
        }
    ],
    "version": "1.0"
}

schema_response = personalize.create_schema(
    name = "personalize-demo1",
    schema = json.dumps(data_schema)
)

schema_arn = schema_response['schemaArn']
print(json.dumps(schema_response, indent=2))

{
  "schemaArn": "arn:aws:personalize:us-east-1:966730287427:schema/personalize-demo1",
  "ResponseMetadata": {
    "RequestId": "b5a06164-ec52-4022-ae07-d02393f59da8",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Fri, 19 Jul 2019 18:02:21 GMT",
      "x-amzn-requestid": "b5a06164-ec52-4022-ae07-d02393f59da8",
      "content-length": "83",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [23]:
#CREATE DATASET GROUPS
dataset_group_response = personalize.create_dataset_group(
    name = "personalize-demo-dataset"
)

dataset_group_arn = dataset_group_response['datasetGroupArn']
print(json.dumps(dataset_group_response, indent=2))

{
  "datasetGroupArn": "arn:aws:personalize:us-east-1:966730287427:dataset-group/personalize-demo-dataset",
  "ResponseMetadata": {
    "RequestId": "a5d07192-1494-4bf4-a9f4-4e463dafb6ad",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Fri, 19 Jul 2019 18:03:40 GMT",
      "x-amzn-requestid": "a5d07192-1494-4bf4-a9f4-4e463dafb6ad",
      "content-length": "103",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [24]:
# Wait for Dataset Group to Have ACTIVE Status¶
max_time = time.time() + 1*60*60 # 1 hours
while time.time() < max_time:
    describe_dataset_group_response = personalize.describe_dataset_group(
        datasetGroupArn = dataset_group_arn
    )
    status = describe_dataset_group_response["datasetGroup"]["status"]
    print("DatasetGroup: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(40)

DatasetGroup: CREATE PENDING
DatasetGroup: ACTIVE


### Create Dataset

In [25]:
dataset_type = "INTERACTIONS"
create_dataset_response = personalize.create_dataset(
    name = "DEMO-dataset",
    datasetType = dataset_type,
    datasetGroupArn = dataset_group_arn,
    schemaArn = schema_arn
)

dataset_arn = create_dataset_response['datasetArn']
print(json.dumps(create_dataset_response, indent=2))

{
  "datasetArn": "arn:aws:personalize:us-east-1:966730287427:dataset/personalize-demo-dataset/INTERACTIONS",
  "ResponseMetadata": {
    "RequestId": "19a3d38b-3abb-4bc5-82c6-0204891bfe9a",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Fri, 19 Jul 2019 18:05:31 GMT",
      "x-amzn-requestid": "19a3d38b-3abb-4bc5-82c6-0204891bfe9a",
      "content-length": "105",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [None]:
#Attach Policy to S3 Bucket

s3 = boto3.client("s3")

policy = {
    "Version": "2012-10-17",
    "Id": "PersonalizeS3BucketAccessPolicy",
    "Statement": [
        {
            "Sid": "PersonalizeS3BucketAccessPolicy",
            "Effect": "Allow",
            "Principal": {
                "Service": "personalize.amazonaws.com"
            },
            "Action": [
                "s3:GetObject",
                "s3:ListBucket"
            ],
            "Resource": [
                "arn:aws:s3:::{}".format('bucket-amazon-personalize'),
                "arn:aws:s3:::{}/*".format('bucket-amazon-personalize')
            ]
        }
    ]
}

s3.put_bucket_policy(Bucket=bucket, Policy=json.dumps(policy))

In [31]:
#Create Personalize Role incase you do not have created already
# I have created a role already so I am going to skip below step and use role directly


role_arn ='arn:aws:iam::966730287427:role/service-role/AmazonPersonalize-ExecutionRole-1561001485447'


'''
iam = boto3.client("iam")

role_name = "PersonalizeRole"
assume_role_policy_document = {
    "Version": "2012-10-17",
    "Statement": [
        {
          "Effect": "Allow",
          "Principal": {
            "Service": "personalize.amazonaws.com"
          },
          "Action": "sts:AssumeRole"
        }
    ]
}

create_role_response = iam.create_role(
    RoleName = role_name,
    AssumeRolePolicyDocument = json.dumps(assume_role_policy_document)
)

# AmazonPersonalizeFullAccess provides access to any S3 bucket with a name that includes "personalize" or "Personalize" 
# if you would like to use a bucket with a different name, please consider creating and attaching a new policy
# that provides read access to your bucket or attaching the AmazonS3ReadOnlyAccess policy to the role
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonPersonalizeFullAccess"
iam.attach_role_policy(
    RoleName = role_name,
    PolicyArn = policy_arn
)

time.sleep(60) # wait for a minute to allow IAM role policy attachment to propagate

role_arn = create_role_response["Role"]["Arn"]
print(role_arn)
'''



In [32]:
#Create Dataset Import Job



create_dataset_import_job_response = personalize.create_dataset_import_job(
    jobName = "dataset-import-job",
    datasetArn = dataset_arn,
    dataSource = {
        "dataLocation": "s3://{}/{}".format('bucket-amazon-personalize', 'movie_lens_data.csv')
    },
    roleArn = role_arn
)

dataset_import_job_arn = create_dataset_import_job_response['datasetImportJobArn']
print(json.dumps(create_dataset_import_job_response, indent=2))

{
  "datasetImportJobArn": "arn:aws:personalize:us-east-1:966730287427:dataset-import-job/dataset-import-job",
  "ResponseMetadata": {
    "RequestId": "1b17172f-ecc3-4f31-881d-8ab541c163b5",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Fri, 19 Jul 2019 18:16:43 GMT",
      "x-amzn-requestid": "1b17172f-ecc3-4f31-881d-8ab541c163b5",
      "content-length": "106",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [33]:
#Wait for Dataset Import Job to Have ACTIVE Status

max_time = time.time() + 2*60*60 # 2 hours
while time.time() < max_time:
    describe_dataset_import_job_response = personalize.describe_dataset_import_job(
        datasetImportJobArn = dataset_import_job_arn
    )
    status = describe_dataset_import_job_response["datasetImportJob"]['status']
    print("DatasetImportJob: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(40)

DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: ACTIVE


### Select Recipe

In [34]:
# Select Recipe - A machine learning model that best fits your requirement
list_recipes_response = personalize.list_recipes()
recipe_arn = "arn:aws:personalize:::recipe/aws-hrnn" 
list_recipes_response

{'recipes': [{'name': 'aws-hrnn',
   'recipeArn': 'arn:aws:personalize:::recipe/aws-hrnn',
   'status': 'ACTIVE',
   'creationDateTime': datetime.datetime(2019, 6, 9, 20, 0, tzinfo=tzlocal()),
   'lastUpdatedDateTime': datetime.datetime(2019, 6, 19, 20, 39, 17, 65000, tzinfo=tzlocal())},
  {'name': 'aws-hrnn-coldstart',
   'recipeArn': 'arn:aws:personalize:::recipe/aws-hrnn-coldstart',
   'status': 'ACTIVE',
   'creationDateTime': datetime.datetime(2019, 6, 9, 20, 0, tzinfo=tzlocal()),
   'lastUpdatedDateTime': datetime.datetime(2019, 6, 19, 20, 39, 17, 64000, tzinfo=tzlocal())},
  {'name': 'aws-hrnn-metadata',
   'recipeArn': 'arn:aws:personalize:::recipe/aws-hrnn-metadata',
   'status': 'ACTIVE',
   'creationDateTime': datetime.datetime(2019, 6, 9, 20, 0, tzinfo=tzlocal()),
   'lastUpdatedDateTime': datetime.datetime(2019, 6, 19, 20, 39, 17, 64000, tzinfo=tzlocal())},
  {'name': 'aws-personalized-ranking',
   'recipeArn': 'arn:aws:personalize:::recipe/aws-personalized-ranking',
   's

In [35]:
# Create a solution

create_solution_response = personalize.create_solution(
    name = "personalized-solution",
    datasetGroupArn = dataset_group_arn,
    recipeArn = recipe_arn
)

solution_arn = create_solution_response['solutionArn']
print(json.dumps(create_solution_response, indent=2))

{
  "solutionArn": "arn:aws:personalize:us-east-1:966730287427:solution/personalized-solution",
  "ResponseMetadata": {
    "RequestId": "ebb6fead-7ede-4974-9203-f0e514d29962",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Fri, 19 Jul 2019 18:29:51 GMT",
      "x-amzn-requestid": "ebb6fead-7ede-4974-9203-f0e514d29962",
      "content-length": "91",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [38]:
#Wait for Dataset Import Job to Have ACTIVE Status
create_solution_version_response = personalize.create_solution_version(solutionArn = solution_arn)

solution_version_arn = create_solution_version_response['solutionVersionArn']

print(json.dumps(create_solution_version_response, indent=2))

{
  "solutionVersionArn": "arn:aws:personalize:us-east-1:966730287427:solution/personalized-solution/aec1e753",
  "ResponseMetadata": {
    "RequestId": "7251b627-c8c0-43ec-88bd-6aeb2afc7aa6",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Fri, 19 Jul 2019 18:36:30 GMT",
      "x-amzn-requestid": "7251b627-c8c0-43ec-88bd-6aeb2afc7aa6",
      "content-length": "107",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [39]:
#Wait for Solution Version to Have ACTIVE Status
max_time = time.time() + 2*60*60
while time.time() < max_time:
    describe_solution_version_response = personalize.describe_solution_version(
        solutionVersionArn = solution_version_arn
    )
    status = describe_solution_version_response["solutionVersion"]["status"]
    print("SolutionVersion: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(40)

SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_P

In [40]:
# Get Solution Metrics
get_solution_metrics_response = personalize.get_solution_metrics(
    solutionVersionArn = solution_version_arn
)

print(json.dumps(get_solution_metrics_response, indent=2))

{
  "solutionVersionArn": "arn:aws:personalize:us-east-1:966730287427:solution/personalized-solution/aec1e753",
  "metrics": {
    "coverage": 0.1142,
    "mean_reciprocal_rank_at_25": 0.0678,
    "normalized_discounted_cumulative_gain_at_10": 0.0986,
    "normalized_discounted_cumulative_gain_at_25": 0.1159,
    "normalized_discounted_cumulative_gain_at_5": 0.0662,
    "precision_at_10": 0.0174,
    "precision_at_25": 0.0098,
    "precision_at_5": 0.0163
  },
  "ResponseMetadata": {
    "RequestId": "6a9c79df-626e-44b7-8d6c-24ceb93613ad",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Fri, 19 Jul 2019 19:15:39 GMT",
      "x-amzn-requestid": "6a9c79df-626e-44b7-8d6c-24ceb93613ad",
      "content-length": "405",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [41]:
# Create Campaign
create_campaign_response = personalize.create_campaign(
    name = "personalized-campaign",
    solutionVersionArn = solution_version_arn,
    minProvisionedTPS = 1
)

campaign_arn = create_campaign_response['campaignArn']
print(json.dumps(create_campaign_response, indent=2))

{
  "campaignArn": "arn:aws:personalize:us-east-1:966730287427:campaign/personalized-campaign",
  "ResponseMetadata": {
    "RequestId": "7ee5409a-5c83-409a-9def-3f8d9f865176",
    "HTTPStatusCode": 200,
    "HTTPHeaders": {
      "content-type": "application/x-amz-json-1.1",
      "date": "Fri, 19 Jul 2019 19:16:44 GMT",
      "x-amzn-requestid": "7ee5409a-5c83-409a-9def-3f8d9f865176",
      "content-length": "91",
      "connection": "keep-alive"
    },
    "RetryAttempts": 0
  }
}


In [42]:
#Wait for Campaign to Have ACTIVE Status
max_time = time.time() + 1*60*60 
while time.time() < max_time:
    describe_campaign_response = personalize.describe_campaign(
        campaignArn = campaign_arn
    )
    status = describe_campaign_response["campaign"]["status"]
    print("Campaign: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(40)

Campaign: CREATE PENDING
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: ACTIVE


### Get Recommendations

In [57]:
response = personalize_runtime.get_recommendations(
    campaignArn = campaign_arn,
    userId = '710',
    numResults= 50
)

print("Recommendationed Items: ")
for item in response['itemList']:
    print (item['itemId'])

Recommendationed Items: 
191
318
135
200
132
12
143
56
197
64
173
211
527
427
144
82
423
204
180
238
97
185
168
195
205
265
50
187
651
182
603
208
69
528
215
183
23
216
194
660
193
496
170
179
153
89
432
568
210
228
