### Get the Personalize boto3 Client

In [15]:
import boto3

import json
import numpy as np
import pandas as pd
import time

!wget -N https://s3-us-west-2.amazonaws.com/personalize-cli-json-models/personalize.json
!wget -N https://s3-us-west-2.amazonaws.com/personalize-cli-json-models/personalize-runtime.json
!aws configure add-model --service-model file://`pwd`/personalize.json --service-name personalize
!aws configure add-model --service-model file://`pwd`/personalize-runtime.json --service-name personalize-runtime

personalize = boto3.client(service_name='personalize', endpoint_url='https://personalize.us-west-2.amazonaws.com', region_name='us-west-2')
personalize_runtime = boto3.client(service_name='personalize-runtime', endpoint_url='https://personalize-runtime.us-west-2.amazonaws.com', region_name='us-west-2')

--2019-05-05 19:06:43--  https://s3-us-west-2.amazonaws.com/personalize-cli-json-models/personalize.json
Resolving s3-us-west-2.amazonaws.com (s3-us-west-2.amazonaws.com)... 52.218.217.152
Connecting to s3-us-west-2.amazonaws.com (s3-us-west-2.amazonaws.com)|52.218.217.152|:443... connected.
HTTP request sent, awaiting response... 304 Not Modified
File ‘personalize.json’ not modified on server. Omitting download.

--2019-05-05 19:06:43--  https://s3-us-west-2.amazonaws.com/personalize-cli-json-models/personalize-runtime.json
Resolving s3-us-west-2.amazonaws.com (s3-us-west-2.amazonaws.com)... 52.218.217.152
Connecting to s3-us-west-2.amazonaws.com (s3-us-west-2.amazonaws.com)|52.218.217.152|:443... connected.
HTTP request sent, awaiting response... 304 Not Modified
File ‘personalize-runtime.json’ not modified on server. Omitting download.



### Specify a Bucket and Data Output Location

In [19]:
bucket = "personalize-peerjako"            # replace with the name of your S3 bucket
filename = "DEMO-movie-lens-100k.csv"  # replace with a name that you want to save the dataset under

### Download, Prepare, and Upload Training Data

#### Download and Explore the Dataset

In [20]:
!wget -N http://files.grouplens.org/datasets/movielens/ml-100k.zip
!unzip -o ml-100k.zip
data = pd.read_csv('./ml-100k/u.data', sep='\t', names=['USER_ID', 'ITEM_ID', 'RATING', 'TIMESTAMP'])
pd.set_option('display.max_rows', 5)
data

--2019-05-05 19:08:16--  http://files.grouplens.org/datasets/movielens/ml-100k.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.34.235
Connecting to files.grouplens.org (files.grouplens.org)|128.101.34.235|:80... connected.
HTTP request sent, awaiting response... 304 Not Modified
File ‘ml-100k.zip’ not modified on server. Omitting download.

Archive:  ml-100k.zip
  inflating: ml-100k/allbut.pl       
  inflating: ml-100k/mku.sh          
  inflating: ml-100k/README          
  inflating: ml-100k/u.data          
  inflating: ml-100k/u.genre         
  inflating: ml-100k/u.info          
  inflating: ml-100k/u.item          
  inflating: ml-100k/u.occupation    
  inflating: ml-100k/u.user          
  inflating: ml-100k/u1.base         
  inflating: ml-100k/u1.test         
  inflating: ml-100k/u2.base         
  inflating: ml-100k/u2.test         
  inflating: ml-100k/u3.base         
  inflating: ml-100k/u3.test         
  inflating: ml-100k/u4.base         
  inflat

Unnamed: 0,USER_ID,ITEM_ID,RATING,TIMESTAMP
0,196,242,3,881250949
1,186,302,3,891717742
...,...,...,...,...
99998,13,225,2,882399156
99999,12,203,3,879959583


#### Prepare and Upload Data

In [21]:
data = data[data['RATING'] > 3.6]                # keep only movies rated 3.6 and above
data = data[['USER_ID', 'ITEM_ID', 'TIMESTAMP']] # select columns that match the columns in the schema below
data.to_csv(filename, index=False)

boto3.Session().resource('s3').Bucket(bucket).Object(filename).upload_file(filename)

### Create Schema

In [22]:
schema = {
    "type": "record",
    "name": "Interactions",
    "namespace": "com.amazonaws.personalize.schema",
    "fields": [
        {
            "name": "USER_ID",
            "type": "string"
        },
        {
            "name": "ITEM_ID",
            "type": "string"
        },
        {
            "name": "TIMESTAMP",
            "type": "long"
        }
    ],
    "version": "1.0"
}

create_schema_response = personalize.create_schema(
    name = "DEMO-schema",
    schema = json.dumps(schema)
)

schema_arn = create_schema_response['schemaArn']
print(json.dumps(create_schema_response, indent=2))

{
  "schemaArn": "arn:aws:personalize:us-west-2:586768923447:schema/DEMO-schema", 
  "ResponseMetadata": {
    "RetryAttempts": 0, 
    "HTTPStatusCode": 200, 
    "RequestId": "674d837f-4187-43f9-a0a9-5616a469d8fb", 
    "HTTPHeaders": {
      "date": "Sun, 05 May 2019 19:08:28 GMT", 
      "x-amzn-requestid": "674d837f-4187-43f9-a0a9-5616a469d8fb", 
      "content-length": "77", 
      "content-type": "application/x-amz-json-1.1", 
      "connection": "keep-alive"
    }
  }
}


### Create and Wait for Dataset Group

#### Create Dataset Group

In [23]:
create_dataset_group_response = personalize.create_dataset_group(
    name = "DEMO-dataset-group"
)

dataset_group_arn = create_dataset_group_response['datasetGroupArn']
print(json.dumps(create_dataset_group_response, indent=2))

{
  "datasetGroupArn": "arn:aws:personalize:us-west-2:586768923447:dataset-group/DEMO-dataset-group", 
  "ResponseMetadata": {
    "RetryAttempts": 0, 
    "HTTPStatusCode": 200, 
    "RequestId": "05860312-92e9-4a72-bde2-23c800c0a323", 
    "HTTPHeaders": {
      "date": "Sun, 05 May 2019 19:08:38 GMT", 
      "x-amzn-requestid": "05860312-92e9-4a72-bde2-23c800c0a323", 
      "content-length": "97", 
      "content-type": "application/x-amz-json-1.1", 
      "connection": "keep-alive"
    }
  }
}


#### Wait for Dataset Group to Have ACTIVE Status

In [24]:
status = None
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_dataset_group_response = personalize.describe_dataset_group(
        datasetGroupArn = dataset_group_arn
    )
    status = describe_dataset_group_response["datasetGroup"]["status"]
    print("DatasetGroup: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

DatasetGroup: ACTIVE


### Create Dataset

In [25]:
dataset_type = "INTERACTIONS"
create_dataset_response = personalize.create_dataset(
    name = "DEMO-dataset",
    datasetType = dataset_type,
    datasetGroupArn = dataset_group_arn,
    schemaArn = schema_arn
)

dataset_arn = create_dataset_response['datasetArn']
print(json.dumps(create_dataset_response, indent=2))

{
  "ResponseMetadata": {
    "RetryAttempts": 0, 
    "HTTPStatusCode": 200, 
    "RequestId": "97c40129-e404-45d5-9d10-4606d698a501", 
    "HTTPHeaders": {
      "date": "Sun, 05 May 2019 19:09:51 GMT", 
      "x-amzn-requestid": "97c40129-e404-45d5-9d10-4606d698a501", 
      "content-length": "99", 
      "content-type": "application/x-amz-json-1.1", 
      "connection": "keep-alive"
    }
  }, 
  "datasetArn": "arn:aws:personalize:us-west-2:586768923447:dataset/DEMO-dataset-group/INTERACTIONS"
}


### Prepare, Create, and Wait for Dataset Import Job

#### Attach policy to S3 bucket

In [26]:
s3 = boto3.client("s3")

policy = {
    "Version": "2012-10-17",
    "Id": "PersonalizeS3BucketAccessPolicy",
    "Statement": [
        {
            "Sid": "PersonalizeS3BucketAccessPolicy",
            "Effect": "Allow",
            "Principal": {
                "Service": "personalize.amazonaws.com"
            },
            "Action": [
                "s3:GetObject",
                "s3:ListBucket"
            ],
            "Resource": [
                "arn:aws:s3:::{}".format(bucket),
                "arn:aws:s3:::{}/*".format(bucket)
            ]
        }
    ]
}

s3.put_bucket_policy(Bucket=bucket, Policy=json.dumps(policy));

#### Create S3 Read Only Access Role

In [30]:
iam = boto3.client("iam")

role_name = "PersonalizeS3Role"
assume_role_policy_document = {
    "Version": "2012-10-17",
    "Statement": [
        {
          "Effect": "Allow",
          "Principal": {
            "Service": "personalize.amazonaws.com"
          },
          "Action": "sts:AssumeRole"
        }
    ]
}

create_role_response = iam.create_role(
    RoleName = role_name,
    AssumeRolePolicyDocument = json.dumps(assume_role_policy_document)
);

iam.attach_role_policy(
    RoleName = role_name,
    PolicyArn = "arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess"
);

iam.attach_role_policy(
    RoleName = role_name,
    PolicyArn = "arn:aws:iam::aws:policy/CloudWatchFullAccess"
);


role_arn = create_role_response["Role"]["Arn"]
print(role_arn)

arn:aws:iam::586768923447:role/PersonalizeS3Role


#### Create Dataset Import Job

In [31]:
create_dataset_import_job_response = personalize.create_dataset_import_job(
    jobName = "DEMO-dataset-import-job",
    datasetArn = dataset_arn,
    dataSource = {
        "dataLocation": "s3://{}/{}".format(bucket, filename)
    },
    roleArn = role_arn
)

dataset_import_job_arn = create_dataset_import_job_response['datasetImportJobArn']
print(json.dumps(create_dataset_import_job_response, indent=2))

{
  "datasetImportJobArn": "arn:aws:personalize:us-west-2:586768923447:dataset-import-job/DEMO-dataset-import-job", 
  "ResponseMetadata": {
    "RetryAttempts": 0, 
    "HTTPStatusCode": 200, 
    "RequestId": "d7096254-63ec-4789-9807-378f86fe22ac", 
    "HTTPHeaders": {
      "date": "Sun, 05 May 2019 19:17:11 GMT", 
      "x-amzn-requestid": "d7096254-63ec-4789-9807-378f86fe22ac", 
      "content-length": "111", 
      "content-type": "application/x-amz-json-1.1", 
      "connection": "keep-alive"
    }
  }
}


#### Wait for Dataset Import Job to Have ACTIVE Status

In [32]:
status = None
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_dataset_import_job_response = personalize.describe_dataset_import_job(
        datasetImportJobArn = dataset_import_job_arn
    )
    status = describe_dataset_import_job_response["datasetImportJob"]['status']
    print("DatasetImportJob: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

DatasetImportJob: CREATE PENDING
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: CREATE IN_PROGRESS
DatasetImportJob: ACTIVE


### Select Recipe

In [33]:
list_recipes_response = personalize.list_recipes()
recipe_arn = "arn:aws:personalize:::recipe/aws-hrnn" # aws-hrnn selected for demo purposes
list_recipes_response

{'ResponseMetadata': {'HTTPHeaders': {'connection': 'keep-alive',
   'content-length': '989',
   'content-type': 'application/x-amz-json-1.1',
   'date': 'Sun, 05 May 2019 19:43:22 GMT',
   'x-amzn-requestid': '6ec6af07-78cb-47c3-8c05-71d2d8eff852'},
  'HTTPStatusCode': 200,
  'RequestId': '6ec6af07-78cb-47c3-8c05-71d2d8eff852',
  'RetryAttempts': 0},
 u'recipes': [{u'creationDateTime': datetime.datetime(2018, 11, 26, 0, 0, tzinfo=tzlocal()),
   u'lastUpdatedDateTime': datetime.datetime(1970, 1, 1, 0, 0, tzinfo=tzlocal()),
   u'name': u'aws-hrnn',
   u'recipeArn': u'arn:aws:personalize:::recipe/aws-hrnn',
   u'status': u'ACTIVE'},
  {u'creationDateTime': datetime.datetime(2018, 11, 26, 0, 0, tzinfo=tzlocal()),
   u'lastUpdatedDateTime': datetime.datetime(1970, 1, 1, 0, 0, tzinfo=tzlocal()),
   u'name': u'aws-hrnn-coldstart',
   u'recipeArn': u'arn:aws:personalize:::recipe/aws-hrnn-coldstart',
   u'status': u'ACTIVE'},
  {u'creationDateTime': datetime.datetime(2018, 11, 26, 0, 0, tzinfo

### Create and Wait for Solution

#### Create Solution

In [34]:
create_solution_response = personalize.create_solution(
    name = "DEMO-solution",
    datasetGroupArn = dataset_group_arn,
    recipeArn = recipe_arn
)

solution_arn = create_solution_response['solutionArn']
print(json.dumps(create_solution_response, indent=2))

{
  "solutionArn": "arn:aws:personalize:us-west-2:586768923447:solution/DEMO-solution", 
  "ResponseMetadata": {
    "RetryAttempts": 0, 
    "HTTPStatusCode": 200, 
    "RequestId": "086c9b31-e4da-43a5-baa5-09e8338784ca", 
    "HTTPHeaders": {
      "date": "Sun, 05 May 2019 19:43:27 GMT", 
      "x-amzn-requestid": "086c9b31-e4da-43a5-baa5-09e8338784ca", 
      "content-length": "83", 
      "content-type": "application/x-amz-json-1.1", 
      "connection": "keep-alive"
    }
  }
}


#### Create Solution Version

In [35]:
create_solution_version_response = personalize.create_solution_version(
    solutionArn = solution_arn
)

solution_version_arn = create_solution_version_response['solutionVersionArn']
print(json.dumps(create_solution_version_response, indent=2))

{
  "solutionVersionArn": "arn:aws:personalize:us-west-2:586768923447:solution/DEMO-solution/e5987ace", 
  "ResponseMetadata": {
    "RetryAttempts": 0, 
    "HTTPStatusCode": 200, 
    "RequestId": "053abba6-858f-436d-bb44-c7ec649c9ad6", 
    "HTTPHeaders": {
      "date": "Sun, 05 May 2019 19:43:31 GMT", 
      "x-amzn-requestid": "053abba6-858f-436d-bb44-c7ec649c9ad6", 
      "content-length": "99", 
      "content-type": "application/x-amz-json-1.1", 
      "connection": "keep-alive"
    }
  }
}


#### Wait for Solution Version to Have ACTIVE Status

In [None]:
status = None
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_solution_version_response = personalize.describe_solution_version(
        solutionVersionArn = solution_version_arn
    )
    status = describe_solution_version_response["solutionVersion"]["status"]
    print("SolutionVersion: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

SolutionVersion: CREATE PENDING
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS
SolutionVersion: CREATE IN_PROGRESS


#### Get Metrics of Solution

In [None]:
get_solution_metrics_response = personalize.get_solution_metrics(
    solutionVersionArn = solution_version_arn
)

print(json.dumps(get_solution_metrics_response, indent=2))

{
    "_user_history_length_10_pct_quantile": 12.0, 
    "_user_history_length_mean": 53.18681318681319, 
    "normalized_discounted_cumulative_gain_at_10": 0.06490878963255765, 
    "_num_unique_items": 1448.0, 
    "normalized_discounted_cumulative_gain_at_5": 0.0486887533854219, 
    "_user_history_length_50_pct_quantile": 34.0, 
    "precision_at_10": 0.009890109890109891, 
    "mean_reciprocal_rank": 0.05396875069177629, 
    "coverage": 0.26035911602209943, 
    "precision_at_25": 0.008351648351648353, 
    "precision_at_5": 0.01098901098901099, 
    "normalized_discounted_cumulative_gain_at_25": 0.09189543814574382, 
    "_user_history_length_90_pct_quantile": 140.0, 
    "_num_evaluation_users": 91.0
  }, 
  "solutionVersionArn": "arn:aws:personalize:us-west-2:237539672711:solution/DEMO-solution/702e0792", 
  "ResponseMetadata": {
    "RetryAttempts": 0, 
    "HTTPStatusCode": 200, 
    "RequestId": "5b5f4f4f-5249-4c0e-9f83-45e3fe22f09f", 
    "HTTPHeaders": {
      "date": "Tu

### Create and Wait for Campaign

#### Create Campaign

In [None]:
create_campaign_response = personalize.create_campaign(
    name = "DEMO-campaign",
    solutionVersionArn = solution_version_arn,
    minProvisionedTPS = 1
)

campaign_arn = create_campaign_response['campaignArn']
print(json.dumps(create_campaign_response, indent=2))

{
  "campaignArn": "arn:aws:personalize:us-west-2:237539672711:campaign/DEMO-campaign", 
  "ResponseMetadata": {
    "RetryAttempts": 0, 
    "HTTPStatusCode": 200, 
    "RequestId": "527e97ba-683c-4dc7-8218-00716f22c904", 
    "HTTPHeaders": {
      "date": "Tue, 04 Dec 2018 00:54:17 GMT", 
      "x-amzn-requestid": "527e97ba-683c-4dc7-8218-00716f22c904", 
      "content-length": "83", 
      "content-type": "application/x-amz-json-1.1", 
      "connection": "keep-alive"
    }
  }
}


#### Wait for Campaign to Have ACTIVE Status

In [None]:
status = None
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_campaign_response = personalize.describe_campaign(
        campaignArn = campaign_arn
    )
    status = describe_campaign_response["campaign"]["status"]
    print("Campaign: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

Campaign: CREATE PENDING
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: CREATE IN_PROGRESS
Campaign: ACTIVE


### Get Recommendations

#### Select a User and an Item

In [None]:
items = pd.read_csv('./ml-100k/u.item', sep='|', usecols=[0,1], header=None)
items.columns = ['ITEM_ID', 'TITLE']

user_id, item_id, _ = data.sample().values[0]
item_title = items.loc[items['ITEM_ID'] == item_id].values[0][-1]
print("USER: {}".format(user_id))
print("ITEM: {}".format(item_title))

items

USER: 711
ITEM: Silence of the Lambs, The (1991)


Unnamed: 0,ITEM_ID,TITLE
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
...,...,...
1680,1681,You So Crazy (1994)
1681,1682,Scream of Stone (Schrei aus Stein) (1991)


#### Call GetRecommendations

In [None]:
get_recommendations_response = personalize_runtime.get_recommendations(
    campaignArn = campaign_arn,
    userId = str(user_id),
    itemId = str(item_id)
)

item_list = get_recommendations_response['itemList']
title_list = [items.loc[items['ITEM_ID'] == np.int(item['itemId'])].values[0][-1] for item in item_list]

print("Recommendations: {}".format(json.dumps(title_list, indent=2)))

Recommendations: [
  "Godfather, The (1972)", 
  "Contact (1997)", 
  "Titanic (1997)", 
  "Star Wars (1977)", 
  "Fargo (1996)", 
  "Liar Liar (1997)", 
  "Evita (1996)", 
  "Jerry Maguire (1996)", 
  "Scream (1996)", 
  "Devil's Advocate, The (1997)", 
  "Full Monty, The (1997)", 
  "Conspiracy Theory (1997)", 
  "Edge, The (1997)", 
  "Sense and Sensibility (1995)", 
  "English Patient, The (1996)", 
  "Twelve Monkeys (1995)", 
  "L.A. Confidential (1997)", 
  "As Good As It Gets (1997)", 
  "In & Out (1997)", 
  "Rock, The (1996)", 
  "Return of the Jedi (1983)", 
  "Amistad (1997)", 
  "Men in Black (1997)", 
  "Truth About Cats & Dogs, The (1996)", 
  "Alien: Resurrection (1997)"
]
