### Get the Personalize boto3 Client

In [None]:
import boto3

import json
import numpy as np
import pandas as pd
import time
from datetime import datetime

personalize = boto3.client('personalize')
personalize_runtime = boto3.client('personalize-runtime')

### Specify a Bucket and Data Output Location

In [None]:
bucket = "your-s3-bucket"       # replace with the name of your S3 bucket
filename = "AmazonReviews.csv"  # replace with a name that you want to save the dataset under

### Download, Prepare, and Upload Training Data

#### Download and Explore the Dataset

Copy from Rumi's notebook folder

In [None]:
!cp ../sagemaker-rec-engine-demo/reviews.csv .
!wc -l reviews.csv
!tail -3998345 reviews.csv > headlessReviews.csv

In [None]:
data = pd.read_csv('headlessReviews.csv', sep=',', names=['INDEX','USER_ID','ITEM_ID','PRODUCT_TITLE','RATING','TIMESTAMP'])
pd.set_option('display.max_rows', 5)
data

In [None]:
data['TIMESTAMP'] = pd.to_datetime(data['TIMESTAMP'], format='%Y-%m-%d').astype(int)

#### Prepare and Upload Data

In [None]:
# data = data[data['RATING'] > 3.6]                # keep only movies rated 3.6 and above
data = data[['USER_ID', 'ITEM_ID', 'TIMESTAMP']] # select columns that match the columns in the schema below
data.to_csv(filename, index=False)

boto3.Session().resource('s3').Bucket(bucket).Object(filename).upload_file(filename)

### Create Schema

In [None]:
schema = {
    "type": "record",
    "name": "Interactions",
    "namespace": "com.amazonaws.personalize.schema",
    "fields": [
        {
            "name": "USER_ID",
            "type": "string"
        },
        {
            "name": "ITEM_ID",
            "type": "string"
        },
        {
            "name": "TIMESTAMP",
            "type": "long"
        }
    ],
    "version": "1.0"
}

create_schema_response = personalize.create_schema(
    name = datetime.now().strftime("%Y%m%d-%H%M%S-schema"),
    schema = json.dumps(schema)
)

schema_arn = create_schema_response['schemaArn']
print(json.dumps(create_schema_response, indent=2))

### Create and Wait for Dataset Group

#### Create Dataset Group

In [None]:
create_dataset_group_response = personalize.create_dataset_group(
    name = datetime.now().strftime("%Y%m%d-%H%M%S-dataset-group")
)

dataset_group_arn = create_dataset_group_response['datasetGroupArn']
print(json.dumps(create_dataset_group_response, indent=2))

#### Wait for Dataset Group to Have ACTIVE Status

In [None]:
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_dataset_group_response = personalize.describe_dataset_group(
        datasetGroupArn = dataset_group_arn
    )
    status = describe_dataset_group_response["datasetGroup"]["status"]
    print("DatasetGroup: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

### Create Dataset

In [None]:
dataset_type = "INTERACTIONS"
create_dataset_response = personalize.create_dataset(
    name = datetime.now().strftime("%Y%m%d-%H%M%S-dataset"),
    datasetType = dataset_type,
    datasetGroupArn = dataset_group_arn,
    schemaArn = schema_arn
)

dataset_arn = create_dataset_response['datasetArn']
print(json.dumps(create_dataset_response, indent=2))

### Prepare, Create, and Wait for Dataset Import Job

#### Attach Policy to S3 Bucket

In [None]:
s3 = boto3.client("s3")

policy = {
    "Version": "2012-10-17",
    "Id": "PersonalizeS3BucketAccessPolicy",
    "Statement": [
        {
            "Sid": "PersonalizeS3BucketAccessPolicy",
            "Effect": "Allow",
            "Principal": {
                "Service": "personalize.amazonaws.com"
            },
            "Action": [
                "s3:GetObject",
                "s3:ListBucket"
            ],
            "Resource": [
                "arn:aws:s3:::{}".format(bucket),
                "arn:aws:s3:::{}/*".format(bucket)
            ]
        }
    ]
}

s3.put_bucket_policy(Bucket=bucket, Policy=json.dumps(policy))

#### Create Personalize Role

In [None]:
iam = boto3.client("iam")

role_name = "PersonalizeRole" + datetime.now().strftime("%Y%m%d-%H%M%S")
assume_role_policy_document = {
    "Version": "2012-10-17",
    "Statement": [
        {
          "Effect": "Allow",
          "Principal": {
            "Service": "personalize.amazonaws.com"
          },
          "Action": "sts:AssumeRole"
        }
    ]
}

create_role_response = iam.create_role(
    RoleName = role_name,
    AssumeRolePolicyDocument = json.dumps(assume_role_policy_document)
)

# AmazonPersonalizeFullAccess provides access to any S3 bucket with a name that includes "personalize" or "Personalize" 
# if you would like to use a bucket with a different name, please consider creating and attaching a new policy
# that provides read access to your bucket or attaching the AmazonS3ReadOnlyAccess policy to the role
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonPersonalizeFullAccess"
iam.attach_role_policy(
    RoleName = role_name,
    PolicyArn = policy_arn
)

time.sleep(60) # wait for a minute to allow IAM role policy attachment to propagate

role_arn = create_role_response["Role"]["Arn"]
print(role_arn)

### Add s3 access to the newly created role

In [None]:
iam.attach_role_policy(
    PolicyArn='arn:aws:iam::aws:policy/AmazonS3FullAccess',
    RoleName=role_name
)

# It takes a few moments for these changes to take effect
time.sleep(60)

#### Create Dataset Import Job

In [None]:
create_dataset_import_job_response = personalize.create_dataset_import_job(
    jobName = datetime.now().strftime("%Y%m%d-%H%M%S-import-job"),
    datasetArn = dataset_arn,
    dataSource = {
        "dataLocation": "s3://{}/{}".format(bucket, filename)
    },
    roleArn = role_arn
)

dataset_import_job_arn = create_dataset_import_job_response['datasetImportJobArn']
print(json.dumps(create_dataset_import_job_response, indent=2))

#### Wait for Dataset Import Job to Have ACTIVE Status

In [None]:
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_dataset_import_job_response = personalize.describe_dataset_import_job(
        datasetImportJobArn = dataset_import_job_arn
    )
    status = describe_dataset_import_job_response["datasetImportJob"]['status']
    print("DatasetImportJob: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

### Select Recipe

In [None]:
list_recipes_response = personalize.list_recipes()
recipe_arn = "arn:aws:personalize:::recipe/aws-hrnn" # aws-hrnn selected for our purpose
list_recipes_response

### Create and Wait for Solution

#### Create Solution

In [None]:
create_solution_response = personalize.create_solution(
    name = datetime.now().strftime("%Y%m%d-%H%M%S-solution"),
    datasetGroupArn = dataset_group_arn,
    recipeArn = recipe_arn
)

solution_arn = create_solution_response['solutionArn']
print(json.dumps(create_solution_response, indent=2))

#### Create Solution Version

In [None]:
create_solution_version_response = personalize.create_solution_version(
    solutionArn = solution_arn
)

solution_version_arn = create_solution_version_response['solutionVersionArn']
print(json.dumps(create_solution_version_response, indent=2))

#### Wait for Solution Version to Have ACTIVE Status

In [None]:
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_solution_version_response = personalize.describe_solution_version(
        solutionVersionArn = solution_version_arn
    )
    status = describe_solution_version_response["solutionVersion"]["status"]
    print("SolutionVersion: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

#### Get Metrics of Solution

In [None]:
get_solution_metrics_response = personalize.get_solution_metrics(
    solutionVersionArn = solution_version_arn
)

print(json.dumps(get_solution_metrics_response, indent=2))

### Create and Wait for Campaign

#### Create Campaign

In [None]:
create_campaign_response = personalize.create_campaign(
    name = datetime.now().strftime("%Y%m%d-%H%M%S-campaign"),
    solutionVersionArn = solution_version_arn,
    minProvisionedTPS = 1
)

campaign_arn = create_campaign_response['campaignArn']
print(json.dumps(create_campaign_response, indent=2))

#### Wait for Campaign to Have ACTIVE Status

In [None]:
max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_campaign_response = personalize.describe_campaign(
        campaignArn = campaign_arn
    )
    status = describe_campaign_response["campaign"]["status"]
    print("Campaign: {}".format(status))
    
    if status == "ACTIVE" or status == "CREATE FAILED":
        break
        
    time.sleep(60)

### Get Recommendations

#### Select a User and an Item

In [None]:
items = pd.read_csv('reviews_raw.csv', sep=',', names=['INDEX','USER_ID','ITEM_ID','PRODUCT_TITLE','RATING','TIMESTAMP'])

In [None]:
item = items.iloc[200].to_json()
item
user_id = str(item['USER_ID'])
item_id = str(item['ITEM_ID'])
item_title = item['PRODUCT_TITLE']

print("USER: {}".format(user_id))
print("ITEM: {}".format(item_title))

In [None]:
# items = pd.read_csv('./ml-100k/u.item', sep='|', usecols=[0,1], header=None, encoding='latin1')
# items.columns = ['ITEM_ID', 'TITLE']

# user_id, item_id, _ = data.sample().values[0]
# item_title = items.loc[items['ITEM_ID'] == item_id].values[0][-1]
# print("USER: {}".format(user_id))
# print("ITEM: {}".format(item_title))

items

#### Call GetRecommendations

In [None]:
get_recommendations_response = personalize_runtime.get_recommendations(
    campaignArn = campaign_arn,
    userId = user_id,
    itemId = item_id
)

# get_recommendations_response
item_list = get_recommendations_response['itemList']
title_list = [items.loc[items['ITEM_ID'] == item_id] for item in item_list]

print("Recommendations: {}".format(json.dumps(title_list, indent=2)))

## Cleanup

In [None]:
# Delete campaign
# personalize.delete_campaign( campaignArn=campaign_arn )
    
# Delete solution
# personalize.delete_solution( solutionArn = solution_arn )

# Delete dataset-import-job - No way to do that currently 

# Delete dataset - takes a few moments
# personalize.delete_dataset( datasetArn = dataset_arn )
# time.sleep(10)

# Delete schema 
# personalize.delete_schema(schemaArn=schema_arn)

# Delete dataset group
# personalize.delete_dataset_group( datasetGroupArn = dataset_group_arn )
# time.sleep(10)


# Delete Personalize role
# attached_policies = iam.list_attached_role_policies( RoleName = role_name)
# for policy in attached_policies["AttachedPolicies"]:
#     print(policy["PolicyArn"])
#     iam.detach_role_policy( RoleName=role_name, PolicyArn=policy["PolicyArn"] )
# iam.delete_role( RoleName = role_name )

# Delete s3 bucket
# Delete newly-created SageMaker role
