# Amazon Personalize Generic Module - Inference Layer

## Environment Setup

In [None]:
!pip install -U -q dvc dvc[gdrive]
!dvc pull

In [None]:
import sys
sys.path.insert(0,'./code')

In [None]:
!pip install -q boto3

In [None]:
!mkdir -p ~/.aws && cp /content/drive/MyDrive/AWS/d01_admin/* ~/.aws

In [None]:
%reload_ext autoreload
%autoreload 2

---

In [None]:
import time
from time import sleep
import json
from datetime import datetime
import uuid
import random

import boto3
import pandas as pd

In [None]:
personalize = boto3.client('personalize')
personalize_runtime = boto3.client('personalize-runtime')
personalize_events = boto3.client(service_name='personalize-events')

In [None]:
import pickle
from generic_modules.import_model import personalize_model

In [None]:
with open('./artifacts/etc/personalize_model_sims.pkl', 'rb') as outp:
    sims_model = pickle.load(outp)

sims_model.__dict__

{'campaign_arn': 'arn:aws:personalize:us-east-1:746888961694:campaign/personalize-poc-sims-campaign',
 'dataset_group_arn': 'arn:aws:personalize:us-east-1:746888961694:dataset-group/immersion-day-dataset-group-movielens-latest',
 'filter_arns': ['arn:aws:personalize:us-east-1:746888961694:filter/watched',
  'arn:aws:personalize:us-east-1:746888961694:filter/unwatched'],
 'solution_arn': 'arn:aws:personalize:us-east-1:746888961694:solution/personalize-poc-sims',
 'solution_version_arn': 'arn:aws:personalize:us-east-1:746888961694:solution/personalize-poc-sims/edb3d46e'}

In [None]:
with open('./artifacts/etc/personalize_model_pers.pkl', 'rb') as outp:
    pers_model = pickle.load(outp)

pers_model.__dict__

{'campaign_arn': 'arn:aws:personalize:us-east-1:746888961694:campaign/personalize-poc-userpersonalization',
 'dataset_group_arn': 'arn:aws:personalize:us-east-1:746888961694:dataset-group/immersion-day-dataset-group-movielens-latest',
 'filter_arns': ['arn:aws:personalize:us-east-1:746888961694:filter/watched',
  'arn:aws:personalize:us-east-1:746888961694:filter/unwatched'],
 'solution_arn': 'arn:aws:personalize:us-east-1:746888961694:solution/personalize-poc-sims',
 'solution_version_arn': 'arn:aws:personalize:us-east-1:746888961694:solution/personalize-poc-sims/edb3d46e'}

## Interact with Campaigns

First, let's create a supporting function to help make sense of the results returned by a Personalize campaign. Personalize returns only an item_id. This is great for keeping data compact, but it means you need to query a database or lookup table to get a human-readable result for the notebooks. We will create a helper function to return a human-readable result from the Movielens dataset.

Start by loading in the dataset which we can use for our lookup table.

In [None]:
# Create a dataframe for the items by reading in the correct source CSV
items_df = pd.read_csv('./data/bronze/ml-latest-small/movies.csv',
                       sep=',', usecols=[0,1], encoding='latin-1',
                       dtype={'movieId': "object", 'title': "str"},
                       index_col=0)

# Render some sample data
items_df.head(5)

Unnamed: 0_level_0,title
movieId,Unnamed: 1_level_1
1,Toy Story (1995)
2,Jumanji (1995)
3,Grumpier Old Men (1995)
4,Waiting to Exhale (1995)
5,Father of the Bride Part II (1995)


In [None]:
movie_id_example = 589
title = items_df.loc[movie_id_example]['title']
print(title)

Terminator 2: Judgment Day (1991)


In [None]:
def get_movie_by_id(movie_id, movie_df=items_df):
    try:
        return movie_df.loc[int(movie_id)]['title']
    except:
        return "Error obtaining title"

In [None]:
# A known good id (The Princess Bride)
print(get_movie_by_id(movie_id="1197"))
# A bad type of value
print(get_movie_by_id(movie_id="987.9393939"))
# Really bad values
print(get_movie_by_id(movie_id="Steve"))

Princess Bride, The (1987)
Error obtaining title
Error obtaining title


### SIMS
SIMS requires just an item as input, and it will return items which users interact with in similar ways to their interaction with the input item. In this particular case the item is a movie.

In [None]:
get_recommendations_response = personalize_runtime.get_recommendations(
    campaignArn = sims_model.campaign_arn,
    itemId = str(589),
)

item_list = get_recommendations_response['itemList']
for item in item_list:
    print(get_movie_by_id(movie_id=item['itemId']))

Jurassic Park (1993)
Braveheart (1995)
Terminator, The (1984)
Fugitive, The (1993)
Speed (1994)
Crimson Tide (1995)
GoldenEye (1995)
Batman (1989)
Clear and Present Danger (1994)
True Lies (1994)
Mask, The (1994)
Die Hard: With a Vengeance (1995)
In the Line of Fire (1993)
Lion King, The (1994)
Forrest Gump (1994)
Ghost (1990)
Apollo 13 (1995)
Star Trek: Generations (1994)
Cliffhanger (1993)
Firm, The (1993)
Die Hard (1988)
Mission: Impossible (1996)
Seven (a.k.a. Se7en) (1995)
Indiana Jones and the Last Crusade (1989)
Mrs. Doubtfire (1993)


In [None]:
# Update DF rendering
pd.set_option('display.max_rows', 30)

def get_new_recommendations_df(recommendations_df, movie_ID):
    # Get the movie name
    movie_name = get_movie_by_id(movie_ID)
    # Get the recommendations
    get_recommendations_response = personalize_runtime.get_recommendations(
        campaignArn = sims_model.campaign_arn,
        itemId = str(movie_ID),
    )
    # Build a new dataframe of recommendations
    item_list = get_recommendations_response['itemList']
    recommendation_list = []
    for item in item_list:
        movie = get_movie_by_id(item['itemId'])
        recommendation_list.append(movie)
    new_rec_DF = pd.DataFrame(recommendation_list, columns = [movie_name])
    # Add this dataframe to the old one
    recommendations_df = pd.concat([recommendations_df, new_rec_DF], axis=1)
    return recommendations_df

In [None]:
samples = items_df.sample(5)
samples

Unnamed: 0_level_0,title
movieId,Unnamed: 1_level_1
27793,Starship Troopers 2: Hero of the Federation (2...
113862,"Guest, The (2014)"
5735,Faces of Death (1978)
2498,My Favorite Martian (1999)
74342,"Captain Newman, M.D. (1963)"


In [None]:
sims_recommendations_df = pd.DataFrame()
movies = samples.index.tolist()

for movie in movies:
    sims_recommendations_df = get_new_recommendations_df(sims_recommendations_df, movie)

sims_recommendations_df

Unnamed: 0,Starship Troopers 2: Hero of the Federation (2004),"Guest, The (2014)",Faces of Death (1978),My Favorite Martian (1999),"Captain Newman, M.D. (1963)"
0,"Shawshank Redemption, The (1994)","Shawshank Redemption, The (1994)","Shawshank Redemption, The (1994)",Moonraker (1979),Willy/Milly (1986)
1,Forrest Gump (1994),Forrest Gump (1994),Forrest Gump (1994),Beverly Hills Cop (1984),Dr. Phibes Rises Again (1972)
2,Pulp Fiction (1994),Pulp Fiction (1994),Pulp Fiction (1994),Lucy (2014),Tears for Sale (2008)
3,"Silence of the Lambs, The (1991)","Silence of the Lambs, The (1991)","Silence of the Lambs, The (1991)","Matrix, The (1999)",Spellbound (2011)
4,Braveheart (1995),Braveheart (1995),Braveheart (1995),,Jimmy Carr: Telling Jokes (2009)
5,"Matrix, The (1999)","Matrix, The (1999)","Matrix, The (1999)",,Maz Jobrani: Immigrant (2017)
6,Schindler's List (1993),Schindler's List (1993),Schindler's List (1993),,Runaway Brain (1995)
7,Star Wars: Episode IV - A New Hope (1977),Star Wars: Episode IV - A New Hope (1977),Star Wars: Episode IV - A New Hope (1977),,L.A. Slasher (2015)
8,Jurassic Park (1993),Jurassic Park (1993),Jurassic Park (1993),,Tenchi MuyÃ´! In Love (1996)
9,Terminator 2: Judgment Day (1991),Terminator 2: Judgment Day (1991),Terminator 2: Judgment Day (1991),,The Editor (2015)


This is a good time to think about the hyperparameters of the Personalize recipes. The SIMS recipe has a popularity_discount_factor hyperparameter (see documentation). Leveraging this hyperparameter allows you to control the nuance you see in the results. This parameter and its behavior will be unique to every dataset you encounter, and depends on the goals of the business. You can iterate on the value of this hyperparameter until you are satisfied with the results, or you can start by leveraging Personalize's hyperparameter optimization (HPO) feature. For more information on hyperparameters and HPO tuning, see the documentation.

### User Personalization Model

HRNN is one of the more advanced algorithms provided by Amazon Personalize. It supports personalization of the items for a specific user based on their past behavior and can intake real time events in order to alter recommendations for a user without retraining.

Since HRNN relies on having a sampling of users, let's load the data we need for that and select 3 random users. Since Movielens does not include user data, we will select 3 random numbers from the range of user id's in the dataset.

In [None]:
users = random.sample(range(1, 600), 3)
users

[260, 583, 287]

In [None]:
# Update DF rendering
pd.set_option('display.max_rows', 30)

def get_new_recommendations_df_users(recommendations_df, user_id):
    # Get the movie name
    #movie_name = get_movie_by_id(artist_ID)
    # Get the recommendations
    get_recommendations_response = personalize_runtime.get_recommendations(
        campaignArn = pers_model.campaign_arn,
        userId = str(user_id),
    )
    # Build a new dataframe of recommendations
    item_list = get_recommendations_response['itemList']
    recommendation_list = []
    for item in item_list:
        movie = get_movie_by_id(item['itemId'])
        recommendation_list.append(movie)
    new_rec_DF = pd.DataFrame(recommendation_list, columns = [user_id])
    # Add this dataframe to the old one
    recommendations_df = pd.concat([recommendations_df, new_rec_DF], axis=1)
    return recommendations_df

In [None]:
recommendations_df_users = pd.DataFrame()
#users = users_df.sample(3).index.tolist()

for user in users:
    recommendations_df_users = get_new_recommendations_df_users(recommendations_df_users, user)

recommendations_df_users

Unnamed: 0,260,583,287
0,Bowling for Columbine (2002),27 Dresses (2008),Apocalypse Now (1979)
1,Apollo 13 (1995),300 (2007),Forrest Gump (1994)
2,Dead Poets Society (1989),Austin Powers: International Man of Mystery (1...,Schindler's List (1993)
3,Fahrenheit 9/11 (2004),Yes Man (2008),"Pianist, The (2002)"
4,Amadeus (1984),Notting Hill (1999),"Graduate, The (1967)"
5,Baraka (1992),Step Brothers (2008),Full Metal Jacket (1987)
6,"Lord of the Rings: The Two Towers, The (2002)",Juno (2007),Apollo 13 (1995)
7,Hearts of Darkness: A Filmmakers Apocalypse (1...,Blades of Glory (2007),"Boot, Das (Boat, The) (1981)"
8,Pink Floyd: The Wall (1982),"Hangover, The (2009)",Amadeus (1984)
9,Schindler's List (1993),Forgetting Sarah Marshall (2008),Hotel Rwanda (2004)


## Static and Dynamic Filters
Lets interact with the static filters we created in the previous notebook, and utilize dynamic filters in realtime.

A few common use cases for dynamic filters in Video On Demand are:

Categorical filters based on Item Metadata (that arent range based) - Often your item metadata will have information about the title such as Genre, Keyword, Year, Director, Actor etc. Filtering on these can provide recommendations within that data, such as action movies, Steven Spielberg movies, Movies from 1995 etc.

Events - you may want to filter out certain events and provide results based on those events, such as moving a title from a "suggestions to watch" recommendation to a "watch again" recommendations.

Now lets apply item filters to see recommendations for one of these users within each decade of our static filters.

In [None]:
def get_new_recommendations_df_by_static_filter(recommendations_df, user_id, filter_arn):
    # Get the movie name
    #movie_name = get_movie_by_id(artist_ID)
    # Get the recommendations
    get_recommendations_response = personalize_runtime.get_recommendations(
        campaignArn = pers_model.campaign_arn,
        userId = str(user_id),
        filterArn = filter_arn
    )
    # Build a new dataframe of recommendations
    item_list = get_recommendations_response['itemList']
    recommendation_list = []
    for item in item_list:
        movie = get_movie_by_id(item['itemId'])
        recommendation_list.append(movie)
    #print(recommendation_list)
    filter_name = filter_arn.split('/')[1]
    new_rec_DF = pd.DataFrame(recommendation_list, columns = [filter_name])
    # Add this dataframe to the old one
    recommendations_df = pd.concat([recommendations_df, new_rec_DF], axis=1)
    return recommendations_df

In [None]:
def get_new_recommendations_df_by_dynamicfilter(recommendations_df, user_id, genre_filter_arn, filter_values):
    # Get the movie name
    #movie_name = get_movie_by_id(artist_ID)
    # Get the recommendations
    get_recommendations_response = personalize_runtime.get_recommendations(
        campaignArn = pers_model.campaign_arn,
        userId = str(user_id),
        filterArn = genre_filter_arn,
        filterValues = { "GENRE": "\"" + filter_values + "\""}
    )
    # Build a new dataframe of recommendations
    item_list = get_recommendations_response['itemList']
    recommendation_list = []
    for item in item_list:
        movie = get_movie_by_id(item['itemId'])
        recommendation_list.append(movie)
    filter_name = genre_filter_arn.split('/')[1]
    new_rec_DF = pd.DataFrame(recommendation_list, columns = [filter_values])
    # Add this dataframe to the old one
    recommendations_df = pd.concat([recommendations_df, new_rec_DF], axis=1)
    return recommendations_df

In [None]:
pers_model.filter_arns

['arn:aws:personalize:us-east-1:746888961694:filter/watched',
 'arn:aws:personalize:us-east-1:746888961694:filter/unwatched']

In [None]:
recommendations_df_decade_shelves = pd.DataFrame()
for filter_arn in pers_model.filter_arns:
    recommendations_df_decade_shelves = get_new_recommendations_df_by_static_filter(recommendations_df_decade_shelves, user, filter_arn)

recommendations_df_decade_shelves

Unnamed: 0,watched,unwatched
0,Apocalypse Now (1979),Forrest Gump (1994)
1,Schindler's List (1993),"Pianist, The (2002)"
2,Sideways (2004),"Graduate, The (1967)"
3,Saving Private Ryan (1998),Full Metal Jacket (1987)
4,"Amelie (Fabuleux destin d'AmÃ©lie Poulain, Le)...",Apollo 13 (1995)
5,"Usual Suspects, The (1995)","Boot, Das (Boat, The) (1981)"
6,"Breakfast Club, The (1985)",Amadeus (1984)
7,"Truman Show, The (1998)",Hotel Rwanda (2004)
8,Requiem for a Dream (2000),Blade Runner (1982)
9,Traffic (2000),Black Hawk Down (2001)


## Real-Time Events

In [None]:
# Start by creating an event tracker that is attached to the campaign.

response = personalize.create_event_tracker(
    name='MovieTracker',
    datasetGroupArn=pers_model.dataset_group_arn
)

print(response['eventTrackerArn'])
print(response['trackingId'])

TRACKING_ID = response['trackingId']
event_tracker_arn = response['eventTrackerArn']

arn:aws:personalize:us-east-1:746888961694:event-tracker/30109ae4
b3e19ad6-ec08-440c-988d-44ba91a2464d


We will create some code that simulates a user interacting with a particular item. After running this code, you will get recommendations that differ from the results above.

We start by creating some methods for the simulation of real time events.

In [None]:
session_dict = {}

def send_movie_click(USER_ID, ITEM_ID, EVENT_TYPE):
    """
    Simulates a click as an envent
    to send an event to Amazon Personalize's Event Tracker
    """
    # Configure Session
    try:
        session_ID = session_dict[str(USER_ID)]
    except:
        session_dict[str(USER_ID)] = str(uuid.uuid1())
        session_ID = session_dict[str(USER_ID)]
        
    # Configure Properties:
    event = {
    "itemId": str(ITEM_ID),
    }
    event_json = json.dumps(event)
        
    # Make Call
    
    personalize_events.put_events(
    trackingId = TRACKING_ID,
    userId= str(USER_ID),
    sessionId = session_ID,
    eventList = [{
        'sentAt': int(time.time()),
        'eventType': str(EVENT_TYPE),
        'properties': event_json
        }]
    )

def get_new_recommendations_df_users_real_time(recommendations_df, user_id, item_id, event_type):
    # Get the artist name (header of column)
    movie_name = get_movie_by_id(item_id)
    # Interact with different movies
    print('sending event ' + event_type + ' for ' + get_movie_by_id(item_id))
    send_movie_click(USER_ID=user_id, ITEM_ID=item_id, EVENT_TYPE=event_type)
    # Get the recommendations (note you should have a base recommendation DF created before)
    get_recommendations_response = personalize_runtime.get_recommendations(
        campaignArn = pers_model.campaign_arn,
        userId = str(user_id),
    )
    # Build a new dataframe of recommendations
    item_list = get_recommendations_response['itemList']
    recommendation_list = []
    for item in item_list:
        artist = get_movie_by_id(item['itemId'])
        recommendation_list.append(artist)
    new_rec_DF = pd.DataFrame(recommendation_list, columns = [movie_name])
    # Add this dataframe to the old one
    #recommendations_df = recommendations_df.join(new_rec_DF)
    recommendations_df = pd.concat([recommendations_df, new_rec_DF], axis=1)
    return recommendations_df

At this point, we haven't generated any real-time events yet; we have only set up the code. To compare the recommendations before and after the real-time events, let's pick one user and generate the original recommendations for them.

In [None]:
# First pick a user
user_id = user

# Get recommendations for the user
get_recommendations_response = personalize_runtime.get_recommendations(
        campaignArn = pers_model.campaign_arn,
        userId = str(user_id),
    )

# Build a new dataframe for the recommendations
item_list = get_recommendations_response['itemList']
recommendation_list = []
for item in item_list:
    artist = get_movie_by_id(item['itemId'])
    recommendation_list.append(artist)
user_recommendations_df = pd.DataFrame(recommendation_list, columns = [user_id])
user_recommendations_df

Unnamed: 0,287
0,Apocalypse Now (1979)
1,Forrest Gump (1994)
2,Schindler's List (1993)
3,"Pianist, The (2002)"
4,"Graduate, The (1967)"
5,Full Metal Jacket (1987)
6,Apollo 13 (1995)
7,"Boot, Das (Boat, The) (1981)"
8,Amadeus (1984)
9,Hotel Rwanda (2004)


Ok, so now we have a list of recommendations for this user before we have applied any real-time events. Now let's pick 3 random artists which we will simulate our user interacting with, and then see how this changes the recommendations.

In [None]:
# Next generate 3 random movies
movies = items_df.sample(3).index.tolist()
# Note this will take about 15 seconds to complete due to the sleeps
for movie in movies:
    user_recommendations_df = get_new_recommendations_df_users_real_time(user_recommendations_df, user_id, movie,'click')
    time.sleep(5)

sending event click for Gaslight (1944)
sending event click for Terminator 2: Judgment Day (1991)
sending event click for Boss of It All, The (DirektÃ¸ren for det hele) (2006)


Now we can look at how the click events changed the recommendations.

In [None]:
user_recommendations_df

Unnamed: 0,287,Gaslight (1944),Terminator 2: Judgment Day (1991),"Boss of It All, The (DirektÃ¸ren for det hele) (2006)"
0,Apocalypse Now (1979),Apocalypse Now (1979),Gaslight (1944),Terminator 2: Judgment Day (1991)
1,Forrest Gump (1994),Forrest Gump (1994),Rosemary's Baby (1968),Planet of the Apes (1968)
2,Schindler's List (1993),Schindler's List (1993),Wait Until Dark (1967),"Day the Earth Stood Still, The (1951)"
3,"Pianist, The (2002)","Pianist, The (2002)","Night of the Hunter, The (1955)",Jurassic Park (1993)
4,"Graduate, The (1967)","Graduate, The (1967)","Sorry, Wrong Number (1948)","Terminator, The (1984)"
5,Full Metal Jacket (1987),Full Metal Jacket (1987),Carnival of Souls (1962),In the Line of Fire (1993)
6,Apollo 13 (1995),Apollo 13 (1995),"Insider, The (1999)",Braveheart (1995)
7,"Boot, Das (Boat, The) (1981)","Boot, Das (Boat, The) (1981)",Lord of the Flies (1963),Vertigo (1958)
8,Amadeus (1984),Amadeus (1984),"Day the Earth Stood Still, The (1951)",Night of the Living Dead (1968)
9,Hotel Rwanda (2004),Hotel Rwanda (2004),Cape Fear (1962),Twelve Monkeys (a.k.a. 12 Monkeys) (1995)


## Generic Module

In [None]:
import boto3
import json
import time


class personalize_inference:
    def __init__(self,
                 dataset_group_arn = None,
                 campaign_arn = None,
                 event_tracker_arn = None,
                 role_arn=None,
                 solution_version_arn=None,
                 batch_job_arn=None
                 ):
        self.personalize = None
        self.personalize_runtime = None
        self.personalize_events = None
        self.dataset_group_arn = dataset_group_arn
        self.campaign_arn = campaign_arn
        self.event_tracker_arn = event_tracker_arn
        self.event_tracker_id = event_tracker_id
        self.role_arn = role_arn
        self.solution_version_arn = solution_version_arn
        self.batch_job_arn = batch_job_arn

    def setup_connection(self):
        try:
            self.personalize = boto3.client('personalize')
            self.personalize_runtime = boto3.client('personalize-runtime')
            self.personalize_events = boto3.client(service_name='personalize-events')
            print("SUCCESS | We can communicate with Personalize!")
        except:
            print("ERROR | Connection can't be established!")

    def get_recommendations(self, itemid=None, userid=None, k=5,
                            filter_arn=None, filter_values=None):
        get_recommendations_response = self.personalize_runtime.get_recommendations(
            campaignArn = self.campaign_arn,
            itemId = str(itemid),
            userId = str(userid),
            filterArn = filter_arn,
            filterValues = filter_values,
            numResults = k
            )
        
    def get_rankings(self, userid=None, inputlist=None):
        get_recommendations_response = self.personalize_runtime.get_personalized_ranking(
            campaignArn = self.campaign_arn,
            userId = str(userid),
            inputList = inputlist
            )
        
    def create_event_tracker(self, name=None):
        response = self.personalize.create_event_tracker(
            name=name,
            datasetGroupArn=self.dataset_group_arn
            )
        self.event_tracker_arn = response['eventTrackerArn']
        self.event_tracker_id = response['trackingId']
    
    def put_events(self, userid=None, sessionid=None, eventlist=None):
        self.personalize_events.put_events(
            trackingId = self.event_tracker_id,
            userId = userid, 
            sessionId = sessionid,
            eventList = eventlist
            )
        
    def create_batch_job(self, jobname=None, input_path=None, output_path=None):
        response = self.personalize.create_batch_inference_job(
            solutionVersionArn = self.solution_version_arn,
            jobName = jobname,
            roleArn = self.role_arn,
            jobInput = {"s3DataSource": {"path": input_path}},
            jobOutput = {"s3DataDestination": {"path": output_path}}
            )
        self.batch_job_arn = response['batchInferenceJobArn']
    
    def check_batch_job_status(self):
        batch_job_response = self.personalize.describe_batch_inference_job(
        batchInferenceJobArn = self.batch_job_arn
        )
        status = batch_job_response["batchInferenceJob"]['status']
        return status

    def __getstate__(self):
        attributes = self.__dict__.copy()
        del attributes['personalize']
        del attributes['personalize_runtime']
        del attributes['personalize_events']
        return attributes