In [1]:
import sagemaker
from sagemaker import get_execution_role

# SageMaker SDK Documentation: http://sagemaker.readthedocs.io/en/latest/estimators.html

  from pandas.core.computation.check import NUMEXPR_INSTALLED


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [2]:
s3_model_output_location = 's3://sagemaker-us-east-1-190250733572/movie/model/fm-movie-1-039-b7b84b16/output/model.tar.gz'

In [3]:
sess = sagemaker.Session()

In [4]:
sess.default_bucket()

'sagemaker-us-east-1-190250733572'

## Deploy Model

In [5]:
role = get_execution_role()
role

'arn:aws:iam::190250733572:role/sagemaker-operator'

In [6]:
image_uri = sagemaker.image_uris.retrieve("factorization-machines",sess.boto_region_name)
image_uri

'382416733822.dkr.ecr.us-east-1.amazonaws.com/factorization-machines:1'

In [8]:
model = sagemaker.model.Model(
    name = 'fm-movie-model-sgmksdk',
    image_uri = image_uri,
    model_data = s3_model_output_location,
    role = role,
    sagemaker_session = sess
)

In [9]:
model.deploy(
    initial_instance_count=1,
    instance_type='ml.m5.xlarge',
    endpoint_name = 'fm-movie-from-sgmksdk')

----------!

In [19]:
# Read Dimension: Number of unique users + Number of unique movies in our dataset
dim_movie = 0

# Update movie dimension - from file used for training 
with open(r'ml-latest-small/movie_dimension.txt','r') as f:
    dim_movie = int(f.read())

In [20]:
predictor = sagemaker.Predictor(endpoint_name= 'fm-movie-from-sgmksdk')

In [21]:
import json

def fm_sparse_serializer(data):
    js = {'instances': []}
    for row in data:
        
        column_list = row.tolist()
        value_list = np.ones(len(column_list),dtype=int).tolist()
       
        js['instances'].append({'data':{'features': { 'keys': column_list, 'shape':[dim_movie], 'values': value_list}}})
    return json.dumps(js)

In [22]:
from sagemaker.deserializers import JSONDeserializer

In [23]:
predictor.serializer.serialize = fm_sparse_serializer
predictor.serializer.content_type = 'application/json'
predictor.deserializer = JSONDeserializer()

In [24]:
import numpy as np

In [25]:
fm_sparse_serializer([np.array([341,1416])])

'{"instances": [{"data": {"features": {"keys": [341, 1416], "shape": [10334], "values": [1, 1]}}}]}'

In [26]:
# Let's test with few entries from test file
# Movie dataset is updated regularly...so, instead of hard coding userid and movie id, let's
# use actual values

# Each row is in this format: ['2.5', '426:1', '943:1']
# ActualRating, UserID, MovieID

with open(r'ml-latest-small/user_movie_test.svm','r') as f:
    for i in range(3):
        rating = f.readline().split()
        print(f"Movie {rating}")
        userID = rating[1].split(':')[0]
        movieID = rating[2].split(':')[0]
        predicted_rating = predictor.predict([np.array([int(userID),int(movieID)])])
        print(f'  Actual Rating:\t{rating[0]}')
        print(f"  Predicted Rating:\t{predicted_rating['predictions'][0]['score']}")
        print()

Movie ['2.5', '426:1', '943:1']
  Actual Rating:	2.5
  Predicted Rating:	2.909193277359009

Movie ['3', '110:1', '10120:1']
  Actual Rating:	3
  Predicted Rating:	2.811187744140625

Movie ['4', '304:1', '1554:1']
  Actual Rating:	4
  Predicted Rating:	4.064101219177246



In [27]:
predictor.delete_endpoint()

In [28]:
model.delete_model()