In [2]:
"""
The following code uses the 100k movie lens example to make inference against an endpoint deployed on Sakemaker
to determine if a movie is suitable for a user in the dataset.
This example uses two of the users in test set as an example to determine a score for the movie user combination
"""

import boto3, csv, json
import numpy as np
from scipy.sparse import lil_matrix

nbUsers = 943
nbMovies = 1682

moviesByUser = {}
for userId in range(nbUsers):
    moviesByUser[str(userId)] = []

def loadDataset(filename, lines, columns):
    # Features are one-hot encoded in a sparse matrix
    X = lil_matrix((lines, columns)).astype('float32')
    # Labels are stored in a vector
    Y = []
    line = 0
    with open(filename, 'r') as f:
        samples = csv.reader(f, delimiter='\t')
        for userId, movieId, rating, timestamp in samples:
            X[line, int(userId) - 1] = 1
            X[line, int(nbUsers) + int(movieId) - 1] = 1
            if int(rating) >= 4:
                Y.append(1)
            else:
                Y.append(0)
            line = line + 1

    Y = np.array(Y).astype('float32')
    return X, Y

In [5]:
import pandas as pd
load = pd.read_csv('/project/DataCollection/ratings.csv')
#nbMovies= load.movieId.nunique()
#nbUsers=1

nbUsers = 943
nbMovies = 1682

nbFeatures = nbUsers + nbMovies
load = load.loc[load.userId==11]

nbRatings = load.shape[0]
load.to_csv('temp.pred', sep='\t', header=False, index=False)

In [353]:
X_test, Y_test = loadDataset('temp.pred', nbRatings , nbFeatures)

data = X_test.toarray()

# this code serialises the data for use by the sagemaker API, in this case is expands the sparse matrix to a dense version
# see https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-inference.html for more examples of dense ans spare formats
def fm_serializer(data):
    js = {'instances': []}
    for row in data:
        js['instances'].append({'features': row.tolist()})
    # print js
    return json.dumps(js)


payload = fm_serializer(data)

runtime_client = boto3.client('sagemaker-runtime', region_name='eu-west-2', aws_access_key_id='AKIA4LVLLZ6RVW3YN23V',
                              aws_secret_access_key='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')

# the endpoint traing on the 100k dataset and deployed to AWS
endpoint_name = 'factorization-machines-2020-03-14-18-20-08-738'
response = runtime_client.invoke_endpoint(EndpointName=endpoint_name,
                                          ContentType='application/json',
                                          Accept='application/json',
                                          Body=payload)

# the scoring returned for the datapoints for the user and movie
#print(response)
#print(response['Body'].read())

In [354]:
text = response['Body'].read().decode("utf-8")

In [355]:
import json

d = json.loads(text)
d['predictions']

[{'score': 0.4113192558288574, 'predicted_label': 0.0},
 {'score': 0.5253480076789856, 'predicted_label': 1.0},
 {'score': 0.4893607199192047, 'predicted_label': 0.0},
 {'score': 0.4042651951313019, 'predicted_label': 0.0},
 {'score': 0.5581875443458557, 'predicted_label': 1.0},
 {'score': 0.45601779222488403, 'predicted_label': 0.0},
 {'score': 0.3240577280521393, 'predicted_label': 0.0},
 {'score': 0.15418727695941925, 'predicted_label': 0.0},
 {'score': 0.24585725367069244, 'predicted_label': 0.0},
 {'score': 0.23056110739707947, 'predicted_label': 0.0},
 {'score': 0.4726409912109375, 'predicted_label': 0.0},
 {'score': 0.7106837630271912, 'predicted_label': 1.0},
 {'score': 0.498005747795105, 'predicted_label': 0.0},
 {'score': 0.5535253286361694, 'predicted_label': 1.0},
 {'score': 0.47566720843315125, 'predicted_label': 0.0},
 {'score': 0.44765618443489075, 'predicted_label': 0.0},
 {'score': 0.39757952094078064, 'predicted_label': 0.0},
 {'score': 0.26378437876701355, 'predicted

In [356]:
pd.DataFrame(d['predictions'])

Unnamed: 0,score,predicted_label
0,0.411319,0.0
1,0.525348,1.0
2,0.489361,0.0
3,0.404265,0.0
4,0.558188,1.0
...,...,...
176,0.407398,0.0
177,0.273137,0.0
178,0.396227,0.0
179,0.594568,1.0
