<h2>Factorization Machines - Movie Recommendation Model</h2>
Input Features: [userId, moveId] <br>
Target: rating <br>

In [1]:
import numpy as np
import pandas as pd

# Define IAM role
import boto3
import re
import sagemaker
from sagemaker import get_execution_role

# SageMaker SDK Documentation: http://sagemaker.readthedocs.io/en/latest/estimators.html

  from pandas.core.computation.check import NUMEXPR_INSTALLED


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


## Upload Data to S3

In [2]:
# Specify your bucket name
bucket_name = 'sagemaker-us-east-1-190250733572'
training_file_key = 'movie/user_movie_train.recordio'
test_file_key = 'movie/user_movie_test.recordio'

s3_model_output_location = r's3://{0}/movie/model'.format(bucket_name)
s3_training_file_location = r's3://{0}/{1}'.format(bucket_name,training_file_key)
s3_test_file_location = r's3://{0}/{1}'.format(bucket_name,test_file_key)

In [3]:
# Read Dimension: Number of unique users + Number of unique movies in our dataset
dim_movie = 0

# Update movie dimension - from file used for training 
with open(r'ml-latest-small/movie_dimension.txt','r') as f:
    dim_movie = int(f.read())

In [4]:
dim_movie

10334

In [5]:
print(s3_model_output_location)
print(s3_training_file_location)
print(s3_test_file_location)

s3://sagemaker-us-east-1-190250733572/movie/model
s3://sagemaker-us-east-1-190250733572/movie/user_movie_train.recordio
s3://sagemaker-us-east-1-190250733572/movie/user_movie_test.recordio


In [6]:
# Write and Reading from S3 is just as easy
# files are referred as objects in S3.  
# file name is referred as key name in S3
# Files stored in S3 are automatically replicated across 3 different availability zones 
# in the region where the bucket was created.

# http://boto3.readthedocs.io/en/latest/guide/s3.html
def write_to_s3(filename, bucket, key):
    with open(filename,'rb') as f: # Read in binary mode
        return boto3.Session().resource('s3').Bucket(bucket).Object(key).upload_fileobj(f)

In [7]:
write_to_s3(r'ml-latest-small/user_movie_train.recordio',bucket_name,training_file_key)

In [8]:
write_to_s3(r'ml-latest-small/user_movie_test.recordio',bucket_name,test_file_key)

## Training Algorithm Docker Image
### AWS Maintains a separate image for every region and algorithm

In [9]:
# Use Spot Instance - Save up to 90% of training cost by using spot instances when compared to on-demand instances
# Reference: https://github.com/aws-samples/amazon-sagemaker-managed-spot-training/blob/main/xgboost_built_in_managed_spot_training_checkpointing/xgboost_built_in_managed_spot_training_checkpointing.ipynb

# if you are still on two-month free-tier you can use the on-demand instance by setting:
#   use_spot_instances = False

# We will use spot for training
use_spot_instances = True
max_run = 3600 # in seconds
max_wait = 3600 if use_spot_instances else None # in seconds

job_name = 'fm-movie-v4'

checkpoint_s3_uri = None

if use_spot_instances:
    checkpoint_s3_uri = f's3://{bucket_name}/movie/checkpoints/{job_name}'
    
print (f'Checkpoint uri: {checkpoint_s3_uri}')

Checkpoint uri: s3://sagemaker-us-east-1-190250733572/movie/checkpoints/fm-movie-v4


In [10]:
sess = sagemaker.Session()

In [11]:
sess.default_bucket()

'sagemaker-us-east-1-190250733572'

In [12]:
role = get_execution_role()

In [13]:
# This role contains the permissions needed to train, deploy models
# SageMaker Service is trusted to assume this role
print(role)

arn:aws:iam::190250733572:role/sagemaker-operator


In [14]:
# https://sagemaker.readthedocs.io/en/stable/api/utility/image_uris.html#sagemaker.image_uris.retrieve

# SDK 2 uses image_uris.retrieve the container image location

# Use factorization-machines
container = sagemaker.image_uris.retrieve("factorization-machines",sess.boto_region_name)

print (f'Using FM Container {container}')

Using FM Container 382416733822.dkr.ecr.us-east-1.amazonaws.com/factorization-machines:1


In [15]:
container

'382416733822.dkr.ecr.us-east-1.amazonaws.com/factorization-machines:1'

## Build Model

In [16]:
# Configure the training job
# Specify type and number of instances to use
# S3 location where final artifacts needs to be stored

#   Reference: http://sagemaker.readthedocs.io/en/latest/estimators.html

# SDK 2.x version does not require train prefix for instance count and type

estimator = sagemaker.estimator.Estimator(container,
                                          role,                                        
                                          instance_count=1, 
                                          instance_type='ml.m5.xlarge',
                                          output_path=s3_model_output_location,
                                          sagemaker_session=sess,
                                          base_job_name = job_name,
                                          use_spot_instances=use_spot_instances,
                                          max_run=max_run,
                                          max_wait=max_wait,
                                          checkpoint_s3_uri=checkpoint_s3_uri)

### New Configuration after Model Tuning
### Refer to Hyperparameter Tuning Lecture on how to optimize hyperparameters

In [17]:
estimator.set_hyperparameters(feature_dim=dim_movie,
                              num_factors=8,
                              predictor_type='regressor', 
                              mini_batch_size=994,
                              epochs=91,
                              bias_init_method='normal',
                              bias_lr=0.21899531189430518,
                              factors_init_method='normal',
                              factors_lr=5.357593337770278e-05,
                              linear_init_method='normal',
                              linear_lr=0.00021524948053767607)

In [18]:
estimator.hyperparameters()

{'feature_dim': 10334,
 'num_factors': 8,
 'predictor_type': 'regressor',
 'mini_batch_size': 994,
 'epochs': 91,
 'bias_init_method': 'normal',
 'bias_lr': 0.21899531189430518,
 'factors_init_method': 'normal',
 'factors_lr': 5.357593337770278e-05,
 'linear_init_method': 'normal',
 'linear_lr': 0.00021524948053767607}

### Train the model

In [19]:
# New Hyperparameters
# Reference: Supported channels by algorithm
#   https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-algo-docker-registry-paths.html
estimator.fit({'train':s3_training_file_location, 'test': s3_test_file_location})

INFO:sagemaker:Creating training-job with name: fm-movie-v4-2024-01-25-02-30-45-220


2024-01-25 02:30:45 Starting - Starting the training job...
2024-01-25 02:31:00 Starting - Preparing the instances for training.........
2024-01-25 02:32:24 Downloading - Downloading input data...
2024-01-25 02:32:49 Downloading - Downloading the training image.....................
2024-01-25 02:36:40 Training - Training image download completed. Training in progress...[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
  if num_device is 1 and 'dist' not in kvstore:[0m
[34m[01/25/2024 02:36:56 INFO 140081874167616] Reading default configuration from /opt/amazon/lib/python3.8/site-packages/algorithm/resources/default-conf.json: {'epochs': 1, 'mini_batch_size': '1000', 'use_bias': 'true', 'use_linear': 'true', 'bias_lr': '0.1', 'linear_lr': '0.001', 'factors_lr': '0.0001', 'bias_wd': '0.01', 'linear_wd': '0.001', 'factors_wd': '0.00001', 'bias_init_method': 'normal', 'bias_init_sigma': '0.01', 'linear_init_method': 'norma

## Deploy Model

In [20]:
# Ref: http://sagemaker.readthedocs.io/en/latest/estimators.html
predictor = estimator.deploy(initial_instance_count=1,
                             instance_type='ml.m5.xlarge',
                             endpoint_name = job_name)

INFO:sagemaker:Creating model with name: fm-movie-v4-2024-01-25-03-07-25-569
INFO:sagemaker:Creating endpoint-config with name fm-movie-v4
INFO:sagemaker:Creating endpoint with name fm-movie-v4


-----------!

## Run Predictions
### Dense and Sparse Formats
https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-inference.html

In [21]:
import json

def fm_sparse_serializer(data):
    js = {'instances': []}
    for row in data:
        
        column_list = row.tolist()
        value_list = np.ones(len(column_list),dtype=int).tolist()
       
        js['instances'].append({'data':{'features': { 'keys': column_list, 'shape':[dim_movie], 'values': value_list}}})
    return json.dumps(js)

In [22]:
# SDK 2
from sagemaker.deserializers import JSONDeserializer

In [24]:
# https://github.com/aws/amazon-sagemaker-examples/blob/master/introduction_to_amazon_algorithms/factorization_machines_mnist/factorization_machines_mnist.ipynb

# Specify custom serializer
predictor.serializer.serialize = fm_sparse_serializer
predictor.serializer.content_type = 'application/json'

predictor.deserializer = JSONDeserializer()

In [25]:
import numpy as np

In [26]:
fm_sparse_serializer([np.array([341,1416])])

'{"instances": [{"data": {"features": {"keys": [341, 1416], "shape": [10334], "values": [1, 1]}}}]}'

In [27]:
# Let's test with few entries from test file
# Movie dataset is updated regularly...so, instead of hard coding userid and movie id, let's
# use actual values

# Each row is in this format: ['2.5', '426:1', '943:1']
# ActualRating, UserID, MovieID

with open(r'ml-latest-small/user_movie_test.svm','r') as f:
    for i in range(3):
        rating = f.readline().split()
        print(f"Movie {rating}")
        userID = rating[1].split(':')[0]
        movieID = rating[2].split(':')[0]
        predicted_rating = predictor.predict([np.array([int(userID),int(movieID)])])
        print(f'  Actual Rating:\t{rating[0]}')
        print(f"  Predicted Rating:\t{predicted_rating['predictions'][0]['score']}")
        print()

Movie ['2.5', '426:1', '943:1']
  Actual Rating:	2.5
  Predicted Rating:	2.8515758514404297

Movie ['3', '110:1', '10120:1']
  Actual Rating:	3
  Predicted Rating:	3.107433795928955

Movie ['4', '304:1', '1554:1']
  Actual Rating:	4
  Predicted Rating:	4.020742416381836



## Summary

1. Ensure Training, Test and Validation data are in S3 Bucket
2. Select Algorithm Container Registry Path - Path varies by region
3. Configure Estimator for training - Specify Algorithm container, instance count, instance type, model output location
4. Specify algorithm specific hyper parameters
5. Train model
6. Deploy model - Specify instance count, instance type and endpoint name
7. Run Predictions