### Factorization Machines - Movie Recommendation Model
Input features: ['userId','movieId']
Target: rating

In [2]:
import numpy as np
import pandas as pd

#Define IAM role
import boto3
import re
import sagemaker
from sagemaker import get_execution_role

#### Upload Data to S3

In [3]:
### Import s3 bucket name as environment variable

import os
env_vars = !cat ./.env
for var in env_vars:
    key, value = var.split('=')
    os.environ[key] = value

In [4]:
bucket_name = os.environ['BUCKET_NAME']
training_file_key = 'movie/user_movie_train.recordio'
test_file_key = 'movie/user_movie_test.recordio'

s3_model_output_location = r's3://{}/movie/model'.format(bucket_name)
s3_training_file_location = r's3://{}/{}'.format(bucket_name,training_file_key)
s3_test_file_location = r's3://{}/{}'.format(bucket_name, test_file_key)

In [5]:
# Read Dimension:Number of unique users + number of unique movies in our dataset

dim_movie = 0

#Update movie dimension - from file used for training
with open(r'ml-latest-small/movie_dimension.txt') as f:
    dim_movie= int(f.read())

In [6]:
dim_movie

10334

In [7]:
# Write and Reading from S3
# files are referred as objects in S3.  
# file name is referred as key name in S3
# Files stored in S3 are automatically replicated across 3 different availability zones 
# in the region where the bucket was created.

def write_to_s3(filename,bucket,key):
    with open(filename, 'rb') as f:
        boto3.Session().resource('s3').Bucket(bucket).Object(key).upload_fileobj(f)

In [8]:
write_to_s3(r'ml-latest-small/user_movie_train.recordio',bucket_name,training_file_key)
write_to_s3(r'ml-latest-small/user_movie_test.recordio',bucket_name,test_file_key)

### Training Algorithm Docker Image

In [9]:
# We use spot instance for traing

use_spot_instances = True
max_run = 3600
max_wait = 3600

job_name = 'fm-movie-v4'

checkpoint_s3_uri = None

if use_spot_instances:
    checkpoint_s3_uri = f's3://{bucket_name}/movie/checkpoints/{job_name}'

In [10]:
sess = sagemaker.Session()
sess

<sagemaker.session.Session at 0x7f391ebda590>

In [11]:
role = get_execution_role()

In [12]:
# Use fatorization-machines

container = sagemaker.image_uris.retrieve("factorization-machines",sess.boto_region_name)
print(f'using FM container {container}')

using FM container 382416733822.dkr.ecr.us-east-1.amazonaws.com/factorization-machines:1


#### Build Model

In [13]:
# Configure training job
# specify type and number of instances to use
# s3 location where final artifacts needs tobe stored

# SDK 2.x version does not require train prefix for instance count and type

estimator = sagemaker.estimator.Estimator(container,
                                          role,
                                          instance_count=1,
                                          instance_type='ml.m5.xlarge',
                                          output_path=s3_model_output_location,
                                          sagemaker_session=sess,
                                          base_job_name=job_name,
                                          use_spot_instances=use_spot_instances,
                                          max_run=max_run,
                                          max_wait=max_wait,
                                          checkpoint_s3_uri=checkpoint_s3_uri
                                    
                )

#### New Configuration after Model tuning

In [14]:
estimator.set_hyperparameters(feature_dim=dim_movie,
                             num_factors=8,
                              predictor_type='regressor',
                              mini_batch_size=994,
                              epochs=91,
                              bias_init_method='normal',
                              bias_lr=0.21899531189430518,
                              factors_init_method='normal',
                              factors_lr=5.357593337770278e-05,
                              linear_init_method='normal',
                              linear_lr=0.00021524948053767607)

In [15]:
estimator.hyperparameters()

{'feature_dim': 10334,
 'num_factors': 8,
 'predictor_type': 'regressor',
 'mini_batch_size': 994,
 'epochs': 91,
 'bias_init_method': 'normal',
 'bias_lr': 0.21899531189430518,
 'factors_init_method': 'normal',
 'factors_lr': 5.357593337770278e-05,
 'linear_init_method': 'normal',
 'linear_lr': 0.00021524948053767607}

#### Train the model

In [16]:
estimator.fit({'train':s3_training_file_location, 'test':s3_test_file_location})

INFO:sagemaker:Creating training-job with name: fm-movie-v4-2024-05-15-21-26-16-668


2024-05-15 21:26:16 Starting - Starting the training job...
2024-05-15 21:26:31 Starting - Preparing the instances for training...
2024-05-15 21:27:00 Downloading - Downloading input data...
2024-05-15 21:27:20 Downloading - Downloading the training image..................
2024-05-15 21:30:36 Training - Training image download completed. Training in progress..[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
  if num_device is 1 and 'dist' not in kvstore:[0m
[34m[05/15/2024 21:30:46 INFO 140500196235072] Reading default configuration from /opt/amazon/lib/python3.8/site-packages/algorithm/resources/default-conf.json: {'epochs': 1, 'mini_batch_size': '1000', 'use_bias': 'true', 'use_linear': 'true', 'bias_lr': '0.1', 'linear_lr': '0.001', 'factors_lr': '0.0001', 'bias_wd': '0.01', 'linear_wd': '0.001', 'factors_wd': '0.00001', 'bias_init_method': 'normal', 'bias_init_sigma': '0.01', 'linear_init_method': 'normal', 'linea

[34m[2024-05-15 21:30:55.270] [tensorio] [info] epoch_stats={"data_pipeline": "/opt/ml/input/data/train", "epoch": 58, "duration": 273, "num_examples": 72, "num_bytes": 4517440}[0m
[34m[05/15/2024 21:30:55 INFO 140500196235072] #quality_metric: host=algo-1, epoch=28, train rmse <loss>=1.036385629770208[0m
[34m[05/15/2024 21:30:55 INFO 140500196235072] #quality_metric: host=algo-1, epoch=28, train mse <loss>=1.074095173594191[0m
[34m[05/15/2024 21:30:55 INFO 140500196235072] #quality_metric: host=algo-1, epoch=28, train absolute_loss <loss>=0.8023887513175844[0m
[34m#metrics {"StartTime": 1715808654.9939668, "EndTime": 1715808655.2704835, "Dimensions": {"Algorithm": "factorization-machines", "Host": "algo-1", "Operation": "training"}, "Metrics": {"update.time": {"sum": 275.8975028991699, "count": 1, "min": 275.8975028991699, "max": 275.8975028991699}}}[0m
[34m[05/15/2024 21:30:55 INFO 140500196235072] #progress_metric: host=algo-1, completed 31.86813186813187 % of epochs[0m


[34m[2024-05-15 21:31:05.210] [tensorio] [info] epoch_stats={"data_pipeline": "/opt/ml/input/data/train", "epoch": 120, "duration": 279, "num_examples": 72, "num_bytes": 4517440}[0m
[34m[05/15/2024 21:31:05 INFO 140500196235072] #quality_metric: host=algo-1, epoch=59, train rmse <loss>=0.9795268099351381[0m
[34m[05/15/2024 21:31:05 INFO 140500196235072] #quality_metric: host=algo-1, epoch=59, train mse <loss>=0.9594727713817082[0m
[34m[05/15/2024 21:31:05 INFO 140500196235072] #quality_metric: host=algo-1, epoch=59, train absolute_loss <loss>=0.7465603438474816[0m
[34m#metrics {"StartTime": 1715808664.9280512, "EndTime": 1715808665.2108254, "Dimensions": {"Algorithm": "factorization-machines", "Host": "algo-1", "Operation": "training"}, "Metrics": {"update.time": {"sum": 282.35435485839844, "count": 1, "min": 282.35435485839844, "max": 282.35435485839844}}}[0m
[34m[05/15/2024 21:31:05 INFO 140500196235072] #progress_metric: host=algo-1, completed 65.93406593406593 % of epoch


2024-05-15 21:31:34 Uploading - Uploading generated training model
2024-05-15 21:31:34 Completed - Training job completed
Training seconds: 274
Billable seconds: 121
Managed Spot Training savings: 55.8%


### Deploy Model

In [26]:
predictor = estimator.deploy(initial_instance_count=1,
                            instance_type='ml.m5.xlarge',
                             endpoint_name=job_name)

INFO:sagemaker:Creating model with name: fm-movie-v4-2024-05-15-21-44-16-905
INFO:sagemaker:Creating endpoint-config with name fm-movie-v4
INFO:sagemaker:Creating endpoint with name fm-movie-v4


----------!

#### Run Predictions
#### Dense and Sparse Formats

https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-inference.html

In [18]:
import json

def fm_sparse_serializer(data):
    js = {'instances':[]}
    
    for row in data:
        
        column_list = row.tolist()
        value_list = np.ones(len(column_list),dtype=int).tolist()
        
        js['instances'].append({'data':{'features':{'keys':column_list, 'shape':[dim_movie], 'values': value_list}}})
        
    return json.dumps(js)

In [21]:
fm_sparse_serializer([np.array([341,1416])])

'{"instances": [{"data": {"features": {"keys": [341, 1416], "shape": [10334], "values": [1, 1]}}}]}'

In [27]:
# SDK 2
from sagemaker.deserializers import JSONDeserializer

In [28]:
# Specify custom serializer

predictor.serializer.serialize = fm_sparse_serializer
predictor.serializer.content_type='application/json'

predictor.deserializer=JSONDeserializer()

In [29]:
predictor.predict([np.array([341,1416])])

{'predictions': [{'score': 2.8631527423858643}]}

In [32]:
# Test with few entries from test file

with open(r'ml-latest-small/user_movie_test.svm','r') as f:
    
    for i in range(5):
        rating = f.readline().split()
        print(f'Movie {rating}')
        userId = rating[1].split(':')[0]
        movieId = rating[2].split(':')[0]
        predicted_rating = predictor.predict([np.array([int(userId), int(movieId)])])
        print(f'    Actual Rating: \t{rating[0]}')
        print(f"    Predicted Rating: \t{predicted_rating['predictions'][0]['score']}")

Movie ['2.5', '426:1', '943:1']
    Actual Rating: 	2.5
    Predicted Rating: 	2.8515758514404297
Movie ['3', '110:1', '10120:1']
    Actual Rating: 	3
    Predicted Rating: 	3.107433795928955
Movie ['4', '304:1', '1554:1']
    Actual Rating: 	4
    Predicted Rating: 	4.020742416381836
Movie ['5', '273:1', '867:1']
    Actual Rating: 	5
    Predicted Rating: 	4.1318769454956055
Movie ['2', '18:1', '3373:1']
    Actual Rating: 	2
    Predicted Rating: 	2.294043779373169


In [None]:
predictor.delete_endpoint()