# Recommendation System (training)

In [1]:
import numpy as np
import pandas as pd
import boto3
import sagemaker
from sagemaker import get_execution_role
from sagemaker.deserializers import JSONDeserializer

In [2]:
# Save anime_train.recordio and anime_test.recordio to S3
# Set the output path for the saved model
bucket_name = 'sagemaker-tutorial-rnd'
prefix = "Recommendation"

model_output = f"s3://{bucket_name}/{prefix}/saved_model"
train_input = sagemaker.TrainingInput(
    f"s3://{bucket_name}/{prefix}/train/anime_train.recordio")
test_input = sagemaker.TrainingInput(
    f"s3://{bucket_name}/{prefix}/test/anime_test.recordio")

print(sagemaker.Session().boto_region_name)

ap-southeast-1


In [3]:
# Set the dimension = (Number of distinct user id: 69) + (Number of distinct anime id: 3098)
dim = 69 + 3098

# Build estimator
container = sagemaker.image_uris.retrieve(
    "factorization-machines", sagemaker.Session().boto_region_name)

base_job_name = "recommentation-anime"

recommendation_estimator = sagemaker.estimator.Estimator(
    container,
    role=get_execution_role(),
    sagemaker_session=sagemaker.Session(),
    instance_count=1,
    instance_type='ml.m5.xlarge',
    output_path=model_output,
    use_spot_instances=False,
    max_run=3600,
    # max_wait=3600,
    # checkpoint_s3_uri=f"s3://{bucket_name}/{prefix}/checkpoints/{base_job_name}",
    base_job_name=base_job_name,
)

# Set the hyperparameter
recommendation_estimator.set_hyperparameters(
    feature_dim=dim,
    num_factors=8,
    predictor_type='regressor',
    mini_batch_size=2000,
    epochs=100,
)

In [4]:
# Fit the model
recommendation_estimator.fit(
    {
        'train': train_input,
        'test': test_input
    }
)

2022-02-12 11:13:50 Starting - Starting the training job...
2022-02-12 11:14:14 Starting - Preparing the instances for trainingProfilerReport-1644664430: InProgress
......
2022-02-12 11:15:18 Downloading - Downloading input data
2022-02-12 11:15:18 Training - Downloading the training image......
2022-02-12 11:16:14 Training - Training image download completed. Training in progress..[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
  from collections import Mapping, MutableMapping, Sequence[0m
  """[0m
  """[0m
[34m[02/12/2022 11:16:18 INFO 140146954667840 integration.py:636] worker started[0m
[34m[02/12/2022 11:16:18 INFO 140146954667840] Reading default configuration from /opt/amazon/lib/python3.7/site-packages/algorithm/resources/default-conf.json: {'epochs': 1, 'mini_batch_size': '1000', 'use_bias': 'true', 'use_linear': 'true', 'bias_lr': '0.1', 'linear_lr': '0.001', 'factors_lr': '0.0001', 'bias_wd': '0.01', '

In [5]:
# Deploy the model
recommendation_predictor = recommendation_estimator.deploy(
    initial_instance_count=1,
    instance_type='ml.m5.xlarge',
    endpoint_name=base_job_name
)

-----!