# Train and Deploy a Neural Collaborative Filtering Model

In this notebook, you will execute code blocks to

1. inspect the training script [ncf.py](./ncf.py)  
2. train a model using [Tensorflow Estimator](https://sagemaker.readthedocs.io/en/stable/frameworks/tensorflow/sagemaker.tensorflow.html)  
3. deploy and host the trained model as an endpoint using Amazon SageMaker Hosting Services  
4. perform batch inference by calling the model endpoint

In [None]:
%%capture

#################
## Code Cell 1 ##
#################

# Install some required libraries
!pip install -U sagemaker
!pip install tensorflow==2.6.2
!pip install dynamo-pandas[boto3]

# Restore the previously stored unique products data frame
import pandas as pd
import pickle
df_unique_products = pd.read_pickle('products.pkl')
df_unique_products.info()

In [None]:
#################
## Code Cell 2 ##
#################

# In the last notebook (data-preparation-notebook.ipynb), we stored two variables.
# Let's restore those variables here. These variables are inputs for the model training process.

# Import some required libraries.
import tensorflow as tf
import sagemaker
import logging

# Setup logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
logger.addHandler(logging.StreamHandler())

# Print version numbers
logger.info(f"TesnorFlow version:{tf.__version__}")
logger.info(f'[Using SageMaker version: {sagemaker.__version__}]')

# Get stored variables
%store -r n_customer
%store -r n_product

print(n_customer)
print(n_product)

In [None]:
#################
## Code Cell 3 ##
#################

# import additional required libraries
import os
import json
import numpy as np

from sagemaker.tensorflow.serving import TensorFlowModel
from sagemaker import get_execution_role
from sagemaker.tensorflow import TensorFlow

# get current SageMaker session's execution role and default bucket name
sagemaker_session = sagemaker.Session()

role = get_execution_role()
print("execution role ARN:", role)

bucket_name = sagemaker_session.default_bucket()
print("default bucket name:", bucket_name)
prefix = 'models'

In [None]:
#################
## Code Cell 4 ##
#################

# specify the location of the training data we uploaded to S3 from the previous notebook
training_data_uri = os.path.join(f's3://{bucket_name}', 'data')

print(training_data_uri)

# inspect the training script using `pygmentize` magic
!pygmentize 'ncf.py'

In [None]:
#################
## Code Cell 5 ##
#################

# specify training instance type and model hyperparameters
# note that for the demo purpose, the number of epoch is set to 1

num_of_instance = 1                 # number of instance to use for training
instance_type = 'ml.m5.2xlarge'     # type of instance to use for training

training_script = 'ncf.py'

training_parameters = {
    'epochs': 1,
    'batch_size': 256, 
    'n_user': n_customer, 
    'n_item': n_product
}

# training framework specs
tensorflow_version = '2.6.2'
python_version = 'py38'
distributed_training_spec = {'parameter_server': {'enabled': True}}

In [None]:
#################
## Code Cell 6 ##
#################

# initiate the training job using Tensorflow estimator
ncf_estimator = TensorFlow(
    entry_point=training_script,
    role=role,
    instance_count=num_of_instance,
    instance_type=instance_type,
    framework_version=tensorflow_version,
    py_version=python_version,
    distribution=distributed_training_spec,
    hyperparameters=training_parameters
)

# kick off the training job
ncf_estimator.fit(training_data_uri)

## Deploy the Endpoint to Amazon SageMaker

In [None]:
#################
## Code Cell 7 ##
#################

# Once the model is trained, we can deploy the model using Amazon SageMaker Hosting Services
# Here we deploy the model using one ml.m5.2xlarge instance as a tensorflow-serving endpoint
# This enables us to invoke the endpoint like how we use Tensorflow serving
# Read more about Tensorflow serving using the link below
# https://www.tensorflow.org/tfx/tutorials/serving/rest_simple


# Upload the model to Amazon S3
tf_model = ncf_estimator.model_data
output = f's3://{bucket_name}/{prefix}/model.tar.gz'
!aws s3 cp {tf_model} {output}

In [None]:
#################
## Code Cell 8 ##
#################

IMAGE_URI = '763104351884.dkr.ecr.us-east-1.amazonaws.com/tensorflow-inference:2.6-gpu-py38-cu112-ubuntu20.04-v1'
model_data_prefix = f's3://{bucket_name}/{prefix}/'

model = TensorFlowModel(
    model_data=output,
    role=role,
    image_uri=IMAGE_URI
)

instance_type = 'ml.m5.2xlarge'

predictor = model.deploy(
    initial_instance_count=1,
    instance_type=instance_type
)


## Invoke the Endpoint by passing it some test data

In [None]:
#################
## Code Cell 9 ##
#################

# Define a function to read testing data
def _load_testing_data(base_dir):
    """ load testing data """
    df_test = np.load(os.path.join(base_dir, 'test.npy'),allow_pickle=True)
    user_test, item_test, y_test = np.split(np.transpose(df_test).flatten(), 3)
    return user_test, item_test, y_test

In [None]:
##################
## Code Cell 10 ##
##################

# read testing data from local
user_test, item_test, test_labels = _load_testing_data('./data/')

# one-hot encode the testing data for model input
test_user_data = tf.one_hot(user_test, depth=n_customer).numpy().tolist()
test_item_data = tf.one_hot(item_test, depth=n_product).numpy().tolist()
    
# if you're using Tensorflow 2.0 for one hot encoding
# you can convert the tensor to list using:
# tf.one_hot(uuser_test, depth=n_user).numpy().tolist()

In [None]:
##################
## Code Cell 11 ##
##################

# make batch prediction
batch_size = 100
y_pred = []
for idx in range(0, len(test_user_data), batch_size):
    # reformat test samples into tensorflow serving acceptable format
    input_vals = {
     "instances": [
         {'input_1': u, 'input_2': i} 
         for (u, i) in zip(test_user_data[idx:idx+batch_size], test_item_data[idx:idx+batch_size])
    ]}
 
    # invoke model endpoint to make inference
    pred = predictor.predict(input_vals)
    
    # store predictions
    y_pred.extend([i[0] for i in pred['predictions']])

In [None]:
##################
## Code Cell 12 ##
##################

# let's see some prediction examples, assuming the threshold of 0.5
# --- prediction probability view ---
print('This is what the raw prediction output looks like')
print(y_pred[:5],end='\n\n\n')

# --- user item pair prediction view, with threshold of 0.5 applied ---
pred_df = pd.DataFrame([
    user_test,
    item_test,
    (np.array(y_pred) >= 0.5).astype(int)],
).T

# Add column headers to the data frame
pred_df.columns = ['CustomerId', 'ProductId', 'prediction']

print('We can convert the output to user-item pair as shown below, and add back the ProductName data')
df_combined = pd.merge(pred_df, df_unique_products, how='inner',on='ProductId')
display(df_combined)

## Upload the recommendations dataframe to the DynamoDB customers table.

In [None]:
##################
## Code Cell 13 ##
##################

# Useful functions to interact with DynamoDB directly.
from dynamo_pandas import put_df, get_df, keys

# Create a new Pandas dataframe grouping by CustomerId; Include only recommended products

## DIY Note ##
## Initially load the recommendations to DynamoDB as ProductIds. In the DIY section 
## of the lab you will be required to edit the below line and apply the (list) function to the ProductName column.
recommend_df = df_combined.query('prediction == 1').groupby('CustomerId').ProductId.apply(list).to_frame().reset_index()

# Rename column ProductId to recommendations. When using put_df helper function, the
# columns names of the dataframe and dynamoDB table must match verbatim.

## DIY Note ##
## Do not forget to rename 'ProductId' below to 'ProductName' when completing the DIY section.
recommend_df.rename(columns={'ProductId': 'recommended'},inplace=True)

# BatchWrite to DynamoDB customers table.
df_customer_ids = recommend_df.drop(['recommended'],axis=1)
put_df(df_customer_ids,table="customers")
put_df(recommend_df,table='customers')
