# Modeling - Ranking_3

The ranking stage takes the outputs of the retrieval model and fine-tunes them to select the best possible handful of recommendations. Its task is to narrow down the set of items the user may be interested in to a shortlist of likely candidates.

In [1]:
# install libraries
! pip install -q tensorflow-recommenders
! pip install -q --upgrade tensorflow-datasets
! pip install -q scann

[K     |████████████████████████████████| 89 kB 5.9 MB/s 
[K     |████████████████████████████████| 4.7 MB 17.3 MB/s 
[K     |████████████████████████████████| 10.4 MB 12.4 MB/s 
[K     |████████████████████████████████| 578.0 MB 14 kB/s 
[K     |████████████████████████████████| 438 kB 84.1 MB/s 
[K     |████████████████████████████████| 1.7 MB 56.3 MB/s 
[K     |████████████████████████████████| 5.9 MB 45.4 MB/s 
[?25h

In [2]:
# import libraries
import os
import pprint
import tempfile

from typing import Dict, Text

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_recommenders as tfrs

# import interactive table 
from google.colab import data_table
data_table.enable_dataframe_formatter()

# set seed
tf.random.set_seed(42)

In [3]:
# mount G-Drive and load data
from google.colab import drive
drive.mount('/content/drive')

# load data subset 
gdrive_path = '/content/drive/MyDrive/ModelingData'
path = os.path.join(gdrive_path, "ratings")

ratings = tf.data.Dataset.load(path)

Mounted at /content/drive


### Preparing the dataset

In [4]:
# Select the basic features.
ratings = ratings.map(lambda x: {
    'product_title': x['data']['product_title'], 
    'customer_id': x['data']['customer_id'], 
    'star_rating': x['data']['star_rating']
})


In [5]:
# train-test split: 80-20
tf.random.set_seed(42)
shuffled = ratings.shuffle(92_096, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(92_096)
test = shuffled.skip(92_096).take(23_024)

In [6]:
# vocabulary to map raw feature values to embedding vectors
product_titles = ratings.batch(50_000).map(lambda x: x['product_title'])
customer_ids = ratings.batch(110_000).map(lambda x: x['customer_id'])

unique_product_titles = np.unique(np.concatenate(list(product_titles)))
unique_customer_ids = np.unique(np.concatenate(list(customer_ids)))

unique_product_titles[:10]

array([b'! Set 7 Colors Small S Replacement Bands + 1pc Free Small Grey Band With Clasp for Fitbit FLEX Only /No tracker/ 1pc Teal (Blue/Grey) 1pc Purple / Pink 1pc Red (Tangerine) 1pc Green 1pc Slate (Blue/Grey) 1pc Black 1pc Navy (Blue) Bands Wireless Activity Bracelet Sport Wristband Fit Bit Flex Bracelet Sport Arm Band Armband',
       b'! Small S 1pc Green 1pc Teal (Blue/Green) 1pc Red (Tangerine) Replacement Bands + 1pc Free Small Grey Band With Clasp for Fitbit FLEX Only /No tracker/ Wireless Activity Bracelet Sport Wristband Fit Bit Flex Bracelet Sport Arm Band Armband',
       b'! Small S 1pc Teal (Blue/Green) 1pc Purple / Pink Replacement Bands + 1pc Free Small Grey Band With Clasp for Fitbit FLEX Only /No tracker/ Wireless Activity Bracelet Sport Wristband Fit Bit Flex Bracelet Sport Arm Band Armband',
       b'"""SEASON SPECIAL"""THE ORIGINAL HEAVY DUTY BIG GRIZZLY COT-HEAVY DUTY QUALITY w/ IPHONE Holder & Drink Holder-High Quality Product-10 YEARS WARRANTY-84\xe2\x80\x9d L

### Implementing a model

#### Architecture

In [7]:
class RankingModel(tf.keras.Model):

  def __init__(self):
    super().__init__()
    embedding_dimension = 32

    # Compute embeddings for users.
    self.user_embeddings = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
        vocabulary = unique_customer_ids, mask_token=None),
      tf.keras.layers.Embedding(len(unique_customer_ids) + 1, embedding_dimension)
    ])

    # Compute embeddings for products.
    self.product_embeddings = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
        vocabulary=unique_product_titles, mask_token=None),
      tf.keras.layers.Embedding(len(unique_product_titles) + 1, embedding_dimension)
    ])

    # Compute predictions.
    self.ratings = tf.keras.Sequential([
      # Learn multiple dense layers.
      tf.keras.layers.Dense(256, activation="relu"),
      tf.keras.layers.Dense(64, activation="relu"),
      # Make rating predictions in the final layer.
      tf.keras.layers.Dense(1)
  ])

  def call(self, inputs):

    user_id, product_title = inputs

    user_embedding = self.user_embeddings(user_id)
    product_embedding = self.product_embeddings(product_title)

    return self.ratings(tf.concat([user_embedding, product_embedding], axis=1))

#### Loss and Metrics

In [8]:
# using Ranking object to wrap together the loss function and metrics
task = tfrs.tasks.Ranking(
  loss = tf.keras.losses.MeanSquaredError(),
  metrics=[tf.keras.metrics.RootMeanSquaredError()]
)

#### The full model

In [9]:
class AmazonRankingModel(tfrs.models.Model):

  def __init__(self):
    super().__init__()
    self.ranking_model: tf.keras.Model = RankingModel()
    self.task: tf.keras.layers.Layer = tfrs.tasks.Ranking(
      loss = tf.keras.losses.MeanSquaredError(),
      metrics=[tf.keras.metrics.RootMeanSquaredError()]
    )

  def call(self, features: Dict[str, tf.Tensor]) -> tf.Tensor:
    return self.ranking_model(
        (features["customer_id"], features["product_title"]))

  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
    labels = features.pop("star_rating")

    rating_predictions = self(features)

    # The task computes the loss and the metrics.
    return self.task(labels=labels, predictions=rating_predictions)

### Fitting and evaluation

In [10]:
model_ranking = AmazonRankingModel()
model_ranking.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

In [11]:
# shuffle, batch and cache the training and test data

cached_train = train.shuffle(100_000).batch(8192).cache()
cached_test = test.batch(4096).cache()

In [12]:
model_ranking.fit(cached_train, epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f7bfa06b910>

In [13]:
model_ranking.evaluate(cached_test, return_dict=True)



{'root_mean_squared_error': 1.2444056272506714,
 'loss': 1.612769603729248,
 'regularization_loss': 0,
 'total_loss': 1.612769603729248}

### Testing and ranking model

In [19]:
test_ratings = {}
test_product_titles = ['WaterVault Thermos Water Bottle - Double Insulated Copper Plated Stainless Steel - Keeps Hot 12 Hours, Cold up to 36 - BPA-Free (12oz, 17oz, 26oz, 1 liter) Assorted Colors',
       'Bomber Irie Bomb Floating Sunglasses',
       'Vader Bicycle Cycling Bike Road Offroad MTB Mountain Saddle Seat']
for product_title in test_product_titles:
  test_ratings[product_title] = model_ranking({
      "customer_id": np.array(["52228204"]),
      "product_title": np.array([product_title])
  })

print("Ratings:")
for title, score in sorted(test_ratings.items(), key=lambda x: x[1], reverse=True):
  print(f"{title}: {score}")

Ratings:
WaterVault Thermos Water Bottle - Double Insulated Copper Plated Stainless Steel - Keeps Hot 12 Hours, Cold up to 36 - BPA-Free (12oz, 17oz, 26oz, 1 liter) Assorted Colors: [[4.6428814]]
Vader Bicycle Cycling Bike Road Offroad MTB Mountain Saddle Seat: [[4.397792]]
Bomber Irie Bomb Floating Sunglasses: [[4.1441364]]


### Exporting for serving

In [15]:
# model serving: saving the model to G-Drive

# Export the query model.
gdrive_path = '/content/drive/MyDrive/Models'
path = os.path.join(gdrive_path, "model_ranking")

# Save model
tf.saved_model.save(model_ranking, path)



In [16]:
loaded = tf.saved_model.load(path)

loaded({"customer_id": np.array(["52228204"]), "product_title": ['WaterVault Thermos Water Bottle - Double Insulated Copper Plated Stainless Steel - Keeps Hot 12 Hours, Cold up to 36 - BPA-Free (12oz, 17oz, 26oz, 1 liter) Assorted Colors']}).numpy()

array([[4.6428814]], dtype=float32)

### Convert the model to TensorFlow Lite

TensorFlow Lite allows a model to be ran on any device. 

In [17]:
converter = tf.lite.TFLiteConverter.from_saved_model(path)
tflite_model = converter.convert()
open("converted_model.tflite", "wb").write(tflite_model)

25364012

In [21]:
interpreter = tf.lite.Interpreter(model_path="converted_model.tflite")
interpreter.allocate_tensors()

# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Test the model.
if input_details[0]["name"] == "serving_default_movie_title:0":
  interpreter.set_tensor(input_details[0]["index"], np.array(["Stearns Sospenders Manual Inflatable Life Jacket"]))
  interpreter.set_tensor(input_details[1]["index"], np.array(["52228204"]))
else:
  interpreter.set_tensor(input_details[0]["index"], np.array(["52228204"]))
  interpreter.set_tensor(input_details[1]["index"], np.array(["Stearns Sospenders Manual Inflatable Life Jacket"]))

interpreter.invoke()

rating = interpreter.get_tensor(output_details[0]['index'])
print(rating)

[[4.192669]]
