# Tensorflow Recommenders: Ranking

In [1]:
import os
import pprint
import tempfile

from typing import Dict, Text

import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

import tensorflow_recommenders as tfrs

### Get Data

In [2]:
ratings = tfds.load('movie_lens/100k-ratings', split='train')

ratings = ratings.map(lambda x: {
    'movie_title': x['movie_title'],
    'user_id': x['user_id'],
    'user_rating': x['user_rating']
})

In [3]:
# split train and test data
tf.random.set_seed(42)

shuffled = ratings.shuffle(10000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(80000)
test = shuffled.skip(80000).take(20000)

In [4]:
# figure out unique user ids and movie titles

user_ids = ratings.batch(1_000_000).map(lambda x: x['user_id'])
movie_titles = ratings.batch(1_000_000).map(lambda x: x['movie_title'])

unique_user_ids = np.unique(np.concatenate(list(user_ids)))
unique_movie_titles = np.unique(np.concatenate(list(movie_titles)))

## Creating a Model

In [13]:
class RankingModel(tf.keras.Model):
    def __init__(self):
        super().__init__()
        embedding_dimension = 32
        
        # computer user embedding
        self.user_embeddings = tf.keras.Sequential([
            tf.keras.layers.experimental.preprocessing.StringLookup(vocabulary=unique_user_ids, mask_token=None),
            tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
        ])
        
        # compute movie embedding
        
        self.movie_embeddings = tf.keras.Sequential([
            tf.keras.layers.experimental.preprocessing.StringLookup(vocabulary=unique_movie_titles, mask_token=None),
            tf.keras.layers.Embedding(len(unique_movie_titles) + 1, embedding_dimension)
        ])
        
        # compute predictions
        self.ratings = tf.keras.Sequential([
            tf.keras.layers.Dense(256, activation='relu'),
            tf.keras.layers.Dense(64, activation='relu'),
            tf.keras.layers.Dense(1)
        ])
        
    def call(self, user_id, movie_title):
        user_embedding = self.user_embeddings(user_id)
        movie_embedding = self.movie_embeddings(movie_title)
            
        return self.ratings(tf.concat([user_embedding, movie_embedding], axis=1))

In [14]:
RankingModel()(['42'],["One Flew Over the Cuckoo's Nest (1975)"])

Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


Consider rewriting this model with the Functional API.


<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[0.03740937]], dtype=float32)>

### Loss and Metrics

In [15]:
task = tfrs.tasks.Ranking(
    loss = tf.keras.losses.MeanSquaredError(),
    metrics = [tf.keras.metrics.RootMeanSquaredError()]
)

## Full Model

In [23]:
class MovieLensModel(tfrs.models.Model):
    def __init__(self):
        super().__init__()
        self.ranking_model: tf.keras.models = RankingModel()
        self.task: tf.keras.layers.Layers = tfrs.tasks.Ranking(
            loss = tf.keras.losses.MeanSquaredError(),
            metrics = [tf.keras.metrics.RootMeanSquaredError()]
        )
        
    def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
        rating_predictions = self.ranking_model(features['user_id'], features['movie_title'])
        return self.task(labels=features['user_rating'], predictions=rating_predictions)

### Fitting and Evaluating

In [24]:
model = MovieLensModel()
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

In [25]:
# shuffle batch and cache training and evaluation data
cached_train = train.shuffle(100_000).batch(8196).cache()
cached_test = test.batch(4096).cache()

### Train the Model

In [26]:
model.fit(cached_train, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7f75cc269150>

In [27]:
model.evaluate(cached_test, return_dict=True)



{'root_mean_squared_error': 1.1146851778030396,
 'loss': 1.239869236946106,
 'regularization_loss': 0,
 'total_loss': 1.239869236946106}