# TensorFlow Recommenders: Quickstart

To estimate a simple matrix factorization model on the Movielens 100K dataset, the following is enough:

### Import TFRS

In [13]:
from typing import Dict, Text

import tensorflow as tf
import tensorflow_datasets as tfds

import tensorflow_recommenders as tfrs

### Read the data

In [14]:
ratings = tfds.load('movie_lens/100k-ratings', split='train')
movies = tfds.load('movie_lens/100k-movies', split='train')

# Keep only id features for batching
def keep_only_id_features(example):
  feature_names = ['movie_id', 'user_id']
  filtered_example = {key: example[key] for key in feature_names}
  return filtered_example

ratings = ratings.map(keep_only_id_features)

### Define a model:

In [15]:
class Model(tfrs.Model):

  def __init__(self):
    super().__init__()

    # Set up user representation.
    self.user_model = tf.keras.layers.Embedding(
        input_dim=2000, output_dim=64)
    # Set up movie representation.
    self.item_model = tf.keras.layers.Embedding(
        input_dim=2000, output_dim=64)
    # Set up a retrieval task and evaluation metrics over the
    # entire dataset of candidates.
    self.task = tfrs.tasks.RetrievalTask(
        corpus_metrics=tfrs.metrics.FactorizedTopK(
            candidates=movies.map(lambda x: self.item_model(tf.strings.to_number(x["movie_id"]))).batch(128)
        )
    )

  def compute_loss(self, features: Dict[Text, tf.Tensor], training: bool=False) -> tf.Tensor:

    user_embeddings = self.user_model(tf.strings.to_number(features["user_id"]))
    movie_embeddings = self.item_model(tf.strings.to_number(features["movie_id"]))

    return self.task(user_embeddings, movie_embeddings)

### Fit and evaluate it.

In [16]:
model = Model()
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.5))

# Randomly shuffle data and split between train and test.
tf.random.set_seed(42)
shuffled = ratings.shuffle(100_000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(80_000)
test = shuffled.skip(80_000).take(20_000)

# Train.
model.fit(train.batch(4096), epochs=5)

# Evaluate.
model.evaluate(test.batch(4096), return_dict=True)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


{'factorized_top_k_3': array([5.0000e-05, 4.5000e-04, 1.5500e-03, 5.5050e-02, 1.5355e-01],
       dtype=float32),
 'factorized_top_k_3/top_1_categorical_accuracy': 4.999999873689376e-05,
 'factorized_top_k_3/top_5_categorical_accuracy': 0.00044999999227002263,
 'factorized_top_k_3/top_10_categorical_accuracy': 0.001550000044517219,
 'factorized_top_k_3/top_50_categorical_accuracy': 0.05505000054836273,
 'factorized_top_k_3/top_100_categorical_accuracy': 0.15354999899864197,
 'loss': 30127.83984375}