# Matrix Factorization Recommendation System

In [1]:
# Import packages
import os
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs

from typing import Dict, Text

tf.__version__

'2.7.0'

## Load data

In [2]:
os.listdir("/database/tensorflow-datasets/")

['movielens', 'datasets', 'tiny_shakespeare', 'imdb_reviews', 'downloads']

In [3]:
# Load data
ratings = tfds.load("movielens/100k-ratings", split="train", data_dir="/database/tensorflow-datasets/")
movies = tfds.load("movielens/100k-movies", split="train", data_dir="/database/tensorflow-datasets/")

# Select basic features
ratings = ratings.map(lambda x: {
	"movie_title": x["movie_title"],
	"user_id": x["user_id"]
})
movies = movies.map(lambda x: x["movie_title"])

2021-12-06 17:21:37.372985: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-12-06 17:21:37.377627: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-12-06 17:21:37.378099: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-12-06 17:21:37.378805: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

## Build Vocabulary

In [4]:
uid_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
uid_vocabulary.adapt(ratings.map(lambda x: x["user_id"]))

In [5]:
title_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
title_vocabulary.adapt(movies)

## Define model

In [6]:
# Define recommender
class MovieRecommender(tfrs.Model):
	def __init__(self, user_model: tf.keras.Model, movie_model: tf.keras.Model, task: tfrs.tasks.Retrieval) -> None:
		super().__init__()
		self.user_model = user_model
		self.movie_model = movie_model
		self.task = task
	
	def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
		user_embedding = self.user_model(features["user_id"])
		movie_embedding = self.movie_model(features["movie_title"])
		return self.task(user_embedding, movie_embedding)

In [7]:
# Define user model
user_model = tf.keras.Sequential([
	uid_vocabulary,
	tf.keras.layers.Embedding(uid_vocabulary.vocabulary_size(), 100)
])

# Define movie model
movie_model = tf.keras.Sequential([
	title_vocabulary,
	tf.keras.layers.Embedding(title_vocabulary.vocabulary_size(), 100)
])

In [8]:
# Define recommendation ovbjective task
task  = tfrs.tasks.Retrieval(metrics=tfrs.metrics.FactorizedTopK(
	movies.batch(128).map(movie_model)
))

## Learn and evaluate

In [9]:
# Create a retrieval model
model = MovieRecommender(user_model, movie_model, task)

# Compile model
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.5))

In [10]:
model.fit(ratings.batch(4096), epochs=15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7f622068b550>

## Brute-force retrieval

In [11]:
# Generate index
index = tfrs.layers.factorized_top_k.BruteForce(model.user_model)
index.index_from_dataset(movies.batch(100).map(lambda title: (title, model.movie_model(title))))

<tensorflow_recommenders.layers.factorized_top_k.BruteForce at 0x7f622032efa0>

In [12]:
# Get some recommendations
i, titles = index(np.array(["939"]))
print("Top 5 recommendations:", titles[0, :5])

Top 5 recommendations: tf.Tensor(
[b'Children of the Corn: The Gathering (1996)' b'Batman & Robin (1997)'
 b"Fathers' Day (1997)" b'That Old Feeling (1997)' b'Girl 6 (1996)'], shape=(5,), dtype=string)
