In [1]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs
from typing import Dict, Text
from tensorflow_recommenders.metrics import FactorizedTopK
from tensorflow_recommenders.tasks import Retrieval

In [2]:
ratings100: tf.data.Dataset = tfds.load('movie_lens/100k-ratings', data_dir='../dataset', split="train")
movies100: tf.data.Dataset = tfds.load('movie_lens/100k-movies', data_dir='../dataset', split="train")

In [3]:
ratings25m: tf.data.Dataset = tfds.load('movie_lens/25m-ratings', data_dir='../dataset', split="train")
movies25m: tf.data.Dataset = tfds.load('movie_lens/25m-movies', data_dir='../dataset', split="train")

In [4]:
X100 = ratings100.as_numpy_iterator()
X25m = ratings25m.as_numpy_iterator()

In [5]:
len(ratings100)

100000

In [6]:
len(np.unique(
    list(ratings100.map(lambda x: x['user_id']).as_numpy_iterator())
))

943

In [7]:
len(np.unique(
    list(ratings100.map(lambda x: x['movie_id']).as_numpy_iterator())
))

1682

In [8]:
next(X100)

{'bucketized_user_age': 45.0,
 'movie_genres': array([7], dtype=int64),
 'movie_id': b'357',
 'movie_title': b"One Flew Over the Cuckoo's Nest (1975)",
 'raw_user_age': 46.0,
 'timestamp': 879024327,
 'user_gender': True,
 'user_id': b'138',
 'user_occupation_label': 4,
 'user_occupation_text': b'doctor',
 'user_rating': 4.0,
 'user_zip_code': b'53211'}

In [9]:
next(X25m)

{'movie_genres': array([4, 5], dtype=int64),
 'movie_id': b'7367',
 'movie_title': b'Ladykillers, The (2004)',
 'timestamp': 1198090049,
 'user_id': b'58198',
 'user_rating': 3.5}

In [10]:
class FlickPickModel(tf.keras.Model):

    def __init__(self,
            user_layer: tf.keras.layers.Layer,
            rating_layer: tf.keras.layers.Layer,
            retrieval_layer: tf.keras.layers.Layer):
        super(FlickPickModel, self).__init__()

        self.user_layer = user_layer
        self.rating_layer = rating_layer
        self.retrieval_layer = retrieval_layer

    def train_step(self, data):
        super(FlickPickModel, self).train_step(data)

    def test_step(self, data):
        super(FlickPickModel, self).train_step(data)



In [11]:
def construct_model(movies: tf.data.Dataset) -> tfrs.Model:
    user_model = tf.keras.layers.Embedding(input_dim=2000, output_dim=64)
    item_model = tf.keras.layers.Embedding(input_dim=2000, output_dim=64)
    retrieval = Retrieval(
        metrics=FactorizedTopK(
            candidates=movies.batch(128).map(item_model)
        )
    )
    model = FlickPickModel(user_model, item_model, retrieval)
    model.compile(optimizer=tf.keras.optimizers.Adam(0.025))

    return model
