In [2]:
import tensorflow as tf
import pandas as pd
from pprint import pprint
import numpy as np
import tensorflow_recommenders as tfrs
import tensorflow_datasets as tfds

In [10]:
ratings = tfds.load("movielens/100k-ratings", split="train")
movies = tfds.load("movielens/100k-movies", split="train")

In [11]:
ratings = ratings.map(lambda x: {
    "movie_title": x["movie_title"],
    "user_id": x["user_id"],
    "timestamp": x["timestamp"],
})
movies = movies.map(lambda x: x["movie_title"])

In [13]:
timestamps = np.concatenate(list(ratings.map(lambda x: x["timestamp"]).batch(100)))

max_timestamps = timestamps.max()
min_timestamps = timestamps.min()

timestamp_buckets = np.linspace(min_timestamps, max_timestamps, num = 1000)

unique_movie_titles = np.unique(np.concatenate(list(movies.batch(1000))))
unique_user_ids = np.unique(np.concatenate(list(ratings.batch(1000).map(lambda x: x["user_id"]))))

In [14]:
class UserModel(tf.keras.Model):
    def __init__(self, use_timestamp):
        _use_timestamp = use_timestamp

        user_model = tf.keras.Sequential([
            tf.keras.layers.StringLookup(vocabulary=unique_user_ids, mask_token = None),
            tf.keras.layers.Embedding(len(unique_user_ids) +1, 32)
        ])

        if use_timestamp:
            self.timestamp_embedding = tf.keras.Sequential([
                tf.keras.layers.Discretization(timestamp_buckets.tolist()),
                tf.keras.layers.Embedding(len(timestamp_buckets)+1, 32)
            ])

            self.normalized_timestamp = tf.keras.layers.Normalization(axis = None)

            self.normalized_timestamp.adapt(timestamps)

    def call(self, inputs):
        if not self.use_timetamps:
            return self.user_embeddings(inputs["user_ids"])

        return tf.concat([self.user_embeddings(inputs["user_ids"]), self.timestamp_embedding(inputs["timestamp"]),
        tf.reshape(self.normalized_timestamp(inputs["timestamp"]), (-1,1))], axis = 1)