In [16]:
import tensorflow as tf 
import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs 
import numpy as np

In [17]:
%%capture

ratings_raw = tfds.load("movielens/100k-ratings", split="train")
ratings = ratings_raw.map(lambda x: {
    "movie_title" : x['movie_title'],
    'timestamp' : x['timestamp'],
    'user_id': x['user_id']
    })


movies_raw = tfds.load("movielens/100k-movies", split="train")
movies = movies_raw.map(lambda x: {
    'movie_title' : x['movie_title']
})

In [18]:
next(movies.as_numpy_iterator())
movies

<MapDataset element_spec={'movie_title': TensorSpec(shape=(), dtype=tf.string, name=None)}>

In [19]:
# need these to initialize embedding layers in future steps

unique_users = np.unique(np.concatenate(list(ratings.map(lambda x: x['user_id']).batch(1000))))
unique_items = np.unique(np.concatenate(list(movies.map(lambda x: x['movie_title']).batch(1000))))

In [20]:
# need these to initialize timestamp embedding layers in future steps

timestamps = np.concatenate(list(ratings.map(lambda x: x["timestamp"]).batch(100)))

max_timestamp = timestamps.max()
min_timestamp = timestamps.min()

timestamp_buckets = np.linspace(
    min_timestamp, max_timestamp, num=1000,
)

In [21]:
'''
this handles embedding user Identifiers and contextual data.
time stamp is used as the contexual information here.
using timestamp is 
'''

class UserModel(tf.keras.Model):
    def __init__(self, use_timestamp):
        super().__init__()

        self.use_timestamp = use_timestamp

        self.embed_user_id = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary = unique_users,
                mask_token =None
            ),
            tf.keras.layers.Embedding(
                input_dim = len(unique_users)+1,
                output_dim = 32
            )
        ])

        if self.use_timestamp:
            self.embed_timestamp = tf.keras.Sequential([
                tf.keras.layers.Discretization(
                    bin_boundaries = list(timestamp_buckets)
                ),

                tf.keras.layers.Embedding(
                    input_dim = len(list(timestamp_buckets))+1 ,
                    output_dim = 32
                )
            ])

            self.normalize_timestamp = tf.keras.layers.Normalization(
                axis = None #calcuate a scaler mean and variance 
            )
            self.normalize_timestamp.adapt(timestamps)

    
    def call(self, inputs):

        if self.use_timestamp:
            user_id_embed = self.embed_user_id(inputs['user_id'])
            timestamp_embed = self.embed_timestamp(inputs['timestamp'])
            norm_timestamp = tf.reshape(self.normalize_timestamp(inputs['timestamp']), (-1,1)) #(-1,1) means first dimension to be infered

            return tf.concat([user_id_embed, timestamp_embed, norm_timestamp], axis = 1) #concatenate vertically
            
        return self.embed_user_id(inputs['user_id'])

In [22]:
'''
this handles embedding item Identifiers and contextual data.
movie title itself is used as the contexual information here.
using timestamp is 
'''

class ItemModel(tf.keras.Model):
    def __init__(self):
        super().__init__()

        self.max_tokens = 10000

        self.embed_item_id = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary = unique_items,
                mask_token =None
            ),
            tf.keras.layers.Embedding(
                input_dim = len(unique_items)+1,
                output_dim = 32
            )
        ])


        self.textvectorizer = tf.keras.layers.TextVectorization(
            max_tokens = self.max_tokens
        )

        self.embed_item_title = tf.keras.Sequential([
            self.textvectorizer,

            tf.keras.layers.Embedding(
                input_dim = self.max_tokens,
                output_dim = 32,
                mask_zero = True
            ),

            tf.keras.layers.GlobalAveragePooling1D() # reduces dimensionality to 1d (embedding layer embeddeds each word in a title one by one)
        ])

        self.textvectorizer.adapt(unique_items)
    
    def call(self, inputs):

        return tf.concat([
            self.embed_item_id(inputs['movie_title']),
            self.embed_item_title(inputs['movie_title'])
        ],
        axis = 1)
        
        # return self.embed_item_title(inputs['movie_title'])

In [23]:
# test_item = next(movies.batch(10).take(1).as_numpy_iterator())
# test_user = next(ratings.batch(10).take(1).as_numpy_iterator())
# # test_item['movie_title']
# item_model = ItemModel()
# user_model = UserModel(use_timestamp=True)

# user_out = user_model(test_user)
# item_out = item_model(test_item)

In [24]:
# concatenated_tensor = tf.concat([user_out, item_out], axis=1)
# concatenated_tensor

In [25]:
# next(ratings_raw.batch(10).take(1).as_numpy_iterator())
# next(ratings_raw.take(1).as_numpy_iterator())

In [26]:
class RatingModel(tf.keras.Model):
    def __init__(self, use_timestamp):
        super().__init__()
        self.use_timestamp = use_timestamp
        self.user_model = UserModel(use_timestamp= self.use_timestamp)
        self.item_model = ItemModel()

        self.rating_NN = tf.keras.Sequential([
            tf.keras.layers.Dense(254, activation = 'relu'),
            tf.keras.layers.Dense(64, activation = 'relu'),
            tf.keras.layers.Dense(1)
        ])

    def call(self, inputs):

        user_vec = self.user_model(inputs)
        item_vec = self.item_model(inputs)

        return self.rating_NN(tf.concat([user_vec, item_vec], axis = 1))


In [57]:
rating_model = RatingModel(use_timestamp = True)
rating_model(test_row)

<tf.Tensor: shape=(8192, 1), dtype=float32, numpy=
array([[0.01499823],
       [0.03705404],
       [0.09719963],
       ...,
       [0.06178984],
       [0.01390735],
       [0.02128595]], dtype=float32)>

In [65]:
class recommender(tfrs.models.Model):
    def __init__(self, use_timestamp):
        super().__init__()

        self.use_timestamp = use_timestamp
        self.rating_model = RatingModel(use_timestamp = self.use_timestamp)

        self.task = tfrs.tasks.Ranking(
            loss = tf.keras.losses.MeanAbsoluteError(),
            metrics = [tf.keras.metrics.RootMeanSquaredError()]
        )

    def call(self, inputs):
        return self.rating_model(inputs)

    def compute_loss(self, inputs, training=False): 
        
        # rating_pred = self.rating_model(inputs)
        rating_pred = self(inputs)

        return self.task(
            label = inputs['user_rating'],
            predictions = rating_pred
        )



In [73]:
from typing import Dict, Text

class MovielensModel(tfrs.models.Model):

  def __init__(self, use_timestamp):
    super().__init__()
    self.use_timestamp = use_timestamp
    self.ranking_model: tf.keras.Model = RatingModel(use_timestamp = self.use_timestamp)
    self.task: tf.keras.layers.Layer = tfrs.tasks.Ranking(
      loss = tf.keras.losses.MeanSquaredError(),
      metrics=[tf.keras.metrics.RootMeanSquaredError()]
    )

  def call(self, features: Dict[str, tf.Tensor]) -> tf.Tensor:
    return self.ranking_model(
        (features["user_id"], features["movie_title"]))

  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
    labels = features.pop("user_rating")

    rating_predictions = self(features)

    # The task computes the loss and the metrics.
    return self.task(labels=labels, predictions=rating_predictions)

# Training

In [49]:
# alternative

ratings_1 = tfds.load("movielens/100k-ratings", split="train")

ratings_1 = ratings_1.map(lambda x: {
    "movie_title": x["movie_title"],
    "user_id": x["user_id"],
    "user_rating": x["user_rating"],
    "timestamp": x["timestamp"]
})

In [50]:
tf.random.set_seed(42)
shuffled = ratings_1.shuffle(100_000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(80_000)
test = shuffled.skip(80_000).take(20_000)

In [51]:
cached_train = train.shuffle(100_000).batch(8192).cache()
cached_test = test.batch(4096).cache()

In [55]:
test_row = next(cached_train.take(1).as_numpy_iterator())
test_row

{'movie_title': array([b'Brazil (1985)', b'Dave (1993)', b"Muriel's Wedding (1994)", ...,
        b'Flirting With Disaster (1996)', b'Gandhi (1982)',
        b'Muppet Treasure Island (1996)'], dtype=object),
 'user_id': array([b'387', b'389', b'911', ..., b'643', b'537', b'82'], dtype=object),
 'user_rating': array([5., 4., 5., ..., 4., 4., 1.], dtype=float32),
 'timestamp': array([886479771, 880087850, 892839846, ..., 891447696, 886031860,
        884714456], dtype=int64)}

In [74]:
reco_model = MovielensModel(use_timestamp= True)
reco_model.compile(optimizer = tf.keras.optimizers.Adam())

reco_model.fit(cached_train, epochs = 5)

Epoch 1/5


TypeError: in user code:

    File "c:\Users\bpadmin\anaconda3\envs\tensorflow_cuda\lib\site-packages\keras\engine\training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "c:\Users\bpadmin\anaconda3\envs\tensorflow_cuda\lib\site-packages\keras\engine\training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\bpadmin\anaconda3\envs\tensorflow_cuda\lib\site-packages\keras\engine\training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "c:\Users\bpadmin\anaconda3\envs\tensorflow_cuda\lib\site-packages\tensorflow_recommenders\models\base.py", line 68, in train_step
        loss = self.compute_loss(inputs, training=True)
    File "C:\Users\naradaw\AppData\Local\Temp\ipykernel_43080\3740315027.py", line 21, in compute_loss
        rating_predictions = self(features)
    File "c:\Users\bpadmin\anaconda3\envs\tensorflow_cuda\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\naradaw\AppData\Local\Temp\__autograph_generated_filecg9lbgf1.py", line 12, in tf__call
        retval_ = ag__.converted_call(ag__.ld(self).ranking_model, ((ag__.ld(features)['user_id'], ag__.ld(features)['movie_title']),), None, fscope)
    File "C:\Users\naradaw\AppData\Local\Temp\__autograph_generated_filenvuhzovw.py", line 10, in tf__call
        user_vec = ag__.converted_call(ag__.ld(self).user_model, (ag__.ld(inputs),), None, fscope)
    File "C:\Users\naradaw\AppData\Local\Temp\__autograph_generated_filehshrryen.py", line 41, in tf__call
        ag__.if_stmt(ag__.ld(self).use_timestamp, if_body, else_body, get_state, set_state, ('do_return', 'retval_'), 2)
    File "C:\Users\naradaw\AppData\Local\Temp\__autograph_generated_filehshrryen.py", line 20, in if_body
        user_id_embed = ag__.converted_call(ag__.ld(self).embed_user_id, (ag__.ld(inputs)['user_id'],), None, fscope)

    TypeError: Exception encountered when calling layer "movielens_model_1" "                 f"(type MovielensModel).
    
    in user code:
    
        File "C:\Users\naradaw\AppData\Local\Temp\ipykernel_43080\3740315027.py", line 16, in call  *
            (features["user_id"], features["movie_title"]))
        File "c:\Users\bpadmin\anaconda3\envs\tensorflow_cuda\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler  **
            raise e.with_traceback(filtered_tb) from None
        File "C:\Users\naradaw\AppData\Local\Temp\__autograph_generated_filenvuhzovw.py", line 10, in tf__call
            user_vec = ag__.converted_call(ag__.ld(self).user_model, (ag__.ld(inputs),), None, fscope)
        File "C:\Users\naradaw\AppData\Local\Temp\__autograph_generated_filehshrryen.py", line 41, in tf__call
            ag__.if_stmt(ag__.ld(self).use_timestamp, if_body, else_body, get_state, set_state, ('do_return', 'retval_'), 2)
        File "C:\Users\naradaw\AppData\Local\Temp\__autograph_generated_filehshrryen.py", line 20, in if_body
            user_id_embed = ag__.converted_call(ag__.ld(self).embed_user_id, (ag__.ld(inputs)['user_id'],), None, fscope)
    
        TypeError: Exception encountered when calling layer "rating_model_7" "                 f"(type RatingModel).
        
        in user code:
        
            File "C:\Users\naradaw\AppData\Local\Temp\ipykernel_43080\1993724450.py", line 16, in call  *
                user_vec = self.user_model(inputs)
            File "c:\Users\bpadmin\anaconda3\envs\tensorflow_cuda\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler  **
                raise e.with_traceback(filtered_tb) from None
            File "C:\Users\naradaw\AppData\Local\Temp\__autograph_generated_filehshrryen.py", line 41, in tf__call
                ag__.if_stmt(ag__.ld(self).use_timestamp, if_body, else_body, get_state, set_state, ('do_return', 'retval_'), 2)
            File "C:\Users\naradaw\AppData\Local\Temp\__autograph_generated_filehshrryen.py", line 20, in if_body
                user_id_embed = ag__.converted_call(ag__.ld(self).embed_user_id, (ag__.ld(inputs)['user_id'],), None, fscope)
        
            TypeError: Exception encountered when calling layer "user_model_7" "                 f"(type UserModel).
            
            in user code:
            
                File "C:\Users\naradaw\AppData\Local\Temp\ipykernel_43080\2174534949.py", line 45, in call  *
                    user_id_embed = self.embed_user_id(inputs['user_id'])
            
                TypeError: tuple indices must be integers or slices, not str
            
            
            Call arguments received by layer "user_model_7" "                 f"(type UserModel):
              • inputs=('tf.Tensor(shape=(None,), dtype=string)', 'tf.Tensor(shape=(None,), dtype=string)')
        
        
        Call arguments received by layer "rating_model_7" "                 f"(type RatingModel):
          • inputs=('tf.Tensor(shape=(None,), dtype=string)', 'tf.Tensor(shape=(None,), dtype=string)')
    
    
    Call arguments received by layer "movielens_model_1" "                 f"(type MovielensModel):
      • features={'movie_title': 'tf.Tensor(shape=(None,), dtype=string)', 'user_id': 'tf.Tensor(shape=(None,), dtype=string)', 'timestamp': 'tf.Tensor(shape=(None,), dtype=int64)'}


# Sandbox

In [None]:
test = next(ratings.take(1).as_numpy_iterator())
test

{'movie_title': b"One Flew Over the Cuckoo's Nest (1975)",
 'timestamp': 879024327,
 'user_id': b'138'}

In [None]:
type(test)

dict

In [None]:
o, t, tr = test
o, t, tr

('movie_title', 'timestamp', 'user_id')

In [None]:
import tensorflow as tf

tensor1 = tf.constant([[[1, 2],
                       [3, 4]],
                       [[5, 6],
                       [7, 8]]])


tensor2 = tf.constant([[[5, 6],
                       [7, 8]],
                       [[1, 2],
                       [3, 4]]])

In [None]:
concatenated_tensor = tf.concat([tensor1, tensor2], axis=2)
concatenated_tensor

<tf.Tensor: shape=(2, 2, 4), dtype=int32, numpy=
array([[[1, 2, 5, 6],
        [3, 4, 7, 8]],

       [[5, 6, 1, 2],
        [7, 8, 3, 4]]])>