# importing necessary libreries

In [1]:
! pip install -q tensorflow-recommenders

In [3]:
import tensorflow as tf
import numpy as np
import pprint
import tensorflow_recommenders as tfrs


In [4]:
movies=tf.data.experimental.load('/content/drive/MyDrive/datasets/movielens_movies')
ratings=tf.data.experimental.load('/content/drive/MyDrive/datasets/movielens_ratings')
for x in ratings.take(1).as_numpy_iterator():
  pprint.pprint(x)

{'bucketized_user_age': 45.0,
 'movie_genres': array([7]),
 'movie_id': b'357',
 'movie_title': b"One Flew Over the Cuckoo's Nest (1975)",
 'raw_user_age': 46.0,
 'timestamp': 879024327,
 'user_gender': True,
 'user_id': b'138',
 'user_occupation_label': 4,
 'user_occupation_text': b'doctor',
 'user_rating': 4.0,
 'user_zip_code': b'53211'}


In [6]:
all_data=ratings.map(lambda x:{
    'age':x['raw_user_age'],
    'time':x['timestamp'],
    'user_id':x['user_id'],
    'movie_id':x['movie_id'],
    'gender':x['user_gender'],
    'occupation':x['user_occupation_text'],
    'movie_title':x['movie_title'],
    'genre':x['movie_genres'][0],
    'rating':x['user_rating']
})
user_data=ratings.map(lambda x:{
    'age':x['raw_user_age'],
    'time':x['timestamp'],
    'gender':x['user_gender'],
    'occupation':x['user_occupation_text'],
    'movie_title':x['movie_title'],
    'genre':x['movie_genres'][0],
    'rating':x['user_rating']
})
movie_data=movies.map(lambda x: {'movie_title':x['movie_title'],
                      'genre':float(x['movie_genres'][0])})
movie_title=movies.map(lambda x:x['movie_title']
                     )

In [7]:
for x in user_data.take(1).as_numpy_iterator():
  pprint.pprint(x)
print('-------------------------------')
for x in movie_data.take(1).as_numpy_iterator():
  pprint.pprint(x)

{'age': 46.0,
 'gender': True,
 'genre': 7,
 'movie_title': b"One Flew Over the Cuckoo's Nest (1975)",
 'occupation': b'doctor',
 'rating': 4.0,
 'time': 879024327}
-------------------------------
{'genre': 4.0, 'movie_title': b'You So Crazy (1994)'}


# creating vocabulary for string features

In [8]:
movie_titles=user_data.map(lambda x:x['movie_title'])
unique_movie_titles=np.unique(np.concatenate(list(movie_titles.batch(1_000))))
user_occupation=user_data.map(lambda x :x['occupation'])
unique_user_occupation=np.unique(np.concatenate(list(user_occupation.batch(1_000))))
movie_genres=user_data.map(lambda x :x['genre'])
unique_movie_genres=np.unique(np.concatenate(list(movie_genres.batch(1_000))))
user_gender=movie_genres=user_data.map(lambda x :x['gender'])
unique_user_gender=np.unique(np.concatenate(list(user_gender.batch(1_000))))

In [9]:
timestamp=user_data.map(lambda x : x['time'])
min_timestamp=np.unique(np.concatenate(list(timestamp.batch(1_000)))).min()
max_timestamp=np.unique(np.concatenate(list(timestamp.batch(1_000)))).max()
time_bucket=np.linspace(min_timestamp,max_timestamp,1000)
ages=user_data.map(lambda x : x['age'])

# spliting data to train and test

In [10]:
tf.random.set_seed(123)
shuffled=user_data.shuffle(1_000,reshuffle_each_iteration=False)

train=shuffled.take(80_000)
test=shuffled.skip(80_000).take(10_000)

# creating seprate models

In [11]:
embedding_dim=128
# age normalizer
age_normalizer=tf.keras.layers.Normalization(
    axis=None
)
age_normalizer.adapt(ages)

In [12]:
# gender model
gender_model=tf.keras.Sequential(
    [tf.keras.layers.IntegerLookup(vocabulary=unique_user_gender),
     tf.keras.layers.Embedding(len(unique_user_gender)+1,embedding_dim)
    ]
)

In [13]:
# genre model
genre_normalizer=tf.keras.layers.Normalization(axis=None)
genre_normalizer.adapt(movie_genres)
genre_model=tf.keras.Sequential([tf.keras.layers.IntegerLookup(vocabulary=unique_movie_genres),
    tf.keras.layers.Embedding(len(unique_movie_genres)+1,embedding_dim)
     
    ]
)

In [14]:

# movie model
movie_model=tf.keras.Sequential(
    [tf.keras.layers.StringLookup(vocabulary=unique_movie_titles),
     tf.keras.layers.Embedding(len(unique_movie_titles)+1,embedding_dim)
    ]
)

In [15]:
# occupation model
occupation_model=tf.keras.Sequential(
    [tf.keras.layers.StringLookup(vocabulary=unique_user_occupation),
     tf.keras.layers.Embedding(len(unique_user_occupation)+1,embedding_dim)
    ]
)

In [16]:

# time normalizer
time_model=tf.keras.Sequential(
    [tf.keras.layers.Discretization(time_bucket.tolist()),
     tf.keras.layers.Embedding(len(time_bucket)+1,embedding_dim)
    ]
)
time_normalizer=tf.keras.layers.Normalization(axis=None)
time_normalizer.adapt(timestamp)

In [17]:
# rating model
rating_model=tf.keras.Sequential(
    [tf.keras.layers.Dense(128,activation='tanh'),
     tf.keras.layers.Dense(64,activation='tanh'),
     tf.keras.layers.Dense(1)
     
    ]
)

# query tower
* user model

In [18]:
class UserModel(tf.keras.Model):
  def __init__(self):
    super().__init__()
    self.age_normalizer=age_normalizer
    self.gender_model=gender_model
    self.occupation_model=occupation_model
    self.time_model=time_model
    self.time_normalizer=time_normalizer

  def call(self,inputs):
    out=tf.concat(
        [tf.reshape(self.age_normalizer(inputs['age']),(-1,1)),
         self.gender_model(inputs['gender']),
         self.occupation_model(inputs['occupation']),
         self.time_model(inputs['time']),
         tf.reshape(self.time_normalizer(inputs['time']),(-1,1))
        ],axis=1
    )
    return out

In [19]:
class QueryTower(tf.keras.Model):
  def __init__(self,layers):
    super().__init__()
    self.user_model=UserModel()
    self.dense_model=tf.keras.Sequential()
    for layer in layers[:-1]:
      self.dense_model.add(tf.keras.layers.Dense(layer,activation='relu'))
    for layer in layers[-1:]:
      self.dense_model.add(tf.keras.layers.Dense(layer,activation='relu'))
  def call(self,inputs):
    v=self.user_model(inputs)
    return self.dense_model(v)

# candidate tower

In [20]:
class MovieModel(tf.keras.Model):
  def __init__(self):
    super().__init__()
    self.movie_model=movie_model
    self.genre_model=genre_model
    self.genre_normalizer=genre_normalizer
  def call(self,inputs):
    out=tf.concat(
        [self.movie_model(inputs['movie_title']),
         self.genre_model(inputs['genre']),
         tf.reshape(self.genre_normalizer(inputs['genre']),(-1,1))
        ],axis=1
    )
    return out

In [21]:
class CandidateTower(tf.keras.Model):
  def __init__(self,layers):
    super().__init__()
    self.movie_model=MovieModel()
    self.dense=tf.keras.Sequential()
    for layer in layers[:-1]:
      self.dense.add(tf.keras.layers.Dense(layer,activation='relu'))
    for layer in layers[-1:]:
      self.dense.add(tf.keras.layers.Dense(layer))
  def call(self,inputs):
    x=self.movie_model(inputs)
    return self.dense(x)

# rating model

In [22]:
class Rating(tf.keras.Model):
  def __init__(self,layers):
    super().__init__()
    self.candidate_tower=CandidateTower(layers)
    self.query_tower=QueryTower(layers)
    self.rating_model=rating_model
  def call(self,inputs):
    candidate_embedings=self.candidate_tower(inputs)
    query_embeddings=self.query_tower(inputs)
    rating_input=tf.concat([candidate_embedings,query_embeddings],axis=1)
    rates=self.rating_model(rating_input)
    return rates

In [24]:
class Ranker(tfrs.models.Model):
  def __init__(self,layers):
    super(Ranker,self).__init__()
    self.rating_model=Rating(layers)
    self.task = tfrs.tasks.Ranking(
          loss = tf.keras.losses.MeanSquaredError(),
          metrics=[tf.keras.metrics.RootMeanSquaredError()]
        )
  def compute_loss(self,features,training=True):
    labels=features.pop('rating')
    pred=self.rating_model(features)
    return self.task(labels=labels, predictions=pred)

In [25]:
model = Ranker([64,32,16])
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

# preparing data to train model

In [26]:
cached_train = train.shuffle(100_000).batch(2048).cache().prefetch(tf.data.AUTOTUNE)
cached_test = test.batch(4096).cache().prefetch(tf.data.AUTOTUNE)

# training

In [29]:
num_epochs = 5
model = Ranker([64])
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))

one_layer_history = model.fit(
    cached_train,
    validation_data=cached_test,
    validation_freq=5,
    epochs=num_epochs,
    verbose=1)

model.evaluate(cached_test, return_dict=True)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


{'loss': 1.2623728513717651,
 'regularization_loss': 0,
 'root_mean_squared_error': 1.120743751525879,
 'total_loss': 1.2623728513717651}

# building model with call method

In [31]:
class Ranker(tfrs.models.Model):
  def __init__(self,layers):
    super(Ranker,self).__init__()
    self.rating_model=Rating(layers)
    self.task = tfrs.tasks.Ranking(
          loss = tf.keras.losses.MeanSquaredError(),
          metrics=[tf.keras.metrics.RootMeanSquaredError()]
        )
  def call(self,inputs):
    rate=self.rating_model(inputs)
    return rate
  def compute_loss(self,features,
                   training=True
                  #Whether the model is in training mode
                   ):
    labels=features.pop('rating')
    pred=self(features)
    return self.task(labels=labels, predictions=pred)

In [32]:
num_epochs = 5
model = Ranker([64])
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))

one_layer_history = model.fit(
    cached_train,
    validation_data=cached_test,
    validation_freq=5,
    epochs=num_epochs,
    verbose=0)

model.evaluate(cached_test, return_dict=True)



{'loss': 1.248848795890808,
 'regularization_loss': 0,
 'root_mean_squared_error': 1.112566351890564,
 'total_loss': 1.248848795890808}

# testing model

In [33]:
test_data=[i for i in train.batch(1).take(20)]

for test in test_data:
  prediction=model(test)
  actual_label=test['rating']
  movie_title=test['movie_title']
  print('user : one random user')
  print(f' movie : {movie_title}')
  print(f'actual rate is {actual_label}\n predicted rate is {prediction}')
  print('------------------------')

user : one random user
 movie : [b'Shining, The (1980)']
actual rate is [2.]
 predicted rate is [[3.4873476]]
------------------------
user : one random user
 movie : [b'Speed (1994)']
actual rate is [4.]
 predicted rate is [[3.3322208]]
------------------------
user : one random user
 movie : [b'Terminator 2: Judgment Day (1991)']
actual rate is [3.]
 predicted rate is [[3.4740837]]
------------------------
user : one random user
 movie : [b'Army of Darkness (1993)']
actual rate is [4.]
 predicted rate is [[3.0966947]]
------------------------
user : one random user
 movie : [b'Evita (1996)']
actual rate is [4.]
 predicted rate is [[3.5705984]]
------------------------
user : one random user
 movie : [b'Jurassic Park (1993)']
actual rate is [5.]
 predicted rate is [[3.4242237]]
------------------------
user : one random user
 movie : [b'Kiss of Death (1995)']
actual rate is [3.]
 predicted rate is [[3.5719059]]
------------------------
user : one random user
 movie : [b'Glimmer Man, T