<a href="https://colab.research.google.com/github/ramin40/movie-recommender-ranker-model/blob/main/Copy_of_deep_ranker_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# importing necessary libreries

In [None]:
! pip install -q tensorflow-recommenders

[?25l[K     |███▉                            | 10 kB 19.2 MB/s eta 0:00:01[K     |███████▋                        | 20 kB 10.5 MB/s eta 0:00:01[K     |███████████▌                    | 30 kB 7.0 MB/s eta 0:00:01[K     |███████████████▎                | 40 kB 3.6 MB/s eta 0:00:01[K     |███████████████████             | 51 kB 4.0 MB/s eta 0:00:01[K     |███████████████████████         | 61 kB 4.5 MB/s eta 0:00:01[K     |██████████████████████████▊     | 71 kB 4.4 MB/s eta 0:00:01[K     |██████████████████████████████▌ | 81 kB 5.0 MB/s eta 0:00:01[K     |████████████████████████████████| 85 kB 2.3 MB/s 
[?25h

In [None]:
import tensorflow as tf
import numpy as np
import pprint
import tensorflow_recommenders as tfrs


In [None]:
movies=tf.data.experimental.load('/content/drive/MyDrive/datasets/movielens_movies')
ratings=tf.data.experimental.load('/content/drive/MyDrive/datasets/movielens_ratings')
for x in ratings.take(1).as_numpy_iterator():
  pprint.pprint(x)

{'bucketized_user_age': 45.0,
 'movie_genres': array([7]),
 'movie_id': b'357',
 'movie_title': b"One Flew Over the Cuckoo's Nest (1975)",
 'raw_user_age': 46.0,
 'timestamp': 879024327,
 'user_gender': True,
 'user_id': b'138',
 'user_occupation_label': 4,
 'user_occupation_text': b'doctor',
 'user_rating': 4.0,
 'user_zip_code': b'53211'}


In [None]:
user_data=ratings.map(lambda x:{
    'age':float(x['raw_user_age']),
    'time':float(x['timestamp']),
    'gender':float(x['user_gender']),
    'occupation':x['user_occupation_text'],
    'movie_title':x['movie_title'],
    'genre':float(x['movie_genres'][0]),
    'rating':float(x['user_rating'])
})

movie_data=movies.map(lambda x: {'movie_title':x['movie_title'],
                      'genre':float(x['movie_genres'][0])})
movie_title=movies.map(lambda x:x['movie_title']
                     )

In [None]:
for x in user_data.take(1).as_numpy_iterator():
  pprint.pprint(x)
print('-------------------------------')
for x in movie_data.take(1).as_numpy_iterator():
  pprint.pprint(x)

{'age': 46.0,
 'gender': 1.0,
 'genre': 7.0,
 'movie_title': b"One Flew Over the Cuckoo's Nest (1975)",
 'occupation': b'doctor',
 'rating': 4.0,
 'time': 879024300.0}
-------------------------------
{'genre': 4.0, 'movie_title': b'You So Crazy (1994)'}


# creating vocabulary for string features

In [None]:
movie_titles=user_data.map(lambda x:x['movie_title'])
unique_movie_titles=np.unique(np.concatenate(list(movie_titles.batch(1_000))))
user_occupation=user_data.map(lambda x :x['occupation'])
unique_user_occupation=np.unique(np.concatenate(list(user_occupation.batch(1_000))))
movie_genres=user_data.map(lambda x :x['genre'])
unique_movie_genres=np.unique(np.concatenate(list(movie_genres.batch(1_000))))
user_gender=movie_genres=user_data.map(lambda x :x['gender'])
unique_user_gender=np.unique(np.concatenate(list(user_gender.batch(1_000))))

In [None]:
timestamp=user_data.map(lambda x : x['time'])
min_timestamp=np.unique(np.concatenate(list(timestamp.batch(1_000)))).min()
max_timestamp=np.unique(np.concatenate(list(timestamp.batch(1_000)))).max()
time_bucket=np.linspace(min_timestamp,max_timestamp,1000)
ages=user_data.map(lambda x : x['age'])

# spliting data to train and test

In [None]:
tf.random.set_seed(123)
shuffled=user_data.shuffle(1_000,reshuffle_each_iteration=False)

train=shuffled.take(80_000)
test=shuffled.skip(80_000).take(10_000)

# creating seprate models

In [None]:
embedding_dim=128
# age normalizer
age_normalizer=tf.keras.layers.Normalization(
    axis=None
)
age_normalizer.adapt(ages)

In [None]:
# gender model
gender_model=tf.keras.Sequential(
    [tf.keras.layers.IntegerLookup(vocabulary=unique_user_gender),
     tf.keras.layers.Embedding(len(unique_user_gender)+1,embedding_dim)
    ]
)

In [None]:
# genre model
genre_normalizer=tf.keras.layers.Normalization(axis=None)
genre_normalizer.adapt(movie_genres)
genre_model=tf.keras.Sequential([tf.keras.layers.IntegerLookup(vocabulary=unique_movie_genres),
    tf.keras.layers.Embedding(len(unique_movie_genres)+1,embedding_dim)
     
    ]
)

In [None]:

# movie model
movie_model=tf.keras.Sequential(
    [tf.keras.layers.StringLookup(vocabulary=unique_movie_titles),
     tf.keras.layers.Embedding(len(unique_movie_titles)+1,embedding_dim)
    ]
)

In [None]:
# occupation model
occupation_model=tf.keras.Sequential(
    [tf.keras.layers.StringLookup(vocabulary=unique_user_occupation),
     tf.keras.layers.Embedding(len(unique_user_occupation)+1,embedding_dim)
    ]
)

In [None]:

# time normalizer
time_model=tf.keras.Sequential(
    [tf.keras.layers.Discretization(time_bucket.tolist()),
     tf.keras.layers.Embedding(len(time_bucket)+1,embedding_dim)
    ]
)
time_normalizer=tf.keras.layers.Normalization(axis=None)
time_normalizer.adapt(timestamp)

In [None]:
# rating model
rating_model=tf.keras.Sequential(
    [tf.keras.layers.Dense(128,activation='tanh'),
     tf.keras.layers.Dense(64,activation='tanh'),
     tf.keras.layers.Dense(1)
     
    ]
)

# query tower
* user model

In [None]:
class UserModel(tf.keras.Model):
  def __init__(self):
    super().__init__()
    self.age_normalizer=age_normalizer
    self.gender_model=gender_model
    self.occupation_model=occupation_model
    self.time_model=time_model
    self.time_normalizer=time_normalizer

  def call(self,inputs):
    out=tf.concat(
        [tf.reshape(self.age_normalizer(inputs['age']),(-1,1)),
         self.gender_model(inputs['gender']),
         self.occupation_model(inputs['occupation']),
         self.time_model(inputs['time']),
         tf.reshape(self.time_normalizer(inputs['time']),(-1,1))
        ],axis=1
    )
    return out

In [None]:
class QueryTower(tf.keras.Model):
  def __init__(self,layers):
    super().__init__()
    self.user_model=UserModel()
    self.dense_model=tf.keras.Sequential()
    for layer in layers[:-1]:
      self.dense_model.add(tf.keras.layers.Dense(layer,activation='relu'))
    for layer in layers[-1:]:
      self.dense_model.add(tf.keras.layers.Dense(layer,activation='relu'))
  def call(self,inputs):
    v=self.user_model(inputs)
    return self.dense_model(v)

# candidate tower

In [None]:
class MovieModel(tf.keras.Model):
  def __init__(self):
    super().__init__()
    self.movie_model=movie_model
    self.genre_model=genre_model
    self.genre_normalizer=genre_normalizer
  def call(self,inputs):
    out=tf.concat(
        [self.movie_model(inputs['movie_title']),
         self.genre_model(inputs['genre']),
         tf.reshape(self.genre_normalizer(inputs['genre']),(-1,1))
        ],axis=1
    )
    return out

In [None]:
class CandidateTower(tf.keras.Model):
  def __init__(self,layers):
    super().__init__()
    self.movie_model=MovieModel()
    self.dense=tf.keras.Sequential()
    for layer in layers[:-1]:
      self.dense.add(tf.keras.layers.Dense(layer,activation='relu'))
    for layer in layers[-1:]:
      self.dense.add(tf.keras.layers.Dense(layer))
  def call(self,inputs):
    x=self.movie_model(inputs)
    return self.dense(x)

# rating model

In [None]:
class Rating(tf.keras.Model):
  def __init__(self,layers):
    super().__init__()
    self.candidate_tower=CandidateTower(layers)
    self.query_tower=QueryTower(layers)
    self.rating_model=rating_model
  def call(self,inputs):
    candidate_embedings=self.candidate_tower(inputs)
    query_embeddings=self.query_tower(inputs)
    rating_input=tf.concat([candidate_embedings,query_embeddings],axis=1)
    rates=self.rating_model(rating_input)
    return rates

In [None]:
class Ranker(tfrs.models.Model):
  def __init__(self,layers):
    super(Ranker,self).__init__()
    self.rating_model=Rating(layers)
    self.task = tfrs.tasks.Ranking(
          loss = tf.keras.losses.MeanSquaredError(),
          metrics=[tf.keras.metrics.RootMeanSquaredError()]
        )
  def compute_loss(self,features,training=True'''Whether the model is in training mode.'''):
    labels=features.pop('rating')
    pred=self.rating_model(features)
    return self.task(labels=labels, predictions=pred)

In [None]:
model = Ranker([10])
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

# preparing data to train model

In [None]:
cached_train = train.shuffle(100_000).batch(2048).cache().prefetch(tf.data.AUTOTUNE)
cached_test = test.batch(4096).cache().prefetch(tf.data.AUTOTUNE)

# training

In [None]:
num_epochs = 300
model = Ranker([64])
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))

one_layer_history = model.fit(
    cached_train,
    validation_data=cached_test,
    validation_freq=5,
    epochs=num_epochs,
    verbose=0)

model.evaluate(cached_test, return_dict=True)



{'loss': 0.9185203909873962,
 'regularization_loss': 0,
 'root_mean_squared_error': 0.9607136249542236,
 'total_loss': 0.9185203909873962}

# building model with call method

In [None]:
class Ranker(tfrs.models.Model):
  def __init__(self,layers):
    super(Ranker,self).__init__()
    self.rating_model=Rating(layers)
    self.task = tfrs.tasks.Ranking(
          loss = tf.keras.losses.MeanSquaredError(),
          metrics=[tf.keras.metrics.RootMeanSquaredError()]
        )
  def call(self,inputs):
    rate=self.rating_model(inputs)
    return rate
  def compute_loss(self,features,
                   training=True
                  #Whether the model is in training mode
                   ):
    labels=features.pop('rating')
    pred=self(features)
    return self.task(labels=labels, predictions=pred)

In [None]:
num_epochs = 100
model = Ranker([64])
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))

one_layer_history = model.fit(
    cached_train,
    validation_data=cached_test,
    validation_freq=5,
    epochs=num_epochs,
    verbose=0)

model.evaluate(cached_test, return_dict=True)



{'loss': 0.9656700491905212,
 'regularization_loss': 0,
 'root_mean_squared_error': 0.986902117729187,
 'total_loss': 0.9656700491905212}

# testing model

In [None]:
test_data=[i for i in train.batch(1).take(20)]

for test in test_data:
  prediction=model(test)
  actual_label=test['rating']
  movie_title=test['movie_title']
  print('user : one random user')
  print(f' movie : {movie_title}')
  print(f'actual rate is {actual_label}\n predicted rate is {prediction}')
  print('------------------------')

user : one random user
 movie : [b'Shining, The (1980)']
actual rate is [2.]
 predicted rate is [[4.0236855]]
------------------------
user : one random user
 movie : [b'Speed (1994)']
actual rate is [4.]
 predicted rate is [[3.5547466]]
------------------------
user : one random user
 movie : [b'Terminator 2: Judgment Day (1991)']
actual rate is [3.]
 predicted rate is [[3.703743]]
------------------------
user : one random user
 movie : [b'Army of Darkness (1993)']
actual rate is [4.]
 predicted rate is [[3.034646]]
------------------------
user : one random user
 movie : [b'Evita (1996)']
actual rate is [4.]
 predicted rate is [[4.3348827]]
------------------------
user : one random user
 movie : [b'Jurassic Park (1993)']
actual rate is [5.]
 predicted rate is [[3.836258]]
------------------------
user : one random user
 movie : [b'Kiss of Death (1995)']
actual rate is [3.]
 predicted rate is [[2.7258916]]
------------------------
user : one random user
 movie : [b'Glimmer Man, The 