In [2]:
!pip install tensorflow-recommenders
!pip install --upgrade tensorflow-datasets





In [3]:
from typing import Dict, Text

import numpy as np
import tensorflow as tf

import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs

In [4]:
ratings = tfds.load('movielens/100k-ratings', split='train')

In [5]:
movies = tfds.load('movielens/100k-movies', split='train')

In [6]:
movies.cardinality()

cnt = 0
for e in movies:
#     print(e)
    cnt += 1
    
print(cnt)
print(len(movies))
print(movies.cardinality())

for e in movies.take(1):
    print(e)
    
for i in movies.take(20).as_numpy_iterator():
    print(i)

1682
1682
tf.Tensor(1682, shape=(), dtype=int64)
{'movie_genres': <tf.Tensor: shape=(1,), dtype=int64, numpy=array([4])>, 'movie_id': <tf.Tensor: shape=(), dtype=string, numpy=b'1681'>, 'movie_title': <tf.Tensor: shape=(), dtype=string, numpy=b'You So Crazy (1994)'>}
{'movie_genres': array([4]), 'movie_id': b'1681', 'movie_title': b'You So Crazy (1994)'}
{'movie_genres': array([4, 7]), 'movie_id': b'1457', 'movie_title': b'Love Is All There Is (1996)'}
{'movie_genres': array([1, 3]), 'movie_id': b'500', 'movie_title': b'Fly Away Home (1996)'}
{'movie_genres': array([0]), 'movie_id': b'838', 'movie_title': b'In the Line of Duty 2 (1987)'}
{'movie_genres': array([7]), 'movie_id': b'1648', 'movie_title': b'Niagara, Niagara (1997)'}
{'movie_genres': array([5]), 'movie_id': b'547', 'movie_title': b"Young Poisoner's Handbook, The (1995)"}
{'movie_genres': array([7]), 'movie_id': b'387', 'movie_title': b'Age of Innocence, The (1993)'}
{'movie_genres': array([7]), 'movie_id': b'1495', 'movie_t

In [7]:
ratings.cardinality()
ratings.take(1).as_numpy_iterator().next()

ratings = ratings.map( lambda x: { 
                                    'movie_title' : x['movie_title'],
                                    'user_id' : x['user_id']
                                }
                                 )
for i in ratings.take(20).as_numpy_iterator():
    print(i)

{'movie_title': b"One Flew Over the Cuckoo's Nest (1975)", 'user_id': b'138'}
{'movie_title': b'Strictly Ballroom (1992)', 'user_id': b'92'}
{'movie_title': b'Very Brady Sequel, A (1996)', 'user_id': b'301'}
{'movie_title': b'Pulp Fiction (1994)', 'user_id': b'60'}
{'movie_title': b'Scream 2 (1997)', 'user_id': b'197'}
{'movie_title': b'Crash (1996)', 'user_id': b'601'}
{'movie_title': b'Aladdin (1992)', 'user_id': b'710'}
{'movie_title': b'True Romance (1993)', 'user_id': b'833'}
{'movie_title': b'Bob Roberts (1992)', 'user_id': b'916'}
{'movie_title': b'Starship Troopers (1997)', 'user_id': b'940'}
{'movie_title': b'Sphere (1998)', 'user_id': b'611'}
{'movie_title': b'Tin Men (1987)', 'user_id': b'707'}
{'movie_title': b'Arrival, The (1996)', 'user_id': b'699'}
{'movie_title': b'Beavis and Butt-head Do America (1996)', 'user_id': b'16'}
{'movie_title': b'Last Dance (1996)', 'user_id': b'314'}
{'movie_title': b'In the Line of Fire (1993)', 'user_id': b'217'}
{'movie_title': b'Lone Sta

In [8]:
movies = movies.map(lambda x: x['movie_title'])
for i in movies.take(5):
    print(i)
for i in movies.take(5).as_numpy_iterator():
    print(i)

tf.Tensor(b'You So Crazy (1994)', shape=(), dtype=string)
tf.Tensor(b'Love Is All There Is (1996)', shape=(), dtype=string)
tf.Tensor(b'Fly Away Home (1996)', shape=(), dtype=string)
tf.Tensor(b'In the Line of Duty 2 (1987)', shape=(), dtype=string)
tf.Tensor(b'Niagara, Niagara (1997)', shape=(), dtype=string)
b'You So Crazy (1994)'
b'Love Is All There Is (1996)'
b'Fly Away Home (1996)'
b'In the Line of Duty 2 (1987)'
b'Niagara, Niagara (1997)'


In [9]:
user_ids_vocab = tf.keras.layers.experimental.preprocessing.StringLookup(mask_token=None)
user_ids_vocab.adapt(ratings.map(lambda x: x['user_id']))

movies_ids_vocab = tf.keras.layers.experimental.preprocessing.StringLookup(mask_token=None)
movies_ids_vocab.adapt(movies)


In [10]:
emb_dim = 64
user_model = tf.keras.Sequential([
    user_ids_vocab, tf.keras.layers.Embedding(user_ids_vocab.vocabulary_size(), emb_dim)
])
movies_model = tf.keras.Sequential([
    movies_ids_vocab, tf.keras.layers.Embedding(movies_ids_vocab.vocabulary_size(), emb_dim)
])

'''
probably means - take 128 smaples at a time and check if the positive data point(true label) comes in the
top-K of the points.
'''
task = tfrs.tasks.Retrieval(metrics=tfrs.metrics.FactorizedTopK(movies.batch(128).map(movies_model)) )


In [11]:
class MovieLensModel(tfrs.Model):
    def __init__(self, user_model: tf.keras.Model, movies_model: tf.keras.Model, task : tfrs.tasks.Retrieval):
        
        super().__init__()
        
        self.user_model = user_model
        self.movies_model = movies_model
        
        self.task = task
    
    def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
        user_embeddings = self.user_model(features['user_id'])
        movies_embeddings = self.movies_model(features['movie_title'])
        
        return self.task(user_embeddings, movies_embeddings)
    

In [12]:
model = MovieLensModel(user_model, movies_model, task)

model.compile(optimizer=tf.keras.optimizers.Adagrad(0.5))

model.fit(ratings.batch(4096), epochs = 5)

Epoch 1/5
Instructions for updating:
The `validate_indices` argument has no effect. Indices are always validated on CPU and never validated on GPU.


Instructions for updating:
The `validate_indices` argument has no effect. Indices are always validated on CPU and never validated on GPU.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f83306e3580>

In [13]:
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [21]:
# default k = 10
index = tfrs.layers.factorized_top_k.BruteForce(model.user_model)

index.index(movies.batch(100).map(movies_model), movies)

_, titles = index(np.array(['42']))

print("top rec. = ", titles[0, :3])

top rec. =  tf.Tensor(
[b'Rent-a-Kid (1995)' b'Just Cause (1995)'
 b'Winnie the Pooh and the Blustery Day (1968)'], shape=(3,), dtype=string)


In [22]:
titles[0]

<tf.Tensor: shape=(10,), dtype=string, numpy=
array([b'Rent-a-Kid (1995)', b'Just Cause (1995)',
       b'Winnie the Pooh and the Blustery Day (1968)',
       b'Murder in the First (1995)', b'House Arrest (1996)',
       b'Only You (1994)', b'Two if by Sea (1996)', b'Old Yeller (1957)',
       b'Unstrung Heroes (1995)', b'Father of the Bride Part II (1995)'],
      dtype=object)>

In [23]:
_

<tf.Tensor: shape=(1, 10), dtype=float32, numpy=
array([[4.5950384, 3.8992772, 3.7086987, 3.4556668, 3.3144298, 3.3136063,
        3.1960196, 3.1284142, 3.086831 , 2.9352067]], dtype=float32)>