In [7]:
import pandas as pd
import sys
from typing import Dict, Text

import numpy as np
import tensorflow as tf

import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs


In [9]:
class MovieLensModel(tfrs.Model):
  # We derive from a custom base class to help reduce boilerplate. Under the hood,
  # these are still plain Keras Models.

  def __init__(
      self,
      user_model: tf.keras.Model,
      movie_model: tf.keras.Model,
      task: tfrs.tasks.Retrieval):
    super().__init__()

    # Set up user and movie representations.
    self.user_model = user_model
    self.movie_model = movie_model

    # Set up a retrieval task.
    self.task = task

  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
    # Define how the loss is computed.

    user_embeddings = self.user_model(features["UserId"])
    movie_embeddings = self.movie_model(features["ItemId"])

    return self.task(user_embeddings, movie_embeddings)

In [8]:
Ratings = pd.read_json('ratings.jsonl', lines=True)
Content = pd.read_json('content.jsonl', lines=True)
Target = pd.read_csv('targets.csv')


In [11]:
ratings_util = Ratings[["UserId", "ItemId", "Rating"]]
Ratings = tf.data.Dataset.from_tensor_slices(dict(ratings_util))
content_util = Content[["ItemId", "Title","Year", "Rated", "Released", "Runtime", "Genre", "Director"]]
Content = tf.data.Dataset.from_tensor_slices(dict(content_util))

2023-11-16 17:25:25.410949: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-11-16 17:25:25.412088: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2211] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [12]:
ratings = Ratings.map(lambda x: {
    "ItemId": x["ItemId"],
    "UserId": x["UserId"]
})

movies = Content.map(lambda x: x["ItemId"])

In [13]:
user_ids_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
user_ids_vocabulary.adapt(ratings.map(lambda x: x["UserId"]))

In [14]:
movie_titles_vocabulary = tf.keras.layers.StringLookup(mask_token=None)
movie_titles_vocabulary.adapt(movies)

In [15]:
# Define user and movie models.
user_model = tf.keras.Sequential([
    user_ids_vocabulary,
    tf.keras.layers.Embedding(user_ids_vocabulary.vocab_size(), 64)
])



In [16]:
movie_model = tf.keras.Sequential([
    movie_titles_vocabulary,
    tf.keras.layers.Embedding(movie_titles_vocabulary.vocab_size(), 64)
])



In [17]:
# Define your objectives.
task = tfrs.tasks.Retrieval(metrics=tfrs.metrics.FactorizedTopK(
    movies.batch(128).map(movie_model)
  )
)

In [18]:
# Create a retrieval model.
model = MovieLensModel(user_model, movie_model, task)
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.5))


In [19]:
# Train for 3 epochs.
model.fit(ratings.batch(4096), epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.src.callbacks.History at 0x7fcf951d41f0>

In [31]:
# Use brute-force search to set up retrieval using the trained representations.
index = tfrs.layers.factorized_top_k.BruteForce(model.user_model)
index.index_from_dataset(
    movies.batch(100).map(lambda title: (title, model.movie_model(title))))


<tensorflow_recommenders.layers.factorized_top_k.BruteForce at 0x7fcf951d7280>

In [65]:
def printaResposta(Target, index):
    uniqueUsers = Target['UserId'].unique()
    csv_filename = 'output.csv'
    with open(csv_filename, 'w+') as csv_file:
        # Escrever o cabeçalho
        csv_file.write('UserId,ItemId\n')
        # print("UserId,ItemId")
        for user in uniqueUsers:
            _, titles = index(np.array([user]), k=100)
            recommendations = np.array(titles[0, :100])
            recommendations = [item.decode('utf-8') for item in recommendations]
            for rec in recommendations:
                csv_file.write(user +','+rec+ '\n')
                # print(user+','+rec)

In [66]:
printaResposta(Target, index)

In [35]:
# Get some recommendations.
_, titles = index(np.array(["c4ca4238a0"]), k=100)
recommendations = np.array(titles[0, :100])
recommendations = [item.decode('utf-8') for item in recommendations]
print(f"Top 3 recommendations for user c4ca4238a0: {recommendations}")

Top 3 recommendations for user c4ca4238a0: ['91766eac45', '7f40c37991', '9115120446', 'b5f293e3f4', '2fd0adbb9d', 'd0daf9fa5d', 'c4584e0af4', 'e073da97f6', '01a1674741', '8727f95452', 'f05fb18fe7', '4f66cc3378', '98fc59b03f', 'cb5a7896b8', 'd24a75007a', '507981f7ce', 'b4205cecc7', '2d2c68f273', '89c86ad4bb', 'de8c722cd9', '1906277065', '02d10cb1b3', '948e6832fc', 'fe725df7db', '762e2a4a6b', '7ec76c268b', 'f25dfcdaaa', '828c20f94b', 'b58b2000c1', '981a5b345d', '0bdd128ea2', '6ecc305295', 'cbdadb0e00', '4ec485f5d3', '8d1bb8f3db', '53fc77d38d', '4646d64c74', '9c28a6c7e0', 'ed1dd7b5e0', '770aab654c', '69cc3755fe', 'cac4fd72a8', 'c1bac1d55e', '7bcc2cda6f', 'e201b119d0', 'c97cb0ab01', 'c3bb921443', 'e5112e4098', '236afad3d6', '28cd2832ff', '331f53b3b5', '50ab04b31f', 'e560c33025', '26519932e1', '8306baf487', '046abd1a53', '1514742238', '5b70cbefdd', '8feb35769b', 'a49ceb96ea', 'b02596b935', '0bbb0307ab', '95ad4d4e2a', '4582a9f881', '45b34fa033', '8cefcfbbb1', 'e121c55253', '324a87b636', '12b