In [30]:
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv(r'd:\naomy\LIA-FastAPI-MySQL\data\data.csv')
df.head()

Unnamed: 0,id,title,author,description,genre,classification,pages,owner_id
0,1,Matadouro cinco,Kurt Vonnegut,Um livro que zomba da estupidez humana com um ...,Science Fiction,5,288,1
1,2,Cama de gato,Kurt Vonnegut,Mais um livro incrível que zomba da condição h...,Science Fiction,5,280,1
2,3,Cem anos de solidão,Gabriel Garcia Marquez,Um realismo fantástico lindo e cativante,Fiction Novel,5,448,1
3,4,Cem anos de solidão,Gabriel Garcia Marquez,,Fiction Novel,5,448,2
4,5,Sobre os ossos dos mortos,Olga Tockarzuck,Uma história sobre nossa relação com a naturez...,Fiction Novel,5,256,1


In [31]:
df.drop(columns=['title', 'author', 'description', 'genre', 'pages'], inplace=True)
df.rename(columns={'id':'book_id'},inplace=True)
df.head()

Unnamed: 0,book_id,classification,owner_id
0,1,5,1
1,2,5,1
2,3,5,1
3,4,5,2
4,5,5,1


In [4]:
# df1 = df.loc[df['owner_id'] == 1]
# df2 = df.loc[df['owner_id'] == 2]

#df_new = df.pivot_table(index='title', columns='owner_id', values='classification').fillna(0)
#df_new.head()

rated_books = (df[['owner_id', 'book_id']].groupby('owner_id', as_index=False).aggregate(lambda x: list(x)))
rated_books.head()

Unnamed: 0,owner_id,book_id
0,1,"[1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1..."
1,2,"[4, 62, 78, 79, 82, 83, 84, 85, 86, 87, 88, 89..."


In [12]:
books_ratings = df.groupby('title', as_index=False).agg({'classification': ['count', 'mean']})

books_ratings.head()

Unnamed: 0_level_0,title,classification,classification
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean
0,1984,2,3.0
1,A balada do black Tom,1,3.0
2,A hora do lobisomen,1,3.0
3,A longa e sombria hora do chá da alma,1,4.0
4,A menina que roubava livros,2,4.5


In [32]:
import tensorflow.compat.v1 as tf
import collections
tf.disable_v2_behavior()
tf.logging.set_verbosity(tf.logging.ERROR)

def build_rating_sparse_tensor(ratings_df):
  """
  Args:
    ratings_df: a pd.DataFrame with `user_id`, `movie_id` and `rating` columns.
  Returns:
    a tf.SparseTensor representing the ratings matrix.
  """
  indices = df[['owner_id', 'book_id']].values
  values = df['classification'].values
  return tf.SparseTensor(
      indices=indices,
      values=values,
      dense_shape=[df.shape[0], df.shape[0]])

In [33]:
def split_dataframe(df, holdout_fraction=0.1):
  """Splits a DataFrame into training and test sets.
  Args:
    df: a dataframe.
    holdout_fraction: fraction of dataframe rows to use in the test set.
  Returns:
    train: dataframe for training
    test: dataframe for testing
  """
  test = df.sample(frac=holdout_fraction, replace=False)
  train = df[~df.index.isin(test.index)]
  return train, test

In [34]:

def sparse_mean_square_error(sparse_ratings, user_embeddings, movie_embeddings):
  """
  Args:
    sparse_ratings: A SparseTensor rating matrix, of dense_shape [N, M]
    user_embeddings: A dense Tensor U of shape [N, k] where k is the embedding
      dimension, such that U_i is the embedding of user i.
    movie_embeddings: A dense Tensor V of shape [M, k] where k is the embedding
      dimension, such that V_j is the embedding of movie j.
  Returns:
    A scalar Tensor representing the MSE between the true ratings and the
      model's predictions.
  """
  predictions = tf.gather_nd(
      tf.matmul(user_embeddings, movie_embeddings, transpose_b=True),
      sparse_ratings.indices)
  loss = tf.losses.mean_squared_error(sparse_ratings.values, predictions)
  return loss

In [35]:
class CFModel(object):
  """Simple class that represents a collaborative filtering model"""
  def __init__(self, embedding_vars, loss, metrics=None):
    """Initializes a CFModel.
    Args:
      embedding_vars: A dictionary of tf.Variables.
      loss: A float Tensor. The loss to optimize.
      metrics: optional list of dictionaries of Tensors. The metrics in each
        dictionary will be plotted in a separate figure during training.
    """
    self._embedding_vars = embedding_vars
    self._loss = loss
    self._metrics = metrics
    self._embeddings = {k: None for k in embedding_vars}
    self._session = None

  @property
  def embeddings(self):
    """The embeddings dictionary."""
    return self._embeddings

  def train(self, num_iterations=100, learning_rate=1.0, plot_results=True,
            optimizer=tf.train.GradientDescentOptimizer):
    """Trains the model.
    Args:
      iterations: number of iterations to run.
      learning_rate: optimizer learning rate.
      plot_results: whether to plot the results at the end of training.
      optimizer: the optimizer to use. Default to GradientDescentOptimizer.
    Returns:
      The metrics dictionary evaluated at the last iteration.
    """
    with self._loss.graph.as_default():
      opt = optimizer(learning_rate)
      train_op = opt.minimize(self._loss)
      local_init_op = tf.group(
          tf.variables_initializer(opt.variables()),
          tf.local_variables_initializer())
      if self._session is None:
        self._session = tf.Session()
        with self._session.as_default():
          self._session.run(tf.global_variables_initializer())
          self._session.run(tf.tables_initializer())
          tf.train.start_queue_runners()

    with self._session.as_default():
      local_init_op.run()
      iterations = []
      metrics = self._metrics or ({},)
      metrics_vals = [collections.defaultdict(list) for _ in self._metrics]

      # Train and append results.
      for i in range(num_iterations + 1):
        _, results = self._session.run((train_op, metrics))
        if (i % 10 == 0) or i == num_iterations:
          print("\r iteration %d: " % i + ", ".join(
                ["%s=%f" % (k, v) for r in results for k, v in r.items()]),
                end='')
          iterations.append(i)
          for metric_val, result in zip(metrics_vals, results):
            for k, v in result.items():
              metric_val[k].append(v)

      for k, v in self._embedding_vars.items():
        self._embeddings[k] = v.eval()

      if plot_results:
        # Plot the metrics.
        num_subplots = len(metrics)+1
        fig = plt.figure()
        fig.set_size_inches(num_subplots*10, 8)
        for i, metric_vals in enumerate(metrics_vals):
          ax = fig.add_subplot(1, num_subplots, i+1)
          for k, v in metric_vals.items():
            ax.plot(iterations, v, label=k)
          ax.set_xlim([1, num_iterations])
          ax.legend()
      return results

In [36]:
def build_model(ratings, embedding_dim=3, init_stddev=1.):
  """
  Args:
    ratings: a DataFrame of the ratings
    embedding_dim: the dimension of the embedding vectors.
    init_stddev: float, the standard deviation of the random initial embeddings.
  Returns:
    model: a CFModel.
  """
  # Split the ratings DataFrame into train and test.
  train_ratings, test_ratings = split_dataframe(df)
  # SparseTensor representation of the train and test datasets.
  A_train = build_rating_sparse_tensor(train_ratings)
  A_test = build_rating_sparse_tensor(test_ratings)
  # Initialize the embeddings using a normal distribution.
  U = tf.Variable(tf.random_normal(
      [A_train.dense_shape[0], embedding_dim], stddev=init_stddev))
  V = tf.Variable(tf.random_normal(
      [A_train.dense_shape[1], embedding_dim], stddev=init_stddev))
  train_loss = sparse_mean_square_error(A_train, U, V)
  test_loss = sparse_mean_square_error(A_test, U, V)
  metrics = {
      'train_error': train_loss,
      'test_error': test_loss
  }
  embeddings = {
      "user_id": U,
      "movie_id": V
  }
  return CFModel(embeddings, train_loss, [metrics])

In [37]:
model = build_model(df, embedding_dim=30, init_stddev=0.5)
model.train(num_iterations=1000, learning_rate=10.)

InvalidArgumentError: Graph execution error:

Detected at node 'GatherNd_4' defined at (most recent call last):
    File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.752.0_x64__qbz5n2kfra8p0\lib\runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.752.0_x64__qbz5n2kfra8p0\lib\runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\traitlets\config\application.py", line 982, in launch_instance
      app.start()
    File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\ipykernel\kernelapp.py", line 712, in start
      self.io_loop.start()
    File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\tornado\platform\asyncio.py", line 215, in start
      self.asyncio_loop.run_forever()
    File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.752.0_x64__qbz5n2kfra8p0\lib\asyncio\base_events.py", line 595, in run_forever
      self._run_once()
    File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.752.0_x64__qbz5n2kfra8p0\lib\asyncio\base_events.py", line 1881, in _run_once
      handle._run()
    File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.752.0_x64__qbz5n2kfra8p0\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\ipykernel\kernelbase.py", line 510, in dispatch_queue
      await self.process_one()
    File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\ipykernel\kernelbase.py", line 499, in process_one
      await dispatch(*args)
    File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\ipykernel\kernelbase.py", line 406, in dispatch_shell
      await result
    File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\ipykernel\kernelbase.py", line 729, in execute_request
      reply_content = await reply_content
    File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\ipykernel\ipkernel.py", line 392, in do_execute
      res = shell.run_cell(
    File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\ipykernel\zmqshell.py", line 531, in run_cell
      return super().run_cell(*args, **kwargs)
    File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\IPython\core\interactiveshell.py", line 2940, in run_cell
      result = self._run_cell(
    File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\IPython\core\interactiveshell.py", line 2995, in _run_cell
      return runner(coro)
    File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\IPython\core\interactiveshell.py", line 3194, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\IPython\core\interactiveshell.py", line 3373, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\IPython\core\interactiveshell.py", line 3433, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\naomy\AppData\Local\Temp\ipykernel_3724\2982579836.py", line 1, in <module>
      model = build_model(df, embedding_dim=30, init_stddev=0.5)
    File "C:\Users\naomy\AppData\Local\Temp\ipykernel_3724\1492954806.py", line 20, in build_model
      train_loss = sparse_mean_square_error(A_train, U, V)
    File "C:\Users\naomy\AppData\Local\Temp\ipykernel_3724\3879905694.py", line 13, in sparse_mean_square_error
      predictions = tf.gather_nd(
Node: 'GatherNd_4'
indices[118] = [2, 120] does not index into param shape [119,119], node name: GatherNd_4
	 [[{{node GatherNd_4}}]]

Original stack trace for 'GatherNd_4':
  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.752.0_x64__qbz5n2kfra8p0\lib\runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.752.0_x64__qbz5n2kfra8p0\lib\runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\traitlets\config\application.py", line 982, in launch_instance
    app.start()
  File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\ipykernel\kernelapp.py", line 712, in start
    self.io_loop.start()
  File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\tornado\platform\asyncio.py", line 215, in start
    self.asyncio_loop.run_forever()
  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.752.0_x64__qbz5n2kfra8p0\lib\asyncio\base_events.py", line 595, in run_forever
    self._run_once()
  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.752.0_x64__qbz5n2kfra8p0\lib\asyncio\base_events.py", line 1881, in _run_once
    handle._run()
  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.752.0_x64__qbz5n2kfra8p0\lib\asyncio\events.py", line 80, in _run
    self._context.run(self._callback, *self._args)
  File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\ipykernel\kernelbase.py", line 510, in dispatch_queue
    await self.process_one()
  File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\ipykernel\kernelbase.py", line 499, in process_one
    await dispatch(*args)
  File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\ipykernel\kernelbase.py", line 406, in dispatch_shell
    await result
  File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\ipykernel\kernelbase.py", line 729, in execute_request
    reply_content = await reply_content
  File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\ipykernel\ipkernel.py", line 392, in do_execute
    res = shell.run_cell(
  File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\ipykernel\zmqshell.py", line 531, in run_cell
    return super().run_cell(*args, **kwargs)
  File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\IPython\core\interactiveshell.py", line 2940, in run_cell
    result = self._run_cell(
  File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\IPython\core\interactiveshell.py", line 2995, in _run_cell
    return runner(coro)
  File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
    coro.send(None)
  File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\IPython\core\interactiveshell.py", line 3194, in run_cell_async
    has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
  File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\IPython\core\interactiveshell.py", line 3373, in run_ast_nodes
    if await self.run_code(code, result, async_=asy):
  File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\IPython\core\interactiveshell.py", line 3433, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\naomy\AppData\Local\Temp\ipykernel_3724\2982579836.py", line 1, in <module>
    model = build_model(df, embedding_dim=30, init_stddev=0.5)
  File "C:\Users\naomy\AppData\Local\Temp\ipykernel_3724\1492954806.py", line 20, in build_model
    train_loss = sparse_mean_square_error(A_train, U, V)
  File "C:\Users\naomy\AppData\Local\Temp\ipykernel_3724\3879905694.py", line 13, in sparse_mean_square_error
    predictions = tf.gather_nd(
  File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\tensorflow\python\util\traceback_utils.py", line 150, in error_handler
    return fn(*args, **kwargs)
  File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\tensorflow\python\util\dispatch.py", line 1176, in op_dispatch_handler
    return dispatch_target(*args, **kwargs)
  File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\tensorflow\python\ops\array_ops.py", line 5724, in gather_nd
    return gen_array_ops.gather_nd(params, indices, name=name)
  File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\tensorflow\python\ops\gen_array_ops.py", line 4594, in gather_nd
    _, _, _op, _outputs = _op_def_library._apply_op_helper(
  File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 795, in _apply_op_helper
    op = g._create_op_internal(op_type_name, inputs, dtypes=None,
  File "d:\naomy\LIA-FastAPI-MySQL\.venv\lib\site-packages\tensorflow\python\framework\ops.py", line 3798, in _create_op_internal
    ret = Operation(
