In [1]:
import pandas as pd
import numpy as np
import collections
from mpl_toolkits.mplot3d import Axes3D
from IPython import display
from matplotlib import pyplot as plt
import sklearn
import sklearn.manifold
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
tf.logging.set_verbosity(tf.logging.ERROR)

Instructions for updating:
non-resource variables are not supported in the long term


In [52]:

# Add some convenience functions to Pandas DataFrame.
pd.options.display.max_rows = 10
pd.options.display.float_format = '{:.3f}'.format
def mask(df, key, function):
  """Returns a filtered dataframe, by applying function to key"""
  return df[function(df[key])]

def flatten_cols(df):
  df.columns = [' '.join(col).strip() for col in df.columns.values]
  return df

pd.DataFrame.mask = mask
pd.DataFrame.flatten_cols = flatten_cols

USER_RATINGS = False

In [2]:
## Importing the Necessary Datasets
artists = pd.read_table('../data/artists.dat', sep="\t")
user_artists_ratings = pd.read_csv('../data/user_artists_ratings.csv')
user_artists_ratings.drop('Unnamed: 0', axis=1, inplace=True)

## Random Methods

In [3]:
def split_dataframe(df, holdout_fraction=0.1):
    """Splits a DataFrame into training and test sets.
    Args:
      df: a dataframe.
      holdout_fraction: fraction of dataframe rows to use in the test set.
    Returns:
      train: dataframe for training
      test: dataframe for testing
    """
    test = df.sample(frac=holdout_fraction, replace=False)
    train = df[~df.index.isin(test.index)]
    return train, test

# Sparse Representation of the Matrix

In [76]:
def build_rating_sparse_tensor(ratings):
    
    indices = ratings[['userID', 'artistID']].values
    values = ratings['rating'].values
    
    return tf.SparseTensor(
      indices=indices,
      values=values,
      #dense_shape=[len(ratings.userID.unique()), len(ratings.artistID.unique())],
      dense_shape=[len(ratings.userID.unique()), artists.shape[0]])


In [132]:
user_artists_ratings['userID'].shape[0]

92834

In [66]:
sparse_matrix = build_rating_sparse_tensor(user_artists_ratings)

In [67]:
sparse_matrix.shape

TensorShape([Dimension(17632), Dimension(1892)])

N=1892, M=17632

In [18]:
user_embeddings = pd.DataFrame(user_artists_ratings['userID']).drop_duplicates()

In [19]:
len(user_embeddings)

1892

In [20]:
artist_embeddings = pd.DataFrame(user_artists_ratings['artistID']).drop_duplicates()

In [21]:
len(artist_embeddings)

17632

In [68]:
def sparse_mean_square_error(sparse_ratings, user_embeddings, artist_embeddings):
    """
    Args:
        sparse_ratings: A SparseTensor rating matrix, of dense_shape [N, M]
        user_embeddings: A dense Tensor U of shape [N, k] where k is the embedding
        dimension, such that U_i is the embedding of user i.
        movie_embeddings: A dense Tensor V of shape [M, k] where k is the embedding
        dimension, such that V_j is the embedding of movie j.
    Returns:
        A scalar Tensor representing the MSE between the true ratings and the
        model's predictions.
    """
    predictions = tf.gather_nd(
        tf.matmul(user_embeddings, artist_embeddings, transpose_b=True),
        sparse_ratings.indices)
    loss = tf.losses.mean_squared_error(sparse_ratings.values, predictions)
    return loss

In [69]:
print(sparse_mean_square_error(sparse_matrix, user_embeddings, artist_embeddings))

Tensor("mean_squared_error_14/value:0", shape=(), dtype=float32)


## Matrix Factorization Model

In [70]:
class CFModel(object):
    """Simple class that represents a collaborative filtering model"""
    def __init__(self, embedding_vars, loss, metrics=None):
        """Initializes a CFModel.
        Args:
          embedding_vars: A dictionary of tf.Variables.
          loss: A float Tensor. The loss to optimize.
          metrics: optional list of dictionaries of Tensors. The metrics in each
            dictionary will be plotted in a separate figure during training.
        """
        self._embedding_vars = embedding_vars
        self._loss = loss
        self._metrics = metrics
        self._embeddings = {k: None for k in embedding_vars}
        self._session = None

    @property
    def embeddings(self):
        """The embeddings dictionary."""
        return self._embeddings

    def train(self, num_iterations=100, learning_rate=1.0, plot_results=True,
              optimizer=tf.train.GradientDescentOptimizer):
        """Trains the model.
        Args:
          iterations: number of iterations to run.
          learning_rate: optimizer learning rate.
          plot_results: whether to plot the results at the end of training.
          optimizer: the optimizer to use. Default to GradientDescentOptimizer.
        Returns:
          The metrics dictionary evaluated at the last iteration.
        """
        with self._loss.graph.as_default():
            opt = optimizer(learning_rate)
            train_op = opt.minimize(self._loss)
            local_init_op = tf.group(
                tf.variables_initializer(opt.variables()),
                tf.local_variables_initializer())
            if self._session is None:
                self._session = tf.Session()
                with self._session.as_default():
                    self._session.run(tf.global_variables_initializer())
                    self._session.run(tf.tables_initializer())
                    tf.train.start_queue_runners()

        with self._session.as_default():
            local_init_op.run()
            iterations = []
            metrics = self._metrics or ({},)
            metrics_vals = [collections.defaultdict(list) for _ in self._metrics]

            # Train and append results.
            for i in range(num_iterations + 1):
                _, results = self._session.run((train_op, metrics))
                if (i % 10 == 0) or i == num_iterations:
                    print("\r iteration %d: " % i + ", ".join(
                        ["%s=%f" % (k, v) for r in results for k, v in r.items()]),
                          end='')
                    iterations.append(i)
                    for metric_val, result in zip(metrics_vals, results):
                        for k, v in result.items():
                            metric_val[k].append(v)

            for k, v in self._embedding_vars.items():
                self._embeddings[k] = v.eval()

            if plot_results:
                # Plot the metrics.
                num_subplots = len(metrics)+1
                fig = plt.figure()
                fig.set_size_inches(num_subplots*10, 8)
                for i, metric_vals in enumerate(metrics_vals):
                    ax = fig.add_subplot(1, num_subplots, i+1)
                    for k, v in metric_vals.items():
                        ax.plot(iterations, v, label=k)
                    ax.set_xlim([1, num_iterations])
                    ax.legend()
            return results

In [71]:
def build_model(ratings, embedding_dim=3, init_stddev=1.):
    """
    Args:
      ratings: a DataFrame of the ratings
      embedding_dim: the dimension of the embedding vectors.
      init_stddev: float, the standard deviation of the random initial embeddings.
    Returns:
      model: a CFModel.
    """
    # Split the ratings DataFrame into train and test
    train_ratings, test_ratings = split_dataframe(ratings)
    # SparseTensor representation of the train and test datasets.
    A_train = build_rating_sparse_tensor(train_ratings)
    A_test = build_rating_sparse_tensor(test_ratings)
    # Initialize the embeddings using a normal distribution.
    U = tf.Variable(tf.random_normal(
        [A_train.dense_shape[0], embedding_dim], stddev=init_stddev))
    V = tf.Variable(tf.random_normal(
        [A_train.dense_shape[1], embedding_dim], stddev=init_stddev))
    train_loss = sparse_mean_square_error(A_train, U, V)
    test_loss = sparse_mean_square_error(A_test, U, V)
    metrics = {
        'train_error': train_loss,
        'test_error': test_loss
    }
    embeddings = {
        "userID": U,
        "artistID": V
    }
    return CFModel(embeddings, train_loss, [metrics])

In [101]:
def build_model(ratings, embedding_dim=3, init_stddev=1.):
    """
    Args:
      ratings: a DataFrame of the ratings
      embedding_dim: the dimension of the embedding vectors.
      init_stddev: float, the standard deviation of the random initial embeddings.
    Returns:
      model: a CFModel.
    """
    # Split the ratings DataFrame into train and test
    train_ratings, test_ratings = split_dataframe(ratings)
    # Re-indexing the train_ratings and test_ratings dataframes
    train_ratings.reset_index(drop=True, inplace=True)
    test_ratings.reset_index(drop=True, inplace=True)
    # SparseTensor representation of the train and test datasets.
    A_train = build_rating_sparse_tensor(train_ratings)
    A_test = build_rating_sparse_tensor(test_ratings)
    # Initialize the embeddings using a normal distribution.
    U = tf.Variable(tf.random_normal(
        [A_train.dense_shape[0], embedding_dim], stddev=init_stddev))
    V = tf.Variable(tf.random_normal(
        [A_train.dense_shape[1], embedding_dim], stddev=init_stddev))
    train_loss = sparse_mean_square_error(A_train, U, V)
    test_loss = sparse_mean_square_error(A_test, U, V)
    metrics = {
        'train_error': train_loss,
        'test_error': test_loss
    }
    embeddings = {
        "userID": U,
        "artistID": V
    }
    return CFModel(embeddings, train_loss, [metrics])

In [72]:
# Creating mapping for re-indexing of userIDs
new_id = 0
user_id_mapping = {}
for userID in user_artists_ratings.userID.unique():
    user_id_mapping[userID] = new_id
    new_id += 1

# Creating mapping for re-indexing of artistIDs
new_id = 0
artist_id_mapping = {}
for artistID in user_artists_ratings.artistID.unique():
    artist_id_mapping[artistID] = new_id
    new_id += 1

In [73]:
user_artists_ratings = user_artists_ratings.replace({"userID": user_id_mapping})
user_artists_ratings = user_artists_ratings.replace({"artistID": artist_id_mapping})

In [74]:
# Normalising the star ratings
user_artists_ratings['rating'] = user_artists_ratings['rating'] / 5

In [77]:
model = build_model(user_artists_ratings, embedding_dim=30, init_stddev=0.5)
model.train(num_iterations=1000, learning_rate=10.)

InvalidArgumentError: indices[8706] = [1891, 4373] does not index into param shape [1891,17632], node name: GatherNd_16
	 [[node GatherNd_16
 (defined at C:\Users\laram\AppData\Local\Temp/ipykernel_38260/3075660840.py:13)
]]

Errors may have originated from an input operation.
Input Source operations connected to node GatherNd_16:
In[0] MatMul_16 (defined at C:\Users\laram\AppData\Local\Temp/ipykernel_38260/3075660840.py:14)	
In[1] SparseTensor_14/indices (defined at C:\Users\laram\AppData\Local\Temp/ipykernel_38260/561548729.py:6)

Operation defined at: (most recent call last)
>>>   File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\runpy.py", line 197, in _run_module_as_main
>>>     return _run_code(code, main_globals, None,
>>> 
>>>   File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\runpy.py", line 87, in _run_code
>>>     exec(code, run_globals)
>>> 
>>>   File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
>>>     app.launch_new_instance()
>>> 
>>>   File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\traitlets\config\application.py", line 846, in launch_instance
>>>     app.start()
>>> 
>>>   File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\ipykernel\kernelapp.py", line 667, in start
>>>     self.io_loop.start()
>>> 
>>>   File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
>>>     self.asyncio_loop.run_forever()
>>> 
>>>   File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\asyncio\base_events.py", line 596, in run_forever
>>>     self._run_once()
>>> 
>>>   File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\asyncio\base_events.py", line 1890, in _run_once
>>>     handle._run()
>>> 
>>>   File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\asyncio\events.py", line 80, in _run
>>>     self._context.run(self._callback, *self._args)
>>> 
>>>   File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\ipykernel\kernelbase.py", line 457, in dispatch_queue
>>>     await self.process_one()
>>> 
>>>   File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\ipykernel\kernelbase.py", line 446, in process_one
>>>     await dispatch(*args)
>>> 
>>>   File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\ipykernel\kernelbase.py", line 353, in dispatch_shell
>>>     await result
>>> 
>>>   File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\ipykernel\kernelbase.py", line 648, in execute_request
>>>     reply_content = await reply_content
>>> 
>>>   File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\ipykernel\ipkernel.py", line 345, in do_execute
>>>     res = shell.run_cell(code, store_history=store_history, silent=silent)
>>> 
>>>   File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\ipykernel\zmqshell.py", line 532, in run_cell
>>>     return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
>>> 
>>>   File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\IPython\core\interactiveshell.py", line 2898, in run_cell
>>>     result = self._run_cell(
>>> 
>>>   File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\IPython\core\interactiveshell.py", line 2944, in _run_cell
>>>     return runner(coro)
>>> 
>>>   File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\IPython\core\async_helpers.py", line 68, in _pseudo_sync_runner
>>>     coro.send(None)
>>> 
>>>   File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\IPython\core\interactiveshell.py", line 3169, in run_cell_async
>>>     has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
>>> 
>>>   File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\IPython\core\interactiveshell.py", line 3361, in run_ast_nodes
>>>     if (await self.run_code(code, result,  async_=asy)):
>>> 
>>>   File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\IPython\core\interactiveshell.py", line 3441, in run_code
>>>     exec(code_obj, self.user_global_ns, self.user_ns)
>>> 
>>>   File "C:\Users\laram\AppData\Local\Temp/ipykernel_38260/1258562315.py", line 1, in <module>
>>>     model = build_model(user_artists_ratings, embedding_dim=30, init_stddev=0.5)
>>> 
>>>   File "C:\Users\laram\AppData\Local\Temp/ipykernel_38260/3439161932.py", line 21, in build_model
>>>     test_loss = sparse_mean_square_error(A_test, U, V)
>>> 
>>>   File "C:\Users\laram\AppData\Local\Temp/ipykernel_38260/3075660840.py", line 13, in sparse_mean_square_error
>>>     predictions = tf.gather_nd(
>>> 

Original stack trace for 'GatherNd_16':
  File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\traitlets\config\application.py", line 846, in launch_instance
    app.start()
  File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\ipykernel\kernelapp.py", line 667, in start
    self.io_loop.start()
  File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
    self.asyncio_loop.run_forever()
  File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\asyncio\base_events.py", line 596, in run_forever
    self._run_once()
  File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\asyncio\base_events.py", line 1890, in _run_once
    handle._run()
  File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\asyncio\events.py", line 80, in _run
    self._context.run(self._callback, *self._args)
  File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\ipykernel\kernelbase.py", line 457, in dispatch_queue
    await self.process_one()
  File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\ipykernel\kernelbase.py", line 446, in process_one
    await dispatch(*args)
  File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\ipykernel\kernelbase.py", line 353, in dispatch_shell
    await result
  File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\ipykernel\kernelbase.py", line 648, in execute_request
    reply_content = await reply_content
  File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\ipykernel\ipkernel.py", line 345, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\ipykernel\zmqshell.py", line 532, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\IPython\core\interactiveshell.py", line 2898, in run_cell
    result = self._run_cell(
  File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\IPython\core\interactiveshell.py", line 2944, in _run_cell
    return runner(coro)
  File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\IPython\core\async_helpers.py", line 68, in _pseudo_sync_runner
    coro.send(None)
  File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\IPython\core\interactiveshell.py", line 3169, in run_cell_async
    has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
  File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\IPython\core\interactiveshell.py", line 3361, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):
  File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\IPython\core\interactiveshell.py", line 3441, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\laram\AppData\Local\Temp/ipykernel_38260/1258562315.py", line 1, in <module>
    model = build_model(user_artists_ratings, embedding_dim=30, init_stddev=0.5)
  File "C:\Users\laram\AppData\Local\Temp/ipykernel_38260/3439161932.py", line 21, in build_model
    test_loss = sparse_mean_square_error(A_test, U, V)
  File "C:\Users\laram\AppData\Local\Temp/ipykernel_38260/3075660840.py", line 13, in sparse_mean_square_error
    predictions = tf.gather_nd(
  File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\tensorflow\python\util\traceback_utils.py", line 150, in error_handler
    return fn(*args, **kwargs)
  File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\tensorflow\python\util\dispatch.py", line 1096, in op_dispatch_handler
    return dispatch_target(*args, **kwargs)
  File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\tensorflow\python\ops\array_ops.py", line 5451, in gather_nd
    return gen_array_ops.gather_nd(params, indices, name=name)
  File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\tensorflow\python\ops\gen_array_ops.py", line 3855, in gather_nd
    _, _, _op, _outputs = _op_def_library._apply_op_helper(
  File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 744, in _apply_op_helper
    op = g._create_op_internal(op_type_name, inputs, dtypes=None,
  File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\tensorflow\python\framework\ops.py", line 3697, in _create_op_internal
    ret = Operation(
  File "C:\Users\laram\AppData\Local\Continuum\anaconda3\envs\ca4015\lib\site-packages\tensorflow\python\framework\ops.py", line 2101, in __init__
    self._traceback = tf_stack.extract_stack_for_node(self._c_op)


## Inspect the Embeddings

In [49]:
DOT = 'dot'
COSINE = 'cosine'
def compute_scores(query_embedding, item_embeddings, measure=DOT):
    """Computes the scores of the candidates given a query.
    Args:
        query_embedding: a vector of shape [k], representing the query embedding.
        item_embeddings: a matrix of shape [N, k], such that row i is the embedding
            of item i.
    measure: a string specifying the similarity measure to be used. Can be
        either DOT or COSINE.
    Returns:
        scores: a vector of shape [N], such that scores[i] is the score of item i.
    """
    u = query_embedding
    V = item_embeddings
    if measure == COSINE:
        V = V / np.linalg.norm(V, axis=1, keepdims=True)
        u = u / np.linalg.norm(u)
    scores = u.dot(V.T)
    return scores

In [63]:
def user_recommendations(model, measure=DOT, exclude_rated=False, k=6):
    if USER_RATINGS:
        scores = compute_scores(
            model.embeddings["userID"][943], model.embeddings["artistID"], measure)
        score_key = measure + ' score'
        df = pd.DataFrame({
            score_key: list(scores),
            'movie_id': user_artist_rating['movie_id'],
            'titles': movies['title'],
            'genres': movies['all_genres'],
        })
        if exclude_rated:
            # remove movies that are already rated
            rated_movies = ratings[ratings.user_id == "943"]["movie_id"].values
            df = df[df.movie_id.apply(lambda movie_id: movie_id not in rated_movies)]
        display.display(df.sort_values([score_key], ascending=False).head(k))  

def artist_neighbors(model, name_substring, measure=DOT, k=6):
    # Search for movie ids that match the given substring.
    ids =  artists[artists['name'].str.contains(name_substring)].index.values
    names = artists.iloc[ids]['name'].values
    if len(names) == 0:
        raise ValueError("Found no movies with title %s" % name_substring)
    print("Nearest neighbors of : %s." % names[0])
    if len(names) > 1:
        print("[Found more than one matching movie. Other candidates: {}]".format(
            ", ".join(names[1:])))
    artist_id = ids[0]
    scores = compute_scores(
        model.embeddings["artistID"][artist_id], model.embeddings["artistID"],
        measure)
    score_key = measure + ' score'
    df = pd.DataFrame({
        score_key: list(scores),
        'names': artists['name'] #,
        #'genres': movies['all_genres']
    })
    display.display(df.sort_values([score_key], ascending=False).head(k))

In [61]:
new_id = 0
id_mapping = {}
for artistID in artists.id.unique():
    id_mapping[artistID] = new_id
    new_id += 1

In [None]:
artists = artists.replace({"id": id_mapping})

In [64]:
artist_neighbors(model, "Britney Spears", DOT)


Nearest neighbors of : Britney Spears.
[Found more than one matching movie. Other candidates: Michael Jackson & Britney Spears, Britney Spears vs Avril Lavigne, Britney Spears㞦, Britney Spears⊼, Britney Spears씨, Panic! at the Disco feat. Britney Spears and Gwen Stefani]


TypeError: 'NoneType' object is not subscriptable

In [57]:
artists

Unnamed: 0,id,name,url,pictureURL
0,0,MALICE MIZER,http://www.last.fm/music/MALICE+MIZER,http://userserve-ak.last.fm/serve/252/10808.jpg
1,1,Diary of Dreams,http://www.last.fm/music/Diary+of+Dreams,http://userserve-ak.last.fm/serve/252/3052066.jpg
2,2,Carpathian Forest,http://www.last.fm/music/Carpathian+Forest,http://userserve-ak.last.fm/serve/252/40222717...
3,3,Moi dix Mois,http://www.last.fm/music/Moi+dix+Mois,http://userserve-ak.last.fm/serve/252/54697835...
4,4,Bella Morte,http://www.last.fm/music/Bella+Morte,http://userserve-ak.last.fm/serve/252/14789013...
...,...,...,...,...
17627,17627,Diamanda Galás,http://www.last.fm/music/Diamanda+Gal%C3%A1s,http://userserve-ak.last.fm/serve/252/16352971...
17628,17628,Aya RL,http://www.last.fm/music/Aya+RL,http://userserve-ak.last.fm/serve/252/207445.jpg
17629,17629,Coptic Rain,http://www.last.fm/music/Coptic+Rain,http://userserve-ak.last.fm/serve/252/344868.jpg
17630,17630,Oz Alchemist,http://www.last.fm/music/Oz+Alchemist,http://userserve-ak.last.fm/serve/252/29297695...
