In [20]:
# !pip3 install implicit
import implicit
import pandas as pd
import numpy as np
import scipy.sparse as sparse

from glob import glob
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = 'all'

In [21]:
paths = glob('../data/*.csv')
paths

['../data/links.csv',
 '../data/tags.csv',
 '../data/movies_metadata.csv',
 '../data/ratings.csv',
 '../data/movies.csv']

In [22]:
links_df   = pd.read_csv(paths[0])
tags_df    = pd.read_csv(paths[1])
ratings_df = pd.read_csv(paths[3])
movies_df  = pd.read_csv(paths[4])

In [23]:
user_item = sparse.load_npz('../data/user_item.npz')

In [24]:
user_item

<610x9724 sparse matrix of type '<class 'numpy.float64'>'
	with 100836 stored elements in Compressed Sparse Row format>

In [25]:
model = implicit.als.AlternatingLeastSquares(factors=10,
                                             iterations=20,
                                             regularization=0.1,
                                             num_threads=4)
model.fit(user_item.T)

100%|██████████| 20.0/20 [00:02<00:00,  8.92it/s]


In [26]:
def similar_items(item_id, movies_table, movies, N=5):
    """
    Input
    -----

    item_id: int
        MovieID in the movies table

    movies_table: DataFrame
        DataFrame with movie ids, movie title and genre

    movies: np.array
        Mapping between movieID in the movies_table and id in the item user matrix

    N: int
        Number of similar movies to return

    Output
    -----

    recommendation: DataFrame
        DataFrame with selected movie in first row and similar movies for N next rows

    """
    # Get movie user index from the mapping array
    user_item_id = movies.index(item_id)
    # Get similar movies from the ALS model
    similars = model.similar_items(user_item_id, N=N+1)    
    # ALS similar_items provides (id, score), we extract a list of ids
    l = [item[0] for item in similars]
    # Convert those ids to movieID from the mapping array
    ids = [movies[ids] for ids in l]
    # Make a dataFrame of the movieIds
    ids = pd.DataFrame(ids, columns=['movieId'])
    # Add movie title and genres by joining with the movies table
    recommendation = pd.merge(ids, movies_table, on='movieId', how='left')

    return recommendation

In [11]:
image_data = meta_df[['imdb_id', 'poster_path']]