In [None]:
!pip install -U --quiet mlfoundry
!pip install --quiet implicit

## Let's load the datasets

In [None]:
# load our datasets as dataframes
import pandas as pd

movie_meta_df = pd.read_csv('https://raw.githubusercontent.com/srihari-tf/recommender-system-tfy/master/movies_metadata.csv')
keywords_df = pd.read_csv('https://raw.githubusercontent.com/srihari-tf/recommender-system-tfy/master/keywords.csv')
ratings_df = pd.read_csv("https://raw.githubusercontent.com/srihari-tf/recommender-system-tfy/master/ratings_small.csv")

In [None]:
!curl -O https://raw.githubusercontent.com/srihari-tf/recommender-system-tfy/master/movies_metadata.csv
!curl -O https://raw.githubusercontent.com/srihari-tf/recommender-system-tfy/master/ratings_small.csv

## Let's take a look at the movie metadata

In [None]:
movie_desc_df = movie_meta_df.copy()
movie_desc_df = movie_desc_df[['overview', 'original_title', 'id']]
movie_desc_df['overview'] = movie_desc_df['overview'].fillna('')

In [None]:
# helper functions
def get_movie_row_by_index(n):
  return movie_desc_df.iloc[n]

def get_movie_row_by_movie_id(id):
  id = str(id)
  return movie_desc_df[movie_desc_df['id'] == id][0]

In [None]:
import tensorflow_hub as hub

module_url = "https://tfhub.dev/google/universal-sentence-encoder/4"
model = hub.load(module_url)
print ("module %s loaded" % module_url)
def embed(input):
  return model(input)

In [None]:
import numpy as np
from scipy.spatial import distance

def get_most_similar_movie_index(embeddings, n, n_movies = 1):
  distances = distance.cdist([embeddings[n]], embeddings, 'cosine')[0]
  # get smiliar movies, remove 1st element as it is the same
  return np.argsort(distances)[1:n_movies+1]

In [None]:
embeddings = embed((movie_desc_df['overview'])).numpy()

In [None]:
get_most_similar_movie_index(embeddings, 0, 10)

## Train a collaborative filtering model using `implicit`

In [None]:
ratings_df.head()

In [None]:
ratings_df['userId'] = ratings_df['userId'].astype("category")
ratings_df['movieId'] = ratings_df['movieId'].astype("category")

In [None]:
#helper functions
def get_movie_id_from_cat_code(cat_code):
  return ratings_df['movieId'].cat.categories[cat_code]

def get_user_id_from_cat_code(cat_code):
  return ratings_df['userId'].cat.categories[cat_code]

def get_cat_code_from_user_id(user_id):
  return ratings_df['userId'].cat.categories.get_loc(user_id)

def get_cat_code_from_movie_id(movie_id):
  return ratings_df['movieId'].cat.categories.get_loc(movie_id)

In [None]:
print(get_cat_code_from_movie_id(949))
print(get_movie_id_from_cat_code(6892))

In [None]:
ratings_df.head()

In [None]:
ratings = ratings_df['rating']
rows = ratings_df['userId'].cat.codes
cols = ratings_df['movieId'].cat.codes

In [None]:
print(ratings.head())
print(rows.head())
print(cols.head())

In [None]:
from scipy.sparse import coo_matrix

r = coo_matrix((ratings, (rows, cols)))

In [None]:
# check that value for user 0, movie 30 is as expected
r.toarray()[0][906]

In [None]:
import implicit

model = implicit.als.AlternatingLeastSquares(factors=25)
model.fit(r)

In [None]:
def get_recommendation_for_user(user_id):
  user_cat_code = get_cat_code_from_user_id(user_id)
  return [get_movie_id_from_cat_code(cat_code) for cat_code in model.recommend(user_cat_code, r.tocsr().getrow(user_cat_code))[0]]

In [None]:
get_recommendation_for_user(1)

## Save the model and log it to MLFoundry

In [None]:
import mlfoundry
mlfoundry.login()
run = mlfoundry.get_client().create_run(project_name="movie-recommendation")

In [None]:
model_version = run.log_model(
    name="reco-implicit",
    model=model,
    framework="sklearn",
    description="model trained for movie recommendation"
)

In [None]:
run.log_artifact('/content/ratings_small.csv')
run.log_artifact('/content/movies_metadata.csv')