In [None]:
import pandas as pd
import numpy as np
from sklearn.decomposition import NMF 
import pickle

In [None]:
movie_df = pd.read_csv('./data/ml-latest-small/movies.csv')
rating_df = pd.read_csv('./data/ml-latest-small/ratings.csv')
link_df = pd.read_csv('./data/ml-latest-small/links.csv')
tag_df = pd.read_csv('./data/ml-latest-small/tags.csv')

In [None]:
rating_df

In [None]:
rating_df = rating_df.rename(columns={'movieId': 'movie_id'})
movie_df = movie_df.rename(columns={'movieId': 'movie_id'})
rating_df = rating_df.rename(columns={'userId': 'user_id'})


In [None]:
# calculate the number of ratings per movie
rating_count = rating_df.groupby('movie_id')[['rating']].count()
rating_count

In [None]:
# filter for movies with more than 20 ratings and extract the index
popular_movies = rating_count[rating_count['rating']>20].index
popular_movies


In [None]:
# filter the ratings matrix and only keep the popular movies
df = rating_df[rating_df['movie_id'].isin(popular_movies)].copy()
df

In [None]:
rating_df.shape, df.shape

In [None]:
# need to remake user ids and movie ids since they are not sequential
user_ids = df['user_id'].unique()
user_ids

In [None]:
user_id_map = {v:k for k,v in enumerate(user_ids)}
df['user_id'] = df['user_id'].map(user_id_map)
df

In [None]:
movie_df

In [None]:
#movie_id_map = {}
#for key, value in enumerate(movie_ids):
#    movie_id_map[value] = key

In [None]:
#similarly for the movie_id:
movie_ids = df['movie_id'].unique()
movie_ids


In [None]:
movie_id_map = {v:k for k,v in enumerate(movie_ids)}
df['movie_id'] = df['movie_id'].map(movie_id_map)
df

In [None]:
# filter out unpopular movies
movies = movie_df[movie_df['movie_id'].isin(movie_ids)]
movies

In [None]:
# redefine movie ids
movies['movie_id'] = movies['movie_id'].map(movie_id_map)
movies

In [None]:
movie_title = movies.sort_values('movie_id')['title']
movie_title

In [None]:
from scipy.sparse import csr_matrix
R = csr_matrix((df['rating'], (df['user_id'], df['movie_id'])))

In [None]:
R.shape

In [None]:
R.todense()

In [None]:
Rating = pd.DataFrame(R.todense(), columns = movie_title)
Rating

### NMF recommender function
1. Implement a recommender **function** that recommends movies to a new user based on the NMF model!

In [None]:
 #load the model
with open('nmf_1000.pkl','rb') as file:
        nmf_model = pickle.load(file)

In [None]:
new_user_query = {'Toy Story (1995)': 1, 
                 'Joe Dirt (2001)':2, 
                 "Heat (1995)": 3.5 ,
                 "Little Nicky (2000)":5}

In [None]:
def recommend_nmf(new_user_query, nmf_model, ranked=10):
    """
    Filters and recommends the top ranked movies for any given input query based on a trained NMF model. 
    Returns a list of top ranked movie titles.
    """

    # 1. construct new_user-item dataframe given the query
    Q_matrix = nmf_model.components_
    Q = pd.DataFrame(Q_matrix, columns= movie_title,index=nmf_model.get_feature_names_out())    
    
    #convert new_user_query into dataframe
    new_user_dataframe =  pd.DataFrame(new_user_query,
                                           columns=movie_title,
                                           index=['new_user_query']
                                           )
        
    #filling the missing values with 0
    new_user_dataframe_imputed = new_user_dataframe.fillna(0)
    
    # 2. scoring
    P_new_user_matrix = nmf_model.transform(new_user_dataframe_imputed)
    # calculate the score with the NMF model
    R_hat_new_user_matrix = np.dot(P_new_user_matrix, Q_matrix)
    
    # 3. ranking
    
    # filter out movies already seen by the user
    R_hat_new_user = pd.DataFrame(data=R_hat_new_user_matrix,
                         columns=movie_title,
                         index = ['new_user'])

    R_hat_new_user_filtered =  R_hat_new_user.drop(new_user_query.keys(), axis=1)
        
    # return the top-k highest rated movie ids or titles
    ranked =  R_hat_new_user_filtered.T.sort_values(by =['new_user'],ascending=False).index.to_list()

    recommended = ranked[:3]
    
    return recommended

In [None]:
new_user_query = {'Toy Story (1995)': 1, 
                 'Joe Dirt (2001)':2, 
                 "Heat (1995)": 3.5 ,
                 "Little Nicky (2000)":5}

recommend_nmf(new_user_query, nmf_model, ranked=10)