In [1]:
import os 
import pandas as pd 
import numpy as np 
import networkx as nx 
from itertools import permutations
from math import factorial
from sklearn.model_selection import train_test_split
from functools import reduce



ROOT = os.getenv('ROOT_FOLDER')


item_item_graph = nx.read_gpickle(os.path.join(ROOT, 'app/resources/item_item_graph.p'))
user_item_graph = nx.read_gpickle(os.path.join(ROOT, 'app/resources/user_item_graph.p'))
user_item_graph_test = nx.read_gpickle(os.path.join(ROOT, 'app/resources/user_item_graph_test.p'))
movies = pd.read_csv(os.path.join(ROOT, 'app/resources/movie.csv'))

In [2]:


def preference_vector(user, user_item_graph):
    "we have no weights in user_item graphs because we assume model is ratings agnostic"
    
    nitems = user_item_graph.degree(user)
    
    return {item:1/nitems for item in user_item_graph.neighbors(user)}



def get_page_rank_scores(users, item_item_graph, user_item_graph):
    
    scores = {user:
              {movie:score for movie,score in nx.pagerank(item_item_graph, 
                                      personalization=preference_vector(user, user_item_graph)).items()
              if movie not in list(user_item_graph.neighbors(user))}
              
          for user in users}
    return scores


def top_recommendations(results, k, mapping_file):
    userid = list(results)[0]
    top_movies = sorted(results[userid].items(), key=lambda x:x[1], reverse=True)[:k]
    
    title_mapping = dict(zip(mapping_file.movieId, mapping_file.title))
    genre_mapping = dict(zip(mapping_file.movieId, mapping_file.genres))
    
    
    for movieid,score in top_movies:
        print(f"movieid:{int(movieid)} movie title:{title_mapping.get(int(movieid), '')}"\
              f" movie genre:{genre_mapping.get(int(movieid))} score:{round(score, 6)}")
    


### Get a sample user id

In [3]:
nn = [node for node,bipartite in user_item_graph.nodes(data="bipartite") if bipartite==0]


### Inference for one user id

In [4]:
nn[:10]

[55420, 7051, 40499, 91897, 136751, 54494, 127235, 66667, 24006, 134898]

In [7]:
%%time
r = get_page_rank_scores(nn[:1], item_item_graph, user_item_graph)
top_recommendations(r, 5, movies)


movieid:50158 movie title:Stomp the Yard (2007) movie genre:Drama|Musical score:0.001804
movieid:61210 movie title:Mutant Chronicles (2008) movie genre:Action|Adventure|Sci-Fi score:0.001752
movieid:844 movie title:Story of Xinghua, The (Xinghua san yue tian) (1994) movie genre:Drama score:0.001738
movieid:61991 movie title:Miracle at St. Anna (2008) movie genre:Drama|Mystery|War score:0.001728
movieid:33558 movie title:Snow Walker, The (2003) movie genre:Adventure|Drama score:0.001722
CPU times: user 1.19 s, sys: 42.1 ms, total: 1.23 s
Wall time: 1.23 s
