In [1]:
import pandas as pd
import numpy as np
import json
import pickle
import sklearn.metrics
from sklearn import preprocessing
from scipy.sparse import coo_matrix

In [2]:
with open("utility_matrix.pkl", "rb") as f:
    utility = pickle.load(f)

In [3]:
U_csc = utility.tocsc()

In [4]:
U_csr = utility.tocsr()

# Calculate similarities

In [7]:
our = U_csr.getrow(0)
sims = sklearn.metrics.pairwise.cosine_similarity(our,U_csr)
sims = np.squeeze(sims)
sims.shape

(3087508,)

# Get the user's neighborhood

In [8]:
asort = sims.argsort()
k_most_similar_idx = asort[::-1][:30]
k_sims = sims[k_most_similar_idx]
k_sims

array([1.        , 1.        , 1.        , 1.        , 0.70710678,
       0.5       , 0.5       , 0.4472136 , 0.4472136 , 0.4472136 ,
       0.40824829, 0.40824829, 0.37796447, 0.37796447, 0.37796447,
       0.35355339, 0.35355339, 0.35355339, 0.35355339, 0.33333333,
       0.33333333, 0.31622777, 0.31622777, 0.31622777, 0.31622777,
       0.31622777, 0.30151134, 0.30151134, 0.30151134, 0.30151134])

In [9]:
ratings_k_similar = np.squeeze(np.array([U_csr.getrow(r).todense() for r in k_most_similar_idx]))

In [10]:
ratings_k_similar_weighted = (k_sims[:,np.newaxis] * ratings_k_similar)
ratings_k_similar_weighted.shape

(30, 3272179)

In [11]:
predicted_ratings = ratings_k_similar_weighted.mean(axis=0)

In [12]:
b = 10
b_best_items_idx = np.argsort(predicted_ratings)[::-1][:b]

In [13]:
# These are the recommendations!
b_best_items_idx

array([285355, 159175,   1180, 172890, 268876, 541791, 478913,  69081,
       588249, 575384])

In [32]:
with open("items_encoder.pkl", "rb") as f:
    items_encoder = pickle.load(f)
    
with open("users_encoder.pkl", "rb") as f:
    users_encoder = pickle.load(f)

In [37]:
rec_paper_ids = items_encoder.inverse_transform(b_best_items_idx)
rec_paper_ids

array([1528791703,  593890627,    1707499, 1021226038, 1520821213,
       1896976133, 1730741578,  102370257, 1965355328, 1963698925])

In [34]:
with open("id2title.pkl", "rb") as f:
    id2title = pickle.load(f)
    
with open("id2name.pkl", "rb") as f:
    id2name = pickle.load(f)
    
with open("name2id.pkl", "rb") as f:
    name2id = pickle.load(f)
    
with open("title2id.pkl", "rb") as f:
    title2id = pickle.load(f)

In [31]:
"Deep Residual Learning for Image Recognition" in id2title.values()

True

In [38]:
for i in rec_paper_ids:
    print(id2title[i])

Querying a polynomial object-relational constraint database in model-based diagnosis
A set of critical heuristics for value sensitive designers and users of persuasive systems.
A Novel Approach for Adaptive EEG Artefact Rejection and EOG Gaze Estimation
Narrative Cognition in Interactive Systems: Suspense-Surprise and the P300 ERP Component
ZHARP: three-dimensional motion tracking from a single image plane
Exploiting compression and approximation paradigms for effective and efficient online analytical processing over sensor network readings in data grid environments
Be Brief, And They Shall Learn: Generating Concise Language Feedback for a Computer Tutor
A genetic algorithm vs. local search methods for solving the orienteering problem in large networks
Revisiting clientelism: A network analysis of problem-solving networks in Argentina
Computers in Third World schools: the example of China
