## Build a recommendator system

In [7]:
%matplotlib inline

import itertools
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("darkgrid")

import math as mt
import time
import re

from surprise import Reader, Dataset, SVD, evaluate
from scipy.sparse import csr_matrix, csc_matrix

from sparsesvd import sparsesvd
from scipy.sparse.linalg import *

import Recommenders_pro as Recommenders
import Evaluation_pro as Evaluation


In [8]:
ratings = pd.read_csv("../data/processed/rating.csv")
users   = pd.read_csv("../data/processed/users.csv", index_col="userId")
blogs = pd.read_csv("../data/processed/blogs.csv", index_col="id")

ratings["rating"] = ratings["rating"].astype(float)

# ratings to pivot table
rating_piot = pd.pivot_table(ratings, values="rating", index="userId", columns="movieId")

## Popularity model

In [9]:
## create an instance of popularity recommenders class
pm = Recommenders.popularity_recommender_py()
pm.create(ratings, 'userId', 'movieId')


# blogs.loc[pm.recommend("")["movieId"][:5].values]["title"].to_dict()

pm.recommend("")

Unnamed: 0,userId,movieId,score,rank
26,,e11c38f98977,12,1.0
27,,eacf251d78ad,11,2.0
5,,40347c6ed448,9,3.0
1,,1a0a0f6e69cb,8,4.0
9,,66771889da10,8,5.0
13,,95b0bf768745,8,6.0
7,,6052b0cc43ac,7,7.0
30,,eea0907cd0ae,7,8.0
12,,7588781774b3,6,9.0
29,,ebbcb4723c0e,6,10.0


## methods to compute singular value decomposition

In [76]:
## constants defining the dimensions of our user rating matrix
MAX_PID = 32
MAX_UID = 27

#Compute SVD of the user ratings matrix
def computeSVD(urm, k):
    U, s, Vt = sparsesvd(urm, k)
    
    dim = (len(s), len(s))
    S = np.zeros(dim, dtype=np.float32)
    for i in range(0, len(s)):
        S[i, i] = mt.sqrt(s[i])
    
    U = csc_matrix(np.transpose(U), dtype=np.float32)
    S = csc_matrix(S, dtype=np.float32)
    Vt = csc_matrix(Vt, dtype=np.float32)
    
    return U, S, Vt

#Compute estimated rating for the test user
def computeEstimatedRatings(urm, U, S, Vt, uTest, K, test, MAX_UID, MAX_PID):
    rightTerm = S*Vt
    
    estimatedRatings = np.zeros(shape=(MAX_UID, MAX_PID), dtype=np.float16)
    for userTest in uTest:
        prod = U[userTest,:]*rightTerm
        
        estimatedRatings[userTest,:] = prod.todense()
        recom = (-estimatedRatings[userTest,:]).argsort()[:250]
    return recom



In [106]:
#Used in SVD calculation (number of latent factors)
K=2

#Initialize a sample user rating matrix
urm = rating_piot.fillna(0).values
urm_a = urm
urm = csc_matrix(urm, dtype=np.float32)

#Compute SVD of the input user ratings matrix
U, S, Vt = computeSVD(urm, K)

#Test user set as user_id 4 with ratings [0, 0, 5, 0]
uTest = [5]
print("User id for whom recommendations are needed: %d" % uTest[0])

#Get estimated rating for test user
print("Predictied ratings:")
uTest_recommended_items = computeEstimatedRatings(urm, U, S, Vt, uTest, K, True, MAX_UID, MAX_PID)
# mTest_recommended_items = computeEstimatedRatings(urm_m, U_m, S_m, Vt_m, mTest, K, True, MAX_UID_m, MAX_PID_m)


print(uTest_recommended_items[:5])
# print(mTest_recommended_items[:25])

User id for whom recommendations are needed: 5
Predictied ratings:
[27  9 12  5 25]


In [80]:
blogs.iloc[uTest_recommended_items[:5]]['title'].to_dict()

{'ecfca5ab7518': 'How to Be Creative',
 '6e63ea497ad3': 'How to Be an Ethical Sugar Daddy',
 '2e42c3206223': 'How the Internet Made Us Believe in a Flat Earth',
 'd2314d321403': 'The Truth About Finding a Satisfying Career',
 'a25946836300': 'How Tucker Carlson Saved My Life'}

In [17]:
# urm_a

In [16]:
# users.reset_index(inplace=True)

In [15]:
# users[users["userId"] == "a8634106cb64"].index[0]

In [14]:
# ' '.join(blogs['tags'].values)

In [13]:
# type(blogs['tags'][0])

In [12]:
# blogs["tags"][0]

In [11]:
# Users

In [10]:
# users