In [1]:
import numpy as np
import pandas as pd
import datetime
from sklearn.metrics.pairwise import cosine_similarity
from scipy import sparse
from sklearn.model_selection import train_test_split

In [27]:
class NMF():
    def __init__(self, rating_matrix, K, lam=.02, X_init = None, W_init = None, lr=.005, n_epoches=20, print_every=2):
        self.rating_matrix=rating_matrix
        self.K = K # number of features
        self.lam=lam
        self.lr=lr
        self.n_epoches=n_epoches
        self.print_every=print_every
        self.users = rating_matrix[:, 0]
        self.items = rating_matrix[:, 1]
        self.ratings = rating_matrix[:, 2]
        self.n_users = int(np.max(self.users)) + 1# Number of users, +1 because indices starts from 0
        self.n_items = int(np.max(self.items)) + 1# Number of items
        self.n_ratings = len(rating_matrix) # number of ratings
        self.X = .1*np.random.randn(self.n_items, K) if X_init is None else X_init
        self.b = np.zeros(self.n_items)
        self.W = .1*np.random.randn(self.n_users, K) if W_init is None else W_init
        self.d = np.zeros(self.n_users)
        self.mu = np.mean(self.ratings)
    
    def fit(self):
        for epoch in range(self.n_epoches):
            idx_ratings = np.random.permutation(self.n_ratings)
            for n in idx_ratings:
                u, i, rating = self.users[n], self.items[n], self.ratings[n]
                error = np.dot(self.X[i], self.W[u]) + self.b[i] + self.d[u] + self.mu - rating
                self.X[i] -= self.lr * (error * self.W[u] + self.lam * self.X[i])
                self.b[i] -= self.lr * (error + self.lam * self.b[i]) # ?
                self.W[u] -= self.lr * (error * self.X[i] + self.lam * self.W[u])
                self.d[u] -= self.lr * (error + self.lam * self.d[u]) # ?
            
            if (epoch + 1) % self.print_every == 0:
                rmse = self.evaluate_RMSE(self.rating_matrix)
                print('Iteration {}, loss: {:.4f}, RMSE = {:.4f}'.format(epoch + 1, self.loss(), rmse))
                
    def loss(self):
        L = 0
        for n in range(self.n_ratings):
            u, i, rating = self.users[n], self.items[n], self.ratings[n]
            L+=0.5*(np.dot(self.X[i], self.W[u]) + self.b[i] + self.d[u] + self.mu - rating)**2 + .5*self.lam*(self.b[i]**2 + self.d[u]**2 + np.sum(self.X[i]**2) + np.sum(self.W[u]**2))
        L/=self.n_ratings
        #L+=0.5*self.lam*(np.sum(self.X**2) + np.sum(self.W**2))
        return L
                
    def pred(self, user_id, item_id):
        pred = np.dot(self.X[item_id], self.W[user_id]) + self.b[item_id] + self.d[user_id] + self.mu
        return max(0, min(5, pred))
                
    def evaluate_RMSE(self, rating_matrix):
        tot = 0
        for idx in range(len(rating_matrix)):
            err = self.pred(rating_matrix[idx, 0], rating_matrix[idx, 1]) - rating_matrix[idx, 2]
            tot += np.sum(err*err)
        return np.sqrt(tot/len(rating_matrix))

# Applied in MovieLens 100K

In [3]:
rating_base = pd.read_csv('../Content-Based/ml-100k/ua.base', sep='\t', names=['user_id', 'item id', 'rating', 'timestamp'])
rating_test = pd.read_csv('../Content-Based/ml-100k/ua.test', sep='\t', names=['user_id', 'item id', 'rating', 'timestamp'])
rating_base.head()

Unnamed: 0,user_id,item id,rating,timestamp
0,1,1,5,874965758
1,1,2,3,876893171
2,1,3,4,878542960
3,1,4,3,876893119
4,1,5,3,889751712


In [4]:
rating_train = rating_base.as_matrix()
rating_test = rating_test.as_matrix()

In [5]:
rating_train[:, :2] -= 1
rating_test[:, :2] -= 1

In [29]:
recommender = NMF(rating_train, K = 100, lam = .1, lr = .005, n_epoches=50, print_every=5)
recommender.fit()

Iteration 5, loss: 0.4854, RMSE = 0.9131
Iteration 10, loss: 0.4567, RMSE = 0.8912
Iteration 15, loss: 0.4425, RMSE = 0.8761
Iteration 20, loss: 0.4320, RMSE = 0.8597
Iteration 25, loss: 0.4230, RMSE = 0.8420
Iteration 30, loss: 0.4155, RMSE = 0.8243
Iteration 35, loss: 0.4091, RMSE = 0.8076
Iteration 40, loss: 0.4036, RMSE = 0.7917
Iteration 45, loss: 0.3989, RMSE = 0.7767
Iteration 50, loss: 0.3949, RMSE = 0.7630


In [15]:
recommender.X

array([[-0.09115018,  0.0391809 , -0.00173673, ..., -0.01602382,
         0.07480827, -0.03061768],
       [-0.00500204, -0.01827754, -0.03268735, ..., -0.05494982,
        -0.08099138, -0.03753803],
       [ 0.05250853, -0.02206965,  0.07094046, ..., -0.05714843,
        -0.00055036, -0.0913247 ],
       ...,
       [-0.1039583 ,  0.07682033, -0.10928394, ...,  0.09998052,
         0.03175551,  0.14215109],
       [-0.02250635,  0.10323281,  0.0019595 , ..., -0.01634393,
        -0.0352844 , -0.10013444],
       [-0.04630697,  0.00860786, -0.0385849 , ...,  0.00778307,
         0.0844827 , -0.06646943]])

In [10]:
recommender.W

array([[ 0.09386343,  0.04506018, -0.11717322, ..., -0.13959729,
        -0.18067205,  0.04896217],
       [ 0.02177766,  0.03481139, -0.09291538, ..., -0.12469851,
         0.07443468, -0.11951526],
       [ 0.09293721, -0.06385368,  0.21894225, ...,  0.13581335,
        -0.00633399, -0.02194369],
       ...,
       [ 0.14665205, -0.05652938,  0.03229272, ..., -0.07097061,
        -0.12297571,  0.08758564],
       [-0.04615017, -0.07192247,  0.02170987, ..., -0.04528887,
        -0.00556666, -0.03014689],
       [ 0.06398043,  0.23854669, -0.00985581, ..., -0.14389396,
         0.07956757,  0.12645999]])