In [4]:
import numpy

def matrix_factorization(R, P, Q, K, steps=5000, alpha=0.0002, beta=0.02):
    '''
    R: rating matrix
    P: |U| * K (User features matrix)
    Q: |D| * K (Item features matrix)
    K: latent features
    steps: iterations
    alpha: learning rate
    beta: regularization parameter'''
    Q = Q.T

    for step in range(steps):
        for i in range(len(R)):
            for j in range(len(R[i])):
                if R[i][j] > 0:
                    # calculate error
                    eij = R[i][j] - numpy.dot(P[i,:],Q[:,j])

                    for k in range(K):
                        # calculate gradient with a and beta parameters
                        P[i][k] = P[i][k] + alpha * (2 * eij * Q[k][j] - beta * P[i][k])
                        Q[k][j] = Q[k][j] + alpha * (2 * eij * P[i][k] - beta * Q[k][j])

        eR = numpy.dot(P,Q)

        e = 0

        for i in range(len(R)):

            for j in range(len(R[i])):

                if R[i][j] > 0:

                    e = e + pow(R[i][j] - numpy.dot(P[i,:],Q[:,j]), 2)

                    for k in range(K):

                        e = e + (beta/2) * (pow(P[i][k],2) + pow(Q[k][j],2))
        # 0.001: local minimum
        if e < 0.001:
            break

    return P, Q.T, e


In [5]:
R = [
     [5,3,0,1],
     [4,0,0,1],
     [1,1,0,5],
     [1,0,0,4],
     [0,1,5,4],
     [2,1,3,0],
    ]

R = numpy.array(R)
# N: num of User
N = len(R)
# M: num of Movie
M = len(R[0])
# Num of Features
K = 3

 
P = numpy.random.rand(N,K)
Q = numpy.random.rand(M,K)

 

nP, nQ, error = matrix_factorization(R, P, Q, K)
nR = numpy.dot(nP, nQ.T)

print(R)
print(nR)
print("Error: " , error)

[[5 3 0 1]
 [4 0 0 1]
 [1 1 0 5]
 [1 0 0 4]
 [0 1 5 4]
 [2 1 3 0]]
[[5.01375573 2.89934627 5.4375737  1.00338849]
 [3.96763613 2.3583031  4.4330484  0.99988245]
 [1.05718948 0.86758506 4.65958826 4.97111058]
 [0.98131159 0.77247376 3.85631804 3.97986631]
 [2.02371593 1.11705557 4.95147629 4.00109806]
 [1.91477295 1.16224041 2.98496922 1.60767941]]
Error:  1.3923554687130955


In [6]:
import pandas as pd

df = pd.read_csv("data/matrix-factorization.csv")
df = df.fillna(0)
df.set_index("movies")

Unnamed: 0_level_0,user_0,user_1,user_2,user_3,user_4,user_5,user_6,user_7,user_8,user_9
movies,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
movie_0,4.0,5.0,4.0,3.0,5.0,1.0,0.0,3.0,2.0,0.0
movie_1,1.0,0.0,1.0,0.0,2.0,4.0,5.0,4.0,0.0,4.0
movie_2,0.0,1.0,3.0,0.0,0.0,5.0,5.0,0.0,4.0,3.0
movie_3,0.0,0.0,0.0,2.0,0.0,0.0,4.0,5.0,0.0,4.0
movie_4,4.0,4.0,0.0,5.0,0.0,3.0,1.0,4.0,0.0,0.0
movie_5,5.0,4.0,4.0,3.0,4.0,2.0,1.0,0.0,0.0,1.0
movie_6,2.0,0.0,0.0,2.0,3.0,0.0,0.0,0.0,5.0,4.0
movie_7,1.0,2.0,1.0,0.0,0.0,4.0,0.0,4.0,4.0,0.0
movie_8,0.0,0.0,1.0,0.0,2.0,0.0,5.0,4.0,0.0,5.0
movie_9,5.0,5.0,3.0,3.0,5.0,2.0,0.0,0.0,2.0,1.0


In [7]:

# R2 = df.values[1:,:]
R2 = df.values[:,1:]
print(R2)

################################################
R2 = numpy.array(R2)
# N: num of User
N2 = len(R2)
# M: num of Movie
M2 = len(R2[0])
# Num of Features
K = 4

 
P2 = numpy.random.rand(N2,K)
Q2 = numpy.random.rand(M2,K)


nP2, nQ2, error = matrix_factorization(R2, P2, Q2, K)
nR2 = numpy.dot(nP2, nQ2.T)

print(error)
print(R2)
print(nR2)

[[4.0 5.0 4.0 3.0 5.0 1.0 0.0 3.0 2.0 0.0]
 [1.0 0.0 1.0 0.0 2.0 4.0 5.0 4.0 0.0 4.0]
 [0.0 1.0 3.0 0.0 0.0 5.0 5.0 0.0 4.0 3.0]
 [0.0 0.0 0.0 2.0 0.0 0.0 4.0 5.0 0.0 4.0]
 [4.0 4.0 0.0 5.0 0.0 3.0 1.0 4.0 0.0 0.0]
 [5.0 4.0 4.0 3.0 4.0 2.0 1.0 0.0 0.0 1.0]
 [2.0 0.0 0.0 2.0 3.0 0.0 0.0 0.0 5.0 4.0]
 [1.0 2.0 1.0 0.0 0.0 4.0 0.0 4.0 4.0 0.0]
 [0.0 0.0 1.0 0.0 2.0 0.0 5.0 4.0 0.0 5.0]
 [5.0 5.0 3.0 3.0 5.0 2.0 0.0 0.0 2.0 1.0]]
8.389467438583468
[[4.0 5.0 4.0 3.0 5.0 1.0 0.0 3.0 2.0 0.0]
 [1.0 0.0 1.0 0.0 2.0 4.0 5.0 4.0 0.0 4.0]
 [0.0 1.0 3.0 0.0 0.0 5.0 5.0 0.0 4.0 3.0]
 [0.0 0.0 0.0 2.0 0.0 0.0 4.0 5.0 0.0 4.0]
 [4.0 4.0 0.0 5.0 0.0 3.0 1.0 4.0 0.0 0.0]
 [5.0 4.0 4.0 3.0 4.0 2.0 1.0 0.0 0.0 1.0]
 [2.0 0.0 0.0 2.0 3.0 0.0 0.0 0.0 5.0 4.0]
 [1.0 2.0 1.0 0.0 0.0 4.0 0.0 4.0 4.0 0.0]
 [0.0 0.0 1.0 0.0 2.0 0.0 5.0 4.0 0.0 5.0]
 [5.0 5.0 3.0 3.0 5.0 2.0 0.0 0.0 2.0 1.0]]
[[4.61731988 4.91833339 3.52043155 2.94069069 4.76770242 1.24718721
  0.79788857 3.04748012 1.84905091 0.69029518]
 [0.9

In [8]:
# print(df.index.to_list())
# print(df.columns)

# temp_df = temp_df.set_index("movies")
# print("Index: " , df.index.to_list())
# print("Columns: " , df.columns.to_list())

new_df = pd.DataFrame(nR2, index=df.index.to_list(), columns=df.columns.to_list()[1:])
new_df

Unnamed: 0,user_0,user_1,user_2,user_3,user_4,user_5,user_6,user_7,user_8,user_9
0,4.61732,4.918333,3.520432,2.940691,4.767702,1.247187,0.797889,3.04748,1.849051,0.690295
1,0.900198,1.729411,1.068539,0.335434,2.127125,4.05579,4.866907,3.847356,3.320056,4.110003
2,2.350904,1.030291,2.962313,0.880519,2.648708,4.936906,4.981814,3.531754,4.022971,3.013962
3,2.897509,4.455073,2.218681,2.022651,4.198254,3.584528,4.035976,4.897386,3.637375,4.006151
4,4.065733,4.027106,3.316693,4.815866,3.829318,3.019273,0.958164,3.984718,5.292207,2.358525
5,4.570664,3.970446,3.86181,3.243811,4.325497,2.069266,1.062608,2.994988,2.859966,0.787053
6,2.111296,2.429031,2.121591,2.083434,2.849573,4.560432,4.259305,4.417485,4.86641,4.115063
7,1.038004,1.942363,1.06382,1.272478,2.054815,3.949471,4.102594,3.938896,4.032916,4.151426
8,0.682357,1.738065,0.860715,0.698514,1.984072,4.450435,5.042741,4.231506,4.067667,4.725542
9,4.615271,5.025122,3.554596,2.902403,4.912977,1.667154,1.335857,3.421307,2.164158,1.145453


In [9]:
user = "user_2"
# user = "user_6"

recommendations = df[df[user] == 0].index.to_list()

temp_final = new_df.iloc[recommendations][user]
final = pd.DataFrame(temp_final.to_frame())

final.sort_values(by=[user],inplace=True,ascending = False)
final

Unnamed: 0,user_2
4,3.316693
3,2.218681
6,2.121591
