In [1]:
import numpy

def matrix_factorization(R, P, Q, K, steps=5000, alpha=0.0002, beta=0.02):
    '''
    R: rating matrix
    P: |U| * K (User features matrix)
    Q: |D| * K (Item features matrix)
    K: latent features
    steps: iterations
    alpha: learning rate
    beta: regularization parameter'''
    Q = Q.T

    for step in range(steps):
        for i in range(len(R)):
            for j in range(len(R[i])):
                if R[i][j] > 0:
                    # calculate error
                    eij = R[i][j] - numpy.dot(P[i,:],Q[:,j])

                    for k in range(K):
                        # calculate gradient with a and beta parameters
                        P[i][k] = P[i][k] + alpha * (2 * eij * Q[k][j] - beta * P[i][k])
                        Q[k][j] = Q[k][j] + alpha * (2 * eij * P[i][k] - beta * Q[k][j])

        eR = numpy.dot(P,Q)
        e = 0

        for i in range(len(R)):
            for j in range(len(R[i])):
                if R[i][j] > 0:
                    e = e + pow(R[i][j] - numpy.dot(P[i,:],Q[:,j]), 2)

                    for k in range(K):
                        e = e + (beta/2) * (pow(P[i][k],2) + pow(Q[k][j],2))
        # 0.001: local minimum
        if e < 0.001:
            break

    return P, Q.T, e


In [2]:
R = [
     [5,3,0,1],
     [4,0,0,1],
     [1,1,0,5],
     [1,0,0,4],
     [0,1,5,4],
     [2,1,3,0],
    ]

R = numpy.array(R)
# N: num of User
N = len(R)
# M: num of Movie
M = len(R[0])
# Num of Features
K = 3

P = numpy.random.rand(N,K)
Q = numpy.random.rand(M,K)


nP, nQ, error = matrix_factorization(R, P, Q, K)
nR = numpy.dot(nP, nQ.T)

print(R)
print(nR)
print("Error: " , error)

[[5 3 0 1]
 [4 0 0 1]
 [1 1 0 5]
 [1 0 0 4]
 [0 1 5 4]
 [2 1 3 0]]
[[4.97492328 2.97862442 3.83813106 1.00051375]
 [3.97408947 2.45952223 3.12848618 1.00061829]
 [1.02065869 0.95546176 4.07692246 4.96766696]
 [0.98656849 0.6912365  3.57560421 3.98056069]
 [2.3651569  1.07793443 4.95400872 4.00270027]
 [2.00994224 0.93124392 3.01200331 1.87909626]]
Error:  1.3288902767725237


In [3]:
import pandas as pd

df = pd.read_csv("data/matrix-factorization.csv")
df = df.fillna(0)
df.set_index("movies")

Unnamed: 0_level_0,user_0,user_1,user_2,user_3,user_4,user_5,user_6,user_7,user_8,user_9
movies,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
movie_0,4.0,5.0,4.0,3.0,5.0,1.0,0.0,3.0,2.0,0.0
movie_1,1.0,0.0,1.0,0.0,2.0,4.0,5.0,4.0,0.0,4.0
movie_2,0.0,1.0,3.0,0.0,0.0,5.0,5.0,0.0,4.0,3.0
movie_3,0.0,0.0,0.0,2.0,0.0,0.0,4.0,5.0,0.0,4.0
movie_4,4.0,4.0,0.0,5.0,0.0,3.0,1.0,4.0,0.0,0.0
movie_5,5.0,4.0,4.0,3.0,4.0,2.0,1.0,0.0,0.0,1.0
movie_6,2.0,0.0,0.0,2.0,3.0,0.0,0.0,0.0,5.0,4.0
movie_7,1.0,2.0,1.0,0.0,0.0,4.0,0.0,4.0,4.0,0.0
movie_8,0.0,0.0,1.0,0.0,2.0,0.0,5.0,4.0,0.0,5.0
movie_9,5.0,5.0,3.0,3.0,5.0,2.0,0.0,0.0,2.0,1.0


In [4]:

# R2 = df.values[1:,:]
R2 = df.values[:,1:]
print(R2)

################################################
R2 = numpy.array(R2)
# N: num of User
N2 = len(R2)
# M: num of Movie
M2 = len(R2[0])
# Num of Features
K = 4

 
P2 = numpy.random.rand(N2,K)
Q2 = numpy.random.rand(M2,K)


nP2, nQ2, error = matrix_factorization(R2, P2, Q2, K)
nR2 = numpy.dot(nP2, nQ2.T)

print(error)
print(R2)
print(nR2)

[[4.0 5.0 4.0 3.0 5.0 1.0 0.0 3.0 2.0 0.0]
 [1.0 0.0 1.0 0.0 2.0 4.0 5.0 4.0 0.0 4.0]
 [0.0 1.0 3.0 0.0 0.0 5.0 5.0 0.0 4.0 3.0]
 [0.0 0.0 0.0 2.0 0.0 0.0 4.0 5.0 0.0 4.0]
 [4.0 4.0 0.0 5.0 0.0 3.0 1.0 4.0 0.0 0.0]
 [5.0 4.0 4.0 3.0 4.0 2.0 1.0 0.0 0.0 1.0]
 [2.0 0.0 0.0 2.0 3.0 0.0 0.0 0.0 5.0 4.0]
 [1.0 2.0 1.0 0.0 0.0 4.0 0.0 4.0 4.0 0.0]
 [0.0 0.0 1.0 0.0 2.0 0.0 5.0 4.0 0.0 5.0]
 [5.0 5.0 3.0 3.0 5.0 2.0 0.0 0.0 2.0 1.0]]
9.433447258185158
[[4.0 5.0 4.0 3.0 5.0 1.0 0.0 3.0 2.0 0.0]
 [1.0 0.0 1.0 0.0 2.0 4.0 5.0 4.0 0.0 4.0]
 [0.0 1.0 3.0 0.0 0.0 5.0 5.0 0.0 4.0 3.0]
 [0.0 0.0 0.0 2.0 0.0 0.0 4.0 5.0 0.0 4.0]
 [4.0 4.0 0.0 5.0 0.0 3.0 1.0 4.0 0.0 0.0]
 [5.0 4.0 4.0 3.0 4.0 2.0 1.0 0.0 0.0 1.0]
 [2.0 0.0 0.0 2.0 3.0 0.0 0.0 0.0 5.0 4.0]
 [1.0 2.0 1.0 0.0 0.0 4.0 0.0 4.0 4.0 0.0]
 [0.0 0.0 1.0 0.0 2.0 0.0 5.0 4.0 0.0 5.0]
 [5.0 5.0 3.0 3.0 5.0 2.0 0.0 0.0 2.0 1.0]]
[[ 4.70407619  4.56848759  3.87455991  3.13865241  4.63976503  1.23653804
  -0.62396534  3.01513154  1.82592394  0.57049

In [5]:
# print(df.index.to_list())
# print(df.columns)

# temp_df = temp_df.set_index("movies")
# print("Index: " , df.index.to_list())
# print("Columns: " , df.columns.to_list())

new_df = pd.DataFrame(nR2, index=df.index.to_list(), columns=df.columns.to_list()[1:])
new_df

Unnamed: 0,user_0,user_1,user_2,user_3,user_4,user_5,user_6,user_7,user_8,user_9
0,4.704076,4.568488,3.87456,3.138652,4.639765,1.236538,-0.623965,3.015132,1.825924,0.570495
1,0.842952,1.049908,1.127331,1.67505,1.900101,4.290816,4.640161,4.017132,3.796331,4.061367
2,1.373995,1.164279,2.825209,0.777051,1.917142,4.570669,5.13985,4.505357,4.285811,2.98045
3,2.263253,2.423822,2.32279,2.281732,3.168614,4.498143,4.251577,4.760718,4.204489,3.786212
4,3.955445,4.193306,2.492231,4.828995,4.965316,2.724783,1.195902,4.031211,2.869436,3.81343
5,4.355754,4.14404,4.119177,2.784448,4.385925,2.144963,0.732375,3.660514,2.60045,1.107304
6,2.071089,2.208331,2.541852,1.86949,3.018041,5.307613,5.356411,5.355731,4.894152,4.090852
7,1.295348,1.66902,1.081189,1.466547,2.221213,4.188736,4.123089,4.020621,3.706542,3.480668
8,0.648431,0.831962,0.93631,2.387312,1.980721,4.40416,4.903157,4.10905,3.876435,4.96655
9,4.780192,5.108412,2.981469,3.089638,4.950926,1.864207,-0.365196,3.423497,2.200827,0.947297


In [6]:
user = "user_2"
# user = "user_6"

recommendations = df[df[user] == 0].index.to_list()

temp_final = new_df.iloc[recommendations][user]
final = pd.DataFrame(temp_final.to_frame())

final.sort_values(by=[user],inplace=True,ascending = False)
final

Unnamed: 0,user_2
6,2.541852
4,2.492231
3,2.32279
