In [1]:
import numpy as np
import pandas as pd

# 数据准备

In [2]:
#评分矩阵R
R = np.array([[4,0,2,0,1],
              [0,2,3,0,0],
              [1,0,2,4,0],
              [5,0,0,3,1],
              [0,0,1,5,1],
              [0,3,2,4,1],])

R

array([[4, 0, 2, 0, 1],
       [0, 2, 3, 0, 0],
       [1, 0, 2, 4, 0],
       [5, 0, 0, 3, 1],
       [0, 0, 1, 5, 1],
       [0, 3, 2, 4, 1]])

# 算法实现

In [3]:
"""
@输入参数
R:M*N的评分矩阵
K：隐特征向量维度
max_iter:最大迭代次数
alpha:步长
lamda:正则化系数

@输出
分解之后的P、Q
P：初始化用户特征矩阵M*K
Q：初始化物品特征矩阵N*K
"""

#给定超参数

K=5
max_iter=5000
alpha=0.0002
lamda=0.004

#核心算法
def LFM_grad_desc(R,K,max_iter,alpha,lamda):
    #基本维度参数定义
    M=len(R)
    N=len(R[0])
    
    #P,Q初始值，随机生产
    P=np.random.rand(M,K)
    Q=np.random.rand(N,K)
    Q=Q.T
    
    #开始迭代
    for step in range(max_iter):
        #对所有用户u,物品i做遍历，对应的特征向量Pu、Qi梯度下降
        for u in range(M):
            for i in range(N):
                #排除评分为零的项（用户未看过）
                if R[u][i]>0:
                    eui=np.dot(P[u,:],Q[:,i])-R[u][i]
                    #代入公式，按照梯度下降算法更新当期的Pu、Qi
                    for k in range(K):
                        P[u][k]=P[u][k]-alpha*(2*eui*Q[k][i]+2*lamda*P[u][k])
                        Q[k][i]=Q[k][i]-alpha*(2*eui*Q[k][i]+2*lamda*Q[k][i])
                        
                        
        #u,i遍历完成，所有特征向量更新完成，可以得到P，Q，可以计算预测评分矩阵
        predR=np.dot(P,Q)
        
        #计算损失函数
        cost=0
        for u in range(M):
            for i in range(N):
                if R[u][i]>0:
                    cost+=(np.dot(P[u,:],Q[:,i])-R[u][i])**2
                    #加上正则化项
                    for k in range(K):
                        cost+=lamda*(P[u][k]**2+Q[k][i]**2)
                        
        if cost<0.0001:
            break
                    
                    
    return P,Q.T,cost


# 测试

In [4]:
P,Q,cost=LFM_grad_desc(R,K,max_iter,alpha,lamda)
print("P")
print(P)
print("Q")
print(Q)
print("cost")
print(cost)

predR=P.dot(Q.T)

print("原始值：")
print(R)
print("预测值：")
predR

P
[[ 0.96939949  0.76463904  0.71202142  0.98194911  0.3325981 ]
 [ 0.71799332  1.1296045   1.30291719  1.15045024  0.49324286]
 [ 0.56937799  0.18417822  0.88928239 -0.34351978  1.40828819]
 [ 0.77542152  1.56776237  0.35185496  1.19666577 -0.08591357]
 [ 1.86901823  0.5272786   0.13112224 -0.02013641  0.36925239]
 [ 1.02414323 -0.04335391  0.6735116   0.647399    1.00273154]]
Q
[[1.2213625  1.35814861 0.08665355 1.58797354 0.40196236]
 [1.06416293 0.04776199 0.43703544 0.26615032 1.16001441]
 [0.41634089 0.24565159 0.73397177 0.62936096 0.81996585]
 [2.02012743 0.81306215 0.62833945 0.10240217 1.55219419]
 [0.40799495 0.18199108 0.56828621 0.03122836 0.25351457]]
cost
0.9886373559722076
原始值：
[[4 0 2 0 1]
 [0 2 3 0 0]
 [1 0 2 4 0]
 [5 0 0 3 1]
 [0 0 1 5 1]
 [0 3 2 4 1]]
预测值：


array([[3.97718193, 2.02646294, 2.00475858, 3.64421127, 1.05428267],
       [4.54915291, 2.26579655, 2.66121433, 4.07096823, 1.39991639],
       [1.04319631, 2.54556228, 1.87355746, 4.0094952 , 1.11748307],
       [4.97255409, 1.27265937, 1.64890314, 3.05140951, 0.81723073],
       [3.12668316, 2.49440763, 1.29401684, 4.85784394, 1.02600699],
       [2.68144407, 2.71762151, 2.13973452, 4.07957343, 1.06712685]])