In [1]:
import numpy as np
import pandas as pd

In [2]:
def sgd(data_matrix, k, alpha, lam, max_cycles):
    """使用梯度下降法进行矩阵分解。
    Args:
    - data_matrix: mat, 用户物品矩阵
    - k: int, 分解矩阵的参数
    - alpha: float, 学习率
    - lam: float, 正则化参数
    - max_cycles: int, 最大迭代次数
    Returns:
    p,q: mat, 分解后的矩阵
    """
    m, n = np.shape(data_matrix)
    # initiate p & q
    p = np.mat(np.random.random((m, k)))
    q = np.mat(np.random.random((k, n)))

    # start training
    for step in range(max_cycles):
        for i in range(m):
            for j in range(n):
                if data_matrix[i, j] > 0:
                    error = data_matrix[i, j]
                    for r in range(k):
                        error = error - p[i, r] * q[r, j]
                    for r in range(k):
                        p[i, r] = p[i, r] + alpha * (2 * error * q[r, j] - lam * p[i, r])
                        q[r, j] = q[r, j] + alpha * (2 * error * p[i, r] - lam * q[r, j])

        loss = 0.0
        for i in range(m):
            for j in range(n):
                if data_matrix[i, j] > 0:
                    error = 0.0
                    for r in range(k):
                        error = error + p[i, r] * q[r, j]
                    # calculate loss function
                    loss = (data_matrix[i, j] - error) * (data_matrix[i, j] - error)
                    for r in range(k):
                        loss = loss + lam * (p[i, r] * p[i, r] + q[r, j] * q[r, j]) / 2

        if loss < 0.001:
            break
        if step % 100 == 0:
            print("\titer: %d, loss: %f" % (step, loss))
    return p, q

In [None]:
def prediction(data_matrix, p, q, user):
    """为用户未互动的项打分
    Args:
    - data_matrix: mat, 原始用户物品矩阵
    - p: mat, 分解后的矩阵p
    - q: mat, 分解后的矩阵q
    - user: int, 用户的id

    Returns:
    - predict: list, 推荐列表
    """
    n = np.shape(data_matrix)[1]
    predict = {}
    for j in range(n):
        if data_matrix[user, j] == 0:
            predict[j] = (p[user,] * q[:, j])[0, 0]

    # 按照打分从大到小排序
    return sorted(predict.items(), key=lambda d: d[1], reverse=True)[:50]

In [None]:
r_cols = ['user_id', 'movie_id', 'rating', 'unix_timestamp']
train = pd.read_csv('./ml-100k/u1.base', sep='\t', names=r_cols, encoding='latin-1')
test = pd.read_csv('./ml-100k/u1.test', sep='\t', names=r_cols, encoding='latin-1')

train_list = np.zeros([943, 1682])
test_list = np.zeros([943, 1682])

for i in range(len(train.values)):
    train_list[train.values[i][0] - 1][train.values[i][1] - 1] = train.values[i][2]

for i in range(len(test.values)):
    test_list[test.values[i][0] - 1][test.values[i][1] - 1] = test.values[i][2]

In [None]:
p,q=sgd(train_list,5,0.01,0.2,200)

In [None]:
a=[]
t=0
user=100
num=0
for i in train_list[user]:
    if i != 0:
        a.append(num)
for i in prediction(train_list,p,q,user):
    if i[0] in a:
        t+=1
print(t/50)