In [1]:
import numpy as np

In [2]:
# 加载训练集、测试集数据（行为用户，列为物品，train[u][i]=rate，test同上）
def load_movielens(path='../ml-100k', k="1"):
    # get movie titles
    movies = {}
    prefs_shape = []
    for line in open(path + '/u.item', encoding='latin-1'):
        id, title = line.split('|')[0:2]
        movies[id] = title
    # load data
    for line in open(path + '/u.info', encoding='latin-1'):
        prefs_shape.append(int(line.split(' ')[0:1][0]))
    train = np.zeros(shape=prefs_shape[0:2], dtype=np.int)
    test = np.zeros(shape=prefs_shape[0:2], dtype=np.int)
    for line in open(path + '/u' + k + '.base', encoding='latin-1'):
        user, movieid, rating, ts = line.split('\t')
        train[int(user)-1][int(movieid)-1] = int(rating)
    for line in open(path + '/u' + k + '.test', encoding='latin-1'):
        user, movieid, rating, ts = line.split('\t')
        test[int(user)-1][int(movieid)-1] = int(rating)
    return train, test
train, test = load_movielens()

In [3]:
# 建立物品相似度矩阵（行列皆为物品）
def item_similarity(train):
    import math
    # 建立物品相似数目的矩阵（行列皆为物品）
    item_simil = np.zeros(shape=[1682, 1682], dtype=np.int)
    for u in range(train.shape[0]):
        for i1 in range(train.shape[1]):
            for i2 in range(i1+1,train.shape[1]):
                if train[u][i1] and train[u][i2]:
                    item_simil[i1][i2] += 1
    for i1 in range(train.shape[1]):
        for i2 in range(i1+1,train.shape[1]):
            item_simil[i2][i1] = item_simil[i1][i2]
    # 由物品相似数目的矩阵得到物品的相似度矩阵（行列皆为物品）
    i_similar = np.zeros(shape=[1682, 1682], dtype=np.float32)
    for i1 in range(item_simil.shape[1]):
        for i2 in range(item_simil.shape[1]):
            if i1 != i2:
                i_similar[i1][i2] = item_simil[i1][i2] / math.sqrt((train.T[i1]>0).sum() * (train.T[i2]>0).sum())
    return i_similar
w = item_similarity(train)



In [4]:
print(w)

[[ 0.          0.36900961  0.35401464 ...,  0.          0.05109761
   0.05109761]
 [ 0.36900961  0.          0.24791192 ...,  0.          0.09759001
   0.09759001]
 [ 0.35401464  0.24791192  0.         ...,  0.          0.          0.11547005]
 ..., 
 [ 0.          0.          0.         ...,  0.          0.          0.        ]
 [ 0.05109761  0.09759001  0.         ...,  0.          0.          0.        ]
 [ 0.05109761  0.09759001  0.11547005 ...,  0.          0.          0.        ]]


In [5]:
# 基于物品相似性为用户 user 推荐物品
def user_based_recommend(data, w, user):
    """基于物品相似性为用户 user 推荐物品

    Args:
    - data: mat, 物品用户矩阵
    - w: mat, 用户之间的相似度
    - user: int, 用户编号

    :return: predict, list, 推荐列表
    """
    m, n = np.shape(data)
    interaction = data[int(user)].T  # 获取用户user评分信息

    # 找到用户user没互动过的物品加入not_inter
    not_inter = []
    for i in range(n):
        if interaction[i] == 0:  # 没评分即为没互动
            not_inter.append(i)
    # 对没有互动过的物品进行预测
    predict = {}
    for x in not_inter:
        item = np.copy(data[:, x])  # 找到所有用户对电影x的评分信息
        for i in range(m):  # 对每一个用户
            if item[i] != 0:
                if x not in predict:
                    predict[x] = w[int(user), i] * item[i]
                else:
                    predict[x] = predict[x] + w[int(user), i] + item[i]
    return sorted(predict.items(), key=lambda d: d[1], reverse=True)

def top_k(predict, n):
    """为用户推荐前 n 个物品

    Args:
    - predict: list, 排好序的物品列表
    - k: int, 推荐的物品个数

    :return: top_recom, list, top n 个物品
    """
    top_recom = []
    len_result = len(predict)
    if n >= len_result:
        top_recom = predict
    else:
        for i in range(n):
            top_recom.append(predict[i])
    return top_recom

In [6]:
def recall(train, test, w):
    hit = 0
    all = 0
    for u in range(train.shape[0]):
        tu = test[u]
        predict = user_based_recommend(train, w, u)
        T = (tu>0).sum()
        pre_tu = top_k(predict, 5)
        for item, _ in pre_tu:
            if item in tu:
                hit += 1
        all += T        
    return hit / (all * 1.0)
recall(train, test, w)

0.073249999999999996

In [7]:
def precision(train, test, w):
    hit = 0
    all = 0
    for u in range(train.shape[0]):
        tu = test[u]
        predict = user_based_recommend(train, w, u)
        pre_tu = top_k(predict, 5)
        for item, _ in pre_tu:
            if item in tu:
                hit += 1
        all += 5
    return hit / (all * 1.0)
precision(train, test, w)

0.3107104984093319

In [8]:
def coverage(train, test, w):
    recommend_items = set()
    all_items = set()
    for u in range(train.shape[0]):
        for i in range(train.shape[1]):
            all_items.add(i)
        predict = user_based_recommend(train, w, u)
        pre_tu = top_k(predict, 5)
        for item, _ in pre_tu:
            recommend_items.add(item)
    return len(recommend_items) / (len(all_items) * 1.0)
coverage(train, test, w)

0.03507728894173603

In [9]:
def coverage(train, test, w):
    recommend_items = set()
    all_items = set()
    for u in range(train.shape[0]):
        for i in range(train.shape[1]):
            all_items.add(i)
        predict = user_based_recommend(train, w, u)
        pre_tu = top_k(predict, 5)
        for item, _ in pre_tu:
            recommend_items.add(item)
    return len(recommend_items) / (len(all_items) * 1.0)
coverage(train, test, w)

0.03507728894173603