#  基于物品的协同过滤算法

读取数据

In [2]:
import math
import numpy as np

def load_movielens(path='./ml-100k'):
    
    # get movie titles
    movies = {}
    for line in open(path + '/u.item', encoding='latin-1'):
        id, title = line.split('|')[0:2]
        movies[id] = title
    # load data
    prefs = {}
    for line in open(path + '/u.data', encoding='latin-1'):
        user, movieid, rating, ts = line.split('\t')
        prefs.setdefault(user, {})
        prefs[user][movies[movieid]] = float(rating)
    return prefs

prefs = load_movielens()['7']
print(prefs.keys())

dict_keys(['Crumb (1994)', 'Vertigo (1958)', "Jackie Chan's First Strike (1996)", 'Adventures of Priscilla, Queen of the Desert, The (1994)', 'Return of the Pink Panther, The (1974)', 'Duck Soup (1933)', 'Bringing Up Baby (1938)', 'East of Eden (1955)', 'High Noon (1952)', 'Quiet Man, The (1952)', 'Miracle on 34th Street (1994)', 'Shining, The (1980)', 'Misérables, Les (1995)', 'Waterworld (1995)', 'Dragonheart (1996)', 'Angels in the Outfield (1994)', 'Thin Blue Line, The (1988)', 'Somewhere in Time (1980)', 'Delicatessen (1991)', 'Grease (1978)', 'Cat People (1982)', 'Crucible, The (1996)', 'Brady Bunch Movie, The (1995)', 'Hudsucker Proxy, The (1994)', 'Cliffhanger (1993)', 'Star Trek: The Wrath of Khan (1982)', 'Willy Wonka and the Chocolate Factory (1971)', 'Englishman Who Went Up a Hill, But Came Down a Mountain, The (1995)', 'Kalifornia (1993)', 'Diabolique (1996)', 'Killing Fields, The (1984)', 'The Innocent (1994)', 'Seventh Seal, The (Sjunde inseglet, Det) (1957)', 'Piano, Th

建立模型：基于物品的ItemCF-IUF算法

In [1]:
def item_based_recommend(data, w, user):
    """
    基于物品相似度为用户 user 推荐物品

    Args:
    - data: mat, 物品用户矩阵
    - w: mat, 物品与物品之间的相似性
    - user: int, 用户编号

    :return: predict, list, 推荐列表
    """

    m, n = np.shape(data)  # m: 物品数量 n: 用户数量
    interaction = data[:, user].T  # 用户 user 互动物品信息

    # 找到用户 user 没有互动的商品
    not_iter = []
    for i in range(m):
        if interaction[0, i] == 0:  # 用户 user 未打分项
            not_iter.append(i)

    # 对没有互动过的物品进行预测
    predict = {}
    for x in not_iter:
        item = np.copy(interaction)  # 获取用户 user 对物品的互动信息
        for j in range(m):   # 对每一个物品
            if item[0, j] != 0:  # 利用互动过的物品预测
                if x not in predict:
                    predict[x] = w[x, j] * item[0, j]
                else:
                    predict[x] = predict[x] + w[x, j] * item[0, j]
    # 按照预测的大小从大到小排序
    return sorted(predict.items(), key=lambda d: d[1], reverse=True)

在ml-100k上进行测试