In [None]:
import numpy as np
import random

In [None]:
def load_movielens(path='./ml-100k'):
    # get movie titles
    movies = {}
    for line in open(path + '/u.item', encoding='latin-1'):
        id, title = line.split('|')[0:2]
        movies[id] = id
    # load data
    prefs = {}
    for line in open(path + '/u.data', encoding='latin-1'):
        user, movieid, rating, ts = line.split('\t')
        prefs.setdefault(user, {})
        prefs[user][movies[movieid]] = float(rating)
    return prefs

In [None]:
prefs = load_movielens()
print(prefs['88'])

In [None]:
def split_data(data, M, k, seed):
    test = []
    train = []
    random.seed(seed)
    for user in data:
        if random.randint(0, M) == k:
            test.append([user, data[user]])
        else:
            train.append([user, data[user]])
    return train, test

In [None]:
train, test = split_data(prefs, 8, 1, 1)
def changement(data):
    x = []
    y = []
    for i in range(len(data)):
        for j in range(1682):
            x.append(data[i][1].get(str(j+1), 0.0))
        y.append(x)
        x = []
    y = np.array(y)
    return y
train = changement(train)
test = changement(test)

In [None]:
def cos_sim(x, y):
    ton = x * y.T
    c = len(ton[ton>0])
    a = len(x[x>0])
    b = len(y[y>0])
    denominator = np.sqrt(a*b)
    if denominator>0:
        res = c / denominator
    else:
        res = 0
    return res

In [None]:
def similarity(data):
    """计算矩阵中任意两行之间的相似度
    Args:
    - data: mat, 任意矩阵

    :return: w, mat, 任意两行之间的相似度
    """

    m = np.shape(data)[0]  # 用户的数量
    # 初始化相似矩阵
    w = np.mat(np.zeros((m, m)))

    for i in range(m):
        for j in range(i, m):
            if not j == i:
                # 计算任意两行之间的相似度
                w[i, j] = cos_sim(data[i], data[j])
                w[j, i] = w[i, j]
            else:
                w[i, j] = 0
    return w

In [None]:
w = similarity(train)

In [16]:
def item_based_recommend(data, w, user):
    """
    基于物品相似度为用户 user 推荐物品

    Args:
    - data: mat, 物品用户矩阵
    - w: mat, 物品与物品之间的相似性
    - user: int, 用户编号

    :return: predict, list, 推荐列表
    """

    m, n = np.shape(data)  # m: 物品数量 n: 用户数量
    interaction = data[:, user].T  # 用户 user 互动物品信息

    # 找到用户 user 没有互动的商品
    not_iter = []
    for i in range(m):
        if interaction[i] == 0:  # 用户 user 未打分项
            not_iter.append(i)

    # 对没有互动过的物品进行预测
    predict = {}
    for x in not_iter:
        item = np.copy(interaction)  # 获取用户 user 对物品的互动信息
        for j in range(m):   # 对每一个物品
            if item[j] != 0:  # 利用互动过的物品预测
                if x not in predict:
                    predict[x] = w[x, j] * item[j]
                else:
                    predict[x] = predict[x] + w[x, j] * item[j]
    # 按照预测的大小从大到小排序
    return sorted(predict.items(), key=lambda d: d[1], reverse=True)

In [18]:
predict = item_based_recommend(train, w, 5)
predict

[(415, 32.099192552287796),
 (75, 30.382292195423638),
 (46, 30.250015070529169),
 (518, 30.046075471839281),
 (19, 29.917161175661857),
 (15, 29.872875043860965),
 (355, 29.715752067356402),
 (94, 29.597534276776006),
 (277, 29.419469109184035),
 (470, 29.400821906606573),
 (587, 29.017222010071347),
 (307, 28.853466588411866),
 (492, 28.743158987073162),
 (61, 28.690041816125191),
 (471, 28.528276572127925),
 (786, 28.5126501976554),
 (391, 28.499638221831113),
 (656, 28.472587107661951),
 (18, 28.397447059484755),
 (359, 28.358401492779191),
 (140, 28.29046192152726),
 (37, 28.279212933208349),
 (6, 28.270383390751018),
 (147, 28.230293896800195),
 (52, 28.146977966183925),
 (371, 28.112206931846785),
 (301, 27.890190836964763),
 (752, 27.841426967392863),
 (12, 27.705286985402562),
 (295, 27.347704913108441),
 (92, 27.312041430451046),
 (154, 27.235569372878995),
 (302, 27.176650547189517),
 (14, 27.084773198090875),
 (131, 27.047913876764198),
 (806, 27.007613534418756),
 (782, 26

In [None]:
# top_n 
def top_k(predict, n):
    top_recom = []
    len_result = len(predict)
    if n>= len_result:
        top_recom = predict
    else:
        for i in range(n):
            top_recom.append(predict[i])
    return top_recom
top_k(predict, 5)