In [1]:
import numpy as np
from collections import defaultdict

In [2]:
# 输入数据的格式
ratings = {
        "A": {"i1": 1, "i2": 1, "i3": 1},
        "B": {"i1": 1, "i3": 1, "i4": 1},
        "C": {"i2": 1, "i4": 1, "i5": 1},
        "D": {"i1": 1, "i2": 1, "i4": 1, "i5": 1},
        "E": {"i3": 1, "i4": 1}
    }


In [None]:
class ItemCF():
    def __init__(self, user_item_ratings):
        """
        初始化 ItemCF
        :param user_item_ratings: dict, {user_id: {item_id: rating}}
        """
        self.user_item_ratings = user_item_ratings
        self.item_similarity = None
        self._build_item_similarity()

    def _build_item_similarity(self):
        """计算所有物品对之间的余弦相似度"""
        # 步骤1: 构建物品-用户倒排表
        item_users = defaultdict(set) # 避免每次新写字典
        for user, items in self.user_item_ratings.items():
            for item in items:
                item_users[item].add(user)

        # 步骤2: 计算物品共现次数 C[i][j]
        C = defaultdict(lambda: defaultdict(int))  # 共现矩阵
        N = defaultdict(int)                       # 每个物品被多少用户交互过

        for users in item_users.values():
            for u in users:
                N[u] += 1
            # 两两组合（无序对）
            user_list = list(users)
            for i in range(len(user_list)):
                for j in range(i + 1, len(user_list)):
                    u, v = user_list[i], user_list[j]
                    C[u][v] += 1
                    C[v][u] += 1

        # 步骤3: 计算余弦相似度 W[i][j] = C[i][j] / sqrt(N[i] * N[j])
        self.item_similarity = defaultdict(dict)
        for i, related_items in C.items():
            for j, c_ij in related_items.items():
                if N[i] > 0 and N[j] > 0:
                    sim = c_ij / math.sqrt(N[i] * N[j])
                    self.item_similarity[i][j] = sim
                else:
                    self.item_similarity[i][j] = 0.0

    def recommend(self, user, K=10, N=5):
        """
        为用户推荐 N 个物品
        :param user: 用户ID
        :param K: 考虑最相似的 K 个物品
        :param N: 推荐 Top-N
        :return: [(item, score), ...]
        """
        if user not in self.user_item_ratings:
            return []

        watched_items = set(self.user_item_ratings[user].keys())
        scores = defaultdict(float)

        # 对用户评过分的每个物品 i
        for i, rating in self.user_item_ratings[user].items():
            # 找出与 i 最相似的 K 个物品 j
            if i not in self.item_similarity:
                continue
            # 按相似度排序，取前 K 个
            similar_items = sorted(
                self.item_similarity[i].items(),
                key=lambda x: x[1],
                reverse=True
            )[:K]

            for j, sim in similar_items:
                if j in watched_items:
                    continue  # 不推荐已看过的
                # 累加相似度 * 用户对 i 的评分
                scores[j] += sim * rating

        # 返回 Top-N
        return sorted(scores.items(), key=lambda x: x[1], reverse=True)[:N]


