In [1]:
critics = {
    'Lisa Rose': {
        'Lady in the Water': 2.5,
        'Snakes on a Plane': 3.5,
        'Just My Luck': 3.0,
        'Superman Returns': 3.5,
        'You, Me and Dupree': 2.5,
        'The Night Listener': 3.0,
    },
    'Gene Seymour': {
        'Lady in the Water': 3.0,
        'Snakes on a Plane': 3.5,
        'Just My Luck': 1.5,
        'Superman Returns': 5.0,
        'The Night Listener': 3.0,
        'You, Me and Dupree': 3.5,
    },
    'Michael Phillips': {
        'Lady in the Water': 2.5,
        'Snakes on a Plane': 3.0,
        'Superman Returns': 3.5,
        'The Night Listener': 4.0,
    },
    'Claudia Puig': {
        'Snakes on a Plane': 3.5,
        'Just My Luck': 3.0,
        'The Night Listener': 4.5,
        'Superman Returns': 4.0,
        'You, Me and Dupree': 2.5,
    },
    'Mick LaSalle': {
        'Lady in the Water': 3.0,
        'Snakes on a Plane': 4.0,
        'Just My Luck': 2.0,
        'Superman Returns': 3.0,
        'The Night Listener': 3.0,
        'You, Me and Dupree': 2.0,
    },
    'Jack Matthews': {
        'Lady in the Water': 3.0,
        'Snakes on a Plane': 4.0,
        'The Night Listener': 3.0,
        'Superman Returns': 5.0,
        'You, Me and Dupree': 3.5,
    },
    'Toby': {
        'Snakes on a Plane': 4.5,
        'You, Me and Dupree': 1.0,
        'Superman Returns': 4.0,
    },
}


In [2]:
critics['Lisa Rose']

{'Lady in the Water': 2.5,
 'Snakes on a Plane': 3.5,
 'Just My Luck': 3.0,
 'Superman Returns': 3.5,
 'You, Me and Dupree': 2.5,
 'The Night Listener': 3.0}

In [3]:
from math import sqrt

sqrt(pow(4.5 - 4, 2) + pow(1 - 2, 2))

1.118033988749895

In [4]:
1 / sqrt(pow(4.5 - 4, 2) + pow(1 - 2, 2))

0.8944271909999159

欧几里德相似度

$$\frac{1}{sqrt((x_1 - x_2)^2 + （y_1 - y_2)^2)}$$

In [5]:
# 返回person1和person2基于距离的相似度评价
def sim_distance(prefs: dict[str, dict[str, float]], person1: str, person2: str) -> float:
    # 得到共同评价过得列表
    si = {}
    for item in prefs[person1]:
        if item in prefs[person2]:
            si[item] = 1

    # 如果两者没有共同之处，返回0
    if not si: return 0

    # 计算平方和
    sum_of_squares = sum([pow(prefs[person1][item] - prefs[person2][item], 2)
                          for item in prefs[person1] if item in prefs[person2]])

    return 1 / (1 + sqrt(sum_of_squares))

In [6]:
sim_distance(critics, 'Lisa Rose', 'Gene Seymour')

0.29429805508554946

皮尔逊相关系数



In [7]:
def sim_pearson(prefs: dict[str, dict[str, float]], person1: str, person2: str) -> float:
    # 得到双方都评价过得物品列表
    si = {}

    for item in prefs[person1]:
        if item in prefs[person2]:
            si[item] = 1

    if not si:
        return 1

    sum1 = sum([prefs[person1][it] for it in si])
    sum2 = sum([prefs[person2][it] for it in si])

    sum1sq = sum([pow(prefs[person1][it], 2) for it in si])
    sum2sq = sum([pow(prefs[person2][it], 2) for it in si])

    psum = sum([prefs[person1][it] * prefs[person2][it] for it in si])

    n = len(si)
    num = psum - (sum1 * sum2 / n)
    den = sqrt((sum1sq - pow(sum1, 2) / n) * (sum2sq - pow(sum2, 2) / n))
    if den == 0: return 0

    r = num / den
    return r

In [8]:
sim_pearson(critics, 'Lisa Rose', 'Gene Seymour')

0.39605901719066977