In [1]:
# load data
from data import critics
critics['Lisa Rose']['Snakes on a Plane']

3.5

### 尋找相似用戶

In [2]:
# 歐式距離
from math import sqrt

# return a distance-based similarity score for person1 and person2
def os_distance(data, p1, p2):
    # get the list of shared_items
    si = {}
    for item in data[p1]:
        if item in data[p2]:
            si[item] = 1 
            
    # if they have no ratings in common, return 0
    if len(si)==0:
        return 0
    
    sum_of_squares = sum([pow(data[p1][item]-data[p2][item], 2)
                         for item in data[p1] if item in data[p2]])
    
    return 1/(1+sum_of_squares)

In [3]:
os_distance(critics, 'Lisa Rose', 'Gene Seymour')

0.14814814814814814

In [4]:
# 皮爾森相關係數

def sim_pearson(data, p1, p2):
    # get the list of shared_items
    si = {}
    for item in data[p1]:
        if item in data[p2]:
            si[item] = 1
    
    # sum calculations
    n = len(si)
    
    # sums of all the data
    sum1 = sum([data[p1][it] for it in si])
    sum2 = sum([data[p2][it] for it in si])
    
    # sums of the squares
    sum1Sq = sum([pow(data[p1][it], 2) for it in si])
    sum2Sq = sum([pow(data[p2][it], 2) for it in si])
    
    # sum of the products
    pSum = sum([data[p1][it]*data[p2][it] for it in si])
    
    # calculate r(相似度)
    num = pSum - (sum1*sum2/n)
    den = sqrt((sum1Sq-pow(sum1, 2)/n)*(sum2Sq-pow(sum2, 2)/n))
    if den==0: return 0
    
    r = num/den
    return r

In [5]:
sim_pearson(critics, 'Lisa Rose', 'Gene Seymour')

0.39605901719066977

### User CF

In [6]:
# 推薦評論者（與此用戶評論最相似的）

def topMatches(data, person, n=5, similarity=sim_pearson):
    scores = []
    
    for other in data:
        if other!=person:
            other_sim = (similarity(data, person, other), other)
            scores.append(other_sim)
        
    scores.sort()
    scores.reverse()
    
    return scores[0: n]

In [7]:
topMatches(critics, 'Toby')

[(0.9912407071619299, 'Lisa Rose'),
 (0.9244734516419049, 'Mick LaSalle'),
 (0.8934051474415647, 'Claudia Puig'),
 (0.66284898035987, 'Jack Matthews'),
 (0.38124642583151164, 'Gene Seymour')]

In [8]:
# 推薦影片，通過一個經過加權的評價值來打分
# 返回所有他人評分的加權平均、標準化集排序後的列表，並推薦給對應的影評者
# 最終評分算法：r = ∑data*sim/∑simSums

def getRecommendations(data, person, similarity=sim_pearson):
    totals = {}
    simSums = {}
    
    for other in data:
        if other==person:
            continue
        
        sim = similarity(data, person, other)
        
        if sim<=0:
            continue
            
        for item in data[other]:
            # only score movies I haven't seen yet
            if item not in data[person] or data[person][item]==0:
                # similarity * Score
                totals.setdefault(item, 0)
                totals[item] += data[other][item]*sim
                
                # sum of similarities
                simSums.setdefault(item, 0)
                simSums[item] += sim
                
    # create the normalized list
    rankings = []
    for item, total in totals.items():
        rank = (total/simSums[item], item)
        rankings.append(rank)
        
    # return the sorted list
    rankings.sort()
    rankings.reverse()
    return rankings

In [9]:
getRecommendations(critics, 'Toby')

[(3.3477895267131017, 'The Night Listener'),
 (2.8325499182641614, 'Lady in the Water'),
 (2.530980703765565, 'Just My Luck')]

### Item CF
<p>當沒有蒐集到關於用戶的足夠資訊時，可以通過查看哪些人喜歡某ㄧ特定物品，以及這些人喜歡哪些其他物品來決定相似度。因此只需將之前字典裡的人與物品進行對換，就可以復用之前相關方法</p>

In [10]:
# 字典內的人語物品對換

def transformData(data):
    result = {}
    for person in data:
        for item in data[person]:
            result.setdefault(item, {})
            result[item][person] = data[person][item]
            
    return result

In [11]:
# 推薦電影

movies = transformData(critics)
topMatches(movies, 'The Night Listener')

[(0.5555555555555556, 'Just My Luck'),
 (-0.1798471947990544, 'Superman Returns'),
 (-0.250000000000002, 'You, Me and Dupree'),
 (-0.5663521139548527, 'Snakes on a Plane'),
 (-0.6123724356957927, 'Lady in the Water')]

In [12]:
# 推薦評論者，推薦合乎該品味的評論者
getRecommendations(movies, 'You, Me and Dupree')

[(3.1637361366111816, 'Michael Phillips')]