# 集合知プログラミング

/Users/runa/python/user_lib に保存してある関数の説明

def sim_distance / def sim_pearson / def topMatches / def getRecommendations

ユークリッド距離によるスコア / ピアソン関数によるスコア / 評者をランキングする / アイテムを推薦する

In [None]:
#データ
critics = {'Lisa Rose': {'Lady in the Water': 2.5, 'Sanakes on a Plane': 3.5, 'Just My Luck': 3.0, 'Superman Returns': 3.5, 'You, Me and Dupree': 2.5, 'The Night Listener': 3.0},
                 'Gene Seymour': {'Lady in the Water': 3.0, 'Sanakes on a Plane': 3.5, 'Just My Luck': 1.5, 'Superman Returns': 5.0, 'You, Me and Dupree': 3.5, 'The Night Listener': 3.0},
                 'Michael Phillips': {'Lady in the Water': 2.5, 'Sanakes on a Plane': 3.0, 'Superman Returns': 3.5, 'The Night Listener': 4.0},
                 'Claudia Puig': {'Sanakes on a Plane': 3.5, 'Just My Luck': 3.0, 'Superman Returns': 4.0, 'You, Me and Dupree': 2.5, 'The Night Listener': 4.5},
                 'Mick LaSalle': {'Lady in the Water': 3.0, 'Sanakes on a Plane': 4.0, 'Just My Luck': 2.0, 'Superman Returns': 3.0, 'You, Me and Dupree': 2.0, 'The Night Listener': 3.0},
                 'Jack Matthews': {'Lady in the Water': 3.0, 'Sanakes on a Plane': 4.0, 'Just My Luck': 2.0, 'Superman Returns': 5.0, 'You, Me and Dupree': 3.5, 'The Night Listener': 3.0},
                 'Toby': {'Sanakes on a Plane': 4.5, 'Superman Returns': 4.0, 'You, Me and Dupree': 1.0}}

In [2]:
critics['Lisa Rose']['Lady in the Water']

2.5

In [3]:
critics['Toby']['Snakes on a Plane'] = 4.5

In [4]:
critics['Toby']

{'Sanakes on a Plane': 4.5,
 'Snakes on a Plane': 4.5,
 'Superman Returns': 4.0,
 'You, Me and Dupree': 1.0}

# ユークリッド距離によるスコア

In [5]:
from math import sqrt

In [18]:
def sim_distance(prefs, person1, person2):
    si={}
    for item in prefs[person1]:
        if item in prefs[person2]: si[item]=1
            
    if len(si)==0: return 0
    
    sum_of_squares=sum([pow(prefs[person1][item]-prefs[person2][item], 2) for item in prefs[person1] if item in prefs[person2]])
    
    return 1/(1+ sum_of_squares)

In [19]:
sim_distance(critics, 'Lisa Rose', 'Gene Seymour')

0.14814814814814814

# ピアソン相関によるスコア

In [41]:
def sim_pearson(prefs, p1, p2):
    si={}
    for item in prefs[p1]:
        if item in prefs[p2]: si[item]=1

    n=len(si)
            
    if n==0: return 0
    
    sum1=sum([prefs[p1][it] for it in si])
    sum2=sum([prefs[p2][it] for it in si])
    
    sum1Sq=sum([pow(prefs[p1][it], 2) for it in si])
    sum2Sq=sum([pow(prefs[p2][it], 2) for it in si])
    
    pSum=sum([prefs[p1][it]*prefs[p2][it] for it in si])
    
    num=pSum-(sum1*sum2/n)
    den=sqrt((sum1Sq-pow(sum1, 2)/n)*(sum2Sq-pow(sum2, 2)/n))
    
    if den==0: return 0
    
    r=num/den
    
    return r

In [42]:
sim_person(critics, 'Lisa Rose', 'Gene Seymour')

0.39605901719066977

# 評者をランキングする

In [43]:
def topMatches(prefs, person, n=5, similarity=sim_pearson):
    scores=[(similarity(prefs, person, other), other) for other in prefs if other!=person]
    
    scores.sort()
    scores.reverse()
    return scores[0:n]

In [45]:
topMatches(critics, 'Toby', n=3)

[(0.9912407071619299, 'Lisa Rose'),
 (0.9244734516419049, 'Mick LaSalle'),
 (0.8934051474415647, 'Claudia Puig')]

# アイテムを推薦する

In [55]:
def getRecommendations(prefs, person, similarity=sim_pearson):
    totals={}
    simSums={}
    for other in prefs:
        if other==person: continue
        sim=similarity(prefs, person, other)
        
        if sim<=0: continue
            
        for item in prefs[other]:
            if item not in prefs[person] or prefs[person][item]==0:
                totals.setdefault(item, 0)
                totals[item]+=prefs[other][item]*sim
                
                simSums.setdefault(item, 0)
                simSums[item]+=sim
                
        rankings=[(total/simSums[item], item) for item,total in totals.items()]
        
        rankings.sort()
        rankings.reverse()
        
        return rankings

In [56]:
getRecommendations(critics, 'Toby')

[(3.0, 'The Night Listener'),
 (3.0, 'Lady in the Water'),
 (2.0, 'Just My Luck')]