In [1]:
from numpy import *
from numpy import linalg as la


def loadExData():
    return [[0, 0, 0, 2, 2], [0, 0, 0, 3, 3], [0, 0, 0, 1, 1], [1, 1, 1, 0, 0],
            [2, 2, 2, 0, 0], [5, 5, 5, 0, 0], [1, 1, 1, 0, 0]]


def loadExData2():
    return [[0, 0, 0, 0, 0, 4, 0, 0, 0, 0,
             5], [0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 3],
            [0, 0, 0, 0, 4, 0, 0, 1, 0, 4,
             0], [3, 3, 4, 0, 0, 0, 0, 2, 2, 0, 0],
            [5, 4, 5, 0, 0, 0, 0, 5, 5, 0,
             0], [0, 0, 0, 0, 5, 0, 1, 0, 0, 5, 0],
            [4, 3, 4, 0, 0, 0, 0, 5, 5, 0,
             1], [0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4],
            [0, 0, 0, 2, 0, 2, 5, 0, 0, 1,
             2], [0, 0, 0, 0, 5, 0, 0, 0, 0, 4, 0],
            [1, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0]]


def ecludSim(inA, inB):
    return 1.0 / (1.0 + la.norm(inA - inB))


def pearsSim(inA, inB):
    if len(inA) < 3: return 1.0
    return 0.5 + 0.5 * corrcoef(inA, inB, rowvar=0)[0][1]


def cosSim(inA, inB):
    num = float(inA.T * inB)
    denom = la.norm(inA) * la.norm(inB)
    return 0.5 + 0.5 * (num / denom)


myMat = mat(loadExData())
myMat

matrix([[0, 0, 0, 2, 2],
        [0, 0, 0, 3, 3],
        [0, 0, 0, 1, 1],
        [1, 1, 1, 0, 0],
        [2, 2, 2, 0, 0],
        [5, 5, 5, 0, 0],
        [1, 1, 1, 0, 0]])

In [2]:
ecludSim(myMat[:, 0], myMat[:, 4]), ecludSim(myMat[:, 0], myMat[:, 0])

(0.12973190755680383, 1.0)

In [3]:
cosSim(myMat[:, 0], myMat[:, 4]), cosSim(myMat[:, 0], myMat[:, 0])

(0.5, 1.0)

In [4]:
pearsSim(myMat[:, 0], myMat[:, 4]), pearsSim(myMat[:, 0], myMat[:, 0])

(0.20596538173840329, 1.0)

In [5]:
# def standEst(dataMat, user, simMeas, item):
#     n = shape(dataMat)[1]
#     simTotal = 0.0
#     ratSimTotal = 0.0
#     for j in range(n):
#         userRating = dataMat[user, j]
#         if userRating == 0: continue
#         overLap = nonzero(logical_and(dataMat[:,item].A>0, \
#                                       dataMat[:,j].A>0))[0]

#         print('dataMat[:,item]=',dataMat[:,item])
#         print('dataMat[:,j]=',dataMat[:,j])
#         print('overLap=', overLap)

#         if len(overLap) == 0: similarity = 0
#         else:            similarity = simMeas(dataMat[overLap,item], \
#                     dataMat[overLap,j])
#         print('the %d and %d similarity is: %f' % (item, j, similarity))
#         simTotal += similarity
#         ratSimTotal += similarity * userRating
#     if simTotal == 0: return 0
#     else: return ratSimTotal / simTotal


def standEst(dataMat, user, simMeas, item):
    n = shape(dataMat)[1]
    simTotal = 0.0
    ratSimTotal = 0.0
    for j in range(n):
        userRating = dataMat[user, j]
        if userRating == 0: continue

        overLap = []
        for i in range(len(dataMat)):
            if dataMat[i, item] > 0 and dataMat[i, j] > 0:
                overLap.append(i)
                
        if len(overLap) == 0: similarity = 0
        else:            similarity = simMeas(dataMat[overLap,item], \
       dataMat[overLap,j])
        simTotal += similarity
        ratSimTotal += similarity * userRating
    if simTotal == 0: return 0
    else: return ratSimTotal / simTotal


def svdEst(dataMat, user, simMeas, item):
    n = shape(dataMat)[1]
    simTotal = 0.0
    ratSimTotal = 0.0
    U, Sigma, VT = la.svd(dataMat)
    Sig4 = mat(eye(4) * Sigma[:4])
    xformedItems = dataMat.T * U[:, :4] * Sig4.I
    for j in range(n):
        userRating = dataMat[user, j]
        if userRating == 0 or j == item: continue
        similarity = simMeas(xformedItems[item,:].T,\
                             xformedItems[j,:].T)
        simTotal += similarity
        ratSimTotal += similarity * userRating
    if simTotal == 0: return 0
    else: return ratSimTotal / simTotal


def recommend(dataMat, user, N=3, simMeas=cosSim, estMethod=standEst):
    unratedItems = nonzero(dataMat[user, :].A == 0)[1]
    if len(unratedItems) == 0: return 'you rated everything'
    itemScores = []
    for item in unratedItems:
        estimatedScore = estMethod(dataMat, user, simMeas, item)
        itemScores.append((item, estimatedScore))
    return sorted(itemScores, key=lambda jj: jj[1], reverse=True)[:N]

In [6]:
myMat = mat(loadExData())
myMat[0, 1] = myMat[0, 0] = myMat[1, 0] = myMat[2, 0] = 4
myMat

matrix([[4, 4, 0, 2, 2],
        [4, 0, 0, 3, 3],
        [4, 0, 0, 1, 1],
        [1, 1, 1, 0, 0],
        [2, 2, 2, 0, 0],
        [5, 5, 5, 0, 0],
        [1, 1, 1, 0, 0]])

In [7]:
recommend(myMat, 2)

[(2, 4.0), (1, 2.0)]

In [8]:
recommend(myMat, 2, simMeas=ecludSim)

[(2, 4.0), (1, 2.8)]

In [9]:
recommend(myMat, 2, simMeas=pearsSim)

[(2, 4.0), (1, 2.0)]

In [10]:
recommend(myMat, 1, estMethod=svdEst)

[(1, 3.333333333333333), (2, 3.3333333333333326)]