In [None]:
"""
    代码7-2 带偏执LFM 的推荐系统
"""

class BinasLFM(object):
    def __init__(self,rating_data,F,alpha=0.1,lmbd=0.1,max_iter=500):
        # rating_data 是 list < (user,list<(position,rate)>) 类型

        pass


In [3]:
# 7-0


from numpy import corrcoef,mat,shape,nonzero,logical_and,eye
import numpy.linalg as la

# similarity calculation

def excludSim(inA,inB):
    dis = 1.0/(1.0+la.norm(inA-inB))
    return dis

def pearSim(inA,inB):
    if (len(inA)<3):
        return 1.0
    dis = 0.5+0.5 * corrcoef(inA,inB,rowvar=0)[0][1]

    return dis

def cosSim(inA,inB):
    tmp = float(inA.T * inB)
    dis = 0.5 + 0.5 * tmp/(la.norm(inA) * la.norm(inB))
    return dis

def loadExtData2():
    return [[0,0,0,0,0,4,0,0,0,5],
            [0,0,0,0,0,4,0,0,0,5],
            [0,0,0,0,0,4,0,0,0,5],
            [0,0,0,0,0,4,0,0,0,5],
            [0,0,0,0,0,4,0,0,0,5]]

def svdEst(dataMat,user,simMeas,item,k):
    if (dataMat[user,item]!=0):
        return dataMat[user,item]
    n = shape(dataMat)[1]
    simTotal = 0.0
    ratSimTotal = 0.0
    U,S,V = la.svd(dataMat)
    S3 = mat(eye(k) * S[:k])

    xtormdItems = dataMat.T * U[:,:k]*S3.T

    for j in range(n):
        userRating = dataMat[user,j]
        if (userRating == 0 or j == item):
            continue
        simlarity = simMeas(xtormdItems[item,:].T,xtormdItems[j,:].T)
        simTotal += simlarity
        ratSimTotal += simlarity*userRating
    if (simTotal == 0):
        return 0
    else:
        return ratSimTotal / simTotal


def svdRecommend(dataMat,user,N=3,simMean=cosSim,estTestMean=svdEst,k=3):
    unRateItem = nonzero(dataMat[user,:].A == 0)[1]
    if (len(unRateItem)==0):
        print("There is nothing to recommend~")
    
    retScores = []
    for item in unRateItem:
        itemScore = estTestMean(dataMat,user,simMean,item,k=k)
        retScores.append((item,itemScore))

    return sorted(retScores,key=lambda j:j[1],reverse=True)[:N]



myData = mat(loadExtData2())

U,S,V = la.svd(myData)

S *= S

threshild = sum(S)*0.9
k = 0

for i in range(S.shape[0]+1):
    if (sum(S[:i])) >= threshild:
        k = i
        break

svdItems = svdRecommend(myData,user=3,estTestMean=svdEst,k=k)

print(svdItems)






[(0, nan), (1, nan), (2, nan)]


  dis = 0.5 + 0.5 * tmp/(la.norm(inA) * la.norm(inB))


In [45]:
"""
    LFM 推荐系统
"""
import random,math
class LFM:

    def __init__(self,rating_data,F,alpha=0.1,lmbd=0.1,max_iter = 500):
        """
        
        F ： 隐因子的个数
        alpha: 学习率
        lmbd: 正则化
        max_iter： 最大迭代次数
        """

        self.F = F
        self.P = dict() # R = P Q T  Q相当于Q的转置
        self.Q = dict()
        self.alpha = alpha
        self.lmbd = lmbd
        self.max_iter = max_iter
        self.rating_data = rating_data

        """随机初始化矩阵P和Q"""
        for user,rates in self.rating_data:
            self.P[user] = [random.random() / math.sqrt(self.F) for j in range(self.F)]

            for item,_ in rates:
                if item not in self.Q:
                    self.Q[item] = [random.random() / math.sqrt(self.F) for x in range(self.F)]

            
    def train(self):
        """
        随机梯度下降法训练参数P和Q
        """ 
        for step in range(self.max_iter):
            for user,rates in self.rating_data:
                for item,rui in rates:
                    hat_rui = self.predict(user,item)
                    err_ui = rui - hat_rui
                    for f in range(self.F):
                        self.P[user][f] += self.alpha * (err_ui * self.Q[item][f] - self.lmbd * self.P[user][f])
                        self.Q[item][f] += self.alpha * (err_ui * self.P[user][f] - self.lmbd * self.Q[item][f])

            self.alpha *= 0.9

    def predict(self,user,item):
        return sum(self.P[user][f] * self.Q[item][f] for f in range(self.F))
    


if __name__ == '__main__':
    """用户A B C,物品有 abcd"""
    rating_data = list()
    rate_A = [('a',1.0),('b',1.0)]
    rating_data.append(('A',rate_A))
    rate_B = [('B',10.0),('c',10.0)]
    rating_data.append(('B',rate_B))
    rate_C = [('c',1.0),('d',1.0)]
    rating_data.append(('C',rate_C))

    print(rating_data)
    lfm = LFM(rating_data,2)
    lfm.train()
    for item in ['a','b','c','d']:
        print(item,lfm.predict('A',item))
    print(lfm.P)
    print(lfm.Q)

[('A', [('a', 1.0), ('b', 1.0)]), ('B', [('B', 10.0), ('c', 10.0)]), ('C', [('c', 1.0), ('d', 1.0)])]
a 0.8859412654977252
b 0.8268536628900358
c 3.149860829079766
d 0.7210510191567403
{'A': [0.7777722987963129, 0.7371606283640012], 'B': [1.1511095505729154, 2.9303972714190203], 'C': [0.43531823974329803, 0.16911865051889763]}
{'a': [0.4958475571989893, 0.6786645295921804], 'b': [0.6253009441155084, 0.4619236256574345], 'B': [0.756237934742612, 3.092799653435125], 'c': [1.386223026081324, 2.810371145347083], 'd': [0.7171564430867594, 0.22148036335432872]}


In [None]:
"""
    LFM 推荐系统  带偏执项
"""
import random,math
class LFM1:

    def __init__(self,rating_data,F,alpha=0.1,lmbd=0.1,max_iter = 500):
        """
        rating_data : [(user,[(item,rate)])] 类型
        F ： 隐因子的个数
        alpha: 学习率
        lmbd: 正则化
        max_iter： 最大迭代次数
        """

        self.F = F
        self.P = dict() # R = P Q T  Q相当于Q的转置
        self.Q = dict()
        self.bu = dict()
        self.bi = dict()
        self.alpha = alpha
        self.lmbd = lmbd
        self.max_iter = max_iter
        self.rating_data = rating_data
        self.mu = 0.0

        """随机初始化矩阵P和Q"""
        cnt = 0
        for user,rates in self.rating_data:
            self.P[user] = [random.random() / math.sqrt(self.F) for j in range(self.F)]
            self.bu[user] = 0
            cnt += len(rates)
            for item,rate in rates:
                self.mu += rate
                if item not in self.Q:
                    self.Q[item] = [random.random() / math.sqrt(self.F) for x in range(self.F)]
                # 52:31
                self.bi[item] = 0
        
        self.mu /= cnt
            
    def train(self):
        """
        随机梯度下降法训练参数P和Q
        """ 
        for step in range(self.max_iter):
            for user,rates in self.rating_data:
                for item,rui in rates:
                    hat_rui = self.predict(user,item)
                    err_ui = rui - hat_rui
                    for f in range(self.F):
                        self.P[user][f] += self.alpha * (err_ui * self.Q[item][f] - self.lmbd * self.P[user][f])
                        self.Q[item][f] += self.alpha * (err_ui * self.P[user][f] - self.lmbd * self.Q[item][f])

            self.alpha *= 0.9

    def predict(self,user,item):
        return sum(self.P[user][f] * self.Q[item][f] for f in range(self.F))
    


if __name__ == '__main__':
    """用户A B C,物品有 abcd"""
    rating_data = list()
    rate_A = [('a',1.0),('b',1.0)]
    rating_data.append(('A',rate_A))
    rate_B = [('B',1.0),('c',1.0)]
    rating_data.append(('B',rate_B))
    rate_C = [('c',1.0),('d',1.0)]
    rating_data.append(('C',rate_C))

    lfm = LFM(rating_data,2)
    lfm.train()
    for item in ['a','b','c','d']:
        print(item,lfm.predict('A',item))