In [None]:
import random
import math
import numpy as np


class Model(object):

    def __init__(self):
        self.p = None
        self.q = None
        self.mateData = None
    
    def loadData(self, path):
        # 获取标题
        movies = {}
        for line in open(path + '/u.item', encoding='latin-1'):
            id, title = line.split('|')[0:2]
            movies[id] = title
        
        train = {}; test = {}
        varDict = {'/ua.base':train, '/ua.test':test}
        for key in varDict:
            for line in open(path + key, encoding='latin-1'):
                user, movieid, rating, ts = line.split('\t')
                varDict[key].setdefault(user, {})
                varDict[key][user][movieid] = float(rating)
        return train, test, movies


    def newList(self, data, userLen=943, movieLen=1682):
        mateData = np.zeros((userLen, movieLen), dtype=float)
        for u, item in data.items():
            for n, r in item.items():
                mateData[int(u)-1][int(n)-1] = float(int(r))
        self.mateData = mateData
        return mateData

    def gradDes(self, dataMatrix, k, alpha, lam, maxCycles):
        m, n = np.shape(dataMatrix)

        p = np.mat(np.random.random((m, k)))
        q = np.mat(np.random.random((k, n)))

        print("————————————————starting————————————————")
        for step in range(maxCycles):
            for i in range(m):
                for j in range(n):
                    if dataMatrix[i, j] > 0:
                        error = dataMatrix[i, j]
                        for r in range(k):
                            error = error - p[i, r] * q[r, j]
                        for r in range(k):
                            p[i, r] = p[i, r] + alpha * (2 * error * q[r, j] - lam * p[i, r])
                            q[r, j] = q[r, j] + alpha * (2 * error * p[i, r] - lam * q[r, j])

            loss = 0.0
            for i in range(m):
                for j in range(n):
                    if dataMatrix[i, j] > 0:
                        error = 0.0
                        for r in range(k):
                            error = error + p[i, r] * q[r, j]
                        # calculate loss function
                        loss = (dataMatrix[i, j] - error) * (dataMatrix[i, j] - error)
                        for r in range(k):
                            loss = loss + lam * (p[i, r] * p[i, r] + q[r, j] * q[r, j]) / 2

            if loss < 0.001:
                break
            if step % 1 == 0:
                print("\titer: %d, loss: %f" % (step, loss))
        self.p = p 
        self.q = q

    def prediction(self, dataMatrix, user):
        n = np.shape(dataMatrix)[1]
        predict = {}
        for j in range(n):
            if dataMatrix[int(user)-1, j] == 0:
                predict[j] = (self.p[int(user)-1,] * self.q[:, j])[0, 0]

        # 按照打分从大到小排序
        return sorted(predict.items(), key=lambda d: d[1], reverse=True)


    def KTop(self, rank, k):
        if len(rank) <= k:
            return rank
        else:          
            return rank[:k]

    # 召回率
    def recall(self, train, test, N):
        hit = 0
        all = 0
        dataTrain = self.newList(train)
        for user in train.keys():
            tu = test[user]
            rank = self.KTop(self.prediction(self.mateData, user), N)
            for item, pui in rank:
                if str(item+1) in tu:
                    hit += 1
            all += len(tu)
        return hit / (all * 1.0)

    # 准确率
    def precision(self, train, test, N):
        hit = 0
        all = 0
        dataTrain = self.newList(train)
        for user in train.keys():
            tu = test[user]
            rank = self.KTop(self.prediction(self.mateData, user), N)
            for item, pui in rank:
                if str(item+1) in tu:
                    hit += 1
            all += N
        return hit / (all * 1.0)

    # 覆盖率
    def coverage(self, train, test, N):
        recommend_items = set()
        all_items = set()
        dataTrain = self.newList(train)
        for user in train.keys():
            for item in train[user].keys():
                all_items.add(item)
            rank = self.KTop(self.prediction(self.mateData, user), N)
            for item, pui in rank:
                recommend_items.add(item)
        return len(recommend_items) / (len(all_items) * 1.0)

    # 流行度
    def popularity(self, train, test, N):
        item_popularity = dict()
        dataTrain = self.newList(train)
        for user, items in train.items():
            for item in items.keys():
                item_popularity[item] = item_popularity.get(item, 0) + 1
        ret = 0
        n = 0
        for user in train.keys():
            rank = self.KTop(self.prediction(self.mateData, user), N)
            for item, pui in rank:
                ret += math.log(1 + 1/item_popularity[str(item + 1)])
                n += 1
        ret /= n * 1.0
        return ret

if __name__ == "__main__":
   
    path = './ml-100k'

    model = Model()
    train, test, movies = model.loadData(path)
    mateData = model.newList(train)
    model.gradDes(mateData, 5, 0.001, 0.01, 50)
    rank = model.prediction(mateData, 1)

    print(model.KTop(rank, 5))
    print("recall：", model.recall(train, test, 5))
    print("precision：",model.precision(train, test, 5))
    print("coverage：", model.coverage(train, test, 5))
    print('popularity：', model.popularity(train, test, 5))