In [None]:
# 基于物品的协同过滤算法

import sys
import random
import math
from operator import itemgetter


class Item_commmend(object):

    def __init__(self, dataSet):
        self.data = dataSet['data']
        self.M = dataSet['M']
        self.key = dataSet['key']
        self.seed = dataSet['seed']
        self.N = dataSet['N']
        self.K = dataSet['K']
        self.W = dataSet['W']
        self.rank = dataSet['rank'] 

    def ReadData(self, file):
        for line in file:
            line = line.strip('\n')
            linelist = line.split()
            self.data.append([linelist[0], linelist[1]])


    def SplitData(self):
        test = dict()
        train = dict()
        random.seed(self.seed)
        for user, item in self.data:
            if random.randint(0, self.M) == self.key:
                if user in test:
                    test[user].append(item)
                else:
                    test[user] = []
            else:
                if user in train:
                    train[user].append(item)
                else:
                    train[user] = []
        return train, test


    def UserSimilarityOld(self, train):
        W = dict()
        for u in train.keys():
            W[u] = dict()
            for v in train.keys():
                if u == v:
                    continue
                W[u][v] = len(list(set(train[u]) & set(train[v])))
                W[u][v] /= math.sqrt(len(train[u]) * len(train[v]) * 1.0)
        return W


    def ItemSimilarity(self, train):
        C = dict()
        N = dict()
        for u, items in train.items():
            for i in items:
                if i not in N:
                    N[i] = 0
                N[i] += 1
                for j in items:
                    if i == j:
                        continue
                    if i not in C:
                        C[i] = dict()
                    if j not in C[i]:
                        C[i][j] = 0
                    C[i][j] += 1

        W = dict()
        for i, related_items in C.items():
            for j, cij in related_items.items():
                if i not in W:
                    W[i] = dict()
                W[i][j] = cij / math.sqrt(N[i] * N[j])

        self.W = W


    def Coverage(self, train, test):
        recommned_items = set()
        all_items = set()

        for user in train.keys():
            for item in train[user]:
                all_items.add(item)

            rank = self.GetRecommendation(user, train)
            for item, pui in rank:
                recommned_items.add(item)

        print('len: ', len(recommned_items), '\n')
        return len(recommned_items) / (len(all_items) * 1.0)


    def GetRecommendation(self, user, train):
        W = self.W
        N = self.N
        K = self.K
        rank = dict()
        ru = train[user]
        for i in ru:
            for j, wj in sorted(W[i].items(), key=itemgetter(1), reverse=True)[0:K]:
                if j in ru:
                    continue
                if j in rank:
                    rank[j] += wj
                else:
                    rank[j] = 0

        rank = sorted(rank.items(), key=itemgetter(1), reverse=True)[0:N]
        return rank


    def Recall(self, train, test):
        hit = 0
        all = 0
        for user in train.keys():
            if user in test:
                tu = test[user]
                rank = self.GetRecommendation(user, train)
                for item, pui in rank:
                    if item in tu:
                        hit += 1
                all += len(tu)
        return hit / (all * 1.0)


    def Precision(self, train, test):
        hit = 0
        all = 0
        for user in train.keys():
            if user in test:
                tu = test[user]
                rank = self.GetRecommendation(user, train)
                for item, pui in rank:
                    if item in tu:
                        hit += 1
                all += self.N
        return hit / (all * 1.0)


    def Popularity(self, train, test):
        item_popularity = dict()
        for user, items in train.items():
            for item in items:
                if item not in item_popularity:
                    item_popularity[item] = 0
                item_popularity[item] += 1

        ret = 0
        n = 0
        for user in train.keys():
            rank = self.GetRecommendation(user, train)
            for item, pui in rank:
                ret += math.log(1 + item_popularity[item])
                n += 1
        ret /= n * 1.0
        return ret


if __name__ == '__main__':    
    dataSet = {
        'data':[],
        'M':7,
        'key':1,
        'seed':1,
        'N':10,
        'K':10,
        'W':dict(),
        'rank':dict()
    }



    Item_object = Item_commmend(dataSet)
    file = open('ml-100k/u.data')
    Item_object.ReadData(file)

    train, test = Item_object.SplitData()
    Item_object.ItemSimilarity(train)

    # 计算
    recall = Item_object.Recall(train, test)
    precision = Item_object.Precision(train, test)
    popularity = Item_object.Popularity(train, test)
    coverage = Item_object.Coverage(train, test)


    print('recall: ', recall, '\n')
    print('precision: ', precision, '\n')
    print('Popularity: ', popularity, '\n')
    print('coverage: ', coverage, '\n')