In [None]:
# 基于用户的协同过滤算法

import sys
import random
import math
from operator import itemgetter
import numpy as np
import datetime

class User_Commend(object):

    def __init__(self, dataSet):
        self.data = dataSet['data']
        self.M = dataSet['M']
        self.key = dataSet['key']
        self.seed = dataSet['seed']
        self.N = dataSet['N']
        self.K = dataSet['K']
        self.W = dataSet['W']
        self.rank = dataSet['rank']   


    def ReadData(self, file):
        for line in file:
            line = line.strip('\n')
            linelist = line.split()
            self.data.append([linelist[0], linelist[1]])
            

    def SplitData(self):
        test = dict()
        train = dict()
        random.seed(self.seed)
        for user, item in self.data:
            if random.randint(0, self.M) == self.key:
                if user in test:
                    test[user].append(item)
                else:
                    test[user] = []
            else:
                if user in train:
                    train[user].append(item)
                else:
                    train[user] = []
        return train, test


    def UserSimilarity(self, train):
        item_users = dict()
        for u, items in train.items():
            for i in items:
                if (i not in item_users):
                    item_users[i] = set()
                item_users[i].add(u)
            C = dict()
            N = dict()
            for i, users in item_users.items():
                for u in users:
                    if (u not in N):
                        N[u] = 1
                    N[u] += 1
                    for v in users:
                        if u == v:
                            continue
                        if (u not in C):
                            C[u] = dict()
                        if (v not in C[u]):
                            C[u][v] = 0
                        C[u][v] += (1 / math.log(1 + len(users)))
        W = dict()
        for u, related_users in C.items():
            for v, cuv in related_users.items():
                if (u not in W):
                    W[u] = dict()
                W[u][v] = cuv / math.sqrt(N[u] * N[v])

        self.W = W


    def Coverage(self, train, test):
        recommned_items = set()
        all_items = set()

        for user in train.keys():
            for item in train[user]:
                all_items.add(item)

            rank = self.GetRecommendation(user, train)
            for item, pui in rank:
                recommned_items.add(item)
        return len(recommned_items) / (len(all_items) * 1.0)


    def GetRecommendation(self, user, train):
        rank = dict()
        interacted_items = train[user]
        for v, wuv in sorted(self.W[user].items(), key=itemgetter(1), reverse=True)[0:self.K]:
            for i in train[v]:
                if i in interacted_items:
                    continue
                if i in rank:
                    rank[i] += wuv
                else:
                    rank[i] = 0
        rank = sorted(rank.items(), key=itemgetter(1), reverse=True)[0:self.N]

        return rank


    def Recall(self, train, test):
        hit = 0
        all = 0
        for user in train.keys():
            if user in test:
                tu = test[user]
                rank = self.GetRecommendation(user, train)
                for item, pui in rank:
                    if item in tu:
                        hit += 1
                all += len(tu)
        return hit / (all * 1.0)


    def Precision(self, train, test):
        hit = 0
        all = 0
        for user in train.keys():
            if user in test:
                tu = test[user]
                rank = self.GetRecommendation(user, train)
                for item, pui in rank:
                    if item in tu:
                        hit += 1
                all += self.N
        return hit / (all * 1.0)


    def Popularity(self, train, test):
        item_popularity = dict()
        for user, items in train.items():
            for item in items:
                if item not in item_popularity:
                    item_popularity[item] = 0
                item_popularity[item] += 1

        ret = 0
        n = 0
        for user in train.keys():
            rank = self.GetRecommendation(user, train)
            for item, pui in rank:
                ret += math.log(1 + item_popularity[item])
                n += 1
        ret /= n * 1.0
        return ret


if __name__ == '__main__':
    starttime = datetime.datetime.now()
    dataSet = {
        'data':[],
        'M':8,
        'key':1,
        'seed':1,
        'N':10,
        'K':80,
        'W':dict(),
        'rank':dict()
    }
    file = open('ml-100k/u.data')  

    UserObject = User_Commend(dataSet)
    UserObject.ReadData(file)
    UserObject.data = np.array(UserObject.data)


    train, test = UserObject.SplitData()
    UserObject.UserSimilarity(train)

    recall = UserObject.Recall(train, test)
    precision = UserObject.Precision(train, test)
    popularity = UserObject.Popularity(train, test)
    coverage = UserObject.Coverage(train, test)
    
    print('recall: ', recall, '\n')
    print('precision: ', precision, '\n')
    print('Popularity: ', popularity, '\n')
    print('coverage: ', coverage, '\n')
