In [1]:

# 导入相关的包
import os,math

# 获取数据
class RecommedFrieds:
    '''
    data_path: nodeId.edges 文件的路径
        nodeId.edges ： 里面的数据代表 nodeId 指向的所有数据   每一行A  B   ==>  A->B  nodeId->A  nodeId->B
        method ： 不同的计算用户相似度的方法
    '''
    def __init__(self,data_path,method=1):
        self.data_path = data_path
        self.data_out = self.get_user_out()
        self.data_in = self.get_user_in()

        self.method = method

        if method == 1:
            self.user_sim = self.userSimilarityMethod1()
        elif method == 2:
            self.user_sim = self.userSimilarityMethod2()
        elif method == 3:
            self.user_sim = self.userSimilarityMethod3()

    # Wout ( u , v ) 公式中 out(u) 是在社交网络图中用户 u 指向的其他好友的集合。
    def get_user_out(self):
        (path,filename) = os.path.split(self.data_path)
        (filename,hz) = filename.split(".")
        user_out = dict()
        user_out.setdefault(filename,set())
        # 打开文件，遍历每一行数据
        with open(self.data_path,"r") as f:
            for line in f.readlines():
                (user_A,user_B) = line.split() 
                user_out[filename].add(user_A)
                user_out[filename].add(user_B)
                # A -> B
                user_out.setdefault(user_A,set())
                user_out[user_A].add(user_B)
        
        return user_out


    # in(u)  是在社交网络图中指向用户 u 的用户的集合  
    def get_user_in(self):
        (path,filename) = os.path.split(self.data_path)
        (filename,hz) = filename.split(".")
        user_in = dict()
        # 打开文件，遍历每一行数据
        with open(self.data_path,"r") as f:
            for line in f.readlines():
                (user_A,user_B) = line.split()
                user_in.setdefault(user_A,set())
                user_in.setdefault(user_B,set()) 
                user_in[user_A].add(filename)
                user_in[user_B].add(filename)
                # A -> B
                user_in[user_B].add(user_A)
        
        #print(user_in)
        return user_in

    # W out (u,v) = |out(u) & out(v)| / sqrt(out(u) * out(v))
    def userSimilarityMethod1(self):
        user_sim = {}
        # 用户 A
        for user_A in self.data_out.keys():
            # 用户B
            for user_B in self.data_out.keys():
                if user_A==user_B:
                    continue
                user_sim.setdefault(user_A,{}).setdefault(user_B,0.0)
                user_sim[user_A][user_B] = 1.0 * (len(self.data_out[user_A] & self.data_out[user_B])) / math.sqrt(len(self.data_out[user_A]) * len(self.data_out[user_B]))
        
        return user_sim

    # W in (u,v) = |in(u) & in(v)| / sqrt(in(u) * in(v))
    def userSimilarityMethod2(self):
        user_sim = {}
        # 用户 A
        for user_A in self.data_out.keys():
            # 用户B
            for user_B in self.data_out.keys():
                if user_A==user_B:
                    continue
                user_sim.setdefault(user_A,{}).setdefault(user_B,0.0)
                if (user_A in self.data_in) and (user_B in self.data_in):
                    user_sim[user_A][user_B]  = 1.0 * (len(self.data_in[user_A] & self.data_in[user_B])) / math.sqrt(len(self.data_in[user_A]) * len(self.data_in[user_B]))
        
        return user_sim
        

    # W out,in (u,v) = |out(u) & in(v)| / sqrt(out(u) * in(v))
    def userSimilarityMethod3(self):
        user_sim = {}
        # 用户 A
        for user_A in self.data_out.keys():
            # 用户B
            for user_B in self.data_out.keys():
                if user_A==user_B:
                    continue
                user_sim.setdefault(user_A,{}).setdefault(user_B,0.0)
                if (user_A in self.data_in) and (user_B in self.data_in) and (user_A in self.data_out) and (user_B in self.data_out):
                    user_sim[user_A][user_B]  = 1.0 * (len(self.data_out[user_A] & self.data_in[user_B])) / math.sqrt(len(self.data_out[user_A]) * len(self.data_in[user_B]))
        
        return user_sim

    # 为用户A 推荐好友
    def UserRecommedFrieds(self,userA):
        # 用户A 不认识的朋友排名
        new_firends_rank = dict()
        # 寻找A 不认的朋友B 
        for userB in self.user_sim[userA].keys():
            if self.user_sim[userA][userB] == 0.0:
                new_firends_rank.setdefault(userB,0.0)

        
        for new_firend in new_firends_rank.keys():
            # A 认识的朋友 C &&  C 认识 new_firend
            for userC in self.user_sim[userA].keys():
                if self.user_sim[userA][userC] > 0.0 and self.user_sim[userC][new_firend]>0.0:
                    new_firends_rank[new_firend] += self.user_sim[userA][userC]*self.user_sim[userC][new_firend]

        
        rank = dict(sorted(new_firends_rank.items(), key=lambda d: d[1], reverse=True))
        
        return rank
# # 为用户推荐10名新朋友
def get_nearest_new_firends(rank):
    user_ids = rank.keys()
    if len(user_ids)<10:
        for user_id in user_ids:
            if rank[user_id] > 0.0:
                print("用户：{}  评分：{}".format(user_id,rank[user_id]))
            else:
                break
    else:
        cnt = 0
        for user_id in user_ids:
            if rank[user_id] > 0.0:
                print("用户：{}  评分：{}".format(user_id,rank[user_id]))
            else:
                break
            cnt+=1
            if cnt>10:
                break

if __name__ == '__main__':
    #nodeId.edges 文件路径
    data_path = r"/home/lijianmin/github/github_not_data/twitter/12831.edges"

    # 为用户 1186 推荐新朋友
    userA = "398874773"
    print("采用不同的方式为用户 {} 推荐好友：".format(userA))
    ways = [str(i) for i in range(1,4)]
    ways = ['2']
    for way in ways:
        print("方式 {} :".format(way))
        rf = RecommedFrieds(data_path,method=int(way))
        rank = rf.UserRecommedFrieds(userA)
        get_nearest_new_firends(rank)




采用不同的方式为用户 398874773 推荐好友：
方式 2 :


In [12]:
# 验证

# 导入相关的包
import os, math


# 获取数据
class RecommedFrieds:
    '''
    data_path: nodeId.edges 文件的路径
        nodeId.edges ： 里面的数据代表 nodeId 指向的所有数据   每一行A  B   ==>  A->B  nodeId->A  nodeId->B
        method ： 不同的计算用户相似度的方法
    '''

    def __init__(self, data_path, method=1):
        self.data_path = data_path
        self.data_out = self.get_user_out()
        self.data_in = self.get_user_in()

        self.method = method

        if method == 1:
            self.user_sim = self.userSimilarityMethod1()
        elif method == 2:
            self.user_sim = self.userSimilarityMethod2()
        elif method == 3:
            self.user_sim = self.userSimilarityMethod3()

    # Wout ( u , v ) 公式中 out(u) 是在社交网络图中用户 u 指向的其他好友的集合。
    def get_user_out(self):
        (path, filename) = os.path.split(self.data_path)
        (filename, hz) = filename.split(".")
        user_out = dict()
        user_out.setdefault(filename, set())
        # 打开文件，遍历每一行数据
        with open(self.data_path, "r") as f:
            for line in f.readlines():
                (user_A, user_B) = line.split()
                user_out[filename].add(user_A)
                user_out[filename].add(user_B)
                # A -> B
                user_out.setdefault(user_A, set())
                user_out[user_A].add(user_B)

        return user_out

    # in(u)  是在社交网络图中指向用户 u 的用户的集合
    def get_user_in(self):
        (path, filename) = os.path.split(self.data_path)
        (filename, hz) = filename.split(".")
        user_in = dict()
        # 打开文件，遍历每一行数据
        with open(self.data_path, "r") as f:
            for line in f.readlines():
                (user_A, user_B) = line.split()
                user_in.setdefault(user_A, set())
                user_in.setdefault(user_B, set())
                user_in[user_A].add(filename)
                user_in[user_B].add(filename)
                # A -> B
                user_in[user_B].add(user_A)

        # print(user_in)
        return user_in

    # W out (u,v) = |out(u) & out(v)| / sqrt(out(u) * out(v))
    def userSimilarityMethod1(self):
        user_sim = {}
        # 用户 A
        for user_A in self.data_out.keys():
            # 用户B
            for user_B in self.data_out.keys():
                if user_A == user_B:
                    continue
                user_sim.setdefault(user_A, {}).setdefault(user_B, 0.0)
                user_sim[user_A][user_B] = 1.0 * (len(self.data_out[user_A] & self.data_out[user_B])) / math.sqrt(len(self.data_out[user_A]) * len(self.data_out[user_B]))

        return user_sim

    # W in (u,v) = |in(u) & in(v)| / sqrt(in(u) * in(v))
    def userSimilarityMethod2(self):
        user_sim = {}
        # 用户 A
        for user_A in self.data_in.keys():
            # 用户B
            for user_B in self.data_in.keys():
                if user_A == user_B:
                    continue
                if (user_A in self.data_in) and (user_B in self.data_in):
                    user_sim.setdefault(user_A, {}).setdefault(user_B, 0.0)
                    user_sim[user_A][user_B] = 1.0 * (len(self.data_in[user_A] & self.data_in[user_B])) / math.sqrt(len(self.data_in[user_A]) * len(self.data_in[user_B]))
                    #print(user_sim[user_A][user_B])

        return user_sim



    # W out,in (u,v) = |out(u) & in(v)| / sqrt(out(u) * in(v))
    def userSimilarityMethod3(self):
        user_sim = {}
        # 用户 A
        for user_A in self.data_out.keys():
            # 用户B
            for user_B in self.data_out.keys():
                if user_A == user_B:
                    continue
                user_sim.setdefault(user_A, dict()).setdefault(user_B, 0.0)
                if (user_A in self.data_in) and (user_B in self.data_in) and (user_A in self.data_out) and (user_B in self.data_out):
                    user_sim[user_A][user_B] = 1.0 * (len(self.data_out[user_A] & self.data_in[user_B])) / math.sqrt(
                        len(self.data_out[user_A]) * len(self.data_in[user_B]))

        return user_sim


    def UserRecommedFrieds(self, userA):
        # 用户A 不认识的朋友排名
        rank = {}

        print(len(self.user_sim[userA].keys()))

        for related_user,rate in self.user_sim[userA].items():
            for userC,c_rate in self.user_sim[related_user].items():
                rank.setdefault(userC, 0.0)
                if userA!=userC and userC not in self.user_sim[userA]:
                    rank[userC]+=rate*c_rate

        rank = dict(sorted(rank.items(), key=lambda d: d[1], reverse=True))
        return rank


# # 为用户推荐10名新朋友
def get_nearest_new_firends(rank):
    user_ids = rank.keys()
    if len(user_ids) < 10:
        for user_id in user_ids:
            if rank[user_id] > 0.0:
                print("用户：{}  评分：{}".format(user_id, rank[user_id]))
            else:
                break
    else:
        cnt = 0
        for user_id in user_ids:
            if rank[user_id] > 0.0:
                print("用户：{}  评分：{}".format(user_id, rank[user_id]))
            else:
                break
            cnt += 1
            if cnt > 10:
                break


if __name__ == '__main__':
    # nodeId.edges 文件路径
    data_path = r"/home/lijianmin/github/github_not_data/twitter/12831.edges"

    # 为用户 1186 推荐新朋友
    userA = "398874773"
    print("采用不同的方式为用户 {} 推荐好友：".format(userA))
    ways = [str(i) for i in range(1, 4)]
    ways = ['2']
    for way in ways:
        print("方式 {} :".format(way))
        rf = RecommedFrieds(data_path, method=int(way))
        rank = rf.UserRecommedFrieds(userA)
        get_nearest_new_firends(rank)







采用不同的方式为用户 398874773 推荐好友：
方式 2 :
235


In [4]:
for key,item in rf.data_in.items():
    print(len(item),item)

1 {'12831'}
41 {'27985216', '528', '17633994', '1186', '14172562', '6331462', '380', '10476462', '179339999', '15384741', '5854882', '16119767', '16934483', '22253', '13652832', '67157376', '662423', '20904050', '16461070', '14367669', '79033767', '12831', '19479427', '14305022', '2735631', '10013512', '40198602', '6735', '8479062', '883301', '17459034', '14087951', '15639334', '398874773', '14231571', '68824195', '6160742', '761', '16912257', '586', '3191321'}
3 {'1678471', '12831', '57378470'}
32 {'11178592', '1566521', '8630302', '1186', '326658079', '10476462', '12741', '51518017', '16004268', '13141442', '22253', '16803822', '13652832', '9767472', '14163141', '67157376', '20904050', '14367669', '14819149', '18498878', '14892191', '12831', '14142965', '163449492', '9943672', '15725851', '713263', '19223', '14202711', '663463', '16912257', '5813712'}
25 {'11178592', '528', '728163', '6331462', '16004268', '15384741', '51518017', '13019862', '12725022', '47', '13141442', '9283582', '