In [1]:
from community import community_louvain
import json
import time
import networkx as nx


class CreateGraph:
    def __init__(self, path):
        with open(path, 'r', encoding='utf8') as f:
            lines = f.readlines()
            edges = []
            nodes = []
            for x in lines:
                start_node = int(x.strip('\n').split("\t")[0])
                end_node = int(x.strip('\n').split("\t")[1])
                nodes.append(start_node)
                nodes.append(end_node)
                edges.append((start_node, end_node))
        nodes.sort()
        self.edges = edges
        self.nodes = set(nodes)
        self.node_num = len(self.nodes)
        self.Mod = None

    def GetNxDi(self):
        graph = nx.DiGraph()
        graph.add_edges_from(self.edges)
        return graph

    def GetNx(self):
        graph = nx.Graph()
        graph.add_edges_from(self.edges)
        return graph

def GetEdges(path, num=-1):
    with open(path, 'r', encoding='utf8') as f:
        lines = f.readlines(num)
        edges = []
        for x in lines:
            start_node = int(x.strip('\n').split(" ")[0])
            end_node = int(x.strip('\n').split(" ")[1])
            edges.append((start_node, end_node))
    return edges


print("创建图")
G = CreateGraph("../Date/课程设计数据集.txt").GetNx()
G.remove_edges_from(nx.selfloop_edges(G))

print("Louvain社区划分")
time_start = time.time() #开始计时
partition = community_louvain.best_partition(G)
time_end = time.time()    #结束计时
time_c= time_end - time_start   #运行所花时间
print('社区划分time cost', time_c, 's')
with open("NC_Mod.json","w",encoding='utf8') as f:
    f.write(json.dumps(partition))

print("读取测试数据")
test_edges = GetEdges(r"..\Date\test_date.txt",109580)
non_edges = GetEdges(r"..\Date\non_edges.txt",976560)
G.remove_edges_from(test_edges)

print("计算JC开始")
time_start = time.time() #开始计时
jc_t = list(nx.jaccard_coefficient(G, test_edges))
jc_n = list(nx.jaccard_coefficient(G, non_edges))
time_end = time.time()    #结束计时
time_c= time_end - time_start   #运行所花时间
print('JC time cost', time_c, 's')

print("计算JC AUC")
count = 0
score = 0
for n in jc_n:
    for t in jc_t:
        count = count + 1
        if t[2] > n[2]:
            score = score + 1
        elif t[2] == n[2]:
            score = score + 0.5
print(score/count)



print("计算AA开始")
time_start = time.time() #开始计时
jc_t = list(nx.adamic_adar_index(G, test_edges))
jc_n = list(nx.adamic_adar_index(G, non_edges))
time_end = time.time()    #结束计时
time_c= time_end - time_start   #运行所花时间
print('AA time cost', time_c, 's')

print("计算AA AUC")
count = 0
score = 0
for n in jc_n:
    for t in jc_t:
        count = count + 1
        if t[2] > n[2]:
            score = score + 1
        elif t[2] == n[2]:
            score = score + 0.5
print(score/count)



print("计算PA开始")
time_start = time.time() #开始计时
jc_t = list(nx.preferential_attachment(G, test_edges))
jc_n = list(nx.preferential_attachment(G, non_edges))
time_end = time.time()    #结束计时
time_c= time_end - time_start   #运行所花时间
print('PA time cost', time_c, 's')

print("计算AUC")
count = 0
score = 0
for n in jc_n:
    for t in jc_t:
        count = count + 1
        if t[2] > n[2]:
            score = score + 1
        elif t[2] == n[2]:
            score = score + 0.5
print(score/count)



print("计算RA开始")
time_start = time.time() #开始计时
jc_t = list(nx.resource_allocation_index(G, test_edges))
jc_n = list(nx.resource_allocation_index(G, non_edges))
time_end = time.time()    #结束计时
time_c= time_end - time_start   #运行所花时间
print('RA time cost', time_c, 's')

print("计算AUC")
count = 0
score = 0
for n in jc_n:
    for t in jc_t:
        count = count + 1
        if t[2] > n[2]:
            score = score + 1
        elif t[2] == n[2]:
            score = score + 0.5
print(score/count)


def My(node1, node2,comm):
    if comm[node1] != comm[node2]:
        jc = nx.jaccard_coefficient(G, [(node1,node2)])
        return next(jc)[2]
    elif nx.shortest_path_length(G, source=node1, target=node2) > 3:
        return 0
    else:
        score = 0
        for nbr in list(G.adj[node1]):
            if comm[node1] == comm[nbr] and nbr != node2:
                for nbr2 in list(G.adj[nbr]):
                    if nbr2 != node2 and comm[node1] == comm[nbr2] and G.has_edge(nbr2, node2):
                        score = score + 1/G.degree[nbr] + 1/G.degree[nbr2]
        a = len(set(list(G.adj[node1]) +list(G.adj[node2])))
        return (a + 0.1 * score)/(len(list(G.adj[node1])) + len(list(G.adj[node2])) + 0.1 * score)

print("计算Cja开始")
time_start = time.time()
jc_t = [My(x,y,partition) for x,y in test_edges]
jc_n = [My(x,y,partition) for x,y in non_edges]
time_end = time.time()    #结束计时
time_c= time_end - time_start   #运行所花时间
print('CJA time cost', time_c, 's')

print("计算AUC")
count = 0
score = 0
for n in jc_n:
    for t in jc_t:
        count = count + 1
        if t > n:
            score = score + 1
        elif t == n:
            score = score + 0.5
print(score/count)



创建图
Louvain社区划分
社区划分time cost 53.57309174537659 s
读取测试数据
计算JC开始
JC time cost 31.130122661590576 s
计算JC AUC
0.850332799
计算AA开始
AA time cost 30.03346872329712 s
计算AA AUC
0.846282812
计算PA开始
PA time cost 0.3443124294281006 s
计算AUC
0.8266956385
计算RA开始
RA time cost 30.036466360092163 s
计算AUC
0.8457490095
计算Cja开始
CJA time cost 218.48960971832275 s
计算AUC
0.843160322
