In [598]:
import networkx as nx
import numpy as np
import pandas as pd
from scipy import sparse
from scipy.sparse import csr_matrix
from scipy.sparse import coo_matrix
from time import perf_counter
import threading
from multiprocessing import Process, Pool
import math

In [599]:
## filename = './datasets/facebook_combined.txt'
filename = './datasets/ca-CSphd.mtx'

In [600]:
t0 = perf_counter()
testgraph = nx.read_edgelist(filename, comments='#', delimiter=None, create_using=nx.Graph, nodetype=None, data=True, edgetype=None, encoding='utf-8')
t_load = perf_counter() - t0

In [601]:
## define the partition index, need to add partition function here
t_partition_start = perf_counter()
adj_matrix_dim = len(testgraph.nodes)
partition_num = 6 ## can be set a variable, equals to thread numbers.
partition_index = np.zeros(partition_num + 1, dtype=np.int32)
for i in range(partition_num):
    partition_index[i+1] = np.int32(adj_matrix_dim*(i+1)/partition_num)
    ## partition_index[i+1] = np.int32(adj_matrix_dim*(math.pow(0.717, partition_num-1-i)))
print(partition_index)

[   0  313  627  941 1254 1568 1882]


In [602]:
## use multi-thread method to implement graph partition, function defination
def partion_multi_process (processID, edge_list, index_list):
    graph = nx.DiGraph()
    for edge_in_list in edge_list:
        if ((int(edge_in_list[1]) >= index_list[processID]) and (int(edge_in_list[1]) < index_list[processID+1])):
            graph.add_edge(edge_in_list[0], edge_in_list[1])
    return graph

process_pool = Pool(partition_num)
result_pool = []
for i in range(partition_num):
    result_pool.append(process_pool.apply_async(func=partion_multi_process, args=(i, testgraph.edges, partition_index)))

In [603]:
process_pool.close()
process_pool.join()

G_part = []

for i in range(partition_num):
    G_part.append(result_pool[i].get())


In [604]:
## single thread to implement graph partition

# for edge_list in testgraph.edges:
#     for i in range(partition_num):
#         if ((int(edge_list[1]) >= partition_index[i]) and (int(edge_list[1]) < partition_index[i+1])):
#             G_part[i].add_edge(edge_list[0], edge_list[1])

# for i in range(partition_num):
#     print(G_part[i])

In [605]:
## triangle count in sub-graphs (need be parallel)
# triangle_count = 0
# # node_triangle = np.zeros(len(testgraph.nodes))
# for g_index in range(partition_num): ## could be executed parallelly in different machines
#     for edge_list in testgraph.edges: ## traverse all the edges in the original graph
#         if ((G_part[g_index].has_node(edge_list[0])) and (G_part[g_index].has_node(edge_list[1]))):
#             srcSet = G_part[g_index].adj[edge_list[0]]
#             dstSet = G_part[g_index].adj[edge_list[1]]
#             for node_obj in srcSet:
#                 if node_obj in dstSet:
#                     triangle_count += 1
#                     # node_triangle[int(edge_list[0])] += 1
#                     # node_triangle[int(edge_list[1])] += 1
#                     # node_triangle[int(node_obj)] += 1

# print (triangle_count)
## print (node_triangle)


In [606]:
## multi-processors processing
def intersection_multi_process (processID, graph_part, graph_whole):
    triangle_count = 0
    for edge_list in graph_whole.edges: ## traverse all the edges in the original graph
        if ((graph_part.has_node(edge_list[0])) and (graph_part.has_node(edge_list[1]))):
            srcSet = graph_part.adj[edge_list[0]]
            dstSet = graph_part.adj[edge_list[1]]
            for node_obj in srcSet:
                if node_obj in dstSet:
                    triangle_count += 1
    return triangle_count

intersection_pool = Pool(partition_num)
final_result_pool = []
for i in range(partition_num):
    final_result_pool.append(intersection_pool.apply_async(func=intersection_multi_process, args=(i, G_part[i], testgraph)))

intersection_pool.close()
intersection_pool.join()

result = 0
for i in range(partition_num):
    result += int(final_result_pool[i].get())

t_partition_end = perf_counter()
print(t_partition_end - t_partition_start)
print(result)

0.4876450151205063
8


In [607]:
testgraph_golden = nx.read_edgelist(filename, comments='#', delimiter=None, create_using=None, nodetype=None, data=True, edgetype=None, encoding='utf-8')

In [608]:
triangle_golden = nx.triangles(testgraph_golden)
result_golden = int(sum(triangle_golden.values())/3)
print (result_golden)
## print (triangle_golden)

8
