forked from sbarakat/dgs-graphstream
-
Notifications
You must be signed in to change notification settings - Fork 0
/
file_io.py
280 lines (238 loc) · 11.8 KB
/
file_io.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
#!/usr/bin/env python3
import os
import logging
import networkx as nx
from collections import defaultdict
import utils
def read_metis(file):
logging.info("Reading METIS file %s", file)
G = nx.Graph() # create undirected graph
# add node weights from METIS file
with open(file, "r") as metis:
n = 0
first_line = None
has_edge_weights = False
has_node_weights = False
n_vertex_weights = 1
for i, line in enumerate(metis):
if line[0] == '%':
# ignore comments
continue
if not first_line:
# read meta data from first line
first_line = line.split()
m_nodes = int(first_line[0])
m_edges = int(first_line[1])
if len(first_line) > 2:
# FMT has the following meanings:
# 0 the graph has no weights (in this case, you can omit FMT)
# 1 the graph has edge weights
# 10 the graph has node weights
# 11 the graph has both edge and node weights
file_format = first_line[2]
if int(file_format) == 0:
pass
elif int(file_format) == 1:
has_edge_weights = True
elif int(file_format) == 10:
has_node_weights = True
elif int(file_format) == 11:
has_edge_weights = True
has_node_weights = True
else:
assert False, "File format not supported"
if len(first_line) > 3:
# NCON, it appears, is the number of vertex weights. Normally,
# if vertex weights are used, there is only one weight per vertex,
# and it is not even necessary to list NCON in the input. But if
# multiple values are associated with a vertex, NCON must be listed.
n_vertex_weights = int(first_line[3])
continue
# METIS starts node count from 1, here we start from 0 by
# subtracting 1 in the edge list and incrementing 'n' after
# processing the line.
if line.strip():
e = line.split()
if has_edge_weights and has_node_weights:
node_weights = e[0:n_vertex_weights]
if len(e) > n_vertex_weights:
# create weighted edge list:
# [(1, 2, {'weight':'2'}), (1, 3, {'weight':'8'})]
edges = e[n_vertex_weights:]
edges_split = list(zip(*[iter(edges)] * 2))
edge_list = [(n, int(v[0]) - 1, {'weight': int(v[1])}) for v in edges_split]
G.add_edges_from(edge_list)
G.node[n]['weight'] = int(node_weights[0]) # use 1st node weight
else:
# no edges
G.add_nodes_from([n], weight=int(node_weights[0])) # use 1st node weight
elif has_edge_weights and not has_node_weights:
n_vertex_weights = 0
if len(e) > 0:
# create weighted edge list:
# [(1, 2, {'weight':'2'}), (1, 3, {'weight':'8'})]
edges = e[n_vertex_weights:]
edges_split = list(zip(*[iter(edges)] * 2))
edge_list = [(n, int(v[0]) - 1, {'weight': int(v[1])}) for v in edges_split]
G.add_edges_from(edge_list)
G.node[n]['weight'] = 1.0
else:
# no edges
G.node[n]['weight'] = 1.0
elif not has_edge_weights and has_node_weights:
node_weights = e[0:n_vertex_weights]
if len(e) > n_vertex_weights:
edges = e[n_vertex_weights:]
edge_list = [(n, int(v) - 1, {'weight':1.0}) for v in edges]
G.add_edges_from(edge_list)
G.node[n]['weight'] = int(node_weights[0]) # use 1st node weight
else:
# no edges
G.add_nodes_from([n], weight=int(node_weights[0]))
pass
else:
edge_list = [(n, int(v) - 1, {'weight':1.0}) for v in e]
G.add_edges_from(edge_list)
G.node[n]['weight'] = 1.0
else:
# blank line indicates no node weight
G.add_nodes_from([n], weight=1.0)
n += 1
# sanity check
assert (m_nodes == G.number_of_nodes()), "Expected {} nodes, networkx graph contains {} nodes".format(m_nodes, G.number_of_nodes())
assert (m_edges == G.number_of_edges()), "Expected {} edges, networkx graph contains {} edges".format(m_edges, G.number_of_edges())
return G
def read_edgelist(file):
logging.info("Reading edgelist file %s", file)
return nx.read_edgelist(file)
def read_gml(file):
logging.info("Reading gml file %s", file)
return nx.read_gml(file, label='id')
def relabel_nodes(graph):
graph = nx.relabel_nodes(graph, {node:utils.to_int(node) for node in graph.nodes()}) # relabel nodes as integers
def read_graph_from_file(file, format):
graph = None
if format == 'metis':
graph = read_metis(file)
elif format == 'edgelist':
graph = read_edgelist(file)
relabel_nodes(graph)
elif format == 'gml':
graph = read_gml(file)
relabel_nodes(graph)
return graph
def read_assignments_file(file):
logging.info("Reading assignments file %s", file)
with open(file, 'r') as f:
return {i:int(l.strip()) for i,l in enumerate(f.readlines())}
def read_filter_file(file):
logging.info("Reading filter file %s", file)
with open(file, 'r') as f:
return {i:int(l.strip()) for i,l in enumerate(f.readlines())}
def read_order_file(file):
logging.info("Reading order file %s", file)
with open(file, 'r') as f:
return [int(l.strip()) for l in f.readlines()]
def read_oslom2_tp_file(filepath):
node_dict = defaultdict(list) # initialize modules per node dictionary
with open(filepath, 'r') as file:
line = next(file)
while line:
if line.startswith('#'): # module header line
module_id = int(line.split()[1]) + 1 # 1-based (needed by gvmap) module id
else: # nodes in the module
nodes = line.split()
for node_id in nodes:
node_dict[utils.to_int(node_id)].append(module_id) # add current module to node dictionary
line = next(file, None)
return node_dict
def read_infomap_tree_file(filepath, level):
node_dict = defaultdict(list) # initialize modules per node dictionary
module_ids = []
with open(filepath, 'r') as file:
line = next(file)
while line:
if not line.startswith('#'):
values = line.split()
module_id = ''.join(values[0].split(":")[0:level]) # concatenated module id at given level (1 to 3). 2:4:3 becomes 2 at level 1, 24 at level 2 and 243 at level 3
if not module_id in module_ids:
module_ids.append(module_id)
node_id = utils.to_int(values[2].strip('"')) # node id without quotes
one_based_module_id = module_ids.index(module_id) + 1 # current 1-based (needed by gvmap) module id
if not one_based_module_id in node_dict[node_id]:
node_dict[node_id].append(one_based_module_id) # add current module id to node dictionary
line = next(file, None)
return node_dict
def get_frame_start_and_count(full_graph, partition, trailing_frame_count):
''' Global frame start and count per node '''
sorted_nodes = sorted(full_graph.nodes(data=True), key=lambda node: node[1]['order'])
ordered_assignments = [node[1]['partition'] for node in sorted_nodes]
partition_frame_start = [i for i, a in enumerate(ordered_assignments) if a == partition] # assignment start indexes for given partition
partition_frame_start_extended = partition_frame_start + [len(sorted_nodes) + trailing_frame_count] # add last frame id with extra trailing frames to give highlighted nodes time to settle
partition_frame_count = [v2 - v1 for v1, v2 in zip(partition_frame_start_extended, partition_frame_start_extended[1:])] # subtract consecutive frame start values
return partition_frame_start, partition_frame_count
def write_dgs_file(output, graph, full_graph, label_type, colour_attr, trailing_frame_count):
partition = graph.graph['partition']
filename = os.path.join(output, 'partition_{}.dgs'.format(partition))
logging.info("Writing DGS file %s (partition %d)", filename, partition)
with open(filename, 'w') as outf:
outf.write("DGS004\n")
outf.write("partition_{} 0 0\n".format(partition))
# get partition start and count per node
partition_frame_start, partition_frame_count = get_frame_start_and_count(full_graph, partition, trailing_frame_count)
# sort nodes according to node_order
sorted_nodes = sorted(graph.nodes(data=True), key=lambda node: node[1]['order'])
i = 0
st = 1
nodes_added = []
edges_added = []
for index, n in enumerate(sorted_nodes):
node_id = n[0]
# Hidden
hidden = 1 if 'hidden' in n[1] else 0
# Color
color = 'black'
if colour_attr in n[1]:
color = n[1][colour_attr] # get color(s) from attributes
# Label
label = ''
if hidden:
label = '' # hide label for hidden nodes
elif label_type == 'id':
label = node_id
elif label_type == 'order':
label = n[1]['order']
# Size
node_size = n[1]['size']
outf.write("an {} c='{}' l='{}' s='{}' fs='{}' fc='{}' hidden='{}'\n".format(node_id, color, label, node_size, partition_frame_start[index], partition_frame_count[index], hidden))
nodes_added += [node_id]
for e in graph.edges(node_id):
edge1_id = e[1]
edge2_id = e[0]
if edge1_id in nodes_added and edge2_id in nodes_added and (edge1_id, edge2_id) not in edges_added:
outf.write("ae {} {} {}\n".format(i, edge1_id, edge2_id))
edges_added += [(edge1_id, edge2_id)]
i += 1
outf.write("st {}\n".format(st))
st += 1
return filename
def write_oslom_edge_file(output_path, data_filename, graph):
if not os.path.exists(output_path):
os.makedirs(output_path)
if not os.path.exists(os.path.join(output_path, 'oslom')):
os.makedirs(os.path.join(output_path, 'oslom'))
# write edge list in a format for OSLOM, tab delimited
edges_oslom_filename = os.path.join(output_path, 'oslom', data_filename + "-edges-oslom.txt")
with open(edges_oslom_filename, "w") as outf:
for e in graph.edges(data=True):
edge_weight = e[2]["weight"] if 'weight' in e[2] else 1.0
outf.write("{}\t{}\t{}\n".format(e[0], e[1], edge_weight))
return edges_oslom_filename
def write_pajek_file(output_path, data_filename, graph):
pajek_filepath = os.path.join(output_path, data_filename + ".net")
# write_pajek requires string attributes
weights = nx.get_node_attributes(graph, 'weight')
new_weights = {k:utils.to_str(v) for k,v in weights.items()}
nx.set_node_attributes(graph, name='weight', values=new_weights)
nx.write_pajek(graph, pajek_filepath)
return pajek_filepath