-
Notifications
You must be signed in to change notification settings - Fork 5
/
domirank.py
executable file
·258 lines (232 loc) · 12 KB
/
domirank.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
########## Here are the associated DomiRank functions #############
import numpy as np
import scipy as sp
import scipy.sparse
import networkx as nx
########## Here are the general functions needed for efficient dismantling and testing of networks #############
def get_largest_component(G, strong = False):
'''
here we get the largest component of a graph, either from scipy.sparse or from networkX.Graph datatype.
1. The argument changes whether or not you want to find the strong or weak - connected components of the graph'''
if type(G) == nx.classes.graph.Graph: #check if it is a networkx Graph
if nx.is_directed(G) and strong == False:
GMask = max(nx.weakly_connected_components(G), key = len)
if nx.is_directed(G) and strong == True:
GMask = max(nx.strongly_connected_components(G), key = len)
else:
GMask = max(nx.connected_components(G), key = len)
G = G.subgraph(GMask)
else:
raise TypeError('You must input a networkx.Graph Data-Type')
return G
def relabel_nodes(G, yield_map = False):
'''relabels the nodes to be from 0, ... len(G).
1. Yield_map returns an extra output as a dict. in case you want to save the hash-map to retrieve node-id'''
if yield_map == True:
nodes = dict(zip(range(len(G)), G.nodes()))
G = nx.relabel_nodes(G, dict(zip(G.nodes(), range(len(G)))))
return G, nodes
else:
G = nx.relabel_nodes(G, dict(zip(G.nodes(), range(len(G)))))
return G
def get_component_size(G, strong = False):
'''
here we get the largest component of a graph, either from scipy.sparse or from networkX.Graph datatype.
1. The argument changes whether or not you want to find the strong or weak - connected components of the graph'''
if type(G) == nx.classes.graph.Graph: #check if it is a networkx Graph
if nx.is_directed(G) and strong == False:
GMask = max(nx.weakly_connected_components(G), key = len)
if nx.is_directed(G) and strong == True:
GMask = max(nx.strongly_connected_components(G), key = len)
else:
GMask = max(nx.connected_components(G), key = len)
G = G.subgraph(GMask)
return len(GMask)
elif type(G) == scipy.sparse._arrays.csr_array:
if strong == False:
connection_type = 'weak'
else:
connection_type = 'strong'
noComponent, lenComponent = sp.sparse.csgraph.connected_components(G, directed = True, connection = connection_type, return_labels = True)
return np.bincount(lenComponent).max()
else:
raise TypeError('You must input a networkx.Graph Data-Type or scipy.sparse.csr array')
def get_link_size(G):
if type(G) == nx.classes.graph.Graph: #check if it is a networkx Graph
links = len(G.edges()) #convert to scipy sparse if it is a graph
elif type(G) == scipy.sparse._arrays.csr_array:
links = G.sum()
else:
raise TypeError('You must input a networkx.Graph Data-Type')
return links
def remove_node(G, removedNode):
'''
removes the node from the graph by removing it from a networkx.Graph type, or zeroing the edges in array form.
'''
if type(G) == nx.classes.graph.Graph: #check if it is a networkx Graph
if type(removedNode) == int:
G.remove_node(removedNode)
else:
for node in removedNode:
G.remove_node(node) #remove node in graph form
return G
elif type(G) == scipy.sparse._arrays.csr_array:
diag = sp.sparse.csr_array(sp.sparse.eye(G.shape[0]))
diag[removedNode, removedNode] = 0 #set the rows and columns that are equal to zero in the sparse array
G = diag @ G
return G @ diag
def generate_attack(centrality, node_map = False):
'''we generate an attack based on a centrality measure -
you can possibly input the node_map to convert the attack to have the correct nodeID'''
if node_map == False:
node_map = range(len(centrality))
else:
node_map = list(node_map.values())
zipped = dict(zip(node_map, centrality))
attackStrategy = sorted(zipped, reverse = True, key = zipped.get)
return attackStrategy
def network_attack_sampled(G, attackStrategy, sampling = 0):
'''Attack a network in a sampled manner... recompute links and largest component after every xth node removal, according to some -
G: is the input graph, preferably as a sparse array.
inputed attack strategy
Note: if sampling is not set, it defaults to sampling every 1%, otherwise, sampling is an integer
that is equal to the number of nodes you want to skip every time you sample.
So for example sampling = int(len(G)/100) would sample every 1% of the nodes removed'''
if type(G) == nx.classes.graph.Graph: #check if it is a networkx Graph
GAdj = nx.to_scipy_sparse_array(G) #convert to scipy sparse if it is a graph
else:
GAdj = G.copy()
if sampling == 0:
sampling = int(GAdj.shape[0]/100)
N = GAdj.shape[0]
initialComponent = get_component_size(GAdj)
initialLinks = get_link_size(GAdj)
m = GAdj.sum()/N
componentEvolution = np.zeros(int(N/sampling))
linksEvolution = np.zeros(int(N/sampling))
j = 0
for i in range(N-1):
if i % sampling == 0:
if i == 0:
componentEvolution[j] = get_component_size(GAdj)/initialComponent
linksEvolution[j] = get_link_size(GAdj)/initialLinks
j+=1
else:
GAdj = remove_node(GAdj, attackStrategy[i-sampling:i])
componentEvolution[j] = get_component_size(GAdj)/initialComponent
linksEvolution[j] = get_link_size(GAdj)/initialLinks
j+=1
return componentEvolution, linksEvolution
######## Beginning of domirank stuff! ####################
def domirank(G, analytical = True, sigma = -1, dt = 0.1, epsilon = 1e-5, maxIter = 1000, checkStep = 10):
'''
G is the input graph as a (preferably) sparse array.
This solves the dynamical equation presented in the Paper: "DomiRank Centrality: revealing structural fragility of
complex networks via node dominance" and yields the following output: bool, DomiRankCentrality
Here, sigma needs to be chosen a priori.
dt determines the step size, usually, 0.1 is sufficiently fine for most networks (could cause issues for networks
with an extremely high degree, but has never failed me!)
maxIter is the depth that you are searching with in case you don't converge or diverge before that.
Checkstep is the amount of steps that you go before checking if you have converged or diverged.
This algorithm scales with O(m) where m is the links in your sparse array.
'''
if type(G) == nx.classes.graph.Graph: #check if it is a networkx Graph
G = nx.to_scipy_sparse_array(G) #convert to scipy sparse if it is a graph
else:
G = G.copy()
if analytical == False:
if sigma == -1:
sigma, _ = optimal_sigma(G, analytical = False, dt=dt, epsilon=epsilon, maxIter = maxIter, checkStep = checkStep)
pGAdj = sigma*G.astype(np.float32)
Psi = np.ones(pGAdj.shape[0]).astype(np.float32)/pGAdj.shape[0]
maxVals = np.zeros(int(maxIter/checkStep)).astype(np.float32)
dt = np.float32(dt)
j = 0
boundary = epsilon*pGAdj.shape[0]*dt
for i in range(maxIter):
tempVal = ((pGAdj @ (1-Psi)) - Psi)*dt
Psi += tempVal.real
if i% checkStep == 0:
if np.abs(tempVal).sum() < boundary:
break
maxVals[j] = tempVal.max()
if i == 0:
initialChange = maxVals[j]
if j > 0:
if maxVals[j] > maxVals[j-1] and maxVals[j-1] > maxVals[j-2]:
return False, Psi
j+=1
return True, Psi
else:
if sigma == -1:
sigma = optimal_sigma(G, analytical = True, dt=dt, epsilon=epsilon, maxIter = maxIter, checkStep = checkStep)
Psi = sp.sparse.linalg.spsolve(sigma*G + sp.sparse.identity(G.shape[0]), sigma*G.sum(axis=-1))
return True, Psi
def find_eigenvalue(G, minVal = 0, maxVal = 1, maxDepth = 100, dt = 0.1, epsilon = 1e-5, maxIter = 100, checkStep = 10):
'''
G: is the input graph as a sparse array.
Finds the largest negative eigenvalue of an adjacency matrix using the DomiRank algorithm.
Currently this function is only single-threaded, as the bisection algorithm only allows for single-threaded
exection. Note, that this algorithm is slightly different, as it uses the fact that DomiRank diverges
at values larger than -1/lambN to its benefit, and thus, it is not exactly bisection theorem. I haven't
tested in order to see which exact value is the fastest for execution, but that will be done soon!
Some notes:
Increase maxDepth for increased accuracy.
Increase maxIter if DomiRank doesn't start diverging within 100 iterations -- i.e. increase at the expense of
increased computational cost if you want potential increased accuracy.
Decrease checkstep for increased error-finding for the values of sigma that are too large, but higher compcost
if you are frequently less than the value (but negligible compcost).
'''
x = (minVal + maxVal)/G.sum(axis=-1).max()
minValStored = 0
for i in range(maxDepth):
if maxVal - minVal < epsilon:
break
if domirank(G, False, x, dt, epsilon, maxIter, checkStep)[0]:
minVal = x
x = (minVal + maxVal)/2
minValStored = minVal
else:
maxVal = (x + maxVal)/2
x = (minVal + maxVal)/2
if minVal == 0:
print(f'Current Interval : [-inf, -{1/maxVal}]')
else:
print(f'Current Interval : [-{1/minVal}, -{1/maxVal}]')
finalVal = (maxVal + minVal)/2
return -1/finalVal
############## This section is for finding the optimal sigma #######################
def process_iteration(q, i, analytical, sigma, spArray, maxIter, checkStep, dt, epsilon, sampling):
tf, domiDist = domirank(spArray, analytical = analytical, sigma = sigma, dt = dt, epsilon = epsilon, maxIter = maxIter, checkStep = checkStep)
domiAttack = generate_attack(domiDist)
ourTempAttack, __ = network_attack_sampled(spArray, domiAttack, sampling = sampling)
finalErrors = ourTempAttack.sum()
q.put(finalErrors)
def optimal_sigma(spArray, analytical = True, endVal = 0, startval = 0.000001, iterationNo = 100, dt = 0.1, epsilon = 1e-5, maxIter = 100, checkStep = 10, maxDepth = 100, sampling = 0):
''' This part finds the optimal sigma by searching the space, here are the novel parameters:
spArray: is the input sparse array/matrix for the network.
startVal: is the starting value of the space that you want to search.
endVal: is the ending value of the space that you want to search (normally it should be the eigenvalue)
iterationNo: the number of partitions of the space between lambN that you set
return : the function returns the value of sigma - the numerator of the fraction of (\sigma)/(-1*lambN)
'''
if endVal == 0:
endVal = find_eigenvalue(spArray, maxDepth = maxDepth, dt = dt, epsilon = epsilon, maxIter = maxIter, checkStep = checkStep)
import multiprocessing as mp
endval = -0.9999/endVal
tempRange = np.arange(startval, endval + (endval-startval)/iterationNo, (endval-startval)/iterationNo)
processes = []
q = mp.Queue()
for i, sigma in enumerate(tempRange):
p = mp.Process(target=process_iteration, args=(q, i, analytical, sigma, spArray, maxIter, checkStep, dt, epsilon, sampling))
p.start()
processes.append(p)
results = []
for p in processes:
p.join()
result = q.get()
results.append(result)
finalErrors = np.array(results)
minEig = np.where(finalErrors == finalErrors.min())[0][-1]
minEig = tempRange[minEig]
return minEig, finalErrors