/
treesim-forward.py
901 lines (842 loc) · 33.2 KB
/
treesim-forward.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
# Tree routing scheme (named Yggdrasil, after the world tree from Norse mythology)
# Steps:
# 1: Pick any node, here I'm using highest nodeID
# 2: Build spanning tree, each node stores path back to root
# Optionally with weights for each hop
# Ties broken by preferring a parent with higher degree
# 3: Distance metric: self->peer + (via tree) peer->dest
# 4: Perform (modified) greedy lookup via this metric for each direction (A->B and B->A)
# 5: Source-route traffic using the better of those two paths
# Note: This makes no attempt to simulate a dynamic network
# E.g. A node's peers cannot be disconnected
# TODO:
# Make better use of drop?
# In particular, we should be ignoring *all* recently dropped *paths* to the root
# To minimize route flapping
# Not really an issue in the sim, but probably needed for a real network
import array
import gc
import glob
import gzip
import heapq
import os
import random
import time
#############
# Constants #
#############
# Reminder of where link cost comes in
LINK_COST = 1
# Timeout before dropping something, in simulated seconds
TIMEOUT = 60
###########
# Classes #
###########
class PathInfo:
def __init__(self, nodeID):
self.nodeID = nodeID # e.g. IP
self.coords = [] # Position in tree
self.tstamp = 0 # Timestamp from sender, to keep track of old vs new info
self.degree = 0 # Number of peers the sender has, used to break ties
# The above should be signed
self.path = [nodeID] # Path to node (in path-vector route)
self.time = 0 # Time info was updated, to keep track of e.g. timeouts
self.treeID = nodeID # Hack, let tree use different ID than IP, used so we can dijkstra once and test many roots
def clone(self):
# Return a deep-enough copy of the path
clone = PathInfo(None)
clone.nodeID = self.nodeID
clone.coords = self.coords[:]
clone.tstamp = self.tstamp
clone.degree = self.degree
clone.path = self.path[:]
clone.time = self.time
clone.treeID = self.treeID
return clone
# End class PathInfo
class Node:
def __init__(self, nodeID):
self.info = PathInfo(nodeID) # Self NodeInfo
self.root = None # PathInfo to node at root of tree
self.drop = dict() # PathInfo to nodes from clus that have timed out
self.peers = dict() # PathInfo to peers
self.links = dict() # Links to peers (to pass messages)
self.msgs = [] # Said messages
self.table = dict() # Pre-computed lookup table of peer info
def tick(self):
# Do periodic maintenance stuff, including push updates
self.info.time += 1
if self.info.time > self.info.tstamp + TIMEOUT/4:
# Update timestamp at least once every 1/4 timeout period
# This should probably be randomized in a real implementation
self.info.tstamp = self.info.time
self.info.degree = 0# TODO decide if degree should be used, len(self.peers)
changed = False # Used to track when the network has converged
changed |= self.cleanRoot()
self.cleanDropped()
# Should probably send messages infrequently if there's nothing new to report
if self.info.tstamp == self.info.time:
msg = self.createMessage()
self.sendMessage(msg)
return changed
def cleanRoot(self):
changed = False
if self.root and self.info.time - self.root.time > TIMEOUT:
print "DEBUG: clean root,", self.root.path
self.drop[self.root.treeID] = self.root
self.root = None
changed = True
if not self.root or self.root.treeID < self.info.treeID:
# No need to drop someone who'se worse than us
self.info.coords = [self.info.nodeID]
self.root = self.info.clone()
changed = True
elif self.root.treeID == self.info.treeID:
self.root = self.info.clone()
return changed
def cleanDropped(self):
# May actually be a treeID... better to iterate over keys explicitly
nodeIDs = sorted(self.drop.keys())
for nodeID in nodeIDs:
node = self.drop[nodeID]
if self.info.time - node.time > 4*TIMEOUT:
del self.drop[nodeID]
return None
def createMessage(self):
# Message is just a tuple
# First element is the sender
# Second element is the root
# We will .clone() everything during the send operation
msg = (self.info, self.root)
return msg
def sendMessage(self, msg):
for link in self.links.values():
newMsg = (msg[0].clone(), msg[1].clone())
link.msgs.append(newMsg)
return None
def handleMessages(self):
changed = False
while self.msgs:
changed |= self.handleMessage(self.msgs.pop())
return changed
def handleMessage(self, msg):
changed = False
for node in msg:
# Update the path and timestamp for the sender and root info
node.path.append(self.info.nodeID)
node.time = self.info.time
# Update the sender's info in our list of peers
sender = msg[0]
self.peers[sender.nodeID] = sender
# Decide if we want to update the root
root = msg[1]
updateRoot = False
isSameParent = False
isBetterParent = False
if len(self.root.path) > 1 and len(root.path) > 1:
parent = self.peers[self.root.path[-2]]
if parent.nodeID == sender.nodeID: isSameParent = True
if sender.degree > parent.degree:
# This would also be where you check path uptime/reliability/whatever
# All else being equal, we prefer parents with high degree
# We are trusting peers to report degree correctly in this case
# So expect some performance reduction if your peers aren't trustworthy
# (Lies can increase average stretch by a few %)
isBetterParent = True
if self.info.nodeID in root.path[:-1]: pass # No loopy routes allowed
elif root.treeID in self.drop and self.drop[root.treeID].tstamp >= root.tstamp: pass
elif not self.root: updateRoot = True
elif self.root.treeID < root.treeID: updateRoot = True
elif self.root.treeID != root.treeID: pass
elif self.root.tstamp > root.tstamp: pass
elif len(root.path) < len(self.root.path): updateRoot = True
elif isBetterParent and len(root.path) == len(self.root.path): updateRoot = True
elif isSameParent and self.root.tstamp < root.tstamp: updateRoot = True
if updateRoot:
if not self.root or self.root.path != root.path: changed = True
self.root = root
self.info.coords = self.root.path
return changed
def lookup(self, dest):
# Note: Can loop in an unconverged network
# The person looking up the route is responsible for checking for loops
best = None
bestDist = 0
for node in self.peers.itervalues():
# dist = distance to node + dist (on tree) from node to dest
dist = len(node.path)-1 + treeDist(node.coords, dest.coords)
if not best or dist < bestDist:
best = node
bestDist = dist
if best:
next = best.path[-2]
assert next in self.peers
return next
else:
# We failed to look something up
# TODO some way to signal this which doesn't crash
assert False
def initTable(self):
# Pre-computes a lookup table for destination coords
# Insert parent first so you prefer them as a next-hop
self.table.clear()
parent = self.info.nodeID
if len(self.info.coords) >= 2: parent = self.info.coords[-2]
for peer in self.peers.itervalues():
current = self.table
for coord in peer.coords:
if coord not in current: current[coord] = (peer.nodeID, dict())
old = current[coord]
next = old[1]
oldPeer = self.peers[old[0]]
oldDist = len(oldPeer.coords)
oldDeg = oldPeer.degree
newDist = len(peer.coords)
newDeg = peer.degree
# Prefer parent
# Else prefer short distance from root
# If equal distance, prefer high degree
if peer.nodeID == parent: current[coord] = (peer.nodeID, next)
elif newDist < oldDist: current[coord] = (peer.nodeID, next)
elif newDist == oldDist and newDeg > oldDeg: current[coord] = (peer.nodeID, next)
current = next
return None
def lookup_new(self, dest):
# Use pre-computed lookup table to look up next hop for dest coords
assert self.table
if len(self.info.coords) >= 2: parent = self.info.coords[-2]
else: parent = None
current = (parent, self.table)
c = None
for coord in dest.coords:
c = coord
if coord not in current[1]: break
current = current[1][coord]
next = current[0]
if c in self.peers: next = c
if next not in self.peers:
assert next == None
# You're the root of a different connected component
# You'd drop the packet in this case
# To make the path cache not die, need to return a valid next hop...
# Returning self for that reason
next = self.info.nodeID
return next
# End class Node
####################
# Helper Functions #
####################
def getIndexOfLCA(source, dest):
# Return index of last common ancestor in source/dest coords
# -1 if no common ancestor (e.g. different roots)
lcaIdx = -1
minLen = min(len(source), len(dest))
for idx in xrange(minLen):
if source[idx] == dest[idx]: lcaIdx = idx
else: break
return lcaIdx
def treePath(source, dest):
# Return path with source at head and dest at tail
lastMatch = getIndexOfLCA(source, dest)
path = dest[-1:lastMatch:-1] + source[lastMatch:]
assert path[0] == dest[-1]
assert path[-1] == source[-1]
return path
def treeDist(source, dest):
dist = len(source) + len(dest)
lcaIdx = getIndexOfLCA(source, dest)
dist -= 2*(lcaIdx+1)
return dist
def dijkstra(nodestore, startingNodeID):
# Idea to use heapq and basic implementation taken from stackexchange post
# http://codereview.stackexchange.com/questions/79025/dijkstras-algorithm-in-python
nodeIDs = sorted(nodestore.keys())
nNodes = len(nodeIDs)
idxs = dict()
for nodeIdx in xrange(nNodes):
nodeID = nodeIDs[nodeIdx]
idxs[nodeID] = nodeIdx
dists = array.array("H", [0]*nNodes)
queue = [(0, startingNodeID)]
while queue:
dist, nodeID = heapq.heappop(queue)
idx = idxs[nodeID]
if not dists[idx]: # Unvisited, otherwise we skip it
dists[idx] = dist
for peer in nodestore[nodeID].links:
if not dists[idxs[peer]]:
# Peer is also unvisited, so add to queue
heapq.heappush(queue, (dist+LINK_COST, peer))
return dists
def dijkstrall(nodestore):
# Idea to use heapq and basic implementation taken from stackexchange post
# http://codereview.stackexchange.com/questions/79025/dijkstras-algorithm-in-python
nodeIDs = sorted(nodestore.keys())
nNodes = len(nodeIDs)
idxs = dict()
for nodeIdx in xrange(nNodes):
nodeID = nodeIDs[nodeIdx]
idxs[nodeID] = nodeIdx
dists = array.array("H", [0]*nNodes*nNodes) # use GetCacheIndex(nNodes, start, end)
for sourceIdx in xrange(nNodes):
print "Finding shortest paths for node {} / {} ({})".format(sourceIdx+1, nNodes, nodeIDs[sourceIdx])
queue = [(0, sourceIdx)]
while queue:
dist, nodeIdx = heapq.heappop(queue)
distIdx = getCacheIndex(nNodes, sourceIdx, nodeIdx)
if not dists[distIdx]: # Unvisited, otherwise we skip it
dists[distIdx] = dist
for peer in nodestore[nodeIDs[nodeIdx]].links:
pIdx = idxs[peer]
pdIdx = getCacheIndex(nNodes, sourceIdx, pIdx)
if not dists[pdIdx]:
# Peer is also unvisited, so add to queue
heapq.heappush(queue, (dist+LINK_COST, pIdx))
return dists
def linkNodes(node1, node2):
node1.links[node2.info.nodeID] = node2
node2.links[node1.info.nodeID] = node1
############################
# Store topology functions #
############################
def makeStoreSquareGrid(sideLength, randomize=True):
# Simple grid in a sideLength*sideLength square
# Just used to validate that the code runs
store = dict()
nodeIDs = list(range(sideLength*sideLength))
if randomize: random.shuffle(nodeIDs)
for nodeID in nodeIDs:
store[nodeID] = Node(nodeID)
for index in xrange(len(nodeIDs)):
if (index % sideLength != 0): linkNodes(store[nodeIDs[index]], store[nodeIDs[index-1]])
if (index >= sideLength): linkNodes(store[nodeIDs[index]], store[nodeIDs[index-sideLength]])
print "Grid store created, size {}".format(len(store))
return store
def makeStoreASRelGraph(pathToGraph):
#Existing network graphs, in caida.org's asrel format (ASx|ASy|z per line, z denotes relationship type)
with open(pathToGraph, "r") as f:
inData = f.readlines()
store = dict()
for line in inData:
if line.strip()[0] == "#": continue # Skip comment lines
line = line.replace('|'," ")
nodes = map(int, line.split()[0:2])
if nodes[0] not in store: store[nodes[0]] = Node(nodes[0])
if nodes[1] not in store: store[nodes[1]] = Node(nodes[1])
linkNodes(store[nodes[0]], store[nodes[1]])
print "CAIDA AS-relation graph successfully imported, size {}".format(len(store))
return store
def makeStoreASRelGraphMaxDeg(pathToGraph, degIdx=0):
with open(pathToGraph, "r") as f:
inData = f.readlines()
store = dict()
nodeDeg = dict()
for line in inData:
if line.strip()[0] == "#": continue # Skip comment lines
line = line.replace('|'," ")
nodes = map(int, line.split()[0:2])
if nodes[0] not in nodeDeg: nodeDeg[nodes[0]] = 0
if nodes[1] not in nodeDeg: nodeDeg[nodes[1]] = 0
nodeDeg[nodes[0]] += 1
nodeDeg[nodes[1]] += 1
sortedNodes = sorted(nodeDeg.keys(), \
key=lambda x: (nodeDeg[x], x), \
reverse=True)
maxDegNodeID = sortedNodes[degIdx]
return makeStoreASRelGraphFixedRoot(pathToGraph, maxDegNodeID)
def makeStoreASRelGraphFixedRoot(pathToGraph, rootNodeID):
with open(pathToGraph, "r") as f:
inData = f.readlines()
store = dict()
for line in inData:
if line.strip()[0] == "#": continue # Skip comment lines
line = line.replace('|'," ")
nodes = map(int, line.split()[0:2])
if nodes[0] not in store:
store[nodes[0]] = Node(nodes[0])
if nodes[0] == rootNodeID: store[nodes[0]].info.treeID += 1000000000
if nodes[1] not in store:
store[nodes[1]] = Node(nodes[1])
if nodes[1] == rootNodeID: store[nodes[1]].info.treeID += 1000000000
linkNodes(store[nodes[0]], store[nodes[1]])
print "CAIDA AS-relation graph successfully imported, size {}".format(len(store))
return store
def makeStoreDimesEdges(pathToGraph, rootNodeID=None):
# Read from a DIMES csv-formatted graph from a gzip file
store = dict()
with gzip.open(pathToGraph, "r") as f:
inData = f.readlines()
size = len(inData)
index = 0
for edge in inData:
if not index % 1000:
pct = 100.0*index/size
print "Processing edge {}, {:.2f}%".format(index, pct)
index += 1
dat = edge.rstrip().split(',')
node1 = "N" + str(dat[0].strip())
node2 = "N" + str(dat[1].strip())
if '?' in node1 or '?' in node2: continue #Unknown node
if node1 == rootNodeID: node1 = "R" + str(dat[0].strip())
if node2 == rootNodeID: node2 = "R" + str(dat[1].strip())
if node1 not in store: store[node1] = Node(node1)
if node2 not in store: store[node2] = Node(node2)
if node1 != node2: linkNodes(store[node1], store[node2])
print "DIMES graph successfully imported, size {}".format(len(store))
return store
def makeStoreGeneratedGraph(pathToGraph, root=None):
with open(pathToGraph, "r") as f:
inData = f.readlines()
store = dict()
for line in inData:
if line.strip()[0] == "#": continue # Skip comment lines
nodes = map(int, line.strip().split(' ')[0:2])
node1 = nodes[0]
node2 = nodes[1]
if node1 == root: node1 += 1000000
if node2 == root: node2 += 1000000
if node1 not in store: store[node1] = Node(node1)
if node2 not in store: store[node2] = Node(node2)
linkNodes(store[node1], store[node2])
print "Generated graph successfully imported, size {}".format(len(store))
return store
############################################
# Functions used as parts of network tests #
############################################
def idleUntilConverged(store):
nodeIDs = sorted(store.keys())
timeOfLastChange = 0
step = 0
# Idle until the network has converged
while step - timeOfLastChange < 4*TIMEOUT:
step += 1
print "Step: {}, last change: {}".format(step, timeOfLastChange)
changed = False
for nodeID in nodeIDs:
# Update node status, send messages
changed |= store[nodeID].tick()
for nodeID in nodeIDs:
# Process messages
changed |= store[nodeID].handleMessages()
if changed: timeOfLastChange = step
initTables(store)
return store
def getCacheIndex(nodes, sourceIndex, destIndex):
return sourceIndex*nodes + destIndex
def initTables(store):
nodeIDs = sorted(store.keys())
nNodes = len(nodeIDs)
print "Initializing routing tables for {} nodes".format(nNodes)
for idx in xrange(nNodes):
nodeID = nodeIDs[idx]
store[nodeID].initTable()
print "Routing tables initialized"
return None
def getCache(store):
nodeIDs = sorted(store.keys())
nNodes = len(nodeIDs)
nodeIdxs = dict()
for nodeIdx in xrange(nNodes):
nodeIdxs[nodeIDs[nodeIdx]] = nodeIdx
cache = array.array("H", [0]*nNodes*nNodes)
for sourceIdx in xrange(nNodes):
sourceID = nodeIDs[sourceIdx]
print "Building fast lookup table for node {} / {} ({})".format(sourceIdx+1, nNodes, sourceID)
for destIdx in xrange(nNodes):
destID = nodeIDs[destIdx]
if sourceID == destID: nextHop = destID # lookup would fail
else: nextHop = store[sourceID].lookup(store[destID].info)
nextHopIdx = nodeIdxs[nextHop]
cache[getCacheIndex(nNodes, sourceIdx, destIdx)] = nextHopIdx
return cache
def testPaths(store, dists):
cache = getCache(store)
nodeIDs = sorted(store.keys())
nNodes = len(nodeIDs)
idxs = dict()
for nodeIdx in xrange(nNodes):
nodeID = nodeIDs[nodeIdx]
idxs[nodeID] = nodeIdx
results = dict()
for sourceIdx in xrange(nNodes):
sourceID = nodeIDs[sourceIdx]
print "Testing paths from node {} / {} ({})".format(sourceIdx+1, len(nodeIDs), sourceID)
#dists = dijkstra(store, sourceID)
for destIdx in xrange(nNodes):
destID = nodeIDs[destIdx]
if destID == sourceID: continue # Skip self
distIdx = getCacheIndex(nNodes, sourceIdx, destIdx)
eHops = dists[distIdx]
if not eHops: continue # The network is split, no path exists
hops = 0
for pair in ((sourceIdx, destIdx),):
nHops = 0
locIdx = pair[0]
dIdx = pair[1]
while locIdx != dIdx:
locIdx = cache[getCacheIndex(nNodes, locIdx, dIdx)]
nHops += 1
if not hops or nHops < hops: hops = nHops
if eHops not in results: results[eHops] = dict()
if hops not in results[eHops]: results[eHops][hops] = 0
results[eHops][hops] += 1
return results
def getAvgStretch(pathMatrix):
avgStretch = 0.
checked = 0.
for eHops in sorted(pathMatrix.keys()):
for nHops in sorted(pathMatrix[eHops].keys()):
count = pathMatrix[eHops][nHops]
stretch = float(nHops)/float(max(1, eHops))
avgStretch += stretch*count
checked += count
avgStretch /= max(1, checked)
return avgStretch
def getMaxStretch(pathMatrix):
maxStretch = 0.
for eHops in sorted(pathMatrix.keys()):
for nHops in sorted(pathMatrix[eHops].keys()):
stretch = float(nHops)/float(max(1, eHops))
maxStretch = max(maxStretch, stretch)
return maxStretch
def getCertSizes(store):
# Returns nCerts frequency distribution
# De-duplicates common certs (for shared prefixes in the path)
sizes = dict()
for node in store.values():
certs = set()
for peer in node.peers.values():
pCerts = set()
assert len(peer.path) == 2
assert peer.coords[-1] == peer.path[0]
hops = peer.coords + peer.path[1:]
for hopIdx in xrange(len(hops)-1):
send = hops[hopIdx]
if send == node.info.nodeID: continue # We created it, already have it
path = hops[0:hopIdx+2]
# Each cert is signed by the sender
# Includes information about the path from the sender to the next hop
# Next hop is at hopIdx+1, so the path to next hop is hops[0:hopIdx+2]
cert = "{}:{}".format(send, path)
certs.add(cert)
size = len(certs)
if size not in sizes: sizes[size] = 0
sizes[size] += 1
return sizes
def getMinLinkCertSizes(store):
# Returns nCerts frequency distribution
# De-duplicates common certs (for shared prefixes in the path)
# Based on the minimum number of certs that must be traded through a particular link
# Handled per link
sizes = dict()
for node in store.values():
peerCerts = dict()
for peer in node.peers.values():
pCerts = set()
assert len(peer.path) == 2
assert peer.coords[-1] == peer.path[0]
hops = peer.coords + peer.path[1:]
for hopIdx in xrange(len(hops)-1):
send = hops[hopIdx]
if send == node.info.nodeID: continue # We created it, already have it
path = hops[0:hopIdx+2]
# Each cert is signed by the sender
# Includes information about the path from the sender to the next hop
# Next hop is at hopIdx+1, so the path to next hop is hops[0:hopIdx+2]
cert = "{}:{}".format(send, path)
pCerts.add(cert)
peerCerts[peer.nodeID] = pCerts
for peer in peerCerts:
size = 0
pCerts = peerCerts[peer]
for cert in pCerts:
required = True
for p2 in peerCerts:
if p2 == peer: continue
p2Certs = peerCerts[p2]
if cert in p2Certs: required = False
if required: size += 1
if size not in sizes: sizes[size] = 0
sizes[size] += 1
return sizes
def getPathSizes(store):
# Returns frequency distribution of the total number of hops the routing table
# I.e. a node with 3 peers, each with 5 hop coord+path, would count as 3x5=15
sizes = dict()
for node in store.values():
size = 0
for peer in node.peers.values():
assert len(peer.path) == 2
assert peer.coords[-1] == peer.path[0]
peerSize = len(peer.coords) + len(peer.path) - 1 # double-counts peer, -1
size += peerSize
if size not in sizes: sizes[size] = 0
sizes[size] += 1
return sizes
def getPeerSizes(store):
# Returns frequency distribution of the number of peers each node has
sizes = dict()
for node in store.values():
nPeers = len(node.peers)
if nPeers not in sizes: sizes[nPeers] = 0
sizes[nPeers] += 1
return sizes
def getAvgSize(sizes):
sumSizes = 0
nNodes = 0
for size in sizes:
count = sizes[size]
sumSizes += size*count
nNodes += count
avgSize = float(sumSizes)/max(1, nNodes)
return avgSize
def getMaxSize(sizes):
return max(sizes.keys())
def getMinSize(sizes):
return min(sizes.keys())
def getResults(pathMatrix):
results = []
for eHops in sorted(pathMatrix.keys()):
for nHops in sorted(pathMatrix[eHops].keys()):
count = pathMatrix[eHops][nHops]
results.append("{} {} {}".format(eHops, nHops, count))
return '\n'.join(results)
####################################
# Functions to run different tests #
####################################
def runTest(store):
# Runs the usual set of tests on the store
# Does not save results, so only meant for quick tests
# To e.g. check the code works, maybe warm up the pypy jit
for node in store.values():
node.info.time = random.randint(0, TIMEOUT)
node.info.tstamp = TIMEOUT
print "Begin testing network"
dists = None
if not dists: dists = dijkstrall(store)
idleUntilConverged(store)
pathMatrix = testPaths(store, dists)
avgStretch = getAvgStretch(pathMatrix)
maxStretch = getMaxStretch(pathMatrix)
peers = getPeerSizes(store)
certs = getCertSizes(store)
paths = getPathSizes(store)
linkCerts = getMinLinkCertSizes(store)
avgPeerSize = getAvgSize(peers)
maxPeerSize = getMaxSize(peers)
avgCertSize = getAvgSize(certs)
maxCertSize = getMaxSize(certs)
avgPathSize = getAvgSize(paths)
maxPathSize = getMaxSize(paths)
avgLinkCert = getAvgSize(linkCerts)
maxLinkCert = getMaxSize(linkCerts)
totalCerts = sum(map(lambda x: x*certs[x], certs.keys()))
totalLinks = sum(map(lambda x: x*peers[x], peers.keys())) # one-way links
avgCertsPerLink = float(totalCerts)/max(1, totalLinks)
print "Finished testing network"
print "Avg / Max stretch: {} / {}".format(avgStretch, maxStretch)
print "Avg / Max nPeers size: {} / {}".format(avgPeerSize, maxPeerSize)
print "Avg / Max nCerts size: {} / {}".format(avgCertSize, maxCertSize)
print "Avg / Max total hops in any node's routing table: {} / {}".format(avgPathSize, maxPathSize)
print "Avg / Max lower bound cert requests per link (one-way): {} / {}".format(avgLinkCert, maxLinkCert)
print "Avg certs per link (one-way): {}".format(avgCertsPerLink)
return # End of function
def rootNodeASTest(path, outDir="output-treesim-AS", dists=None, proc = 1):
# Checks performance for every possible choice of root node
# Saves output for each root node to a separate file on disk
# path = input path to some caida.org formatted AS-relationship graph
if not os.path.exists(outDir): os.makedirs(outDir)
assert os.path.exists(outDir)
store = makeStoreASRelGraph(path)
nodes = sorted(store.keys())
for nodeIdx in xrange(len(nodes)):
if nodeIdx % proc != 0: continue # Work belongs to someone else
rootNodeID = nodes[nodeIdx]
outpath = outDir+"/{}".format(rootNodeID)
if os.path.exists(outpath):
print "Skipping {}, already processed".format(rootNodeID)
continue
store = makeStoreASRelGraphFixedRoot(path, rootNodeID)
for node in store.values():
node.info.time = random.randint(0, TIMEOUT)
node.info.tstamp = TIMEOUT
print "Beginning {}, size {}".format(nodeIdx, len(store))
if not dists: dists = dijkstrall(store)
idleUntilConverged(store)
pathMatrix = testPaths(store, dists)
avgStretch = getAvgStretch(pathMatrix)
maxStretch = getMaxStretch(pathMatrix)
results = getResults(pathMatrix)
with open(outpath, "w") as f:
f.write(results)
print "Finished test for root AS {} ({} / {})".format(rootNodeID, nodeIdx+1, len(store))
print "Avg / Max stretch: {} / {}".format(avgStretch, maxStretch)
#break # Stop after 1, because they can take forever
return # End of function
def timelineASTest():
# Meant to study the performance of the network as a function of network size
# Loops over a set of AS-relationship graphs
# Runs a test on each graph, selecting highest-degree node as the root
# Saves results for each graph to a separate file on disk
outDir = "output-treesim-timeline-AS"
if not os.path.exists(outDir): os.makedirs(outDir)
assert os.path.exists(outDir)
paths = sorted(glob.glob("asrel/datasets/*"))
for path in paths:
date = os.path.basename(path).split(".")[0]
outpath = outDir+"/{}".format(date)
if os.path.exists(outpath):
print "Skipping {}, already processed".format(date)
continue
store = makeStoreASRelGraphMaxDeg(path)
dists = None
for node in store.values():
node.info.time = random.randint(0, TIMEOUT)
node.info.tstamp = TIMEOUT
print "Beginning {}, size {}".format(date, len(store))
if not dists: dists = dijkstrall(store)
idleUntilConverged(store)
pathMatrix = testPaths(store, dists)
avgStretch = getAvgStretch(pathMatrix)
maxStretch = getMaxStretch(pathMatrix)
results = getResults(pathMatrix)
with open(outpath, "w") as f:
f.write(results)
print "Finished {} with {} nodes".format(date, len(store))
print "Avg / Max stretch: {} / {}".format(avgStretch, maxStretch)
#break # Stop after 1, because they can take forever
return # End of function
def timelineDimesTest():
# Meant to study the performance of the network as a function of network size
# Loops over a set of AS-relationship graphs
# Runs a test on each graph, selecting highest-degree node as the root
# Saves results for each graph to a separate file on disk
outDir = "output-treesim-timeline-dimes"
if not os.path.exists(outDir): os.makedirs(outDir)
assert os.path.exists(outDir)
# Input files are named ASEdgesX_Y where X = month (no leading 0), Y = year
paths = sorted(glob.glob("DIMES/ASEdges/*.gz"))
exists = set(glob.glob(outDir+"/*"))
for path in paths:
date = os.path.basename(path).split(".")[0]
outpath = outDir+"/{}".format(date)
if outpath in exists:
print "Skipping {}, already processed".format(date)
continue
store = makeStoreDimesEdges(path)
# Get the highest degree node and make it root
# Sorted by nodeID just to make it stable in the event of a tie
nodeIDs = sorted(store.keys())
bestRoot = ""
bestDeg = 0
for nodeID in nodeIDs:
node = store[nodeID]
if len(node.links) > bestDeg:
bestRoot = nodeID
bestDeg = len(node.links)
assert bestRoot
store = makeStoreDimesEdges(path, bestRoot)
rootID = "R" + bestRoot[1:]
assert rootID in store
# Don't forget to set random seed before setitng times
# To make results reproducible
nodeIDs = sorted(store.keys())
random.seed(12345)
for nodeID in nodeIDs:
node = store[nodeID]
node.info.time = random.randint(0, TIMEOUT)
node.info.tstamp = TIMEOUT
print "Beginning {}, size {}".format(date, len(store))
if not dists: dists = dijkstrall(store)
idleUntilConverged(store)
pathMatrix = testPaths(store, dists)
avgStretch = getAvgStretch(pathMatrix)
maxStretch = getMaxStretch(pathMatrix)
results = getResults(pathMatrix)
with open(outpath, "w") as f:
f.write(results)
print "Finished {} with {} nodes".format(date, len(store))
print "Avg / Max stretch: {} / {}".format(avgStretch, maxStretch)
break # Stop after 1, because they can take forever
return # End of function
def scalingTest(maxTests=None, inputDir="graphs"):
# Meant to study the performance of the network as a function of network size
# Loops over a set of nodes in a previously generated graph
# Runs a test on each graph, testing each node as the root
# if maxTests is set, tests only that number of roots (highest degree first)
# Saves results for each graph to a separate file on disk
outDir = "output-treesim-{}".format(inputDir)
if not os.path.exists(outDir): os.makedirs(outDir)
assert os.path.exists(outDir)
paths = sorted(glob.glob("{}/*".format(inputDir)))
exists = set(glob.glob(outDir+"/*"))
for path in paths:
gc.collect() # pypy waits for gc to close files
graph = os.path.basename(path).split(".")[0]
store = makeStoreGeneratedGraph(path)
# Get the highest degree node and make it root
# Sorted by nodeID just to make it stable in the event of a tie
nodeIDs = sorted(store.keys(), key=lambda x: len(store[x].links), reverse=True)
dists = None
if maxTests: nodeIDs = nodeIDs[:maxTests]
for nodeID in nodeIDs:
nodeIDStr = str(nodeID).zfill(len(str(len(store)-1)))
outpath = outDir+"/{}-{}".format(graph, nodeIDStr)
if outpath in exists:
print "Skipping {}-{}, already processed".format(graph, nodeIDStr)
continue
store = makeStoreGeneratedGraph(path, nodeID)
# Don't forget to set random seed before setting times
random.seed(12345) # To make results reproducible
nIDs = sorted(store.keys())
for nID in nIDs:
node = store[nID]
node.info.time = random.randint(0, TIMEOUT)
node.info.tstamp = TIMEOUT
print "Beginning {}, size {}".format(graph, len(store))
if not dists: dists = dijkstrall(store)
idleUntilConverged(store)
pathMatrix = testPaths(store, dists)
avgStretch = getAvgStretch(pathMatrix)
maxStretch = getMaxStretch(pathMatrix)
results = getResults(pathMatrix)
with open(outpath, "w") as f:
f.write(results)
print "Finished {} with {} nodes for root {}".format(graph, len(store), nodeID)
print "Avg / Max stretch: {} / {}".format(avgStretch, maxStretch)
return # End of function
##################
# Main Execution #
##################
if __name__ == "__main__":
if True: # Run a quick test
random.seed(12345) # DEBUG
store = makeStoreSquareGrid(4)
runTest(store) # Quick test
store = None
# Do some real work
#runTest(makeStoreDimesEdges("DIMES/ASEdges/ASEdges1_2007.csv.gz"))
#timelineDimesTest()
#rootNodeASTest("asrel/datasets/19980101.as-rel.txt")
#timelineASTest()
#rootNodeASTest("hype-2016-09-19.list", "output-treesim-hype")
#scalingTest(None, "graphs-20") # First argument 1 to only test 1 root per graph
#store = makeStoreGeneratedGraph("bgp_tables")
#store = makeStoreGeneratedGraph("skitter")
#store = makeStoreASRelGraphMaxDeg("hype-2016-09-19.list") #http://hia.cjdns.ca/watchlist/c/walk.peers.20160919
#store = makeStoreGeneratedGraph("fc00-2017-08-12.txt")
if store: runTest(store)
#rootNodeASTest("skitter", "output-treesim-skitter", None, 0, 1)
#scalingTest(1, "graphs-20") # First argument 1 to only test 1 root per graph
#scalingTest(1, "graphs-21") # First argument 1 to only test 1 root per graph
#scalingTest(1, "graphs-22") # First argument 1 to only test 1 root per graph
#scalingTest(1, "graphs-23") # First argument 1 to only test 1 root per graph
if not store:
import sys
args = sys.argv
if len(args) == 2:
job_number = int(sys.argv[1])
rootNodeASTest("fc00-2017-08-12.txt", "fc00", None, job_number)
else:
print "Usage: {} job_number".format(args[0])
print "job_number = which job set to run on this node (1-indexed)"