misc/sim/treesim-forward.py

# Tree routing scheme (named Yggdrasil, after the world tree from Norse mythology)
# Steps:
#   1: Pick any node, here I'm using highest nodeID
#   2: Build spanning tree, each node stores path back to root
#     Optionally with weights for each hop
#     Ties broken by preferring a parent with higher degree
#   3: Distance metric: self->peer + (via tree) peer->dest
#   4: Perform (modified) greedy lookup via this metric for each direction (A->B and B->A)
#   5: Source-route traffic using the better of those two paths

# Note: This makes no attempt to simulate a dynamic network
#   E.g. A node's peers cannot be disconnected

# TODO:
#   Make better use of drop?
#   In particular, we should be ignoring *all* recently dropped *paths* to the root
#     To minimize route flapping
#     Not really an issue in the sim, but probably needed for a real network

import array
import gc
import glob
import gzip
import heapq
import os
import random
import time

#############
# Constants #
#############

# Reminder of where link cost comes in
LINK_COST = 1

# Timeout before dropping something, in simulated seconds
TIMEOUT = 60

###########
# Classes #
###########

class PathInfo:
  def __init__(self, nodeID):
    self.nodeID = nodeID   # e.g. IP
    self.coords = []       # Position in tree
    self.tstamp = 0        # Timestamp from sender, to keep track of old vs new info
    self.degree = 0        # Number of peers the sender has, used to break ties
    # The above should be signed
    self.path   = [nodeID] # Path to node (in path-vector route)
    self.time   = 0        # Time info was updated, to keep track of e.g. timeouts
    self.treeID = nodeID   # Hack, let tree use different ID than IP, used so we can dijkstra once and test many roots
  def clone(self):
    # Return a deep-enough copy of the path
    clone = PathInfo(None)
    clone.nodeID = self.nodeID
    clone.coords = self.coords[:]
    clone.tstamp = self.tstamp
    clone.degree = self.degree
    clone.path = self.path[:]
    clone.time = self.time
    clone.treeID = self.treeID
    return clone
# End class PathInfo

class Node:
  def __init__(self, nodeID):
    self.info  = PathInfo(nodeID) # Self NodeInfo
    self.root  = None             # PathInfo to node at root of tree
    self.drop  = dict()           # PathInfo to nodes from clus that have timed out
    self.peers = dict()           # PathInfo to peers
    self.links = dict()           # Links to peers (to pass messages)
    self.msgs  = []               # Said messages
    self.table = dict()           # Pre-computed lookup table of peer info

  def tick(self):
    # Do periodic maintenance stuff, including push updates
    self.info.time += 1
    if self.info.time > self.info.tstamp + TIMEOUT/4:
      # Update timestamp at least once every 1/4 timeout period
      # This should probably be randomized in a real implementation
      self.info.tstamp = self.info.time
      self.info.degree = 0# TODO decide if degree should be used, len(self.peers)
    changed = False # Used to track when the network has converged
    changed |= self.cleanRoot()
    self.cleanDropped()
    # Should probably send messages infrequently if there's nothing new to report
    if self.info.tstamp == self.info.time:
      msg = self.createMessage()
      self.sendMessage(msg)
    return changed

  def cleanRoot(self):
    changed = False
    if self.root and self.info.time - self.root.time > TIMEOUT:
      print "DEBUG: clean root,", self.root.path
      self.drop[self.root.treeID] = self.root
      self.root = None
      changed = True
    if not self.root or self.root.treeID < self.info.treeID:
      # No need to drop someone who'se worse than us
      self.info.coords = [self.info.nodeID]
      self.root = self.info.clone()
      changed = True
    elif self.root.treeID == self.info.treeID:
      self.root = self.info.clone()
    return changed

  def cleanDropped(self):
    # May actually be a treeID... better to iterate over keys explicitly
    nodeIDs = sorted(self.drop.keys())
    for nodeID in nodeIDs:
      node = self.drop[nodeID]
      if self.info.time - node.time > 4*TIMEOUT:
        del self.drop[nodeID]
    return None

  def createMessage(self):
    # Message is just a tuple
    # First element is the sender
    # Second element is the root
    # We will .clone() everything during the send operation
    msg = (self.info, self.root)
    return msg

  def sendMessage(self, msg):
    for link in self.links.values():
      newMsg = (msg[0].clone(), msg[1].clone())
      link.msgs.append(newMsg)
    return None

  def handleMessages(self):
    changed = False
    while self.msgs:
      changed |= self.handleMessage(self.msgs.pop())
    return changed

  def handleMessage(self, msg):
    changed = False
    for node in msg:
      # Update the path and timestamp for the sender and root info
      node.path.append(self.info.nodeID)
      node.time = self.info.time
    # Update the sender's info in our list of peers
    sender = msg[0]
    self.peers[sender.nodeID] = sender
    # Decide if we want to update the root
    root = msg[1]
    updateRoot = False
    isSameParent = False
    isBetterParent = False
    if len(self.root.path) > 1 and len(root.path) > 1:
      parent = self.peers[self.root.path[-2]]
      if parent.nodeID == sender.nodeID: isSameParent = True
      if sender.degree > parent.degree:
        # This would also be where you check path uptime/reliability/whatever
        # All else being equal, we prefer parents with high degree
        # We are trusting peers to report degree correctly in this case
        # So expect some performance reduction if your peers aren't trustworthy
        # (Lies can increase average stretch by a few %)
        isBetterParent = True
    if self.info.nodeID in root.path[:-1]: pass # No loopy routes allowed
    elif root.treeID in self.drop and self.drop[root.treeID].tstamp >= root.tstamp: pass
    elif not self.root: updateRoot = True
    elif self.root.treeID < root.treeID: updateRoot = True
    elif self.root.treeID != root.treeID: pass
    elif self.root.tstamp > root.tstamp: pass
    elif len(root.path) < len(self.root.path): updateRoot = True
    elif isBetterParent and len(root.path) == len(self.root.path): updateRoot = True
    elif isSameParent and self.root.tstamp < root.tstamp: updateRoot = True
    if updateRoot:
      if not self.root or self.root.path != root.path: changed = True
      self.root = root
      self.info.coords = self.root.path
    return changed

  def lookup(self, dest):
    # Note: Can loop in an unconverged network
    # The person looking up the route is responsible for checking for loops
    best = None
    bestDist = 0
    for node in self.peers.itervalues():
      # dist = distance to node + dist (on tree) from node to dest
      dist = len(node.path)-1 + treeDist(node.coords, dest.coords)
      if not best or dist < bestDist:
        best = node
        bestDist = dist
    if best:
      next = best.path[-2]
      assert next in self.peers
      return next
    else:
      # We failed to look something up
      # TODO some way to signal this which doesn't crash
      assert False

  def initTable(self):
    # Pre-computes a lookup table for destination coords
    # Insert parent first so you prefer them as a next-hop
    self.table.clear()
    parent = self.info.nodeID
    if len(self.info.coords) >= 2: parent = self.info.coords[-2]
    for peer in self.peers.itervalues():
      current = self.table
      for coord in peer.coords:
        if coord not in current: current[coord] = (peer.nodeID, dict())
        old = current[coord]
        next = old[1]
        oldPeer = self.peers[old[0]]
        oldDist = len(oldPeer.coords)
        oldDeg = oldPeer.degree
        newDist = len(peer.coords)
        newDeg = peer.degree
        # Prefer parent
        # Else prefer short distance from root
        # If equal distance, prefer high degree
        if peer.nodeID == parent: current[coord] = (peer.nodeID, next)
        elif newDist < oldDist: current[coord] = (peer.nodeID, next)
        elif newDist == oldDist and newDeg > oldDeg: current[coord] = (peer.nodeID, next)
        current = next
    return None

  def lookup_new(self, dest):
    # Use pre-computed lookup table to look up next hop for dest coords
    assert self.table
    if len(self.info.coords) >= 2: parent = self.info.coords[-2]
    else: parent = None
    current = (parent, self.table)
    c = None
    for coord in dest.coords:
      c = coord
      if coord not in current[1]: break
      current = current[1][coord]
    next = current[0]
    if c in self.peers: next = c
    if next not in self.peers:
      assert next == None
      # You're the root of a different connected component
      # You'd drop the packet in this case
      # To make the path cache not die, need to return a valid next hop...
      # Returning self for that reason
      next = self.info.nodeID
    return next
# End class Node

####################
# Helper Functions #
####################

def getIndexOfLCA(source, dest):
  # Return index of last common ancestor in source/dest coords
  # -1 if no common ancestor (e.g. different roots)
  lcaIdx = -1
  minLen = min(len(source), len(dest))
  for idx in xrange(minLen):
    if source[idx] == dest[idx]: lcaIdx = idx
    else: break
  return lcaIdx

def treePath(source, dest):
  # Return path with source at head and dest at tail
  lastMatch = getIndexOfLCA(source, dest)
  path = dest[-1:lastMatch:-1] + source[lastMatch:]
  assert path[0] == dest[-1]
  assert path[-1] == source[-1]
  return path

def treeDist(source, dest):
  dist = len(source) + len(dest)
  lcaIdx = getIndexOfLCA(source, dest)
  dist -= 2*(lcaIdx+1)
  return dist

def dijkstra(nodestore, startingNodeID):
  # Idea to use heapq and basic implementation taken from stackexchange post
  # http://codereview.stackexchange.com/questions/79025/dijkstras-algorithm-in-python
  nodeIDs = sorted(nodestore.keys())
  nNodes = len(nodeIDs)
  idxs = dict()
  for nodeIdx in xrange(nNodes):
    nodeID = nodeIDs[nodeIdx]
    idxs[nodeID] = nodeIdx
  dists = array.array("H", [0]*nNodes)
  queue = [(0, startingNodeID)]
  while queue:
    dist, nodeID = heapq.heappop(queue)
    idx = idxs[nodeID]
    if not dists[idx]: # Unvisited, otherwise we skip it
      dists[idx] = dist
      for peer in nodestore[nodeID].links:
        if not dists[idxs[peer]]:
          # Peer is also unvisited, so add to queue
          heapq.heappush(queue, (dist+LINK_COST, peer))
  return dists

def dijkstrall(nodestore):
  # Idea to use heapq and basic implementation taken from stackexchange post
  # http://codereview.stackexchange.com/questions/79025/dijkstras-algorithm-in-python
  nodeIDs = sorted(nodestore.keys())
  nNodes = len(nodeIDs)
  idxs = dict()
  for nodeIdx in xrange(nNodes):
    nodeID = nodeIDs[nodeIdx]
    idxs[nodeID] = nodeIdx
  dists = array.array("H", [0]*nNodes*nNodes) # use GetCacheIndex(nNodes, start, end)
  for sourceIdx in xrange(nNodes):
    print "Finding shortest paths for node {} / {} ({})".format(sourceIdx+1, nNodes, nodeIDs[sourceIdx])
    queue = [(0, sourceIdx)]
    while queue:
      dist, nodeIdx = heapq.heappop(queue)
      distIdx = getCacheIndex(nNodes, sourceIdx, nodeIdx)
      if not dists[distIdx]: # Unvisited, otherwise we skip it
        dists[distIdx] = dist
        for peer in nodestore[nodeIDs[nodeIdx]].links:
          pIdx = idxs[peer]
          pdIdx = getCacheIndex(nNodes, sourceIdx, pIdx)
          if not dists[pdIdx]:
            # Peer is also unvisited, so add to queue
            heapq.heappush(queue, (dist+LINK_COST, pIdx))
  return dists

def linkNodes(node1, node2):
  node1.links[node2.info.nodeID] = node2
  node2.links[node1.info.nodeID] = node1

############################
# Store topology functions #
############################

def makeStoreSquareGrid(sideLength, randomize=True):
  # Simple grid in a sideLength*sideLength square
  # Just used to validate that the code runs
  store = dict()
  nodeIDs = list(range(sideLength*sideLength))
  if randomize: random.shuffle(nodeIDs)
  for nodeID in nodeIDs:
    store[nodeID] = Node(nodeID)
  for index in xrange(len(nodeIDs)):
    if (index % sideLength != 0): linkNodes(store[nodeIDs[index]], store[nodeIDs[index-1]])
    if (index >= sideLength): linkNodes(store[nodeIDs[index]], store[nodeIDs[index-sideLength]])
  print "Grid store created, size {}".format(len(store))
  return store

def makeStoreASRelGraph(pathToGraph):
  #Existing network graphs, in caida.org's asrel format (ASx|ASy|z per line, z denotes relationship type)
  with open(pathToGraph, "r") as f:
    inData = f.readlines()
  store = dict()
  for line in inData:
    if line.strip()[0] == "#": continue # Skip comment lines
    line = line.replace('|'," ")
    nodes = map(int, line.split()[0:2])
    if nodes[0] not in store: store[nodes[0]] = Node(nodes[0])
    if nodes[1] not in store: store[nodes[1]] = Node(nodes[1])
    linkNodes(store[nodes[0]], store[nodes[1]])
  print "CAIDA AS-relation graph successfully imported, size {}".format(len(store))
  return store

def makeStoreASRelGraphMaxDeg(pathToGraph, degIdx=0):
  with open(pathToGraph, "r") as f:
    inData = f.readlines()
  store = dict()
  nodeDeg = dict()
  for line in inData:
    if line.strip()[0] == "#": continue # Skip comment lines
    line = line.replace('|'," ")
    nodes = map(int, line.split()[0:2])
    if nodes[0] not in nodeDeg: nodeDeg[nodes[0]] = 0
    if nodes[1] not in nodeDeg: nodeDeg[nodes[1]] = 0
    nodeDeg[nodes[0]] += 1
    nodeDeg[nodes[1]] += 1
  sortedNodes = sorted(nodeDeg.keys(), \
                       key=lambda x: (nodeDeg[x], x), \
                       reverse=True)
  maxDegNodeID = sortedNodes[degIdx]
  return makeStoreASRelGraphFixedRoot(pathToGraph, maxDegNodeID)

def makeStoreASRelGraphFixedRoot(pathToGraph, rootNodeID):
  with open(pathToGraph, "r") as f:
    inData = f.readlines()
  store = dict()
  for line in inData:
    if line.strip()[0] == "#": continue # Skip comment lines
    line = line.replace('|'," ")
    nodes = map(int, line.split()[0:2])
    if nodes[0] not in store:
      store[nodes[0]] = Node(nodes[0])
      if nodes[0] == rootNodeID: store[nodes[0]].info.treeID += 1000000000
    if nodes[1] not in store:
      store[nodes[1]] = Node(nodes[1])
      if nodes[1] == rootNodeID: store[nodes[1]].info.treeID += 1000000000
    linkNodes(store[nodes[0]], store[nodes[1]])
  print "CAIDA AS-relation graph successfully imported, size {}".format(len(store))
  return store

def makeStoreDimesEdges(pathToGraph, rootNodeID=None):
  # Read from a DIMES csv-formatted graph from a gzip file
  store = dict()
  with gzip.open(pathToGraph, "r") as f:
    inData = f.readlines()
  size = len(inData)
  index = 0
  for edge in inData:
    if not index % 1000:
      pct = 100.0*index/size
      print "Processing edge {}, {:.2f}%".format(index, pct)
    index += 1
    dat = edge.rstrip().split(',')
    node1 = "N" + str(dat[0].strip())
    node2 = "N" + str(dat[1].strip())
    if '?' in node1 or '?' in node2: continue #Unknown node
    if node1 == rootNodeID: node1 = "R" + str(dat[0].strip())
    if node2 == rootNodeID: node2 = "R" + str(dat[1].strip())
    if node1 not in store: store[node1] = Node(node1)
    if node2 not in store: store[node2] = Node(node2)
    if node1 != node2: linkNodes(store[node1], store[node2])
  print "DIMES graph successfully imported, size {}".format(len(store))
  return store

def makeStoreGeneratedGraph(pathToGraph, root=None):
  with open(pathToGraph, "r") as f:
    inData = f.readlines()
  store = dict()
  for line in inData:
    if line.strip()[0] == "#": continue # Skip comment lines
    nodes = map(int, line.strip().split(' ')[0:2])
    node1 = nodes[0]
    node2 = nodes[1]
    if node1 == root: node1 += 1000000
    if node2 == root: node2 += 1000000
    if node1 not in store: store[node1] = Node(node1)
    if node2 not in store: store[node2] = Node(node2)
    linkNodes(store[node1], store[node2])
  print "Generated graph successfully imported, size {}".format(len(store))
  return store


############################################
# Functions used as parts of network tests #
############################################

def idleUntilConverged(store):
  nodeIDs = sorted(store.keys())
  timeOfLastChange = 0
  step = 0
  # Idle until the network has converged
  while step - timeOfLastChange < 4*TIMEOUT:
    step += 1
    print "Step: {}, last change: {}".format(step, timeOfLastChange)
    changed = False
    for nodeID in nodeIDs:
      # Update node status, send messages
      changed |= store[nodeID].tick()
    for nodeID in nodeIDs:
      # Process messages
      changed |= store[nodeID].handleMessages()
    if changed: timeOfLastChange = step
  initTables(store)
  return store

def getCacheIndex(nodes, sourceIndex, destIndex):
  return sourceIndex*nodes + destIndex

def initTables(store):
  nodeIDs = sorted(store.keys())
  nNodes = len(nodeIDs)
  print "Initializing routing tables for {} nodes".format(nNodes)
  for idx in xrange(nNodes):
    nodeID = nodeIDs[idx]
    store[nodeID].initTable()
  print "Routing tables initialized"
  return None

def getCache(store):
  nodeIDs = sorted(store.keys())
  nNodes = len(nodeIDs)
  nodeIdxs = dict()
  for nodeIdx in xrange(nNodes):
    nodeIdxs[nodeIDs[nodeIdx]] = nodeIdx
  cache = array.array("H", [0]*nNodes*nNodes)
  for sourceIdx in xrange(nNodes):
    sourceID = nodeIDs[sourceIdx]
    print "Building fast lookup table for node {} / {} ({})".format(sourceIdx+1, nNodes, sourceID)
    for destIdx in xrange(nNodes):
      destID = nodeIDs[destIdx]
      if sourceID == destID: nextHop = destID # lookup would fail
      else: nextHop = store[sourceID].lookup(store[destID].info)
      nextHopIdx = nodeIdxs[nextHop]
      cache[getCacheIndex(nNodes, sourceIdx, destIdx)] = nextHopIdx
  return cache

def testPaths(store, dists):
  cache = getCache(store)
  nodeIDs = sorted(store.keys())
  nNodes = len(nodeIDs)
  idxs = dict()
  for nodeIdx in xrange(nNodes):
    nodeID = nodeIDs[nodeIdx]
    idxs[nodeID] = nodeIdx
  results = dict()
  for sourceIdx in xrange(nNodes):
    sourceID = nodeIDs[sourceIdx]
    print "Testing paths from node {} / {} ({})".format(sourceIdx+1, len(nodeIDs), sourceID)
    #dists = dijkstra(store, sourceID)
    for destIdx in xrange(nNodes):
      destID = nodeIDs[destIdx]
      if destID == sourceID: continue # Skip self
      distIdx = getCacheIndex(nNodes, sourceIdx, destIdx)
      eHops = dists[distIdx]
      if not eHops: continue # The network is split, no path exists
      hops = 0
      for pair in ((sourceIdx, destIdx),):
        nHops = 0
        locIdx = pair[0]
        dIdx = pair[1]
        while locIdx != dIdx:
          locIdx = cache[getCacheIndex(nNodes, locIdx, dIdx)]
          nHops += 1
        if not hops or nHops < hops: hops = nHops
      if eHops not in results: results[eHops] = dict()
      if hops not in results[eHops]: results[eHops][hops] = 0
      results[eHops][hops] += 1
  return results

def getAvgStretch(pathMatrix):
  avgStretch = 0.
  checked = 0.
  for eHops in sorted(pathMatrix.keys()):
    for nHops in sorted(pathMatrix[eHops].keys()):
      count = pathMatrix[eHops][nHops]
      stretch = float(nHops)/float(max(1, eHops))
      avgStretch += stretch*count
      checked += count
  avgStretch /= max(1, checked)
  return avgStretch

def getMaxStretch(pathMatrix):
  maxStretch = 0.
  for eHops in sorted(pathMatrix.keys()):
    for nHops in sorted(pathMatrix[eHops].keys()):
      stretch = float(nHops)/float(max(1, eHops))
      maxStretch = max(maxStretch, stretch)
  return maxStretch

def getCertSizes(store):
  # Returns nCerts frequency distribution
  # De-duplicates common certs (for shared prefixes in the path)
  sizes = dict()
  for node in store.values():
    certs = set()
    for peer in node.peers.values():
      pCerts = set()
      assert len(peer.path) == 2
      assert peer.coords[-1] == peer.path[0]
      hops = peer.coords + peer.path[1:]
      for hopIdx in xrange(len(hops)-1):
        send = hops[hopIdx]
        if send == node.info.nodeID: continue # We created it, already have it
        path = hops[0:hopIdx+2]
        # Each cert is signed by the sender
        # Includes information about the path from the sender to the next hop
        # Next hop is at hopIdx+1, so the path to next hop is hops[0:hopIdx+2]
        cert = "{}:{}".format(send, path)
        certs.add(cert)
    size = len(certs)
    if size not in sizes: sizes[size] = 0
    sizes[size] += 1
  return sizes

def getMinLinkCertSizes(store):
  # Returns nCerts frequency distribution
  # De-duplicates common certs (for shared prefixes in the path)
  # Based on the minimum number of certs that must be traded through a particular link
  # Handled per link
  sizes = dict()
  for node in store.values():
    peerCerts = dict()
    for peer in node.peers.values():
      pCerts = set()
      assert len(peer.path) == 2
      assert peer.coords[-1] == peer.path[0]
      hops = peer.coords + peer.path[1:]
      for hopIdx in xrange(len(hops)-1):
        send = hops[hopIdx]
        if send == node.info.nodeID: continue # We created it, already have it
        path = hops[0:hopIdx+2]
        # Each cert is signed by the sender
        # Includes information about the path from the sender to the next hop
        # Next hop is at hopIdx+1, so the path to next hop is hops[0:hopIdx+2]
        cert = "{}:{}".format(send, path)
        pCerts.add(cert)
      peerCerts[peer.nodeID] = pCerts
    for peer in peerCerts:
      size = 0
      pCerts = peerCerts[peer]
      for cert in pCerts:
        required = True
        for p2 in peerCerts:
          if p2 == peer: continue
          p2Certs = peerCerts[p2]
          if cert in p2Certs: required = False
        if required: size += 1
      if size not in sizes: sizes[size] = 0
      sizes[size] += 1
  return sizes

def getPathSizes(store):
  # Returns frequency distribution of the total number of hops the routing table
  # I.e. a node with 3 peers, each with 5 hop coord+path, would count as 3x5=15
  sizes = dict()
  for node in store.values():
    size = 0
    for peer in node.peers.values():
      assert len(peer.path) == 2
      assert peer.coords[-1] == peer.path[0]
      peerSize = len(peer.coords) + len(peer.path) - 1 # double-counts peer, -1
      size += peerSize
    if size not in sizes: sizes[size] = 0
    sizes[size] += 1
  return sizes

def getPeerSizes(store):
  # Returns frequency distribution of the number of peers each node has
  sizes = dict()
  for node in store.values():
    nPeers = len(node.peers)
    if nPeers not in sizes: sizes[nPeers] = 0
    sizes[nPeers] += 1
  return sizes

def getAvgSize(sizes):
  sumSizes = 0
  nNodes = 0
  for size in sizes:
    count = sizes[size]
    sumSizes += size*count
    nNodes += count
  avgSize = float(sumSizes)/max(1, nNodes)
  return avgSize

def getMaxSize(sizes):
  return max(sizes.keys())

def getMinSize(sizes):
  return min(sizes.keys())

def getResults(pathMatrix):
  results = []
  for eHops in sorted(pathMatrix.keys()):
    for nHops in sorted(pathMatrix[eHops].keys()):
      count = pathMatrix[eHops][nHops]
      results.append("{} {} {}".format(eHops, nHops, count))
  return '\n'.join(results)

####################################
# Functions to run different tests #
####################################

def runTest(store):
  # Runs the usual set of tests on the store
  # Does not save results, so only meant for quick tests
  # To e.g. check the code works, maybe warm up the pypy jit
  for node in store.values():
    node.info.time = random.randint(0, TIMEOUT)
    node.info.tstamp = TIMEOUT
  print "Begin testing network"
  dists = None
  if not dists: dists = dijkstrall(store)
  idleUntilConverged(store)
  pathMatrix = testPaths(store, dists)
  avgStretch = getAvgStretch(pathMatrix)
  maxStretch = getMaxStretch(pathMatrix)
  peers = getPeerSizes(store)
  certs = getCertSizes(store)
  paths = getPathSizes(store)
  linkCerts = getMinLinkCertSizes(store)
  avgPeerSize = getAvgSize(peers)
  maxPeerSize = getMaxSize(peers)
  avgCertSize = getAvgSize(certs)
  maxCertSize = getMaxSize(certs)
  avgPathSize = getAvgSize(paths)
  maxPathSize = getMaxSize(paths)
  avgLinkCert = getAvgSize(linkCerts)
  maxLinkCert = getMaxSize(linkCerts)
  totalCerts = sum(map(lambda x: x*certs[x], certs.keys()))
  totalLinks = sum(map(lambda x: x*peers[x], peers.keys())) # one-way links
  avgCertsPerLink = float(totalCerts)/max(1, totalLinks)
  print "Finished testing network"
  print "Avg / Max stretch: {} / {}".format(avgStretch, maxStretch)
  print "Avg / Max nPeers size: {} / {}".format(avgPeerSize, maxPeerSize)
  print "Avg / Max nCerts size: {} / {}".format(avgCertSize, maxCertSize)
  print "Avg / Max total hops in any node's routing table: {} / {}".format(avgPathSize, maxPathSize)
  print "Avg / Max lower bound cert requests per link (one-way): {} / {}".format(avgLinkCert, maxLinkCert)
  print "Avg certs per link (one-way): {}".format(avgCertsPerLink)
  return # End of function

def rootNodeASTest(path, outDir="output-treesim-AS", dists=None, proc = 1):
  # Checks performance for every possible choice of root node
  # Saves output for each root node to a separate file on disk
  # path = input path to some caida.org formatted AS-relationship graph
  if not os.path.exists(outDir): os.makedirs(outDir)
  assert os.path.exists(outDir)
  store = makeStoreASRelGraph(path)
  nodes = sorted(store.keys())
  for nodeIdx in xrange(len(nodes)):
    if nodeIdx % proc != 0: continue # Work belongs to someone else
    rootNodeID = nodes[nodeIdx]
    outpath = outDir+"/{}".format(rootNodeID)
    if os.path.exists(outpath):
      print "Skipping {}, already processed".format(rootNodeID)
      continue
    store = makeStoreASRelGraphFixedRoot(path, rootNodeID)
    for node in store.values():
      node.info.time = random.randint(0, TIMEOUT)
      node.info.tstamp = TIMEOUT
    print "Beginning {}, size {}".format(nodeIdx, len(store))
    if not dists: dists = dijkstrall(store)
    idleUntilConverged(store)
    pathMatrix = testPaths(store, dists)
    avgStretch = getAvgStretch(pathMatrix)
    maxStretch = getMaxStretch(pathMatrix)
    results = getResults(pathMatrix)
    with open(outpath, "w") as f:
      f.write(results)
    print "Finished test for root AS {} ({} / {})".format(rootNodeID, nodeIdx+1, len(store))
    print "Avg / Max stretch: {} / {}".format(avgStretch, maxStretch)
    #break # Stop after 1, because they can take forever
  return # End of function

def timelineASTest():
  # Meant to study the performance of the network as a function of network size
  # Loops over a set of AS-relationship graphs
  # Runs a test on each graph, selecting highest-degree node as the root
  # Saves results for each graph to a separate file on disk
  outDir = "output-treesim-timeline-AS"
  if not os.path.exists(outDir): os.makedirs(outDir)
  assert os.path.exists(outDir)
  paths = sorted(glob.glob("asrel/datasets/*"))
  for path in paths:
    date = os.path.basename(path).split(".")[0]
    outpath = outDir+"/{}".format(date)
    if os.path.exists(outpath):
      print "Skipping {}, already processed".format(date)
      continue
    store = makeStoreASRelGraphMaxDeg(path)
    dists = None
    for node in store.values():
      node.info.time = random.randint(0, TIMEOUT)
      node.info.tstamp = TIMEOUT
    print "Beginning {}, size {}".format(date, len(store))
    if not dists: dists = dijkstrall(store)
    idleUntilConverged(store)
    pathMatrix = testPaths(store, dists)
    avgStretch = getAvgStretch(pathMatrix)
    maxStretch = getMaxStretch(pathMatrix)
    results = getResults(pathMatrix)
    with open(outpath, "w") as f:
      f.write(results)
    print "Finished {} with {} nodes".format(date, len(store))
    print "Avg / Max stretch: {} / {}".format(avgStretch, maxStretch)
    #break # Stop after 1, because they can take forever
  return # End of function

def timelineDimesTest():
  # Meant to study the performance of the network as a function of network size
  # Loops over a set of AS-relationship graphs
  # Runs a test on each graph, selecting highest-degree node as the root
  # Saves results for each graph to a separate file on disk
  outDir = "output-treesim-timeline-dimes"
  if not os.path.exists(outDir): os.makedirs(outDir)
  assert os.path.exists(outDir)
  # Input files are named ASEdgesX_Y where X = month (no leading 0), Y = year
  paths = sorted(glob.glob("DIMES/ASEdges/*.gz"))
  exists = set(glob.glob(outDir+"/*"))
  for path in paths:
    date = os.path.basename(path).split(".")[0]
    outpath = outDir+"/{}".format(date)
    if outpath in exists:
      print "Skipping {}, already processed".format(date)
      continue
    store = makeStoreDimesEdges(path)
    # Get the highest degree node and make it root
    # Sorted by nodeID just to make it stable in the event of a tie
    nodeIDs = sorted(store.keys())
    bestRoot = ""
    bestDeg = 0
    for nodeID in nodeIDs:
      node = store[nodeID]
      if len(node.links) > bestDeg:
        bestRoot = nodeID
        bestDeg = len(node.links)
    assert bestRoot
    store = makeStoreDimesEdges(path, bestRoot)
    rootID = "R" + bestRoot[1:]
    assert rootID in store
    # Don't forget to set random seed before setitng times
    # To make results reproducible
    nodeIDs = sorted(store.keys())
    random.seed(12345)
    for nodeID in nodeIDs:
      node = store[nodeID]
      node.info.time = random.randint(0, TIMEOUT)
      node.info.tstamp = TIMEOUT
    print "Beginning {}, size {}".format(date, len(store))
    if not dists: dists = dijkstrall(store)
    idleUntilConverged(store)
    pathMatrix = testPaths(store, dists)
    avgStretch = getAvgStretch(pathMatrix)
    maxStretch = getMaxStretch(pathMatrix)
    results = getResults(pathMatrix)
    with open(outpath, "w") as f:
      f.write(results)
    print "Finished {} with {} nodes".format(date, len(store))
    print "Avg / Max stretch: {} / {}".format(avgStretch, maxStretch)
    break # Stop after 1, because they can take forever
  return # End of function

def scalingTest(maxTests=None, inputDir="graphs"):
  # Meant to study the performance of the network as a function of network size
  # Loops over a set of nodes in a previously generated graph
  # Runs a test on each graph, testing each node as the root
  # if maxTests is set, tests only that number of roots (highest degree first)
  # Saves results for each graph to a separate file on disk
  outDir = "output-treesim-{}".format(inputDir)
  if not os.path.exists(outDir): os.makedirs(outDir)
  assert os.path.exists(outDir)
  paths = sorted(glob.glob("{}/*".format(inputDir)))
  exists = set(glob.glob(outDir+"/*"))
  for path in paths:
    gc.collect() # pypy waits for gc to close files
    graph = os.path.basename(path).split(".")[0]
    store = makeStoreGeneratedGraph(path)
    # Get the highest degree node and make it root
    # Sorted by nodeID just to make it stable in the event of a tie
    nodeIDs = sorted(store.keys(), key=lambda x: len(store[x].links), reverse=True)
    dists = None
    if maxTests: nodeIDs = nodeIDs[:maxTests]
    for nodeID in nodeIDs:
      nodeIDStr = str(nodeID).zfill(len(str(len(store)-1)))
      outpath = outDir+"/{}-{}".format(graph, nodeIDStr)
      if outpath in exists:
        print "Skipping {}-{}, already processed".format(graph, nodeIDStr)
        continue
      store = makeStoreGeneratedGraph(path, nodeID)
      # Don't forget to set random seed before setting times
      random.seed(12345) # To make results reproducible
      nIDs = sorted(store.keys())
      for nID in nIDs:
        node = store[nID]
        node.info.time = random.randint(0, TIMEOUT)
        node.info.tstamp = TIMEOUT
      print "Beginning {}, size {}".format(graph, len(store))
      if not dists: dists = dijkstrall(store)
      idleUntilConverged(store)
      pathMatrix = testPaths(store, dists)
      avgStretch = getAvgStretch(pathMatrix)
      maxStretch = getMaxStretch(pathMatrix)
      results = getResults(pathMatrix)
      with open(outpath, "w") as f:
        f.write(results)
      print "Finished {} with {} nodes for root {}".format(graph, len(store), nodeID)
      print "Avg / Max stretch: {} / {}".format(avgStretch, maxStretch)
  return # End of function

##################
# Main Execution #
##################

if __name__ == "__main__":
  if True: # Run a quick test
    random.seed(12345) # DEBUG
    store = makeStoreSquareGrid(4)
    runTest(store) # Quick test
  store = None
  # Do some real work
  #runTest(makeStoreDimesEdges("DIMES/ASEdges/ASEdges1_2007.csv.gz"))
  #timelineDimesTest()
  #rootNodeASTest("asrel/datasets/19980101.as-rel.txt")
  #timelineASTest()
  #rootNodeASTest("hype-2016-09-19.list", "output-treesim-hype")
  #scalingTest(None, "graphs-20") # First argument 1 to only test 1 root per graph
  #store = makeStoreGeneratedGraph("bgp_tables")
  #store = makeStoreGeneratedGraph("skitter")
  #store = makeStoreASRelGraphMaxDeg("hype-2016-09-19.list") #http://hia.cjdns.ca/watchlist/c/walk.peers.20160919
  #store = makeStoreGeneratedGraph("fc00-2017-08-12.txt")
  if store: runTest(store)
  #rootNodeASTest("skitter", "output-treesim-skitter", None, 0, 1)
  #scalingTest(1, "graphs-20") # First argument 1 to only test 1 root per graph
  #scalingTest(1, "graphs-21") # First argument 1 to only test 1 root per graph
  #scalingTest(1, "graphs-22") # First argument 1 to only test 1 root per graph
  #scalingTest(1, "graphs-23") # First argument 1 to only test 1 root per graph
  if not store:
    import sys
    args = sys.argv
    if len(args) == 2:
      job_number = int(sys.argv[1])
      rootNodeASTest("fc00-2017-08-12.txt", "fc00", None, job_number)
    else:
      print "Usage: {} job_number".format(args[0])
      print "job_number = which job set to run on this node (1-indexed)"