Add a token generator to standard tools.

Patch by paul cannon, reviewed by brandonwilliams for CASSANDRA-3709
matsumotory · Jul 12, 2012 · 962b23b · 962b23b
1 parent 1e76044
commit 962b23b
Show file tree

Hide file tree

Showing 2 changed files with 318 additions and 0 deletions.
diff --git a/CHANGES.txt b/CHANGES.txt
@@ -10,6 +10,7 @@
  * (cql3) Support ORDER BY when IN condition is given in WHERE clause (CASSANDRA-4327)
  * (cql3) delete "component_index" column on DROP TABLE call (CASSANDRA-4420)
  * change nanoTime() to currentTimeInMillis() in schema related code (CASSANDRA-4432)
+ * add a token generation tool (CASSANDRA-3709)
 Merged from 1.0:
  * allow dropping columns shadowed by not-yet-expired supercolumn or row
    tombstones in PrecompactedRow (CASSANDRA-4396)

diff --git a/tools/bin/token-generator b/tools/bin/token-generator
@@ -0,0 +1,317 @@
+#!/usr/bin/env python
+
+from __future__ import with_statement
+
+import os
+import sys
+import math
+import optparse
+import webbrowser
+import urllib
+from time import sleep
+from itertools import cycle
+from tempfile import NamedTemporaryFile
+
+description = '''Given a list of numbers indicating the number of nodes
+in each separate datacenter, outputs a recommended list of tokens to use
+with RandomPartitioner: one for each node in each datacenter.
+'''
+
+usage = "%prog <nodes_in_dc1> [<nodes_in_dc2> [...]]"
+
+parser = optparse.OptionParser(description=description, usage=usage)
+parser.add_option('--ringrange', type='int',
+                  help='Specify a numeric maximum token value for your ring, '
+                       'different from the default value of 2^127.')
+parser.add_option('--graph', action='store_true',
+                  help='Show a rendering of the generated tokens as line '
+                       'segments in a circle, colored according to datacenter')
+parser.add_option('-n', '--nts', action='store_const', dest='strat', const='nts',
+                  help='Optimize multi-cluster distribution for '
+                       'NetworkTopologyStrategy [default]')
+parser.add_option('-o', '--onts', action='store_const', dest='strat', const='onts',
+                  help='Optimize multi-cluster distribution for '
+                       'OldNetworkTopologyStrategy')
+
+parser.add_option('--test', action='store_true',
+                  help='Run in test mode, outputting an HTML file to display '
+                       'various generated ring arrangements.')
+
+parser.add_option('--html-output', help=optparse.SUPPRESS_HELP)
+parser.add_option('--browser-wait-time', type='float', help=optparse.SUPPRESS_HELP)
+parser.add_option('--test-colors', help=optparse.SUPPRESS_HELP)
+parser.add_option('--test-graphsize', type='int', help=optparse.SUPPRESS_HELP)
+
+parser.set_defaults(
+    ringrange=(1<<127),
+
+    # whether to create (and try to display) graph output
+    graph=False,
+
+    # 'nts' or 'onts'; the replication strategy for which to optimize
+    strat='nts',
+
+    # durr
+    test=False,
+
+    # size of the generated graph
+    graphsize=600,
+
+    # where to write generated graph (HTML) output, or '*tmp*' to write a
+    # temporary file, and remove it after telling a browser to open it. '-'
+    # to write to stdout.
+    html_output='*tmp*',
+
+    # how long, in seconds, to wait before cleaning up a temporary html file
+    # after telling the browser to open it
+    browser_wait_time=5.0,
+
+    # comma-separated list of HTML color codes, used in order to represent
+    # respective datacenter nodes
+    test_colors='#000,#00F,#0F0,#F00,#0FF,#FF0,#F0F',
+
+    # size of the per-test graphs
+    test_graphsize=200,
+)
+
+class Ring:
+    MIN_DC_OFFSET_DIVIDER = 235
+    offset_spacer = 2
+
+    def __init__(self, dc_counts, ringrange, strategy='nts'):
+        self.ringrange = ringrange
+        self.dc_counts = dc_counts
+        self.calculate_offset_tokens = getattr(self, 'calc_offset_tokens_' + strategy)
+
+    def best_per_dc_offset(self):
+        """
+        Calculate a per-dc offset for NTS DC spacing, such that there is a little
+        bit of room between nodes which would otherwise have been at the same token;
+        (hopefully) large enough that the difference can show when --graph is used,
+        but small enough that it there's no chance of the relative ordering changing.
+        """
+        lowest_division = len(self.dc_counts) * max(self.dc_counts) * self.offset_spacer
+        division = max(lowest_division, self.MIN_DC_OFFSET_DIVIDER)
+        return -self.ringrange // division
+
+    def calc_offset_tokens_nts(self):
+        dc_offset = self.best_per_dc_offset()
+        dcs = []
+        for (dcnum, dccount) in enumerate(self.dc_counts):
+            offset = dcnum * dc_offset
+            arcsize = self.ringrange // (dccount or 1)
+            dcs.append([(n * arcsize + offset) % self.ringrange for n in xrange(dccount)])
+        return dcs
+
+    def calc_offset_tokens_onts(self):
+        dcs_by_count = sorted(enumerate(self.dc_counts), key=lambda d:d[1], reverse=True)
+        biggest = dcs_by_count[0][1]
+        nodes = [dcnum for (dcnum, dccount) in dcs_by_count for x in range(dccount)]
+        layout = [nodes[n] for i in range(biggest) for n in range(i, len(nodes), biggest)]
+
+        final = [[] for x in dcs_by_count]
+        for pos, dc in enumerate(layout):
+            final[dc].append(pos * self.ringrange // len(layout))
+        return final
+
+
+def print_tokens(tokens, tokenwidth, indent=0):
+    indentstr = ' ' * indent
+    for dcnum, toklist in enumerate(tokens):
+        print "%sDC #%d:" % (indentstr, dcnum + 1)
+        nwidth = len(str(len(toklist)))
+        for tnum, tok in enumerate(toklist):
+            print "%s  Node #%0*d: % *d" % (indentstr, nwidth, tnum + 1, tokenwidth, tok)
+
+def calculate_ideal_tokens(datacenters, ringrange, strategy):
+    return Ring(datacenters, ringrange, strategy).calculate_offset_tokens()
+
+def file_to_url(path):
+    path = os.path.abspath(path)
+    if os.name == 'nt':
+        host, path = os.path.splitunc(path)
+        drive, path = os.path.splitdrive(path)
+        path = (host or (drive + '|')) + path.replace(os.sep, '/')
+    return 'file://' + urllib.quote(path, safe='/')
+
+html_template = """<!DOCTYPE html>
+<html>
+<body>
+
+%(generated_body)s
+
+</body>
+</html>
+"""
+
+chart_template = """
+<canvas id="%(id)s" width="%(size)s" height="%(size)s" style="border:1px solid #c3c3c3;">
+    Your browser does not support the canvas element.
+</canvas>
+<script type="text/javascript">
+    var c=document.getElementById("%(id)s");
+    var ctx=c.getContext("2d");
+%(generated_script)s
+</script>
+"""
+
+chart_js_template = """
+    ctx.beginPath();
+    ctx.strokeStyle = "%(color)s";
+    ctx.moveTo(%(center)s,%(center)s);
+    ctx.lineTo(%(x)s,%(y)s);
+    ctx.stroke();
+    ctx.closePath();
+"""
+
+class RingRenderer:
+    border_fraction = 0.08
+
+    def __init__(self, ringrange, graphsize, colors):
+        self.ringrange = ringrange
+        self.graphsize = graphsize
+        self.colors = colors
+        self.anglefactor = 2 * math.pi / ringrange
+        self.linelength = graphsize * (1 - self.border_fraction) / 2
+        self.center = graphsize / 2
+
+    def calc_coords(self, tokens):
+        these_calcs = []
+
+        for toklist in tokens:
+            coordlist = []
+            for tok in toklist:
+                angle = tok * self.anglefactor
+                x2 = self.center + self.linelength * math.sin(angle)
+                y2 = self.center - self.linelength * math.cos(angle)
+                coordlist.append((x2, y2))
+            these_calcs.append(coordlist)
+
+        return these_calcs
+
+    def make_html(self, tokensets):
+        coordinate_sets = map(self.calc_coords, tokensets)
+        all_charts = []
+        for chart_index, chart_set in enumerate(coordinate_sets):
+            chart_code = []
+            for coordlist, color in zip(chart_set, cycle(self.colors)):
+                for x, y in coordlist:
+                    chart_code.append(chart_js_template
+                                          % dict(color=color, x=x, y=y,
+                                                 center=(self.graphsize / 2)))
+            this_chart = chart_template % dict(generated_script=''.join(chart_code),
+                                               id=chart_index, size=self.graphsize)
+            all_charts.append(this_chart)
+        return html_template % dict(generated_body=''.join(all_charts))
+
+# ===========================
+# Tests
+
+def run_tests(opts):
+    tests = [
+        [1],
+        [1, 1],
+        [2, 2],
+        [1, 2, 2],
+        [2, 2, 2],
+        [2, 0, 0],
+        [0, 2, 0],
+        [0, 0, 2],
+        [2, 2, 0],
+        [2, 0, 2],
+        [0, 2, 2],
+        [0, 0, 1, 1, 0, 1, 1],
+        [6],
+        [3, 3, 3],
+        [9],
+        [1,1,1,1],
+        [4],
+        [3,3,6,4,2]
+    ]
+
+    tokensets = []
+    for test in tests:
+        print "Test %r" % (test,)
+        tokens = calculate_ideal_tokens(test, opts.ringrange, opts.strat)
+        print_tokens(tokens, len(str(opts.ringrange)) + 1, indent=2)
+        tokensets.append(tokens)
+    return tokensets
+
+# ===========================
+
+def display_html(html, wait_time):
+    with NamedTemporaryFile(suffix='.html') as f:
+        f.write(html)
+        f.flush()
+        webbrowser.open(file_to_url(f.name), new=2)
+        # this is stupid. webbrowser.open really can't wait until the
+        # browser has said "yes I've got it"?
+        sleep(wait_time)
+
+def write_output(html, opts):
+    if opts.html_output == '-':
+        sys.stdout.write(html)
+    elif opts.html_output == '*tmp*':
+        display_html(html, opts.browser_wait_time)
+    else:
+        with open(opts.html_output, 'w') as f:
+            f.write(html)
+
+def readnum(prompt, min=None, max=None):
+    while True:
+        x = raw_input(prompt + ' ')
+        try:
+            val = int(x)
+        except ValueError:
+            print "Oops, %r is not an integer. Try again.\n" % (x,)
+            continue
+        if min is not None and val < min:
+            print "Oops, the answer must be at least %d. Try again.\n" % (min,)
+        elif max is not None and val > max:
+            print "Oops, the answer must be at most %d. Try again.\n" % (max,)
+        else:
+            return val
+
+def get_dc_sizes_interactive():
+    print "Token Generator Interactive Mode"
+    print "--------------------------------"
+    print
+    dcs = readnum(" How many datacenters will participate in this Cassandra cluster?", min=1)
+    sizes = []
+    for n in xrange(dcs):
+        sizes.append(readnum(" How many nodes are in datacenter #%d?" % (n + 1), min=0))
+    print
+    return sizes
+
+def main(opts, args):
+    opts.colorlist = [s.strip() for s in opts.test_colors.split(',')]
+    if opts.test:
+        opts.graph = True
+        tokensets = run_tests(opts)
+        renderer = RingRenderer(ringrange=opts.ringrange, graphsize=opts.test_graphsize,
+                                colors=opts.colorlist)
+    else:
+        if len(args) == 0:
+            args = get_dc_sizes_interactive()
+        try:
+            datacenters = map(int, args)
+        except ValueError, e:
+            parser.error('Arguments should be integers.')
+        renderer = RingRenderer(ringrange=opts.ringrange, graphsize=opts.graphsize,
+                                colors=opts.colorlist)
+        tokens = calculate_ideal_tokens(datacenters, opts.ringrange, opts.strat)
+        print_tokens(tokens, len(str(opts.ringrange)) + 1)
+        tokensets = [tokens]
+
+    if opts.graph:
+        html = renderer.make_html(tokensets)
+        write_output(html, opts)
+    return 0
+
+if __name__ == '__main__':
+    opts, args = parser.parse_args()
+    try:
+        res = main(opts, args)
+    except KeyboardInterrupt:
+        res = -128
+    sys.exit(res)