Skip to content

Commit

Permalink
Add a token generator to standard tools.
Browse files Browse the repository at this point in the history
Patch by paul cannon, reviewed by brandonwilliams for CASSANDRA-3709
  • Loading branch information
driftx committed Jul 12, 2012
1 parent 1e76044 commit 962b23b
Show file tree
Hide file tree
Showing 2 changed files with 318 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
* (cql3) Support ORDER BY when IN condition is given in WHERE clause (CASSANDRA-4327)
* (cql3) delete "component_index" column on DROP TABLE call (CASSANDRA-4420)
* change nanoTime() to currentTimeInMillis() in schema related code (CASSANDRA-4432)
* add a token generation tool (CASSANDRA-3709)
Merged from 1.0:
* allow dropping columns shadowed by not-yet-expired supercolumn or row
tombstones in PrecompactedRow (CASSANDRA-4396)
Expand Down
317 changes: 317 additions & 0 deletions tools/bin/token-generator
Original file line number Diff line number Diff line change
@@ -0,0 +1,317 @@
#!/usr/bin/env python

from __future__ import with_statement

import os
import sys
import math
import optparse
import webbrowser
import urllib
from time import sleep
from itertools import cycle
from tempfile import NamedTemporaryFile

description = '''Given a list of numbers indicating the number of nodes
in each separate datacenter, outputs a recommended list of tokens to use
with RandomPartitioner: one for each node in each datacenter.
'''

usage = "%prog <nodes_in_dc1> [<nodes_in_dc2> [...]]"

parser = optparse.OptionParser(description=description, usage=usage)
parser.add_option('--ringrange', type='int',
help='Specify a numeric maximum token value for your ring, '
'different from the default value of 2^127.')
parser.add_option('--graph', action='store_true',
help='Show a rendering of the generated tokens as line '
'segments in a circle, colored according to datacenter')
parser.add_option('-n', '--nts', action='store_const', dest='strat', const='nts',
help='Optimize multi-cluster distribution for '
'NetworkTopologyStrategy [default]')
parser.add_option('-o', '--onts', action='store_const', dest='strat', const='onts',
help='Optimize multi-cluster distribution for '
'OldNetworkTopologyStrategy')

parser.add_option('--test', action='store_true',
help='Run in test mode, outputting an HTML file to display '
'various generated ring arrangements.')

parser.add_option('--html-output', help=optparse.SUPPRESS_HELP)
parser.add_option('--browser-wait-time', type='float', help=optparse.SUPPRESS_HELP)
parser.add_option('--test-colors', help=optparse.SUPPRESS_HELP)
parser.add_option('--test-graphsize', type='int', help=optparse.SUPPRESS_HELP)

parser.set_defaults(
ringrange=(1<<127),

# whether to create (and try to display) graph output
graph=False,

# 'nts' or 'onts'; the replication strategy for which to optimize
strat='nts',

# durr
test=False,

# size of the generated graph
graphsize=600,

# where to write generated graph (HTML) output, or '*tmp*' to write a
# temporary file, and remove it after telling a browser to open it. '-'
# to write to stdout.
html_output='*tmp*',

# how long, in seconds, to wait before cleaning up a temporary html file
# after telling the browser to open it
browser_wait_time=5.0,

# comma-separated list of HTML color codes, used in order to represent
# respective datacenter nodes
test_colors='#000,#00F,#0F0,#F00,#0FF,#FF0,#F0F',

# size of the per-test graphs
test_graphsize=200,
)

class Ring:
MIN_DC_OFFSET_DIVIDER = 235
offset_spacer = 2

def __init__(self, dc_counts, ringrange, strategy='nts'):
self.ringrange = ringrange
self.dc_counts = dc_counts
self.calculate_offset_tokens = getattr(self, 'calc_offset_tokens_' + strategy)

def best_per_dc_offset(self):
"""
Calculate a per-dc offset for NTS DC spacing, such that there is a little
bit of room between nodes which would otherwise have been at the same token;
(hopefully) large enough that the difference can show when --graph is used,
but small enough that it there's no chance of the relative ordering changing.
"""
lowest_division = len(self.dc_counts) * max(self.dc_counts) * self.offset_spacer
division = max(lowest_division, self.MIN_DC_OFFSET_DIVIDER)
return -self.ringrange // division

def calc_offset_tokens_nts(self):
dc_offset = self.best_per_dc_offset()
dcs = []
for (dcnum, dccount) in enumerate(self.dc_counts):
offset = dcnum * dc_offset
arcsize = self.ringrange // (dccount or 1)
dcs.append([(n * arcsize + offset) % self.ringrange for n in xrange(dccount)])
return dcs

def calc_offset_tokens_onts(self):
dcs_by_count = sorted(enumerate(self.dc_counts), key=lambda d:d[1], reverse=True)
biggest = dcs_by_count[0][1]
nodes = [dcnum for (dcnum, dccount) in dcs_by_count for x in range(dccount)]
layout = [nodes[n] for i in range(biggest) for n in range(i, len(nodes), biggest)]

final = [[] for x in dcs_by_count]
for pos, dc in enumerate(layout):
final[dc].append(pos * self.ringrange // len(layout))
return final


def print_tokens(tokens, tokenwidth, indent=0):
indentstr = ' ' * indent
for dcnum, toklist in enumerate(tokens):
print "%sDC #%d:" % (indentstr, dcnum + 1)
nwidth = len(str(len(toklist)))
for tnum, tok in enumerate(toklist):
print "%s Node #%0*d: % *d" % (indentstr, nwidth, tnum + 1, tokenwidth, tok)

def calculate_ideal_tokens(datacenters, ringrange, strategy):
return Ring(datacenters, ringrange, strategy).calculate_offset_tokens()

def file_to_url(path):
path = os.path.abspath(path)
if os.name == 'nt':
host, path = os.path.splitunc(path)
drive, path = os.path.splitdrive(path)
path = (host or (drive + '|')) + path.replace(os.sep, '/')
return 'file://' + urllib.quote(path, safe='/')

html_template = """<!DOCTYPE html>
<html>
<body>
%(generated_body)s
</body>
</html>
"""

chart_template = """
<canvas id="%(id)s" width="%(size)s" height="%(size)s" style="border:1px solid #c3c3c3;">
Your browser does not support the canvas element.
</canvas>
<script type="text/javascript">
var c=document.getElementById("%(id)s");
var ctx=c.getContext("2d");
%(generated_script)s
</script>
"""

chart_js_template = """
ctx.beginPath();
ctx.strokeStyle = "%(color)s";
ctx.moveTo(%(center)s,%(center)s);
ctx.lineTo(%(x)s,%(y)s);
ctx.stroke();
ctx.closePath();
"""

class RingRenderer:
border_fraction = 0.08

def __init__(self, ringrange, graphsize, colors):
self.ringrange = ringrange
self.graphsize = graphsize
self.colors = colors
self.anglefactor = 2 * math.pi / ringrange
self.linelength = graphsize * (1 - self.border_fraction) / 2
self.center = graphsize / 2

def calc_coords(self, tokens):
these_calcs = []

for toklist in tokens:
coordlist = []
for tok in toklist:
angle = tok * self.anglefactor
x2 = self.center + self.linelength * math.sin(angle)
y2 = self.center - self.linelength * math.cos(angle)
coordlist.append((x2, y2))
these_calcs.append(coordlist)

return these_calcs

def make_html(self, tokensets):
coordinate_sets = map(self.calc_coords, tokensets)
all_charts = []
for chart_index, chart_set in enumerate(coordinate_sets):
chart_code = []
for coordlist, color in zip(chart_set, cycle(self.colors)):
for x, y in coordlist:
chart_code.append(chart_js_template
% dict(color=color, x=x, y=y,
center=(self.graphsize / 2)))
this_chart = chart_template % dict(generated_script=''.join(chart_code),
id=chart_index, size=self.graphsize)
all_charts.append(this_chart)
return html_template % dict(generated_body=''.join(all_charts))

# ===========================
# Tests

def run_tests(opts):
tests = [
[1],
[1, 1],
[2, 2],
[1, 2, 2],
[2, 2, 2],
[2, 0, 0],
[0, 2, 0],
[0, 0, 2],
[2, 2, 0],
[2, 0, 2],
[0, 2, 2],
[0, 0, 1, 1, 0, 1, 1],
[6],
[3, 3, 3],
[9],
[1,1,1,1],
[4],
[3,3,6,4,2]
]

tokensets = []
for test in tests:
print "Test %r" % (test,)
tokens = calculate_ideal_tokens(test, opts.ringrange, opts.strat)
print_tokens(tokens, len(str(opts.ringrange)) + 1, indent=2)
tokensets.append(tokens)
return tokensets

# ===========================

def display_html(html, wait_time):
with NamedTemporaryFile(suffix='.html') as f:
f.write(html)
f.flush()
webbrowser.open(file_to_url(f.name), new=2)
# this is stupid. webbrowser.open really can't wait until the
# browser has said "yes I've got it"?
sleep(wait_time)

def write_output(html, opts):
if opts.html_output == '-':
sys.stdout.write(html)
elif opts.html_output == '*tmp*':
display_html(html, opts.browser_wait_time)
else:
with open(opts.html_output, 'w') as f:
f.write(html)

def readnum(prompt, min=None, max=None):
while True:
x = raw_input(prompt + ' ')
try:
val = int(x)
except ValueError:
print "Oops, %r is not an integer. Try again.\n" % (x,)
continue
if min is not None and val < min:
print "Oops, the answer must be at least %d. Try again.\n" % (min,)
elif max is not None and val > max:
print "Oops, the answer must be at most %d. Try again.\n" % (max,)
else:
return val

def get_dc_sizes_interactive():
print "Token Generator Interactive Mode"
print "--------------------------------"
print
dcs = readnum(" How many datacenters will participate in this Cassandra cluster?", min=1)
sizes = []
for n in xrange(dcs):
sizes.append(readnum(" How many nodes are in datacenter #%d?" % (n + 1), min=0))
print
return sizes

def main(opts, args):
opts.colorlist = [s.strip() for s in opts.test_colors.split(',')]
if opts.test:
opts.graph = True
tokensets = run_tests(opts)
renderer = RingRenderer(ringrange=opts.ringrange, graphsize=opts.test_graphsize,
colors=opts.colorlist)
else:
if len(args) == 0:
args = get_dc_sizes_interactive()
try:
datacenters = map(int, args)
except ValueError, e:
parser.error('Arguments should be integers.')
renderer = RingRenderer(ringrange=opts.ringrange, graphsize=opts.graphsize,
colors=opts.colorlist)
tokens = calculate_ideal_tokens(datacenters, opts.ringrange, opts.strat)
print_tokens(tokens, len(str(opts.ringrange)) + 1)
tokensets = [tokens]

if opts.graph:
html = renderer.make_html(tokensets)
write_output(html, opts)
return 0

if __name__ == '__main__':
opts, args = parser.parse_args()
try:
res = main(opts, args)
except KeyboardInterrupt:
res = -128
sys.exit(res)

0 comments on commit 962b23b

Please sign in to comment.