From a188a698b120bbe1d8d8405932876211a3caa6d2 Mon Sep 17 00:00:00 2001 From: Dieter Plaetinck Date: Mon, 18 Jun 2012 19:46:56 +0200 Subject: [PATCH 001/233] make prefixes for all metrics configurable --- backends/graphite.js | 24 ++++++++++++++++-------- exampleConfig.js | 4 ++++ 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/backends/graphite.js b/backends/graphite.js index 69692fb5..c2de4814 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -19,6 +19,10 @@ var debug; var flushInterval; var graphiteHost; var graphitePort; +var prefixPersecond; +var prefixCount; +var prefixTimer; +var prefixGauge; var graphiteStats = {}; @@ -59,8 +63,8 @@ var flush_stats = function graphite_flush(ts, metrics) { var value = counters[key]; var valuePerSecond = value / (flushInterval / 1000); // calculate "per second" rate - statString += 'stats.' + key + ' ' + valuePerSecond + ' ' + ts + "\n"; - statString += 'stats_counts.' + key + ' ' + value + ' ' + ts + "\n"; + statString += prefixPersecond + key + ' ' + valuePerSecond + ' ' + ts + "\n"; + statString += prefixCount + key + ' ' + value + ' ' + ts + "\n"; numStats += 1; } @@ -98,13 +102,13 @@ var flush_stats = function graphite_flush(ts, metrics) { var clean_pct = '' + pct; clean_pct.replace('.', '_'); - message += 'stats.timers.' + key + '.mean_' + clean_pct + ' ' + mean + ' ' + ts + "\n"; - message += 'stats.timers.' + key + '.upper_' + clean_pct + ' ' + maxAtThreshold + ' ' + ts + "\n"; + message += prefixTimer + key + '.mean_' + clean_pct + ' ' + mean + ' ' + ts + "\n"; + message += prefixTimer + key + '.upper_' + clean_pct + ' ' + maxAtThreshold + ' ' + ts + "\n"; } - message += 'stats.timers.' + key + '.upper ' + max + ' ' + ts + "\n"; - message += 'stats.timers.' + key + '.lower ' + min + ' ' + ts + "\n"; - message += 'stats.timers.' + key + '.count ' + count + ' ' + ts + "\n"; + message += prefixTimer + key + '.upper ' + max + ' ' + ts + "\n"; + message += prefixTimer + key + '.lower ' + min + ' ' + ts + "\n"; + message += prefixTimer + key + '.count ' + count + ' ' + ts + "\n"; statString += message; numStats += 1; @@ -112,7 +116,7 @@ var flush_stats = function graphite_flush(ts, metrics) { } for (key in gauges) { - statString += 'stats.gauges.' + key + ' ' + gauges[key] + ' ' + ts + "\n"; + statString += prefixGauge + key + ' ' + gauges[key] + ' ' + ts + "\n"; numStats += 1; } @@ -130,6 +134,10 @@ exports.init = function graphite_init(startup_time, config, events) { debug = config.debug; graphiteHost = config.graphiteHost; graphitePort = config.graphitePort; + prefixPersecond = config.prefixPersecond || "statsd."; + prefixCount = config.prefixCount || "stats_counts."; + prefixTimer = config.prefixTimer || "stats.timers."; + prefixGauge = config.prefixGauge || "stats.gauges."; graphiteStats.last_flush = startup_time; graphiteStats.last_exception = startup_time; diff --git a/exampleConfig.js b/exampleConfig.js index 96750d6c..f0a5a92b 100644 --- a/exampleConfig.js +++ b/exampleConfig.js @@ -39,6 +39,10 @@ Optional Variables: prettyprint: whether to prettyprint the console backend output [true or false, default: true] + prefixPersecond: graphite prefix for counter per second metrics [default: "statsd."] + prefixCount: graphite prefix for count metrics [default: "stats_counts."] + prefixTimer: graphite prefix for timer metrics [default: "stats.timers."] + prefixGauge: graphite prefix for gauge metrics [default: "stats.gauges."] */ { graphitePort: 2003 From 545d7800c8b64af8c8ec8c87dd6cf5b3f25f66a7 Mon Sep 17 00:00:00 2001 From: Daniel Schauenberg Date: Tue, 7 Aug 2012 23:21:30 -0400 Subject: [PATCH 002/233] modify key namespacing to be easier to configure This makes some changes to what the default key namespaces are and how they are configured. Basically it puts the config under the graphite key and doesn't need the user to take care of where dots are set. --- backends/graphite.js | 77 ++++++++++++++++++++++++++++++++------------ exampleConfig.js | 9 +++--- 2 files changed, 61 insertions(+), 25 deletions(-) diff --git a/backends/graphite.js b/backends/graphite.js index bd73c84b..420e5486 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -19,11 +19,20 @@ var debug; var flushInterval; var graphiteHost; var graphitePort; + +// prefix configuration +var globalPrefix; var prefixPersecond; -var prefixCount; +var prefixCounter; var prefixTimer; var prefixGauge; +// set up namespaces +var globalNamespace = []; +var counterNamespace = []; +var timerNamespace = []; +var gaugesNamespace = []; + var graphiteStats = {}; var post_stats = function graphite_post_stats(statString) { @@ -39,8 +48,9 @@ var post_stats = function graphite_post_stats(statString) { }); graphite.on('connect', function() { var ts = Math.round(new Date().getTime() / 1000); - statString += 'stats.statsd.graphiteStats.last_exception ' + last_exception + ' ' + ts + "\n"; - statString += 'stats.statsd.graphiteStats.last_flush ' + last_flush + ' ' + ts + "\n"; + var namespace = globalNamespace.concat('statsd'); + statString += namespace.join(".") + '.graphiteStats.last_exception ' + last_exception + ' ' + ts + "\n"; + statString += namespace.join(".") + '.graphiteStats.last_flush ' + last_flush + ' ' + ts + "\n"; this.write(statString); this.end(); graphiteStats.last_flush = Math.round(new Date().getTime() / 1000); @@ -66,11 +76,13 @@ var flush_stats = function graphite_flush(ts, metrics) { var pctThreshold = metrics.pctThreshold; for (key in counters) { + var namespace = counterNamespace.concat(key); + var namespace_num = counterNamespace.concat('stats_counts', key); var value = counters[key]; var valuePerSecond = value / (flushInterval / 1000); // calculate "per second" rate - statString += prefixPersecond + key + ' ' + valuePerSecond + ' ' + ts + "\n"; - statString += prefixCount + key + ' ' + value + ' ' + ts + "\n"; + statString += namespace.join(".") + ' ' + valuePerSecond + ' ' + ts + "\n"; + statString += namespace_num.join(".") + ' ' + value + ' ' + ts + "\n"; numStats += 1; } @@ -82,6 +94,9 @@ var flush_stats = function graphite_flush(ts, metrics) { var min = values[0]; var max = values[count - 1]; + var namespace = timerNamespace.concat(key); + var the_key = namespace.join("."); + var cumulativeValues = [min]; for (var i = 1; i < count; i++) { cumulativeValues.push(values[i] + cumulativeValues[i-1]); @@ -108,9 +123,9 @@ var flush_stats = function graphite_flush(ts, metrics) { var clean_pct = '' + pct; clean_pct.replace('.', '_'); - message += prefixTimer + key + '.mean_' + clean_pct + ' ' + mean + ' ' + ts + "\n"; - message += prefixTimer + key + '.upper_' + clean_pct + ' ' + maxAtThreshold + ' ' + ts + "\n"; - message += prefixTimer + key + '.sum_' + clean_pct + ' ' + sum + ' ' + ts + "\n"; + message += the_key + '.mean_' + clean_pct + ' ' + mean + ' ' + ts + "\n"; + message += the_key + '.upper_' + clean_pct + ' ' + maxAtThreshold + ' ' + ts + "\n"; + message += the_key + '.sum_' + clean_pct + ' ' + sum + ' ' + ts + "\n"; } sum = cumulativeValues[count-1]; @@ -122,12 +137,12 @@ var flush_stats = function graphite_flush(ts, metrics) { } var stddev = Math.sqrt(sumOfDiffs / count); - message += prefixTimer + key + '.std ' + stddev + ' ' + ts + "\n"; - message += prefixTimer + key + '.upper ' + max + ' ' + ts + "\n"; - message += prefixTimer + key + '.lower ' + min + ' ' + ts + "\n"; - message += prefixTimer + key + '.count ' + count + ' ' + ts + "\n"; - message += prefixTimer + key + '.sum ' + sum + ' ' + ts + "\n"; - message += prefixTimer + key + '.mean ' + mean + ' ' + ts + "\n"; + message += the_key + '.std ' + stddev + ' ' + ts + "\n"; + message += the_key + '.upper ' + max + ' ' + ts + "\n"; + message += the_key + '.lower ' + min + ' ' + ts + "\n"; + message += the_key + '.count ' + count + ' ' + ts + "\n"; + message += the_key + '.sum ' + sum + ' ' + ts + "\n"; + message += the_key + '.mean ' + mean + ' ' + ts + "\n"; statString += message; @@ -136,12 +151,14 @@ var flush_stats = function graphite_flush(ts, metrics) { } for (key in gauges) { - statString += prefixGauge + key + ' ' + gauges[key] + ' ' + ts + "\n"; + var namespace = gaugesNamespace.concat(key); + statString += namespace.join(".") + ' ' + gauges[key] + ' ' + ts + "\n"; numStats += 1; } - statString += 'statsd.numStats ' + numStats + ' ' + ts + "\n"; - statString += 'stats.statsd.graphiteStats.calculationtime ' + (Date.now() - starttime) + ' ' + ts + "\n"; + var namespace = globalNamespace.concat('statsd'); + statString += namespace.join(".") + '.numStats ' + numStats + ' ' + ts + "\n"; + statString += namespace.join(".") + '.graphiteStats.calculationtime ' + (Date.now() - starttime) + ' ' + ts + "\n"; post_stats(statString); }; @@ -155,10 +172,28 @@ exports.init = function graphite_init(startup_time, config, events) { debug = config.debug; graphiteHost = config.graphiteHost; graphitePort = config.graphitePort; - prefixPersecond = config.prefixPersecond || "statsd."; - prefixCount = config.prefixCount || "stats_counts."; - prefixTimer = config.prefixTimer || "stats.timers."; - prefixGauge = config.prefixGauge || "stats.gauges."; + config.graphite = config.graphite || {}; + globalPrefix = config.graphite.globalPrefix || "stats"; + prefixCounter = config.graphite.prefixCounter || "counters"; + prefixTimer = config.graphite.prefixTimer || "timers"; + prefixGauge = config.graphite.prefixGauge || "gauges"; + + if (globalPrefix !== "") { + globalNamespace.push(globalPrefix); + counterNamespace.push(globalPrefix); + timerNamespace.push(globalPrefix); + gaugesNamespace.push(globalPrefix); + } + + if (prefixCounter !== "") { + counterNamespace.push(prefixCounter); + } + if (prefixTimer !== "") { + timerNamespace.push(prefixTimer); + } + if (prefixGauge !== "") { + gaugesNamespace.push(prefixGauge); + } graphiteStats.last_flush = startup_time; graphiteStats.last_exception = startup_time; diff --git a/exampleConfig.js b/exampleConfig.js index 41ec12a4..871e9a0f 100644 --- a/exampleConfig.js +++ b/exampleConfig.js @@ -44,10 +44,11 @@ Optional Variables: application: name of the application for syslog [string, default: statsd] level: log level for [node-]syslog [string, default: LOG_INFO] - prefixPersecond: graphite prefix for counter per second metrics [default: "statsd."] - prefixCount: graphite prefix for count metrics [default: "stats_counts."] - prefixTimer: graphite prefix for timer metrics [default: "stats.timers."] - prefixGauge: graphite prefix for gauge metrics [default: "stats.gauges."] + graphite: + globalPrefix: global prefix to use for sending stats to graphite [default: "stats"] + prefixCounter: graphite prefix for counter metrics [default: "counters"] + prefixTimer: graphite prefix for timer metrics [default: "timers"] + prefixGauge: graphite prefix for gauge metrics [default: "gauges"] */ { graphitePort: 2003 From 7247c70953f34445a857b8e83a8ca18a22ceb9a0 Mon Sep 17 00:00:00 2001 From: Daniel Schauenberg Date: Tue, 7 Aug 2012 23:48:37 -0400 Subject: [PATCH 003/233] fix the tests --- test/graphite_tests.js | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/test/graphite_tests.js b/test/graphite_tests.js index 8c1dcd4d..f8a98498 100644 --- a/test/graphite_tests.js +++ b/test/graphite_tests.js @@ -148,8 +148,8 @@ module.exports = { data[chunks[0]] = chunks[1]; return data; }); - test.ok(_.include(_.map(entries,function(x) { return _.keys(x)[0] }),'statsd.numStats'),'graphite output includes numStats'); - test.equal(_.find(entries, function(x) { return _.keys(x)[0] == 'statsd.numStats' })['statsd.numStats'],2); + test.ok(_.include(_.map(entries,function(x) { return _.keys(x)[0] }),'stats.statsd.numStats'),'graphite output includes numStats'); + test.equal(_.find(entries, function(x) { return _.keys(x)[0] == 'stats.statsd.numStats' })['stats.statsd.numStats'],2); test.done(); }); }); @@ -171,10 +171,10 @@ module.exports = { return data; }); var numstat_test = function(post){ - var mykey = 'statsd.numStats'; + var mykey = 'stats.statsd.numStats'; return _.include(_.keys(post),mykey) && (post[mykey] == 3); }; - test.ok(_.any(hashes,numstat_test), 'statsd.numStats should be 1'); + test.ok(_.any(hashes,numstat_test), 'stats.statsd.numStats should be 1'); var testtimervalue_test = function(post){ var mykey = 'stats.timers.a_test_value.mean_90'; @@ -204,19 +204,19 @@ module.exports = { return data; }); var numstat_test = function(post){ - var mykey = 'statsd.numStats'; + var mykey = 'stats.statsd.numStats'; return _.include(_.keys(post),mykey) && (post[mykey] == 3); }; test.ok(_.any(hashes,numstat_test), 'statsd.numStats should be 1'); var testavgvalue_test = function(post){ - var mykey = 'stats.a_test_value'; + var mykey = 'stats.counters.a_test_value'; return _.include(_.keys(post),mykey) && (post[mykey] == (testvalue/(me.myflush / 1000))); }; test.ok(_.any(hashes,testavgvalue_test), 'stats.a_test_value should be ' + (testvalue/(me.myflush / 1000))); var testcountvalue_test = function(post){ - var mykey = 'stats_counts.a_test_value'; + var mykey = 'stats.counters.stats_counts.a_test_value'; return _.include(_.keys(post),mykey) && (post[mykey] == testvalue); }; test.ok(_.any(hashes,testcountvalue_test), 'stats_counts.a_test_value should be ' + testvalue); From 1c274895f1466cf220511a9a4652f909a443d90a Mon Sep 17 00:00:00 2001 From: Tim Blair Date: Wed, 8 Aug 2012 10:39:27 +0100 Subject: [PATCH 004/233] Make it clearer that sampling is only for counters. --- README.md | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 7c9be9e2..9eb55500 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,11 @@ Counting This is a simple counter. Add 1 to the "gorets" bucket. It stays in memory until the flush interval `config.flushInterval`. +### Sampling + + gorets:1|c|@0.1 + +Tells StatsD that this counter is being sent sampled every 1/10th of the time. Timing ------ @@ -48,13 +53,6 @@ generate the following list of stats for each threshold: Where `$KEY` is the key you stats key you specify when sending to statsd, and `$PCT` is the percentile threshold. -Sampling --------- - - gorets:1|c|@0.1 - -Tells StatsD that this counter is being sent sampled every 1/10th of the time. - Gauges ------ StatsD now also supports gauges, arbitrary values, which can be recorded. From ea39ca624204a12ef5aa2e71d186c1dfb831cf5a Mon Sep 17 00:00:00 2001 From: Vivien Barousse Date: Wed, 8 Aug 2012 23:05:06 +0100 Subject: [PATCH 005/233] Add support for sets, counting unique events Sets are backed using a set data-structure, discarding duplicate values being inserted. This allows backend to retrieve the number of unique events that happened since the last flush. Sets are all emptied after each flush. --- README.md | 7 +++++++ lib/set.js | 30 ++++++++++++++++++++++++++++++ stats.js | 14 ++++++++++++++ test/set_tests.js | 41 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 92 insertions(+) create mode 100644 lib/set.js create mode 100644 test/set_tests.js diff --git a/README.md b/README.md index 7c9be9e2..78e13f59 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,13 @@ StatsD now also supports gauges, arbitrary values, which can be recorded. gaugor:333|g +Sets +---- +StatsD supports counting unique occurences of events between flushes, +using a Set to store all occuring events. + + uniques:765|s + All metrics can also be batch send in a single UDP packet, separated by a newline character. diff --git a/lib/set.js b/lib/set.js new file mode 100644 index 00000000..8458b063 --- /dev/null +++ b/lib/set.js @@ -0,0 +1,30 @@ +var Set = function() { + this.store = {}; +} + +Set.prototype = { + has: function(value) { + if (value) { + return this.store.hasOwnProperty(value); + } else { + return false; + } + }, + insert: function(value) { + if (value) { + this.store[value] = value; + } + }, + clear: function() { + this.store = {}; + }, + values: function() { + var values = []; + for (value in this.store) { + values.push(value); + } + return values; + } +} + +exports.Set = Set; diff --git a/stats.js b/stats.js index 903d8208..7e1d40ea 100644 --- a/stats.js +++ b/stats.js @@ -5,6 +5,7 @@ var dgram = require('dgram') , fs = require('fs') , events = require('events') , logger = require('./lib/logger') + , set = require('./lib/set') // initialize data structures with defaults for statsd stats var keyCounter = {}; @@ -16,6 +17,8 @@ var timers = { "statsd.packet_process_time": [] }; var gauges = {}; +var sets = { +}; var pctThreshold = null; var debugInt, flushInterval, keyFlushInt, server, mgmtServer; var startup_time = Math.round(new Date().getTime() / 1000); @@ -44,6 +47,7 @@ function flushMetrics() { counters: counters, gauges: gauges, timers: timers, + sets: sets, pctThreshold: pctThreshold } @@ -58,6 +62,11 @@ function flushMetrics() { for (key in metrics.timers) { metrics.timers[key] = []; } + + // Clear the sets + for (key in metrics.sets) { + metrics.sets[key] = new set.Set(); + } }); // Flush metrics to each backend. @@ -139,6 +148,11 @@ config.configFile(process.argv[2], function (config, oldConfig) { timers[key].push(Number(fields[0] || 0)); } else if (fields[1].trim() == "g") { gauges[key] = Number(fields[0] || 0); + } else if (fields[1].trim() == "s") { + if (! sets[key]) { + sets[key] = new set.Set(); + } + sets[key].insert(fields[0] || '0'); } else { if (fields[2] && fields[2].match(/^@([\d\.]+)/)) { sampleRate = Number(fields[2].match(/^@([\d\.]+)/)[1]); diff --git a/test/set_tests.js b/test/set_tests.js new file mode 100644 index 00000000..47b645bc --- /dev/null +++ b/test/set_tests.js @@ -0,0 +1,41 @@ +var set = require('../lib/set') + +module.exports = { + has_returns_expected_values: function(test) { + test.expect(2); + var s = new set.Set(); + s.insert('a'); + test.ok(s.has('a')); + test.ok(!s.has('b')); + test.done(); + }, + clear_empties_the_set: function(test) { + test.expect(3); + var s = new set.Set(); + s.insert('a'); + test.equal(1, s.values().length); + s.clear(); + test.equal(0, s.values().length); + test.equal([], s.values().length); + test.done(); + }, + values_returns_values: function(test) { + test.expect(3); + var s = new set.Set(); + s.insert('a'); + s.insert('b'); + test.equal(2, s.values().length); + test.ok(s.values().indexOf('a') != -1); + test.ok(s.values().indexOf('b') != -1); + test.done(); + }, + values_are_unique: function(test) { + test.expect(1); + var s = new set.Set(); + s.insert('a'); + s.insert('a'); + s.insert('b'); + test.equal(2, s.values().length); + test.done(); + } +} From 159b4c264120313d922dfe9177f4653ea5b3ca4a Mon Sep 17 00:00:00 2001 From: Vivien Barousse Date: Wed, 8 Aug 2012 23:09:56 +0100 Subject: [PATCH 006/233] Add sets support in the console backend. The backend dumps all values stored in all the buckets being sets. This allows easy debugging by showing all values that are stored in each bucket. --- backends/console.js | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/backends/console.js b/backends/console.js index b48d8e37..3fc72368 100644 --- a/backends/console.js +++ b/backends/console.js @@ -34,6 +34,13 @@ ConsoleBackend.prototype.flush = function(timestamp, metrics) { counter: this.statsCache.counters, timers: this.statsCache.timers, gauges: metrics.gauges, + sets: function (vals) { + var ret = {}; + for (val in vals) { + ret[val] = vals[val].values(); + } + return ret; + }(metrics.sets), pctThreshold: metrics.pctThreshold }; From a5f161cea567c055d055fd74222e10115d62b702 Mon Sep 17 00:00:00 2001 From: Vivien Barousse Date: Thu, 9 Aug 2012 14:16:47 +0100 Subject: [PATCH 007/233] Add sets support in the graphite backend The backend doesn't support sets of data, so the count of unique elements is sent instead --- backends/graphite.js | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/backends/graphite.js b/backends/graphite.js index d5b8e2a7..1b704c01 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -59,6 +59,7 @@ var flush_stats = function graphite_flush(ts, metrics) { var counters = metrics.counters; var gauges = metrics.gauges; var timers = metrics.timers; + var sets = metrics.sets; var pctThreshold = metrics.pctThreshold; for (key in counters) { @@ -135,6 +136,11 @@ var flush_stats = function graphite_flush(ts, metrics) { numStats += 1; } + for (key in sets) { + statString += 'stats.sets.' + key + '.count ' + sets[key].values().length + ' ' + ts + "\n"; + numStats += 1; + } + statString += 'statsd.numStats ' + numStats + ' ' + ts + "\n"; statString += 'stats.statsd.graphiteStats.calculationtime ' + (Date.now() - starttime) + ' ' + ts + "\n"; post_stats(statString); From 19814bd69f37f38412e9f47ca321cc16faf19005 Mon Sep 17 00:00:00 2001 From: Diego Varese Date: Mon, 20 Aug 2012 10:19:36 -0300 Subject: [PATCH 008/233] Fix for config.js, use fs.watch instead of fs.watchFile --- config.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.js b/config.js index 695214e9..c03aa049 100644 --- a/config.js +++ b/config.js @@ -21,7 +21,7 @@ var Configurator = function (file) { this.updateConfig(); - fs.watchFile(file, function (curr, prev) { + fs.watch(file, function (curr, prev) { if (curr.ino != prev.ino) { self.updateConfig(); } }); }; From 6762a41a5de5bba5815912ecea2449a89b1ad0e2 Mon Sep 17 00:00:00 2001 From: Steve Reed Date: Mon, 27 Aug 2012 13:52:52 -0700 Subject: [PATCH 009/233] Increments bad_lines_seen when sampleRate bit does not match regex --- stats.js | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/stats.js b/stats.js index 924644ab..05e1566d 100644 --- a/stats.js +++ b/stats.js @@ -140,8 +140,15 @@ config.configFile(process.argv[2], function (config, oldConfig) { } else if (fields[1].trim() == "g") { gauges[key] = Number(fields[0] || 0); } else { - if (fields[2] && fields[2].match(/^@([\d\.]+)/)) { - sampleRate = Number(fields[2].match(/^@([\d\.]+)/)[1]); + if (fields[2]) { + if (fields[2].match(/^@([\d\.]+)/)) { + sampleRate = Number(fields[2].match(/^@([\d\.]+)/)[1]); + } else { + l.log('Bad line: ' + fields + ' in msg "' + metrics[midx] +'"; has invalid sample rate'); + counters["statsd.bad_lines_seen"]++; + stats['messages']['bad_lines_seen']++; + continue; + } } if (! counters[key]) { counters[key] = 0; From f43d8304b96e6021ff4d0be91f5b6c47a615719e Mon Sep 17 00:00:00 2001 From: Alfredo Deza Date: Thu, 30 Aug 2012 13:04:13 -0400 Subject: [PATCH 010/233] fixes undefined Error name --- examples/python_example.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/python_example.py b/examples/python_example.py index c787fd97..7869ec28 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -63,7 +63,7 @@ def send(data, sample_rate=1): host = settings.statsd_host port = settings.statsd_port addr=(host, port) - except Error: + except: exit(1) sampled_data = {} @@ -88,4 +88,4 @@ def send(data, sample_rate=1): import sys from pprint import pprint print "Unexpected error:", pprint(sys.exc_info()) - pass # we don't care \ No newline at end of file + pass # we don't care From 7f38eee38cf52767198f27ca3263126971507cb7 Mon Sep 17 00:00:00 2001 From: Alfredo Deza Date: Thu, 30 Aug 2012 13:04:47 -0400 Subject: [PATCH 011/233] import * raises errors in Python 2.7 --- examples/python_example.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/examples/python_example.py b/examples/python_example.py index 7869ec28..b2f39c8e 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -2,15 +2,15 @@ # Steve Ivy # http://monkinetic.com - + # this file expects local_settings.py to be in the same dir, with statsd host and port information: -# +# # statsd_host = 'localhost' # statsd_port = 8125 # Sends statistics to the stats daemon over UDP class Statsd(object): - + @staticmethod def timing(stat, time, sample_rate=1): """ @@ -38,7 +38,7 @@ def decrement(stats, sample_rate=1): >>> Statsd.decrement('some.int') """ Statsd.update_stats(stats, -1, sample_rate) - + @staticmethod def update_stats(stats, delta=1, sampleRate=1): """ @@ -52,7 +52,7 @@ def update_stats(stats, delta=1, sampleRate=1): data[stat] = "%s|c" % delta Statsd.send(data, sampleRate) - + @staticmethod def send(data, sample_rate=1): """ @@ -65,9 +65,9 @@ def send(data, sample_rate=1): addr=(host, port) except: exit(1) - + sampled_data = {} - + if(sample_rate < 1): import random if random.random() <= sample_rate: @@ -76,8 +76,8 @@ def send(data, sample_rate=1): sampled_data[stat] = "%s|@%s" %(value, sample_rate) else: sampled_data=data - - from socket import * + + from socket import socket, AF_INET, SOCK_DGRAM udp_sock = socket(AF_INET, SOCK_DGRAM) try: for stat in sampled_data.keys(): From ee5f2bb34070dfc6594b6ae147fe2c91503469b8 Mon Sep 17 00:00:00 2001 From: Daniel Schauenberg Date: Thu, 6 Sep 2012 19:13:57 -0300 Subject: [PATCH 012/233] don't print trailing newline character --- examples/statsd-client.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/statsd-client.sh b/examples/statsd-client.sh index 1d091634..67cb2f43 100644 --- a/examples/statsd-client.sh +++ b/examples/statsd-client.sh @@ -18,7 +18,7 @@ fi exec 3<> /dev/udp/${STATSD}/${PORT} # Send data -echo "$1" >&3 +echo -n "$1" >&3 # Close UDP socket exec 3<&- From cafa5559bf6c57089caac27ff6c929c7794abc1f Mon Sep 17 00:00:00 2001 From: Theo Schlossnagle Date: Fri, 7 Sep 2012 14:05:59 -0400 Subject: [PATCH 013/233] implement repeater support into statsd --- backends/repeater.js | 29 +++++++++++++++++++++++++++++ exampleConfig.js | 7 +++++++ stats.js | 1 + 3 files changed, 37 insertions(+) create mode 100644 backends/repeater.js diff --git a/backends/repeater.js b/backends/repeater.js new file mode 100644 index 00000000..3befb508 --- /dev/null +++ b/backends/repeater.js @@ -0,0 +1,29 @@ +var util = require('util'), + dgram = require('dgram'); + +function RepeaterBackend(startupTime, config, emitter){ + var self = this; + this.config = config.repeater || []; + this.sock = dgram.createSocket('udp4'); + + // attach + emitter.on('packet', function(packet, rinfo) { self.process(packet, rinfo); }); +}; + +RepeaterBackend.prototype.process = function(packet, rinfo) { + var self = this; + hosts = self.config; + for(var i=0; i Date: Tue, 11 Sep 2012 01:10:34 -0400 Subject: [PATCH 014/233] use printf in shell example client this fixed the Bad line seen problem because of a trailing newline more reliably --- examples/statsd-client.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/statsd-client.sh b/examples/statsd-client.sh index 67cb2f43..b34d765a 100644 --- a/examples/statsd-client.sh +++ b/examples/statsd-client.sh @@ -18,7 +18,7 @@ fi exec 3<> /dev/udp/${STATSD}/${PORT} # Send data -echo -n "$1" >&3 +printf "$1" >&3 # Close UDP socket exec 3<&- From fc8e38bb3a209d8ec6cb3ff7ea8729fe2c1cc186 Mon Sep 17 00:00:00 2001 From: Daniel Schauenberg Date: Tue, 11 Sep 2012 01:20:58 -0400 Subject: [PATCH 015/233] make README formatting more consistent --- README.md | 97 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 66 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index 78e13f59..5286103d 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,9 @@ Concepts -------- * *buckets* - Each stat is in its own "bucket". They are not predefined anywhere. Buckets can be named anything that will translate to Graphite (periods make folders, etc) + Each stat is in its own "bucket". They are not predefined anywhere. Buckets +can be named anything that will translate to Graphite (periods make folders, +etc) * *values* Each stat will have a value. How it is interpreted depends on modifiers. In @@ -28,7 +30,8 @@ Counting gorets:1|c -This is a simple counter. Add 1 to the "gorets" bucket. It stays in memory until the flush interval `config.flushInterval`. +This is a simple counter. Add 1 to the "gorets" bucket. It stays in memory +until the flush interval `config.flushInterval`. Timing @@ -114,34 +117,35 @@ giving the relative path (e.g. `./backends/graphite`). Graphite Schema --------------- -Graphite uses "schemas" to define the different round robin datasets it houses (analogous to RRAs in rrdtool). Here's an example for the stats databases: +Graphite uses "schemas" to define the different round robin datasets it houses +(analogous to RRAs in rrdtool). Here's an example for the stats databases: In conf/storage-schemas.conf: - + [stats] pattern = ^stats\..* retentions = 10:2160,60:10080,600:262974 -In conf/storage-aggregation.conf: +In conf/storage-aggregation.conf: [min] pattern = \.min$ - xFilesFactor = 0.1 - aggregationMethod = min + xFilesFactor = 0.1 + aggregationMethod = min [max] pattern = \.max$ - xFilesFactor = 0.1 - aggregationMethod = max + xFilesFactor = 0.1 + aggregationMethod = max [sum] pattern = \.count$ - xFilesFactor = 0 - aggregationMethod = sum + xFilesFactor = 0 + aggregationMethod = sum [default_average] pattern = .* - xFilesFactor = 0.3 + xFilesFactor = 0.3 aggregationMethod = average This translates to: @@ -149,22 +153,40 @@ This translates to: * 6 hours of 10 second data (what we consider "near-realtime") * 1 week of 1 minute data * 5 years of 10 minute data -* For databases with 'min' or 'max' in the name, keep only the minimum and maximum value when rolling up data and store a None if less than 10% of the datapoints were received -* For databases with 'count' in the name, add all the values together, and store only a None if none of the datapoints were received -* For all other databases, average the values (mean) when rolling up data, and store a None if less than 30% of the datapoints were received - -(Note: Newer versions of Graphite can take human readable time formats like 10s:6h,1min:7d,10min:5y) - -Retentions and aggregations are read from the file in order, the first pattern that matches is used. This is set when the database is first created, changing these config files will not change databases that have already been created. To view or alter the settings on existing files, use whisper-info.py and whisper-resize.py included with the Whisper package. - -These settings have been a good tradeoff so far between size-of-file (round robin databases are fixed size) and data we care about. Each "stats" database is about 3.2 megs with these retentions. - -Many users have been confused to see their hit counts averaged, missing when the data is intermittent, or never stored when statsd is sending at a different interval than graphite expects. Storage aggregation settings will help you control this and understand what Graphite is doing internally with your data. +* For databases with 'min' or 'max' in the name, keep only the minimum and + maximum value when rolling up data and store a None if less than 10% of the + datapoints were received +* For databases with 'count' in the name, add all the values together, and + store only a None if none of the datapoints were received +* For all other databases, average the values (mean) when rolling up data, and + store a None if less than 30% of the datapoints were received + +(Note: Newer versions of Graphite can take human readable time formats like +10s:6h,1min:7d,10min:5y) + +Retentions and aggregations are read from the file in order, the first pattern +that matches is used. This is set when the database is first created, changing +these config files will not change databases that have already been created. +To view or alter the settings on existing files, use whisper-info.py and +whisper-resize.py included with the Whisper package. + +These settings have been a good tradeoff so far between size-of-file (round +robin databases are fixed size) and data we care about. Each "stats" database +is about 3.2 megs with these retentions. + +Many users have been confused to see their hit counts averaged, missing when +the data is intermittent, or never stored when statsd is sending at a different +interval than graphite expects. Storage aggregation settings will help you +control this and understand what Graphite is doing internally with your data. TCP Stats Interface ------------------- -A really simple TCP management interface is available by default on port 8126 or overriden in the configuration file. Inspired by the memcache stats approach this can be used to monitor a live statsd server. You can interact with the management server by telnetting to port 8126, the following commands are available: +A really simple TCP management interface is available by default on port 8126 +or overriden in the configuration file. Inspired by the memcache stats approach +this can be used to monitor a live statsd server. You can interact with the +management server by telnetting to port 8126, the following commands are +available: * stats - some stats about the running server * counters - a dump of all the current counters @@ -173,7 +195,8 @@ A really simple TCP management interface is available by default on port 8126 or The stats output currently will give you: * uptime: the number of seconds elapsed since statsd started -* messages.last_msg_seen: the number of elapsed seconds since statsd received a message +* messages.last_msg_seen: the number of elapsed seconds since statsd received a + message * messages.bad_lines_seen: the number of bad lines seen since startup Each backend will also publish a set of statistics, prefixed by its @@ -181,10 +204,14 @@ module name. Graphite: -* graphite.last_flush: the number of seconds elapsed since the last successful flush to graphite -* graphite.last_exception: the number of seconds elapsed since the last exception thrown whilst flushing to graphite +* graphite.last_flush: the number of seconds elapsed since the last successful + flush to graphite +* graphite.last_exception: the number of seconds elapsed since the last + exception thrown whilst flushing to graphite -A simple nagios check can be found in the utils/ directory that can be used to check metric thresholds, for example the number of seconds since the last successful flush to graphite. +A simple nagios check can be found in the utils/ directory that can be used to +check metric thresholds, for example the number of seconds since the last +successful flush to graphite. Installation and Configuration ------------------------------ @@ -199,7 +226,12 @@ Installation and Configuration Tests ----- -A test framework has been added using node-unit and some custom code to start and manipulate statsd. Please add tests under test/ for any new features or bug fixes encountered. Testing a live server can be tricky, attempts were made to eliminate race conditions but it may be possible to encounter a stuck state. If doing dev work, a `killall node` will kill any stray test servers in the background (don't do this on a production machine!). +A test framework has been added using node-unit and some custom code to start +and manipulate statsd. Please add tests under test/ for any new features or bug +fixes encountered. Testing a live server can be tricky, attempts were made to +eliminate race conditions but it may be possible to encounter a stuck state. If +doing dev work, a `killall node` will kill any stray test servers in the +background (don't do this on a production machine!). Tests can be executd with `./run_tests.sh`. @@ -264,8 +296,11 @@ metrics: { Inspiration ----------- -StatsD was inspired (heavily) by the project (of the same name) at Flickr. Here's a post where Cal Henderson described it in depth: -[Counting and timing](http://code.flickr.com/blog/2008/10/27/counting-timing/). Cal re-released the code recently: [Perl StatsD](https://github.com/iamcal/Flickr-StatsD) +StatsD was inspired (heavily) by the project (of the same name) at Flickr. +Here's a post where Cal Henderson described it in depth: +[Counting and timing](http://code.flickr.com/blog/2008/10/27/counting-timing/). +Cal re-released the code recently: +[Perl StatsD](https://github.com/iamcal/Flickr-StatsD) Meta --------- From 96c488dc52a4f50fdf73343b8275e914be722655 Mon Sep 17 00:00:00 2001 From: Daniel Schauenberg Date: Tue, 11 Sep 2012 01:27:41 -0400 Subject: [PATCH 016/233] add README information about the repeater backend --- README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/README.md b/README.md index 5286103d..d53a63e5 100644 --- a/README.md +++ b/README.md @@ -103,6 +103,8 @@ StatsD includes the following backends: web-browser interface. * Console (`console`): The console backend outputs the received metrics to stdout (e.g. for seeing what's going on during development). +* Repeater (`repeater`): The repeater backend utilizes the `packet` emit API to + forward raw packets retrieved by StatsD to multiple backend StatsD instances. By default, the `graphite` backend will be loaded automatically. To select which backends are loaded, set the `backends` configuration @@ -293,6 +295,14 @@ metrics: { the `backend_name`. The backend should set `error` to *null*, or, in the case of a failure, an appropriate error. +* Event: **'packet'** + + Parameters: `(packet, rinfo)` + + This is emitted for every incoming packet. The `packet` parameter contains + the raw received message string and the `rinfo` paramter contains remote + address information from the UDP socket. + Inspiration ----------- From b35ab9273d73f47a2b657606e4eac794a11eb407 Mon Sep 17 00:00:00 2001 From: Daniel Schauenberg Date: Tue, 25 Sep 2012 00:21:20 -0400 Subject: [PATCH 017/233] add a flag for namespacing backwards compatibility This introduces the `graphite.legacyNamespace` boolean flag which can be used to maintain backwards compatibility to how stats are sent to graphite. --- backends/graphite.js | 88 ++++++++++++++++++++++++++++---------------- exampleConfig.js | 1 + 2 files changed, 58 insertions(+), 31 deletions(-) diff --git a/backends/graphite.js b/backends/graphite.js index 1b9156d3..3010200f 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -28,6 +28,7 @@ var prefixTimer; var prefixGauge; // set up namespaces +var legacyNamespace = true; var globalNamespace = []; var counterNamespace = []; var timerNamespace = []; @@ -78,12 +79,16 @@ var flush_stats = function graphite_flush(ts, metrics) { for (key in counters) { var namespace = counterNamespace.concat(key); - var namespace_num = counterNamespace.concat('stats_counts', key); var value = counters[key]; var valuePerSecond = value / (flushInterval / 1000); // calculate "per second" rate - statString += namespace.join(".") + ' ' + valuePerSecond + ' ' + ts + "\n"; - statString += namespace_num.join(".") + ' ' + value + ' ' + ts + "\n"; + if (legacyNamespace === true) { + statString += namespace.join(".") + ' ' + valuePerSecond + ' ' + ts + "\n"; + statString += 'stats_counts.' + key + ' ' + value + ' ' + ts + "\n"; + } else { + statString += namespace.concat('rate').join(".") + ' ' + valuePerSecond + ' ' + ts + "\n"; + statString += namespace.concat('count').join(".") + ' ' + value + ' ' + ts + "\n"; + } numStats += 1; } @@ -126,7 +131,7 @@ var flush_stats = function graphite_flush(ts, metrics) { clean_pct.replace('.', '_'); message += the_key + '.mean_' + clean_pct + ' ' + mean + ' ' + ts + "\n"; message += the_key + '.upper_' + clean_pct + ' ' + maxAtThreshold + ' ' + ts + "\n"; - message += the_key + '.sum_' + clean_pct + ' ' + sum + ' ' + ts + "\n"; + message += the_key + '.sum_' + clean_pct + ' ' + sum + ' ' + ts + "\n"; } sum = cumulativeValues[count-1]; @@ -138,12 +143,12 @@ var flush_stats = function graphite_flush(ts, metrics) { } var stddev = Math.sqrt(sumOfDiffs / count); - message += the_key + '.std ' + stddev + ' ' + ts + "\n"; - message += the_key + '.upper ' + max + ' ' + ts + "\n"; - message += the_key + '.lower ' + min + ' ' + ts + "\n"; - message += the_key + '.count ' + count + ' ' + ts + "\n"; - message += the_key + '.sum ' + sum + ' ' + ts + "\n"; - message += the_key + '.mean ' + mean + ' ' + ts + "\n"; + message += the_key + '.std ' + stddev + ' ' + ts + "\n"; + message += the_key + '.upper ' + max + ' ' + ts + "\n"; + message += the_key + '.lower ' + min + ' ' + ts + "\n"; + message += the_key + '.count ' + count + ' ' + ts + "\n"; + message += the_key + '.sum ' + sum + ' ' + ts + "\n"; + message += the_key + '.mean ' + mean + ' ' + ts + "\n"; statString += message; @@ -163,8 +168,13 @@ var flush_stats = function graphite_flush(ts, metrics) { } var namespace = globalNamespace.concat('statsd'); - statString += namespace.join(".") + '.numStats ' + numStats + ' ' + ts + "\n"; - statString += namespace.join(".") + '.graphiteStats.calculationtime ' + (Date.now() - starttime) + ' ' + ts + "\n"; + if (legacyNamespace === true) { + statString += 'statsd.numStats ' + numStats + ' ' + ts + "\n"; + statString += 'stats.statsd.graphiteStats.calculationtime ' + (Date.now() - starttime) + ' ' + ts + "\n"; + } else { + statString += namespace.join(".") + '.numStats ' + numStats + ' ' + ts + "\n"; + statString += namespace.join(".") + '.graphiteStats.calculationtime ' + (Date.now() - starttime) + ' ' + ts + "\n"; + } post_stats(statString); }; @@ -179,26 +189,42 @@ exports.init = function graphite_init(startup_time, config, events) { graphiteHost = config.graphiteHost; graphitePort = config.graphitePort; config.graphite = config.graphite || {}; - globalPrefix = config.graphite.globalPrefix || "stats"; - prefixCounter = config.graphite.prefixCounter || "counters"; - prefixTimer = config.graphite.prefixTimer || "timers"; - prefixGauge = config.graphite.prefixGauge || "gauges"; - - if (globalPrefix !== "") { - globalNamespace.push(globalPrefix); - counterNamespace.push(globalPrefix); - timerNamespace.push(globalPrefix); - gaugesNamespace.push(globalPrefix); - } + globalPrefix = config.graphite.globalPrefix; + prefixCounter = config.graphite.prefixCounter; + prefixTimer = config.graphite.prefixTimer; + prefixGauge = config.graphite.prefixGauge; + legacyNamespace = config.graphite.legacyNamespace; + + // set defaults for prefixes + globalPrefix = globalPrefix !== undefined ? globalPrefix : "stats"; + prefixCounter = prefixCounter !== undefined ? prefixCounter : "counters"; + prefixTimer = prefixTimer !== undefined ? prefixTimer : "timers"; + prefixGauge = prefixGauge !== undefined ? prefixGauge : "gauges"; + legacyNamespace = legacyNamespace !== undefined ? legacyNamespace : true; + + + if (legacyNamespace === false) { + if (globalPrefix !== "") { + globalNamespace.push(globalPrefix); + counterNamespace.push(globalPrefix); + timerNamespace.push(globalPrefix); + gaugesNamespace.push(globalPrefix); + } - if (prefixCounter !== "") { - counterNamespace.push(prefixCounter); - } - if (prefixTimer !== "") { - timerNamespace.push(prefixTimer); - } - if (prefixGauge !== "") { - gaugesNamespace.push(prefixGauge); + if (prefixCounter !== "") { + counterNamespace.push(prefixCounter); + } + if (prefixTimer !== "") { + timerNamespace.push(prefixTimer); + } + if (prefixGauge !== "") { + gaugesNamespace.push(prefixGauge); + } + } else { + globalNamespace = ['stats']; + counterNamespace = ['stats']; + timerNamespace = ['stats', 'timers']; + gaugesNamespace = ['stats', 'gauges']; } graphiteStats.last_flush = startup_time; diff --git a/exampleConfig.js b/exampleConfig.js index 32bb136a..80090f47 100644 --- a/exampleConfig.js +++ b/exampleConfig.js @@ -45,6 +45,7 @@ Optional Variables: level: log level for [node-]syslog [string, default: LOG_INFO] graphite: + legacyNamspace: use the legacy namespace [default: true] globalPrefix: global prefix to use for sending stats to graphite [default: "stats"] prefixCounter: graphite prefix for counter metrics [default: "counters"] prefixTimer: graphite prefix for timer metrics [default: "timers"] From 55126a23e4e994f14b6091c2a36e810245d13d67 Mon Sep 17 00:00:00 2001 From: Daniel Schauenberg Date: Tue, 25 Sep 2012 00:28:42 -0400 Subject: [PATCH 018/233] update tests to reflect non-legacy namespace --- test/graphite_tests.js | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/test/graphite_tests.js b/test/graphite_tests.js index f8a98498..32f1de84 100644 --- a/test/graphite_tests.js +++ b/test/graphite_tests.js @@ -79,6 +79,7 @@ module.exports = { , port: 8125\n\ , dumpMessages: false \n\ , debug: false\n\ + , graphite: { legacyNamespace: false }\n\ , graphitePort: " + this.testport + "\n\ , graphiteHost: \"127.0.0.1\"}"; @@ -210,16 +211,16 @@ module.exports = { test.ok(_.any(hashes,numstat_test), 'statsd.numStats should be 1'); var testavgvalue_test = function(post){ - var mykey = 'stats.counters.a_test_value'; + var mykey = 'stats.counters.a_test_value.rate'; return _.include(_.keys(post),mykey) && (post[mykey] == (testvalue/(me.myflush / 1000))); }; - test.ok(_.any(hashes,testavgvalue_test), 'stats.a_test_value should be ' + (testvalue/(me.myflush / 1000))); + test.ok(_.any(hashes,testavgvalue_test), 'a_test_value.rate should be ' + (testvalue/(me.myflush / 1000))); var testcountvalue_test = function(post){ - var mykey = 'stats.counters.stats_counts.a_test_value'; + var mykey = 'stats.counters.a_test_value.count'; return _.include(_.keys(post),mykey) && (post[mykey] == testvalue); }; - test.ok(_.any(hashes,testcountvalue_test), 'stats_counts.a_test_value should be ' + testvalue); + test.ok(_.any(hashes,testcountvalue_test), 'a_test_value.count should be ' + testvalue); test.done(); }); From 3d1540109128ba7c2e654ee7e99b59b91448f077 Mon Sep 17 00:00:00 2001 From: Daniel Schauenberg Date: Tue, 25 Sep 2012 00:43:17 -0400 Subject: [PATCH 019/233] add tests for graphite legacy namespace --- test/graphite_legacy_tests.js | 229 ++++++++++++++++++++++++++++++++++ 1 file changed, 229 insertions(+) create mode 100644 test/graphite_legacy_tests.js diff --git a/test/graphite_legacy_tests.js b/test/graphite_legacy_tests.js new file mode 100644 index 00000000..8c1dcd4d --- /dev/null +++ b/test/graphite_legacy_tests.js @@ -0,0 +1,229 @@ +var fs = require('fs'), + net = require('net'), + temp = require('temp'), + spawn = require('child_process').spawn, + util = require('util'), + urlparse = require('url').parse, + _ = require('underscore'), + dgram = require('dgram'), + qsparse = require('querystring').parse, + http = require('http'); + + +var writeconfig = function(text,worker,cb,obj){ + temp.open({suffix: '-statsdconf.js'}, function(err, info) { + if (err) throw err; + fs.writeSync(info.fd, text); + fs.close(info.fd, function(err) { + if (err) throw err; + worker(info.path,cb,obj); + }); + }); +} + +var array_contents_are_equal = function(first,second){ + var intlen = _.intersection(first,second).length; + var unlen = _.union(first,second).length; + return (intlen == unlen) && (intlen == first.length); +} + +var statsd_send = function(data,sock,host,port,cb){ + send_data = new Buffer(data); + sock.send(send_data,0,send_data.length,port,host,function(err,bytes){ + if (err) { + throw err; + } + cb(); + }); +} + +// keep collecting data until a specified timeout period has elapsed +// this will let us capture all data chunks so we don't miss one +var collect_for = function(server,timeout,cb){ + var received = []; + var in_flight = 0; + var timed_out = false; + var collector = function(req,res){ + in_flight += 1; + var body = ''; + req.on('data',function(data){ body += data; }); + req.on('end',function(){ + received = received.concat(body.split("\n")); + in_flight -= 1; + if((in_flight < 1) && timed_out){ + server.removeListener('request',collector); + cb(received); + } + }); + } + + setTimeout(function (){ + timed_out = true; + if((in_flight < 1)) { + server.removeListener('connection',collector); + cb(received); + } + },timeout); + + server.on('connection',collector); +} + +module.exports = { + setUp: function (callback) { + this.testport = 31337; + this.myflush = 200; + var configfile = "{graphService: \"graphite\"\n\ + , batch: 200 \n\ + , flushInterval: " + this.myflush + " \n\ + , percentThreshold: 90\n\ + , port: 8125\n\ + , dumpMessages: false \n\ + , debug: false\n\ + , graphitePort: " + this.testport + "\n\ + , graphiteHost: \"127.0.0.1\"}"; + + this.acceptor = net.createServer(); + this.acceptor.listen(this.testport); + this.sock = dgram.createSocket('udp4'); + + this.server_up = true; + this.ok_to_die = false; + this.exit_callback_callback = process.exit; + + writeconfig(configfile,function(path,cb,obj){ + obj.path = path; + obj.server = spawn('node',['stats.js', path]); + obj.exit_callback = function (code) { + obj.server_up = false; + if(!obj.ok_to_die){ + console.log('node server unexpectedly quit with code: ' + code); + process.exit(1); + } + else { + obj.exit_callback_callback(); + } + }; + obj.server.on('exit', obj.exit_callback); + obj.server.stderr.on('data', function (data) { + console.log('stderr: ' + data.toString().replace(/\n$/,'')); + }); + /* + obj.server.stdout.on('data', function (data) { + console.log('stdout: ' + data.toString().replace(/\n$/,'')); + }); + */ + obj.server.stdout.on('data', function (data) { + // wait until server is up before we finish setUp + if (data.toString().match(/server is up/)) { + cb(); + } + }); + + },callback,this); + }, + tearDown: function (callback) { + this.sock.close(); + this.acceptor.close(); + this.ok_to_die = true; + if(this.server_up){ + this.exit_callback_callback = callback; + this.server.kill(); + } else { + callback(); + } + }, + + send_well_formed_posts: function (test) { + test.expect(2); + + // we should integrate a timeout into this + this.acceptor.once('connection',function(c){ + var body = ''; + c.on('data',function(d){ body += d; }); + c.on('end',function(){ + var rows = body.split("\n"); + var entries = _.map(rows, function(x) { + var chunks = x.split(' '); + var data = {}; + data[chunks[0]] = chunks[1]; + return data; + }); + test.ok(_.include(_.map(entries,function(x) { return _.keys(x)[0] }),'statsd.numStats'),'graphite output includes numStats'); + test.equal(_.find(entries, function(x) { return _.keys(x)[0] == 'statsd.numStats' })['statsd.numStats'],2); + test.done(); + }); + }); + }, + + timers_are_valid: function (test) { + test.expect(3); + + var testvalue = 100; + var me = this; + this.acceptor.once('connection',function(c){ + statsd_send('a_test_value:' + testvalue + '|ms',me.sock,'127.0.0.1',8125,function(){ + collect_for(me.acceptor,me.myflush*2,function(strings){ + test.ok(strings.length > 0,'should receive some data'); + var hashes = _.map(strings, function(x) { + var chunks = x.split(' '); + var data = {}; + data[chunks[0]] = chunks[1]; + return data; + }); + var numstat_test = function(post){ + var mykey = 'statsd.numStats'; + return _.include(_.keys(post),mykey) && (post[mykey] == 3); + }; + test.ok(_.any(hashes,numstat_test), 'statsd.numStats should be 1'); + + var testtimervalue_test = function(post){ + var mykey = 'stats.timers.a_test_value.mean_90'; + return _.include(_.keys(post),mykey) && (post[mykey] == testvalue); + }; + test.ok(_.any(hashes,testtimervalue_test), 'stats.timers.a_test_value.mean should be ' + testvalue); + + test.done(); + }); + }); + }); + }, + + counts_are_valid: function (test) { + test.expect(4); + + var testvalue = 100; + var me = this; + this.acceptor.once('connection',function(c){ + statsd_send('a_test_value:' + testvalue + '|c',me.sock,'127.0.0.1',8125,function(){ + collect_for(me.acceptor,me.myflush*2,function(strings){ + test.ok(strings.length > 0,'should receive some data'); + var hashes = _.map(strings, function(x) { + var chunks = x.split(' '); + var data = {}; + data[chunks[0]] = chunks[1]; + return data; + }); + var numstat_test = function(post){ + var mykey = 'statsd.numStats'; + return _.include(_.keys(post),mykey) && (post[mykey] == 3); + }; + test.ok(_.any(hashes,numstat_test), 'statsd.numStats should be 1'); + + var testavgvalue_test = function(post){ + var mykey = 'stats.a_test_value'; + return _.include(_.keys(post),mykey) && (post[mykey] == (testvalue/(me.myflush / 1000))); + }; + test.ok(_.any(hashes,testavgvalue_test), 'stats.a_test_value should be ' + (testvalue/(me.myflush / 1000))); + + var testcountvalue_test = function(post){ + var mykey = 'stats_counts.a_test_value'; + return _.include(_.keys(post),mykey) && (post[mykey] == testvalue); + }; + test.ok(_.any(hashes,testcountvalue_test), 'stats_counts.a_test_value should be ' + testvalue); + + test.done(); + }); + }); + }); + } +} From 5f50a0654fd73b3b34f64a825c82ef25ee9f790b Mon Sep 17 00:00:00 2001 From: Daniel Schauenberg Date: Tue, 25 Sep 2012 00:45:29 -0400 Subject: [PATCH 020/233] add set type to configurable namespace --- backends/graphite.js | 12 +++++++++++- exampleConfig.js | 1 + 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/backends/graphite.js b/backends/graphite.js index 3010200f..01c2c6e0 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -26,6 +26,7 @@ var prefixPersecond; var prefixCounter; var prefixTimer; var prefixGauge; +var prefixSet; // set up namespaces var legacyNamespace = true; @@ -33,6 +34,7 @@ var globalNamespace = []; var counterNamespace = []; var timerNamespace = []; var gaugesNamespace = []; +var setsNamespace = []; var graphiteStats = {}; @@ -163,7 +165,8 @@ var flush_stats = function graphite_flush(ts, metrics) { } for (key in sets) { - statString += 'stats.sets.' + key + '.count ' + sets[key].values().length + ' ' + ts + "\n"; + var namespace = setsNamespace.concat(key); + statString += namespace.join(".") + '.count ' + sets[key].values().length + ' ' + ts + "\n"; numStats += 1; } @@ -193,6 +196,7 @@ exports.init = function graphite_init(startup_time, config, events) { prefixCounter = config.graphite.prefixCounter; prefixTimer = config.graphite.prefixTimer; prefixGauge = config.graphite.prefixGauge; + prefixSet = config.graphite.prefixSet; legacyNamespace = config.graphite.legacyNamespace; // set defaults for prefixes @@ -200,6 +204,7 @@ exports.init = function graphite_init(startup_time, config, events) { prefixCounter = prefixCounter !== undefined ? prefixCounter : "counters"; prefixTimer = prefixTimer !== undefined ? prefixTimer : "timers"; prefixGauge = prefixGauge !== undefined ? prefixGauge : "gauges"; + prefixSet = prefixSet !== undefined ? prefixSet : "sets"; legacyNamespace = legacyNamespace !== undefined ? legacyNamespace : true; @@ -209,6 +214,7 @@ exports.init = function graphite_init(startup_time, config, events) { counterNamespace.push(globalPrefix); timerNamespace.push(globalPrefix); gaugesNamespace.push(globalPrefix); + setsNamespace.push(globalPrefix); } if (prefixCounter !== "") { @@ -220,11 +226,15 @@ exports.init = function graphite_init(startup_time, config, events) { if (prefixGauge !== "") { gaugesNamespace.push(prefixGauge); } + if (prefixSet !== "") { + setsNamespace.push(prefixSet); + } } else { globalNamespace = ['stats']; counterNamespace = ['stats']; timerNamespace = ['stats', 'timers']; gaugesNamespace = ['stats', 'gauges']; + setsNamespace = ['stats', 'sets']; } graphiteStats.last_flush = startup_time; diff --git a/exampleConfig.js b/exampleConfig.js index 80090f47..a27e54b2 100644 --- a/exampleConfig.js +++ b/exampleConfig.js @@ -50,6 +50,7 @@ Optional Variables: prefixCounter: graphite prefix for counter metrics [default: "counters"] prefixTimer: graphite prefix for timer metrics [default: "timers"] prefixGauge: graphite prefix for gauge metrics [default: "gauges"] + prefixSet: graphite prefix for set metrics [default: "sets"] repeater: an array of hashes of the for host: and port: that details other statsd servers to which the received From 7e540549ac42f387fbd7ee4277bfaf0148203173 Mon Sep 17 00:00:00 2001 From: Martin Foot Date: Wed, 26 Sep 2012 14:55:18 +0100 Subject: [PATCH 021/233] Fix typo in examples/README.md. --- examples/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/README.md b/examples/README.md index 7b6ac138..582a561c 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,7 +1,7 @@ StatsD Example Clients ====================== -Here's a bunch of example code contributed by the communinty for interfacing with statsd in a variety of languages. +Here's a bunch of example code contributed by the community for interfacing with statsd in a variety of languages. Etsy/StatsD.pm - perl module perl-example.pl - perl using Etsy/StatsD module From b99c0d596c3862788a757cec509aec59201a1bcc Mon Sep 17 00:00:00 2001 From: Dieter Plaetinck Date: Thu, 27 Sep 2012 14:04:24 -0400 Subject: [PATCH 022/233] allow overriding statsd host and port * allow overriding statsd host and port, by means of STATSD_HOST and STATSD_PORT environment variables * lowercase non-environment variables. that's a good convention * rename 'statsd' variable to 'host' --- examples/statsd-client.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/statsd-client.sh b/examples/statsd-client.sh index 1d091634..15153d22 100644 --- a/examples/statsd-client.sh +++ b/examples/statsd-client.sh @@ -5,8 +5,8 @@ # # Alexander Fortin # -STATSD="127.0.0.1" -PORT="8125" +host="${STATSD_HOST:-127.0.0.1}" +port="${STATSD_PORT:-8125}" if [ $# -ne 1 ] then @@ -15,7 +15,7 @@ then fi # Setup UDP socket with statsd server -exec 3<> /dev/udp/${STATSD}/${PORT} +exec 3<> /dev/udp/$host/$port # Send data echo "$1" >&3 From 514f0ad90d9646aa24cedbd07ffabf53b77768f0 Mon Sep 17 00:00:00 2001 From: Dieter Plaetinck Date: Thu, 27 Sep 2012 14:06:54 -0400 Subject: [PATCH 023/233] make statsd-client.sh script executable --- examples/statsd-client.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 examples/statsd-client.sh diff --git a/examples/statsd-client.sh b/examples/statsd-client.sh old mode 100644 new mode 100755 From 60cb07d677f1a87bad425de5ad8bed13f02588b3 Mon Sep 17 00:00:00 2001 From: Tommy George Date: Thu, 4 Oct 2012 15:51:52 -0500 Subject: [PATCH 024/233] Update README.md Fixed one super tiny grammatical error. =) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d53a63e5..df8a2462 100644 --- a/README.md +++ b/README.md @@ -235,7 +235,7 @@ eliminate race conditions but it may be possible to encounter a stuck state. If doing dev work, a `killall node` will kill any stray test servers in the background (don't do this on a production machine!). -Tests can be executd with `./run_tests.sh`. +Tests can be executed with `./run_tests.sh`. Backend Interface ----------------- From 4c2ee9df31b8ce9ca6a924099b745cb155d5caee Mon Sep 17 00:00:00 2001 From: Dieter Plaetinck Date: Mon, 8 Oct 2012 15:14:17 -0400 Subject: [PATCH 025/233] support histograms in accordance with existing codebase, implement this in graphite backend --- README.md | 7 +++++++ backends/graphite.js | 16 +++++++++++++++- exampleConfig.js | 12 +++++++++--- 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index df8a2462..78f34195 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,13 @@ generate the following list of stats for each threshold: Where `$KEY` is the key you stats key you specify when sending to statsd, and `$PCT` is the percentile threshold. +If `config.histogram` is set to a non-zero array, statsd will also +maintain frequencies for each bin as specified by the (non-inclusive) +upper limits in the array. (`'inf'` can be used to denote infinity, +which is highly recommended, as high outliers will not be accounted for if +your last upper limit is too low). +a lower limit of 0 is assumed. + Sampling -------- diff --git a/backends/graphite.js b/backends/graphite.js index 1b704c01..3ffe909f 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -15,6 +15,7 @@ var net = require('net'), util = require('util'); +var config; var debug; var flushInterval; var graphiteHost; @@ -125,6 +126,18 @@ var flush_stats = function graphite_flush(ts, metrics) { message += 'stats.timers.' + key + '.count ' + count + ' ' + ts + "\n"; message += 'stats.timers.' + key + '.sum ' + sum + ' ' + ts + "\n"; message += 'stats.timers.' + key + '.mean ' + mean + ' ' + ts + "\n"; + + // note: values bigger than the upper limit of the last bin are ignored, by design + num_bins = (config.histogram || []).length + var i = 0; + for (var bin_i = 0; bin_i < num_bins; bin_i++) { + var freq = 0; + for (; i < count && (config.histogram[bin_i] == 'inf' || values[i] < config.histogram[bin_i]); i++) { + freq += 1; + } + message += 'stats.timers.' + key + '.bin_' + config.histogram[bin_i] + ' ' + freq + ' ' + ts + "\n"; + } + statString += message; numStats += 1; @@ -152,7 +165,8 @@ var backend_status = function graphite_status(writeCb) { } }; -exports.init = function graphite_init(startup_time, config, events) { +exports.init = function graphite_init(startup_time, conf, events) { + config = conf debug = config.debug; graphiteHost = config.graphiteHost; graphitePort = config.graphitePort; diff --git a/exampleConfig.js b/exampleConfig.js index b9dcbe9e..acafc77c 100644 --- a/exampleConfig.js +++ b/exampleConfig.js @@ -27,9 +27,6 @@ Optional Variables: debugInterval: interval to print debug information [ms, default: 10000] dumpMessages: log all incoming messages flushInterval: interval (in ms) to flush to Graphite - percentThreshold: for time information, calculate the Nth percentile(s) - (can be a single value or list of floating-point values) - [%, default: 90] keyFlush: log the most frequently sent keys [object, default: undefined] interval: how often to log frequent keys [ms, default: 0] percent: percentage of frequent keys to log [%, default: 100] @@ -49,6 +46,15 @@ Optional Variables: packets should be "repeated" (duplicated to). e.g. [ { host: '10.10.10.10', port: 8125 }, { host: 'observer', port: 88125 } ] + timer: + percentThreshold: calculate the Nth percentile(s) + (can be a single value or list of floating-point values) + [%, default: 90] + histogram: an array of ordered non-inclusive upper limits of bins for + histogram (in ms). 'inf' means infinity. (default: []) + if non-empty, histograms are enabled and frequencies + for each bin are written. + e.g. [ 25, 50, 100, 150, 200, 'inf' ] */ { graphitePort: 2003 From 6f51d04c028bcf1e644eed016537194598d5b958 Mon Sep 17 00:00:00 2001 From: Dieter Plaetinck Date: Mon, 8 Oct 2012 15:32:17 -0400 Subject: [PATCH 026/233] clarify bins can be arbitrarily wide --- README.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 78f34195..8581c38f 100644 --- a/README.md +++ b/README.md @@ -55,8 +55,11 @@ If `config.histogram` is set to a non-zero array, statsd will also maintain frequencies for each bin as specified by the (non-inclusive) upper limits in the array. (`'inf'` can be used to denote infinity, which is highly recommended, as high outliers will not be accounted for if -your last upper limit is too low). -a lower limit of 0 is assumed. +your last upper limit is too low). A lower limit of 0 is assumed. +Note that this is actually more powerful than real histograms, as you can +make your bins arbitrarily wide if you want to. Though if you want to +view real histograms, you should make your bins equally wide +(equally sized class intervals). Sampling -------- From 1c10cfc0ac53f2c3ae291886def868a4025d75df Mon Sep 17 00:00:00 2001 From: Chris Stevens Date: Mon, 8 Oct 2012 15:05:52 -0500 Subject: [PATCH 027/233] Added support for sets and gauges, standardized on using updateStats internally, removed sampleRate argument for clarity from those methods that will never use it --- examples/php-example.php | 50 +++++++++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 11 deletions(-) diff --git a/examples/php-example.php b/examples/php-example.php index dd195821..774a60c4 100644 --- a/examples/php-example.php +++ b/examples/php-example.php @@ -8,14 +8,41 @@ class StatsD { /** - * Log timing information + * Sets one or more timing values * - * @param string $stats The metric to in log timing info for. - * @param float $time The ellapsed time (ms) to log - * @param float|1 $sampleRate the rate (0-1) for sampling. + * @param string|array $stats The metric(s) to set. + * @param float $time The elapsed time (ms) to log + **/ + public static function timing($stats, $time) { + StatsD::updateStats($stats, $time, 1, 'ms'); + } + + /** + * Sets one or more gauges to a value + * + * @param string|array $stats The metric(s) to set. + * @param float $value The value for the stats. + **/ + public static function gauge($stats, $value) { + StatsD::updateStats($stats, $value, 1, 'g'); + } + + /** + * A "Set" is a count of unique events. + * This data type acts like a counter, but supports counting + * of unique occurences of values between flushes. The backend + * receives the number of unique events that happened since + * the last flush. + * + * The reference use case involved tracking the number of active + * and logged in users by sending the current userId of a user + * with each request with a key of "uniques" (or similar). + * + * @param string|array $stats The metric(s) to set. + * @param float $value The value for the stats. **/ - public static function timing($stat, $time, $sampleRate=1) { - StatsD::send(array($stat => "$time|ms"), $sampleRate); + public static function set($stats, $value) { + StatsD::updateStats($stats, $value, 1, 's'); } /** @@ -26,7 +53,7 @@ public static function timing($stat, $time, $sampleRate=1) { * @return boolean **/ public static function increment($stats, $sampleRate=1) { - StatsD::updateStats($stats, 1, $sampleRate); + StatsD::updateStats($stats, 1, $sampleRate, 'c'); } /** @@ -37,22 +64,23 @@ public static function increment($stats, $sampleRate=1) { * @return boolean **/ public static function decrement($stats, $sampleRate=1) { - StatsD::updateStats($stats, -1, $sampleRate); + StatsD::updateStats($stats, -1, $sampleRate, 'c'); } /** - * Updates one or more stats counters by arbitrary amounts. + * Updates one or more stats. * * @param string|array $stats The metric(s) to update. Should be either a string or array of metrics. * @param int|1 $delta The amount to increment/decrement each metric by. * @param float|1 $sampleRate the rate (0-1) for sampling. + * @param string|c $metric The metric type ("c" for count, "ms" for timing, "g" for gauge, "s" for set) * @return boolean **/ - public static function updateStats($stats, $delta=1, $sampleRate=1) { + public static function updateStats($stats, $delta=1, $sampleRate=1, $metric='c') { if (!is_array($stats)) { $stats = array($stats); } $data = array(); foreach($stats as $stat) { - $data[$stat] = "$delta|c"; + $data[$stat] = "$delta|$metric"; } StatsD::send($data, $sampleRate); From dd21c20903c29d3a9de74ecee692e95c47fc06da Mon Sep 17 00:00:00 2001 From: Daniel Schauenberg Date: Wed, 10 Oct 2012 00:54:08 -0300 Subject: [PATCH 028/233] put all graphite keys under stats* into the same schema fixes #159 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index df8a2462..37a1144f 100644 --- a/README.md +++ b/README.md @@ -125,7 +125,7 @@ Graphite uses "schemas" to define the different round robin datasets it houses In conf/storage-schemas.conf: [stats] - pattern = ^stats\..* + pattern = ^stats.* retentions = 10:2160,60:10080,600:262974 In conf/storage-aggregation.conf: From 008c7a77c5da4a6b2911221d1105c098bb055469 Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Wed, 10 Oct 2012 16:05:14 +0200 Subject: [PATCH 029/233] Fix logging of key frequencies to stdout. The current code writes by default to a file called "stdout" instead of the stdout filedescriptor. --- stats.js | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/stats.js b/stats.js index 8ba28266..42f12a88 100644 --- a/stats.js +++ b/stats.js @@ -301,7 +301,7 @@ config.configFile(process.argv[2], function (config, oldConfig) { if (keyFlushInterval > 0) { var keyFlushPercent = Number((config.keyFlush && config.keyFlush.percent) || 100); - var keyFlushLog = (config.keyFlush && config.keyFlush.log) || "stdout"; + var keyFlushLog = config.keyFlush && config.keyFlush.log; keyFlushInt = setInterval(function () { var key; @@ -321,9 +321,13 @@ config.configFile(process.argv[2], function (config, oldConfig) { logMessage += timeString + " count=" + sortedKeys[i][1] + " key=" + sortedKeys[i][0] + "\n"; } - var logFile = fs.createWriteStream(keyFlushLog, {flags: 'a+'}); - logFile.write(logMessage); - logFile.end(); + if (keyFlushLog) { + var logFile = fs.createWriteStream(keyFlushLog, {flags: 'a+'}); + logFile.write(logMessage); + logFile.end(); + } else { + process.stdout.write(logMessage); + } // clear the counter keyCounter = {}; From 5516c1e7748ba228aac6e5c685702a01b1987578 Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Sat, 13 Oct 2012 00:25:44 -0400 Subject: [PATCH 030/233] remove unused timer, remove newline from sets and add sets to README interface definition --- README.md | 1 + stats.js | 7 ++----- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 37a1144f..d4e3fffc 100644 --- a/README.md +++ b/README.md @@ -271,6 +271,7 @@ metrics: { counters: counters, gauges: gauges, timers: timers, + sets: sets, pctThreshold: pctThreshold } ``` diff --git a/stats.js b/stats.js index 8ba28266..29b2d709 100644 --- a/stats.js +++ b/stats.js @@ -13,12 +13,9 @@ var counters = { "statsd.packets_received": 0, "statsd.bad_lines_seen": 0 }; -var timers = { - "statsd.packet_process_time": [] -}; +var timers = {}; var gauges = {}; -var sets = { -}; +var sets = {}; var pctThreshold = null; var debugInt, flushInterval, keyFlushInt, server, mgmtServer; var startup_time = Math.round(new Date().getTime() / 1000); From ec9cc251c6bd90f52aa232699d8103e87eb02f0e Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Sat, 13 Oct 2012 00:33:05 -0400 Subject: [PATCH 031/233] do timer and counter calculations prior to sending to backends --- README.md | 2 + backends/console.js | 2 + backends/graphite.js | 67 +++---------------- lib/processedmetrics.js | 88 +++++++++++++++++++++++++ stats.js | 7 ++ test/processedmetrics_tests.js | 116 +++++++++++++++++++++++++++++++++ 6 files changed, 224 insertions(+), 58 deletions(-) create mode 100644 lib/processedmetrics.js create mode 100644 test/processedmetrics_tests.js diff --git a/README.md b/README.md index d4e3fffc..e075852f 100644 --- a/README.md +++ b/README.md @@ -272,6 +272,8 @@ metrics: { gauges: gauges, timers: timers, sets: sets, + counter_rates: counter_rates, + timer_data: timer_data, pctThreshold: pctThreshold } ``` diff --git a/backends/console.js b/backends/console.js index 3fc72368..84942fb9 100644 --- a/backends/console.js +++ b/backends/console.js @@ -34,6 +34,8 @@ ConsoleBackend.prototype.flush = function(timestamp, metrics) { counter: this.statsCache.counters, timers: this.statsCache.timers, gauges: metrics.gauges, + timer_data: metrics.timer_data, + counter_rates: metrics.counter_rates, sets: function (vals) { var ret = {}; for (val in vals) { diff --git a/backends/graphite.js b/backends/graphite.js index 1b704c01..02d7f6d3 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -55,77 +55,26 @@ var flush_stats = function graphite_flush(ts, metrics) { var statString = ''; var numStats = 0; var key; - + var timer_data_key; var counters = metrics.counters; var gauges = metrics.gauges; var timers = metrics.timers; var sets = metrics.sets; - var pctThreshold = metrics.pctThreshold; + var counter_rates = metrics.counter_rates; + var timer_data = metrics.timer_data; for (key in counters) { - var value = counters[key]; - var valuePerSecond = value / (flushInterval / 1000); // calculate "per second" rate - - statString += 'stats.' + key + ' ' + valuePerSecond + ' ' + ts + "\n"; - statString += 'stats_counts.' + key + ' ' + value + ' ' + ts + "\n"; + statString += 'stats.' + key + ' ' + counter_rates[key] + ' ' + ts + "\n"; + statString += 'stats_counts.' + key + ' ' + counters[key] + ' ' + ts + "\n"; numStats += 1; } for (key in timers) { if (timers[key].length > 0) { - var values = timers[key].sort(function (a,b) { return a-b; }); - var count = values.length; - var min = values[0]; - var max = values[count - 1]; - - var cumulativeValues = [min]; - for (var i = 1; i < count; i++) { - cumulativeValues.push(values[i] + cumulativeValues[i-1]); - } - - var sum = min; - var mean = min; - var maxAtThreshold = max; - - var message = ""; - - var key2; - - for (key2 in pctThreshold) { - var pct = pctThreshold[key2]; - if (count > 1) { - var thresholdIndex = Math.round(((100 - pct) / 100) * count); - var numInThreshold = count - thresholdIndex; - - maxAtThreshold = values[numInThreshold - 1]; - sum = cumulativeValues[numInThreshold - 1]; - mean = sum / numInThreshold; - } - - var clean_pct = '' + pct; - clean_pct.replace('.', '_'); - message += 'stats.timers.' + key + '.mean_' + clean_pct + ' ' + mean + ' ' + ts + "\n"; - message += 'stats.timers.' + key + '.upper_' + clean_pct + ' ' + maxAtThreshold + ' ' + ts + "\n"; - message += 'stats.timers.' + key + '.sum_' + clean_pct + ' ' + sum + ' ' + ts + "\n"; - } - - sum = cumulativeValues[count-1]; - mean = sum / count; - - var sumOfDiffs = 0; - for (var i = 0; i < count; i++) { - sumOfDiffs += (values[i] - mean) * (values[i] - mean); + for (timer_data_key in timer_data[key]) { + statString += 'stats.timers.' + key + '.' + timer_data_key + ' ' + timer_data[key][timer_data_key] + ' ' + ts + "\n"; } - var stddev = Math.sqrt(sumOfDiffs / count); - - message += 'stats.timers.' + key + '.std ' + stddev + ' ' + ts + "\n"; - message += 'stats.timers.' + key + '.upper ' + max + ' ' + ts + "\n"; - message += 'stats.timers.' + key + '.lower ' + min + ' ' + ts + "\n"; - message += 'stats.timers.' + key + '.count ' + count + ' ' + ts + "\n"; - message += 'stats.timers.' + key + '.sum ' + sum + ' ' + ts + "\n"; - message += 'stats.timers.' + key + '.mean ' + mean + ' ' + ts + "\n"; - statString += message; numStats += 1; } @@ -133,11 +82,13 @@ var flush_stats = function graphite_flush(ts, metrics) { for (key in gauges) { statString += 'stats.gauges.' + key + ' ' + gauges[key] + ' ' + ts + "\n"; + numStats += 1; } for (key in sets) { statString += 'stats.sets.' + key + '.count ' + sets[key].values().length + ' ' + ts + "\n"; + numStats += 1; } diff --git a/lib/processedmetrics.js b/lib/processedmetrics.js new file mode 100644 index 00000000..38bdf7f4 --- /dev/null +++ b/lib/processedmetrics.js @@ -0,0 +1,88 @@ +var ProcessedMetrics = function (metrics, flushInterval) { + var starttime = Date.now(); + var key; + var counter_rates = {}; + var timer_data = {}; + + var counters = metrics.counters; + var timers = metrics.timers; + var pctThreshold = metrics.pctThreshold; + + for (key in counters) { + var value = counters[key]; + + // calculate "per second" rate + var valuePerSecond = value / (flushInterval / 1000); + counter_rates[key] = valuePerSecond; + } + + for (key in timers) { + if (timers[key].length > 0) { + timer_data[key] = {}; + var current_timer_data = {}; + + var values = timers[key].sort(function (a,b) { return a-b; }); + var count = values.length; + var min = values[0]; + var max = values[count - 1]; + + var cumulativeValues = [min]; + for (var i = 1; i < count; i++) { + cumulativeValues.push(values[i] + cumulativeValues[i-1]); + } + + var sum = min; + var mean = min; + var maxAtThreshold = max; + + var message = ""; + + var key2; + + for (key2 in pctThreshold) { + var pct = pctThreshold[key2]; + if (count > 1) { + var thresholdIndex = Math.round(((100 - pct) / 100) * count); + var numInThreshold = count - thresholdIndex; + + maxAtThreshold = values[numInThreshold - 1]; + sum = cumulativeValues[numInThreshold - 1]; + mean = sum / numInThreshold; + } + + var clean_pct = '' + pct; + clean_pct.replace('.', '_'); + current_timer_data["mean_" + clean_pct] = mean; + current_timer_data["upper_" + clean_pct] = maxAtThreshold; + current_timer_data["sum_" + clean_pct] = sum; + + } + + sum = cumulativeValues[count-1]; + mean = sum / count; + + var sumOfDiffs = 0; + for (var i = 0; i < count; i++) { + sumOfDiffs += (values[i] - mean) * (values[i] - mean); + } + var stddev = Math.sqrt(sumOfDiffs / count); + current_timer_data["std"] = stddev; + current_timer_data["upper"] = max; + current_timer_data["lower"] = min; + current_timer_data["count"] = count; + current_timer_data["sum"] = sum; + current_timer_data["mean"] = mean; + + timer_data[key] = current_timer_data; + + } + } + + //add processed metrics to the metrics_hash + metrics.counter_rates = counter_rates; + metrics.timer_data = timer_data; + + return metrics; + } + +exports.ProcessedMetrics = ProcessedMetrics diff --git a/stats.js b/stats.js index 29b2d709..40756735 100644 --- a/stats.js +++ b/stats.js @@ -6,6 +6,7 @@ var dgram = require('dgram') , events = require('events') , logger = require('./lib/logger') , set = require('./lib/set') + , pm = require('./lib/processedmetrics') // initialize data structures with defaults for statsd stats var keyCounter = {}; @@ -16,6 +17,8 @@ var counters = { var timers = {}; var gauges = {}; var sets = {}; +var counter_rates = {}; +var timer_data = {}; var pctThreshold = null; var debugInt, flushInterval, keyFlushInt, server, mgmtServer; var startup_time = Math.round(new Date().getTime() / 1000); @@ -45,6 +48,8 @@ function flushMetrics() { gauges: gauges, timers: timers, sets: sets, + counter_rates: counter_rates, + timer_data: timer_data, pctThreshold: pctThreshold } @@ -66,6 +71,8 @@ function flushMetrics() { } }); + metrics_hash = pm.ProcessedMetrics(metrics_hash, flushInterval) + // Flush metrics to each backend. backendEvents.emit('flush', time_stamp, metrics_hash); }; diff --git a/test/processedmetrics_tests.js b/test/processedmetrics_tests.js new file mode 100644 index 00000000..1b41385f --- /dev/null +++ b/test/processedmetrics_tests.js @@ -0,0 +1,116 @@ +var pm = require('../lib/processedmetrics') +var time_stamp = Math.round(new Date().getTime() / 1000); + +var counters = {}; +var gauges = {}; +var timers = {}; +var sets = {}; +var pctThreshold = null; + +var metrics = { + counters: counters, + gauges: gauges, + timers: timers, + sets: sets, + pctThreshold: pctThreshold +} + +module.exports = { + counters_has_stats_count: function(test) { + test.expect(1); + metrics.counters['a'] = 2; + var processed_metrics = new pm.ProcessedMetrics(metrics, 1000); + test.equal(2, processed_metrics.counters['a']); + test.done(); + }, + counters_has_correct_rate: function(test) { + test.expect(1); + metrics.counters['a'] = 2; + var processed_metrics = new pm.ProcessedMetrics(metrics, 100); + test.equal(20, processed_metrics.counter_rates['a']); + test.done(); + }, + timers_handle_empty: function(test) { + test.expect(1); + metrics.timers['a'] = []; + var processed_metrics = new pm.ProcessedMetrics(metrics, 100); + test.equal(20, processed_metrics.counter_rates['a']); + test.done(); + }, + timers_single_time: function(test) { + test.expect(6); + metrics.timers['a'] = [100]; + var processed_metrics = new pm.ProcessedMetrics(metrics, 100); + timer_data = processed_metrics.timer_data['a']; + test.equal(0, timer_data.std); + test.equal(100, timer_data.upper); + test.equal(100, timer_data.lower); + test.equal(1, timer_data.count); + test.equal(100, timer_data.sum); + test.equal(100, timer_data.mean); + test.done(); + }, + timers_multiple_times: function(test) { + test.expect(6); + metrics.timers['a'] = [100, 200, 300]; + var processed_metrics = new pm.ProcessedMetrics(metrics, 100); + timer_data = processed_metrics.timer_data['a']; + test.equal(81.64965809277261, timer_data.std); + test.equal(300, timer_data.upper); + test.equal(100, timer_data.lower); + test.equal(3, timer_data.count); + test.equal(600, timer_data.sum); + test.equal(200, timer_data.mean); + test.done(); + }, + timers_single_time_single_percentile: function(test) { + test.expect(3); + metrics.timers['a'] = [100]; + metrics.pctThreshold = [90]; + var processed_metrics = new pm.ProcessedMetrics(metrics, 100); + timer_data = processed_metrics.timer_data['a']; + test.equal(100, timer_data.mean_90); + test.equal(100, timer_data.upper_90); + test.equal(100, timer_data.sum_90); + test.done(); + }, + timers_single_time_multiple_percentiles: function(test) { + test.expect(6); + metrics.timers['a'] = [100]; + metrics.pctThreshold = [90, 80]; + var processed_metrics = new pm.ProcessedMetrics(metrics, 100); + timer_data = processed_metrics.timer_data['a']; + test.equal(100, timer_data.mean_90); + test.equal(100, timer_data.upper_90); + test.equal(100, timer_data.sum_90); + test.equal(100, timer_data.mean_80); + test.equal(100, timer_data.upper_80); + test.equal(100, timer_data.sum_80); + test.done(); + }, + timers_multiple_times_single_percentiles: function(test) { + test.expect(3); + metrics.timers['a'] = [100, 200, 300]; + metrics.pctThreshold = [90]; + var processed_metrics = new pm.ProcessedMetrics(metrics, 100); + timer_data = processed_metrics.timer_data['a']; + test.equal(200, timer_data.mean_90); + test.equal(300, timer_data.upper_90); + test.equal(600, timer_data.sum_90); + test.done(); + }, + timers_multiple_times_multiple_percentiles: function(test) { + test.expect(6); + metrics.timers['a'] = [100, 200, 300]; + metrics.pctThreshold = [90, 80]; + var processed_metrics = new pm.ProcessedMetrics(metrics, 100); + timer_data = processed_metrics.timer_data['a']; + test.equal(200, timer_data.mean_90); + test.equal(300, timer_data.upper_90); + test.equal(600, timer_data.sum_90); + test.equal(150, timer_data.mean_80); + test.equal(200, timer_data.upper_80); + test.equal(300, timer_data.sum_80); + test.done(); + } +} From c3d87eaa82724fece1a86ff273f28cffaeed91db Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Sat, 13 Oct 2012 02:01:03 -0400 Subject: [PATCH 032/233] Make small changes based on pull request feedback --- backends/graphite.js | 8 ++++---- ...processedmetrics.js => process_metrics.js} | 4 ++-- stats.js | 4 ++-- ...rics_tests.js => process_metrics_tests.js} | 20 +++++++++---------- 4 files changed, 18 insertions(+), 18 deletions(-) rename lib/{processedmetrics.js => process_metrics.js} (95%) rename test/{processedmetrics_tests.js => process_metrics_tests.js} (83%) diff --git a/backends/graphite.js b/backends/graphite.js index 02d7f6d3..3e1b756c 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -64,14 +64,14 @@ var flush_stats = function graphite_flush(ts, metrics) { var timer_data = metrics.timer_data; for (key in counters) { - statString += 'stats.' + key + ' ' + counter_rates[key] + ' ' + ts + "\n"; - statString += 'stats_counts.' + key + ' ' + counters[key] + ' ' + ts + "\n"; + statString += 'stats.' + key + ' ' + counter_rates[key] + ' ' + ts + "\n"; + statString += 'stats_counts.' + key + ' ' + counters[key] + ' ' + ts + "\n"; numStats += 1; } - for (key in timers) { - if (timers[key].length > 0) { + for (key in timer_data) { + if (Object.keys(timer_data).length > 0) { for (timer_data_key in timer_data[key]) { statString += 'stats.timers.' + key + '.' + timer_data_key + ' ' + timer_data[key][timer_data_key] + ' ' + ts + "\n"; } diff --git a/lib/processedmetrics.js b/lib/process_metrics.js similarity index 95% rename from lib/processedmetrics.js rename to lib/process_metrics.js index 38bdf7f4..a89d35d6 100644 --- a/lib/processedmetrics.js +++ b/lib/process_metrics.js @@ -1,4 +1,4 @@ -var ProcessedMetrics = function (metrics, flushInterval) { +var process_metrics = function (metrics, flushInterval) { var starttime = Date.now(); var key; var counter_rates = {}; @@ -85,4 +85,4 @@ var ProcessedMetrics = function (metrics, flushInterval) { return metrics; } -exports.ProcessedMetrics = ProcessedMetrics +exports.process_metrics = process_metrics diff --git a/stats.js b/stats.js index 40756735..a647af23 100644 --- a/stats.js +++ b/stats.js @@ -6,7 +6,7 @@ var dgram = require('dgram') , events = require('events') , logger = require('./lib/logger') , set = require('./lib/set') - , pm = require('./lib/processedmetrics') + , pm = require('./lib/process_metrics') // initialize data structures with defaults for statsd stats var keyCounter = {}; @@ -71,7 +71,7 @@ function flushMetrics() { } }); - metrics_hash = pm.ProcessedMetrics(metrics_hash, flushInterval) + metrics_hash = pm.process_metrics(metrics_hash, flushInterval) // Flush metrics to each backend. backendEvents.emit('flush', time_stamp, metrics_hash); diff --git a/test/processedmetrics_tests.js b/test/process_metrics_tests.js similarity index 83% rename from test/processedmetrics_tests.js rename to test/process_metrics_tests.js index 1b41385f..b7782457 100644 --- a/test/processedmetrics_tests.js +++ b/test/process_metrics_tests.js @@ -1,4 +1,4 @@ -var pm = require('../lib/processedmetrics') +var pm = require('../lib/process_metrics') var time_stamp = Math.round(new Date().getTime() / 1000); var counters = {}; @@ -19,28 +19,28 @@ module.exports = { counters_has_stats_count: function(test) { test.expect(1); metrics.counters['a'] = 2; - var processed_metrics = new pm.ProcessedMetrics(metrics, 1000); + var processed_metrics = new pm.process_metrics(metrics, 1000); test.equal(2, processed_metrics.counters['a']); test.done(); }, counters_has_correct_rate: function(test) { test.expect(1); metrics.counters['a'] = 2; - var processed_metrics = new pm.ProcessedMetrics(metrics, 100); + var processed_metrics = new pm.process_metrics(metrics, 100); test.equal(20, processed_metrics.counter_rates['a']); test.done(); }, timers_handle_empty: function(test) { test.expect(1); metrics.timers['a'] = []; - var processed_metrics = new pm.ProcessedMetrics(metrics, 100); + var processed_metrics = new pm.process_metrics(metrics, 100); test.equal(20, processed_metrics.counter_rates['a']); test.done(); }, timers_single_time: function(test) { test.expect(6); metrics.timers['a'] = [100]; - var processed_metrics = new pm.ProcessedMetrics(metrics, 100); + var processed_metrics = new pm.process_metrics(metrics, 100); timer_data = processed_metrics.timer_data['a']; test.equal(0, timer_data.std); test.equal(100, timer_data.upper); @@ -53,7 +53,7 @@ module.exports = { timers_multiple_times: function(test) { test.expect(6); metrics.timers['a'] = [100, 200, 300]; - var processed_metrics = new pm.ProcessedMetrics(metrics, 100); + var processed_metrics = new pm.process_metrics(metrics, 100); timer_data = processed_metrics.timer_data['a']; test.equal(81.64965809277261, timer_data.std); test.equal(300, timer_data.upper); @@ -67,7 +67,7 @@ module.exports = { test.expect(3); metrics.timers['a'] = [100]; metrics.pctThreshold = [90]; - var processed_metrics = new pm.ProcessedMetrics(metrics, 100); + var processed_metrics = new pm.process_metrics(metrics, 100); timer_data = processed_metrics.timer_data['a']; test.equal(100, timer_data.mean_90); test.equal(100, timer_data.upper_90); @@ -78,7 +78,7 @@ module.exports = { test.expect(6); metrics.timers['a'] = [100]; metrics.pctThreshold = [90, 80]; - var processed_metrics = new pm.ProcessedMetrics(metrics, 100); + var processed_metrics = new pm.process_metrics(metrics, 100); timer_data = processed_metrics.timer_data['a']; test.equal(100, timer_data.mean_90); test.equal(100, timer_data.upper_90); @@ -92,7 +92,7 @@ module.exports = { test.expect(3); metrics.timers['a'] = [100, 200, 300]; metrics.pctThreshold = [90]; - var processed_metrics = new pm.ProcessedMetrics(metrics, 100); + var processed_metrics = new pm.process_metrics(metrics, 100); timer_data = processed_metrics.timer_data['a']; test.equal(200, timer_data.mean_90); test.equal(300, timer_data.upper_90); @@ -103,7 +103,7 @@ module.exports = { test.expect(6); metrics.timers['a'] = [100, 200, 300]; metrics.pctThreshold = [90, 80]; - var processed_metrics = new pm.ProcessedMetrics(metrics, 100); + var processed_metrics = new pm.process_metrics(metrics, 100); timer_data = processed_metrics.timer_data['a']; test.equal(200, timer_data.mean_90); test.equal(300, timer_data.upper_90); From c341f969e34d664700c900d80e50ed959504dc05 Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Mon, 15 Oct 2012 00:20:56 -0400 Subject: [PATCH 033/233] switch process_metrics to take a callback function --- lib/process_metrics.js | 5 ++--- stats.js | 7 ++++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/process_metrics.js b/lib/process_metrics.js index a89d35d6..b99a6374 100644 --- a/lib/process_metrics.js +++ b/lib/process_metrics.js @@ -1,4 +1,4 @@ -var process_metrics = function (metrics, flushInterval) { +var process_metrics = function (metrics, flushInterval, ts, flushCallback) { var starttime = Date.now(); var key; var counter_rates = {}; @@ -81,8 +81,7 @@ var process_metrics = function (metrics, flushInterval) { //add processed metrics to the metrics_hash metrics.counter_rates = counter_rates; metrics.timer_data = timer_data; - - return metrics; + flushCallback(ts, metrics); } exports.process_metrics = process_metrics diff --git a/stats.js b/stats.js index a647af23..19e6c937 100644 --- a/stats.js +++ b/stats.js @@ -71,10 +71,11 @@ function flushMetrics() { } }); - metrics_hash = pm.process_metrics(metrics_hash, flushInterval) + pm.process_metrics(metrics_hash, flushInterval, time_stamp, function emitFlush() { + // Flush metrics to each backend. + backendEvents.emit('flush', time_stamp, metrics_hash); + }); - // Flush metrics to each backend. - backendEvents.emit('flush', time_stamp, metrics_hash); }; var stats = { From 1cacf7b2a3933c399adada1453ee8b7a98e9fc3d Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Mon, 15 Oct 2012 00:21:52 -0400 Subject: [PATCH 034/233] counter to counters for consistency --- backends/console.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/console.js b/backends/console.js index 84942fb9..ca1eed4e 100644 --- a/backends/console.js +++ b/backends/console.js @@ -31,7 +31,7 @@ ConsoleBackend.prototype.flush = function(timestamp, metrics) { }); var out = { - counter: this.statsCache.counters, + counters: this.statsCache.counters, timers: this.statsCache.timers, gauges: metrics.gauges, timer_data: metrics.timer_data, From 39df6b489077abfc99b22a623013fb597608344e Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Mon, 15 Oct 2012 01:53:50 -0400 Subject: [PATCH 035/233] add setup function for tests and use new process_metrics signature. --- lib/process_metrics.js | 2 +- test/process_metrics_tests.js | 94 ++++++++++++++++++----------------- 2 files changed, 50 insertions(+), 46 deletions(-) diff --git a/lib/process_metrics.js b/lib/process_metrics.js index b99a6374..3e1e957b 100644 --- a/lib/process_metrics.js +++ b/lib/process_metrics.js @@ -81,7 +81,7 @@ var process_metrics = function (metrics, flushInterval, ts, flushCallback) { //add processed metrics to the metrics_hash metrics.counter_rates = counter_rates; metrics.timer_data = timer_data; - flushCallback(ts, metrics); + flushCallback(); } exports.process_metrics = process_metrics diff --git a/test/process_metrics_tests.js b/test/process_metrics_tests.js index b7782457..dc55babb 100644 --- a/test/process_metrics_tests.js +++ b/test/process_metrics_tests.js @@ -1,47 +1,51 @@ var pm = require('../lib/process_metrics') -var time_stamp = Math.round(new Date().getTime() / 1000); -var counters = {}; -var gauges = {}; -var timers = {}; -var sets = {}; -var pctThreshold = null; +module.exports = { + setUp: function (callback) { + this.time_stamp = Math.round(new Date().getTime() / 1000); -var metrics = { - counters: counters, - gauges: gauges, - timers: timers, - sets: sets, - pctThreshold: pctThreshold -} + var counters = {}; + var gauges = {}; + var timers = {}; + var sets = {}; + var pctThreshold = null; -module.exports = { + this.metrics = { + counters: counters, + gauges: gauges, + timers: timers, + sets: sets, + pctThreshold: pctThreshold + } + callback(); + }, counters_has_stats_count: function(test) { test.expect(1); - metrics.counters['a'] = 2; - var processed_metrics = new pm.process_metrics(metrics, 1000); - test.equal(2, processed_metrics.counters['a']); + this.metrics.counters['a'] = 2; + pm.process_metrics(this.metrics, 1000, this.time_stamp, function(){}); + test.equal(2, this.metrics.counters['a']); test.done(); }, counters_has_correct_rate: function(test) { test.expect(1); - metrics.counters['a'] = 2; - var processed_metrics = new pm.process_metrics(metrics, 100); - test.equal(20, processed_metrics.counter_rates['a']); + this.metrics.counters['a'] = 2; + pm.process_metrics(this.metrics, 100, this.time_stamp, function(){}); + test.equal(20, this.metrics.counter_rates['a']); test.done(); }, timers_handle_empty: function(test) { test.expect(1); - metrics.timers['a'] = []; - var processed_metrics = new pm.process_metrics(metrics, 100); - test.equal(20, processed_metrics.counter_rates['a']); + this.metrics.timers['a'] = []; + pm.process_metrics(this.metrics, 100, this.time_stamp, function(){}); + //potentially a cleaner way to check this + test.equal(undefined, this.metrics.counter_rates['a']); test.done(); }, timers_single_time: function(test) { test.expect(6); - metrics.timers['a'] = [100]; - var processed_metrics = new pm.process_metrics(metrics, 100); - timer_data = processed_metrics.timer_data['a']; + this.metrics.timers['a'] = [100]; + pm.process_metrics(this.metrics, 100, this.time_stamp, function(){}); + timer_data = this.metrics.timer_data['a']; test.equal(0, timer_data.std); test.equal(100, timer_data.upper); test.equal(100, timer_data.lower); @@ -52,9 +56,9 @@ module.exports = { }, timers_multiple_times: function(test) { test.expect(6); - metrics.timers['a'] = [100, 200, 300]; - var processed_metrics = new pm.process_metrics(metrics, 100); - timer_data = processed_metrics.timer_data['a']; + this.metrics.timers['a'] = [100, 200, 300]; + pm.process_metrics(this.metrics, 100, this.time_stamp, function(){}); + timer_data = this.metrics.timer_data['a']; test.equal(81.64965809277261, timer_data.std); test.equal(300, timer_data.upper); test.equal(100, timer_data.lower); @@ -65,10 +69,10 @@ module.exports = { }, timers_single_time_single_percentile: function(test) { test.expect(3); - metrics.timers['a'] = [100]; - metrics.pctThreshold = [90]; - var processed_metrics = new pm.process_metrics(metrics, 100); - timer_data = processed_metrics.timer_data['a']; + this.metrics.timers['a'] = [100]; + this.metrics.pctThreshold = [90]; + pm.process_metrics(this.metrics, 100, this.time_stamp, function(){}); + timer_data = this.metrics.timer_data['a']; test.equal(100, timer_data.mean_90); test.equal(100, timer_data.upper_90); test.equal(100, timer_data.sum_90); @@ -76,10 +80,10 @@ module.exports = { }, timers_single_time_multiple_percentiles: function(test) { test.expect(6); - metrics.timers['a'] = [100]; - metrics.pctThreshold = [90, 80]; - var processed_metrics = new pm.process_metrics(metrics, 100); - timer_data = processed_metrics.timer_data['a']; + this.metrics.timers['a'] = [100]; + this.metrics.pctThreshold = [90, 80]; + pm.process_metrics(this.metrics, 100, this.time_stamp, function(){}); + timer_data = this.metrics.timer_data['a']; test.equal(100, timer_data.mean_90); test.equal(100, timer_data.upper_90); test.equal(100, timer_data.sum_90); @@ -90,10 +94,10 @@ module.exports = { }, timers_multiple_times_single_percentiles: function(test) { test.expect(3); - metrics.timers['a'] = [100, 200, 300]; - metrics.pctThreshold = [90]; - var processed_metrics = new pm.process_metrics(metrics, 100); - timer_data = processed_metrics.timer_data['a']; + this.metrics.timers['a'] = [100, 200, 300]; + this.metrics.pctThreshold = [90]; + pm.process_metrics(this.metrics, 100, this.time_stamp, function(){}); + timer_data = this.metrics.timer_data['a']; test.equal(200, timer_data.mean_90); test.equal(300, timer_data.upper_90); test.equal(600, timer_data.sum_90); @@ -101,10 +105,10 @@ module.exports = { }, timers_multiple_times_multiple_percentiles: function(test) { test.expect(6); - metrics.timers['a'] = [100, 200, 300]; - metrics.pctThreshold = [90, 80]; - var processed_metrics = new pm.process_metrics(metrics, 100); - timer_data = processed_metrics.timer_data['a']; + this.metrics.timers['a'] = [100, 200, 300]; + this.metrics.pctThreshold = [90, 80]; + pm.process_metrics(this.metrics, 100, this.time_stamp, function(){}); + timer_data = this.metrics.timer_data['a']; test.equal(200, timer_data.mean_90); test.equal(300, timer_data.upper_90); test.equal(600, timer_data.sum_90); From 3620a3ab86e22c1ad67821f39bd1f1951d21de3a Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Wed, 17 Oct 2012 21:09:31 -0400 Subject: [PATCH 036/233] Add Call back parameters for passing metrics and error message --- lib/process_metrics.js | 10 +++++++--- stats.js | 8 ++++++-- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/lib/process_metrics.js b/lib/process_metrics.js index 3e1e957b..536b929e 100644 --- a/lib/process_metrics.js +++ b/lib/process_metrics.js @@ -1,9 +1,8 @@ var process_metrics = function (metrics, flushInterval, ts, flushCallback) { - var starttime = Date.now(); var key; var counter_rates = {}; var timer_data = {}; - + var error = false; var counters = metrics.counters; var timers = metrics.timers; var pctThreshold = metrics.pctThreshold; @@ -81,7 +80,12 @@ var process_metrics = function (metrics, flushInterval, ts, flushCallback) { //add processed metrics to the metrics_hash metrics.counter_rates = counter_rates; metrics.timer_data = timer_data; - flushCallback(); + + if (Object.keys(counter_rates).length == 0 && Object.keys(timer_data).length == 0 ) { + error = "no metrics to calculate"; + } + + flushCallback(error, metrics); } exports.process_metrics = process_metrics diff --git a/stats.js b/stats.js index 19e6c937..932e1faf 100644 --- a/stats.js +++ b/stats.js @@ -8,6 +8,7 @@ var dgram = require('dgram') , set = require('./lib/set') , pm = require('./lib/process_metrics') + // initialize data structures with defaults for statsd stats var keyCounter = {}; var counters = { @@ -71,9 +72,12 @@ function flushMetrics() { } }); - pm.process_metrics(metrics_hash, flushInterval, time_stamp, function emitFlush() { + pm.process_metrics(metrics_hash, flushInterval, time_stamp, function emitFlush(err, metrics) { // Flush metrics to each backend. - backendEvents.emit('flush', time_stamp, metrics_hash); + if (err) { + l.log("Errored processing metrics with: " + err, 'debug'); + } + backendEvents.emit('flush', time_stamp, metrics); }); }; From 246fdaece68ae60c6f25fa59f27d9c4183371b9b Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Wed, 17 Oct 2012 21:35:28 -0400 Subject: [PATCH 037/233] Tweak log formating to append colon and space to stdout type. --- lib/logger.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/logger.js b/lib/logger.js index 402dabe0..f2264278 100644 --- a/lib/logger.js +++ b/lib/logger.js @@ -18,9 +18,9 @@ Logger.prototype = { log: function (msg, type) { if (this.backend == 'stdout') { if (!type) { - type = 'DEBUG: '; + type = 'DEBUG'; } - this.util.log(type + msg); + this.util.log(type + ": " + msg); } else { if (!type) { type = this.level From 0ce07fe21de1d6059b6a1e2c2d3cb5302b5e95b0 Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Thu, 18 Oct 2012 00:24:12 -0400 Subject: [PATCH 038/233] Test sending bad packet and bad_lines_seen being incremented --- test/graphite_tests.js | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/test/graphite_tests.js b/test/graphite_tests.js index 8c1dcd4d..1f785424 100644 --- a/test/graphite_tests.js +++ b/test/graphite_tests.js @@ -155,6 +155,39 @@ module.exports = { }); }, + send_malformed_post: function (test) { + test.expect(3); + + var testvalue = 1; + var me = this; + this.acceptor.once('connection',function(c){ + statsd_send('a_bad_test_value|z',me.sock,'127.0.0.1',8125,function(){ + collect_for(me.acceptor,me.myflush*2,function(strings){ + test.ok(strings.length > 0,'should receive some data'); + var hashes = _.map(strings, function(x) { + var chunks = x.split(' '); + var data = {}; + data[chunks[0]] = chunks[1]; + return data; + }); + var numstat_test = function(post){ + var mykey = 'statsd.numStats'; + return _.include(_.keys(post),mykey) && (post[mykey] == 2); + }; + test.ok(_.any(hashes,numstat_test), 'statsd.numStats should be 0'); + + var bad_lines_seen_value_test = function(post){ + var mykey = 'stats_counts.statsd.bad_lines_seen'; + return _.include(_.keys(post),mykey) && (post[mykey] == testvalue); + }; + test.ok(_.any(hashes,bad_lines_seen_value_test), 'stats_counts.statsd.bad_lines_seen should be ' + testvalue); + + test.done(); + }); + }); + }); + }, + timers_are_valid: function (test) { test.expect(3); From c5209c2f4a3e5d99a46dc1598481da01f6973d8e Mon Sep 17 00:00:00 2001 From: Timon Date: Thu, 18 Oct 2012 19:18:40 +0600 Subject: [PATCH 039/233] fix timings on some locales when php use not common locale (RU for example) simple convertion float to string may look like 13,4343 21 1212,00 12'212'000.33 and udp request send strings like this 12,2323|ms and statsd can't correctly parse such strings --- examples/php-example.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/php-example.php b/examples/php-example.php index dd195821..82e4db62 100644 --- a/examples/php-example.php +++ b/examples/php-example.php @@ -15,6 +15,7 @@ class StatsD { * @param float|1 $sampleRate the rate (0-1) for sampling. **/ public static function timing($stat, $time, $sampleRate=1) { + $time = number_format($time, 4, '.', ''); StatsD::send(array($stat => "$time|ms"), $sampleRate); } @@ -137,4 +138,4 @@ public function getConfig($name) host = yourhost port = 8125 -*/ \ No newline at end of file +*/ From e0daf54adafdc0bc8c622957d56c7eb5467f2c6e Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Thu, 18 Oct 2012 20:43:55 -0400 Subject: [PATCH 040/233] Move config.js to lib folder --- config.js => lib/config.js | 0 stats.js | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename config.js => lib/config.js (100%) diff --git a/config.js b/lib/config.js similarity index 100% rename from config.js rename to lib/config.js diff --git a/stats.js b/stats.js index 8ba28266..3484351b 100644 --- a/stats.js +++ b/stats.js @@ -1,7 +1,7 @@ var dgram = require('dgram') , util = require('util') , net = require('net') - , config = require('./config') + , config = require('./lib/config') , fs = require('fs') , events = require('events') , logger = require('./lib/logger') From 922e9e58c57ae4e61268cbd6925c112f0e4e468c Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Thu, 18 Oct 2012 21:06:05 -0400 Subject: [PATCH 041/233] Simplify threshold calculation --- backends/graphite.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/backends/graphite.js b/backends/graphite.js index 1b704c01..87e708b3 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -95,8 +95,7 @@ var flush_stats = function graphite_flush(ts, metrics) { for (key2 in pctThreshold) { var pct = pctThreshold[key2]; if (count > 1) { - var thresholdIndex = Math.round(((100 - pct) / 100) * count); - var numInThreshold = count - thresholdIndex; + var numInThreshold = Math.round(pct / 100 * count); maxAtThreshold = values[numInThreshold - 1]; sum = cumulativeValues[numInThreshold - 1]; From ac0d55872d201bddbfda4522673722c3e3c995bd Mon Sep 17 00:00:00 2001 From: Thomas Merkel Date: Fri, 19 Oct 2012 17:51:26 +0300 Subject: [PATCH 042/233] enable ipv6 support --- backends/repeater.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/repeater.js b/backends/repeater.js index 3befb508..e52fb963 100644 --- a/backends/repeater.js +++ b/backends/repeater.js @@ -4,7 +4,7 @@ var util = require('util'), function RepeaterBackend(startupTime, config, emitter){ var self = this; this.config = config.repeater || []; - this.sock = dgram.createSocket('udp4'); + this.sock = dgram.createSocket('udp'); // attach emitter.on('packet', function(packet, rinfo) { self.process(packet, rinfo); }); From 86bbcba5e93778f855bd813a7638c4fa9d7c86cd Mon Sep 17 00:00:00 2001 From: Thomas Merkel Date: Fri, 19 Oct 2012 18:03:24 +0300 Subject: [PATCH 043/233] Fix ipv6 typo ... --- backends/repeater.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/repeater.js b/backends/repeater.js index e52fb963..66021f67 100644 --- a/backends/repeater.js +++ b/backends/repeater.js @@ -4,7 +4,7 @@ var util = require('util'), function RepeaterBackend(startupTime, config, emitter){ var self = this; this.config = config.repeater || []; - this.sock = dgram.createSocket('udp'); + this.sock = dgram.createSocket('udp6'); // attach emitter.on('packet', function(packet, rinfo) { self.process(packet, rinfo); }); From ecdc1cdb876deb7d1f4605ed9df42a8f85e2a4e2 Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Fri, 19 Oct 2012 16:45:19 -0400 Subject: [PATCH 044/233] Simplify calculation --- lib/process_metrics.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/process_metrics.js b/lib/process_metrics.js index 536b929e..f6b4ae6d 100644 --- a/lib/process_metrics.js +++ b/lib/process_metrics.js @@ -41,8 +41,7 @@ var process_metrics = function (metrics, flushInterval, ts, flushCallback) { for (key2 in pctThreshold) { var pct = pctThreshold[key2]; if (count > 1) { - var thresholdIndex = Math.round(((100 - pct) / 100) * count); - var numInThreshold = count - thresholdIndex; + var numInThreshold = Math.round(pct / 100 * count); maxAtThreshold = values[numInThreshold - 1]; sum = cumulativeValues[numInThreshold - 1]; From ec563319da77455d1bb6b4b136371db44bc0bb80 Mon Sep 17 00:00:00 2001 From: Vadim Antonov Date: Sat, 20 Oct 2012 13:36:44 -0700 Subject: [PATCH 045/233] Modify debian/statsd.install after config.js file was moved to lib folder. --- debian/statsd.install | 1 - 1 file changed, 1 deletion(-) diff --git a/debian/statsd.install b/debian/statsd.install index 084d4ebd..76611abe 100644 --- a/debian/statsd.install +++ b/debian/statsd.install @@ -1,5 +1,4 @@ stats.js /usr/share/statsd -config.js /usr/share/statsd lib/*.js /usr/share/statsd/lib backends/*.js /usr/share/statsd/backends debian/localConfig.js /etc/statsd From 14a83e12a4681f1a217dd68e405c22a81396cd4c Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Sun, 21 Oct 2012 23:15:52 -0400 Subject: [PATCH 046/233] Revert "Tweak log formating to append colon and space to stdout type." This reverts commit 246fdaece68ae60c6f25fa59f27d9c4183371b9b. --- lib/logger.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/logger.js b/lib/logger.js index f2264278..402dabe0 100644 --- a/lib/logger.js +++ b/lib/logger.js @@ -18,9 +18,9 @@ Logger.prototype = { log: function (msg, type) { if (this.backend == 'stdout') { if (!type) { - type = 'DEBUG'; + type = 'DEBUG: '; } - this.util.log(type + ": " + msg); + this.util.log(type + msg); } else { if (!type) { type = this.level From 1fd07dda20e5758e79bbfdad455552ea3a68a39d Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Mon, 22 Oct 2012 00:05:02 -0400 Subject: [PATCH 047/233] Change Error handling Logic --- stats.js | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/stats.js b/stats.js index 932e1faf..0495fa7e 100644 --- a/stats.js +++ b/stats.js @@ -73,11 +73,18 @@ function flushMetrics() { }); pm.process_metrics(metrics_hash, flushInterval, time_stamp, function emitFlush(err, metrics) { - // Flush metrics to each backend. + // Flush metrics to each backend only if the metrics processing was sucessful. + // Add processing_errors counter to allow for monitoring if (err) { - l.log("Errored processing metrics with: " + err, 'debug'); + l.log("Exiting due to error processing metrics with: " + err); + // Send metrics to backends for any last minute processing + // and give backends a chance to cleanup before exiting. + backendEvents.emit('error', time_stamp, metrics, err); + // Only needed if other backends override the standard stacktrace/exit functionality + process.exit(1); + } else { + backendEvents.emit('flush', time_stamp, metrics); } - backendEvents.emit('flush', time_stamp, metrics); }); }; From 34c43896fa91a3b4d90632d380a0d985ba7f8744 Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Mon, 22 Oct 2012 00:31:36 -0400 Subject: [PATCH 048/233] Tweak logging to add space and colon between type and msg --- lib/logger.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/logger.js b/lib/logger.js index 402dabe0..f2264278 100644 --- a/lib/logger.js +++ b/lib/logger.js @@ -18,9 +18,9 @@ Logger.prototype = { log: function (msg, type) { if (this.backend == 'stdout') { if (!type) { - type = 'DEBUG: '; + type = 'DEBUG'; } - this.util.log(type + msg); + this.util.log(type + ": " + msg); } else { if (!type) { type = this.level From 01da23d0482287796fab2c0b1bf33d161670ba16 Mon Sep 17 00:00:00 2001 From: Patrick McCoy Date: Thu, 25 Oct 2012 08:36:06 -0600 Subject: [PATCH 049/233] Updated stats.js to delete counters Counters are now deleted after being used, not set to zero. This solves some issues with high load on Graphite and certain graphing functions which require null instead of 0 counters. --- stats.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stats.js b/stats.js index 80dda401..9d97830e 100644 --- a/stats.js +++ b/stats.js @@ -52,7 +52,7 @@ function flushMetrics() { backendEvents.once('flush', function clear_metrics(ts, metrics) { // Clear the counters for (key in metrics.counters) { - metrics.counters[key] = 0; + delete(metrics.counters[key]); } // Clear the timers From 2ef655a284de6645d1f0971abbcbb5a4c0bba0cf Mon Sep 17 00:00:00 2001 From: Anton Lebedevich Date: Tue, 30 Oct 2012 10:37:23 +0400 Subject: [PATCH 050/233] install lib files, set current dir for daemon to allow require('./lib/...') --- debian/statsd.init | 3 ++- debian/statsd.install | 1 + debian/statsd.upstart | 3 +-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/debian/statsd.init b/debian/statsd.init index 696715d3..1676e21f 100644 --- a/debian/statsd.init +++ b/debian/statsd.init @@ -25,6 +25,7 @@ DAEMON=$NODE_BIN DAEMON_ARGS="/usr/share/statsd/stats.js /etc/statsd/localConfig.js 2>&1 >> /var/log/statsd/statsd.log " PIDFILE=/var/run/$NAME.pid SCRIPTNAME=/etc/init.d/$NAME +CHDIR="/usr/share/statsd" # Exit if the package is not installed # [ -x "$DAEMON" ] || exit 0 @@ -50,7 +51,7 @@ do_start() # 2 if daemon could not be started start-stop-daemon --start --quiet -m --pidfile $PIDFILE --startas $DAEMON --background --test > /dev/null \ || return 1 - start-stop-daemon --start --quiet -m --pidfile $PIDFILE --startas $DAEMON --background -- \ + start-stop-daemon --start --quiet -m --pidfile $PIDFILE --startas $DAEMON --background --chdir $CHDIR -- \ $DAEMON_ARGS > /dev/null 2> /var/log/$NAME-stderr.log \ || return 2 # Add code here, if necessary, that waits for the process to be ready diff --git a/debian/statsd.install b/debian/statsd.install index 76611abe..bf7fa999 100644 --- a/debian/statsd.install +++ b/debian/statsd.install @@ -1,5 +1,6 @@ stats.js /usr/share/statsd lib/*.js /usr/share/statsd/lib backends/*.js /usr/share/statsd/backends +lib/*.js /usr/share/statsd/lib debian/localConfig.js /etc/statsd debian/scripts/start /usr/share/statsd/scripts diff --git a/debian/statsd.upstart b/debian/statsd.upstart index e6bb85cf..216620cb 100644 --- a/debian/statsd.upstart +++ b/debian/statsd.upstart @@ -5,8 +5,7 @@ start on startup stop on shutdown script - # We found $HOME is needed. Without it, we ran into problems - export HOME="/root" + chdir /usr/share/statsd exec sudo -u nobody /usr/share/statsd/scripts/start end script From e3f7924a4aedb0a97d9f7d7f3dcd4a4c52217219 Mon Sep 17 00:00:00 2001 From: Patrick McCoy Date: Tue, 30 Oct 2012 10:35:11 -0600 Subject: [PATCH 051/233] Update stats.js MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changed from delete to undefined, per some info on delete from Mozilla https://developer.mozilla.org/en-US/docs/JavaScript/Reference/Operators/delete This keeps the array element there, just sets it's value to undefined. --- stats.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stats.js b/stats.js index 9d97830e..3a56bbf3 100644 --- a/stats.js +++ b/stats.js @@ -52,7 +52,7 @@ function flushMetrics() { backendEvents.once('flush', function clear_metrics(ts, metrics) { // Clear the counters for (key in metrics.counters) { - delete(metrics.counters[key]); + metrics.counters[key] = undefined; } // Clear the timers From 71df1bdb6b8f0a926e3c04b07213f59f03d4e61e Mon Sep 17 00:00:00 2001 From: Patrick McCoy Date: Wed, 31 Oct 2012 15:37:43 -0600 Subject: [PATCH 052/233] Update test/graphite_tests.js Updating the test to work with new null count change. --- test/graphite_tests.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/graphite_tests.js b/test/graphite_tests.js index 1f785424..776e1235 100644 --- a/test/graphite_tests.js +++ b/test/graphite_tests.js @@ -178,7 +178,7 @@ module.exports = { var bad_lines_seen_value_test = function(post){ var mykey = 'stats_counts.statsd.bad_lines_seen'; - return _.include(_.keys(post),mykey) && (post[mykey] == testvalue); + return _.include(_.keys(post),mykey) && isNaN(post[mykey]); }; test.ok(_.any(hashes,bad_lines_seen_value_test), 'stats_counts.statsd.bad_lines_seen should be ' + testvalue); From 6009b6c87cd2aaf9fd19db1f789825b0efbaf73a Mon Sep 17 00:00:00 2001 From: ciaranj Date: Thu, 1 Nov 2012 09:53:52 +0000 Subject: [PATCH 053/233] Adds Windows Service support This assumes that there is a local 'config.js' within the statsd folder. Signed-off-by: ciaranj --- package.json | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/package.json b/package.json index 3d65ec34..5129d49b 100644 --- a/package.json +++ b/package.json @@ -3,7 +3,10 @@ "description": "A simple, lightweight network daemon to collect metrics over UDP", "author": "Etsy", "scripts": { - "test": "./run_tests.sh" + "test": "./run_tests.sh", + "start": "node stats.js config.js", + "install-windows-service": "node_modules\\.bin\\winser -i", + "uninstall-windows-service": "node_modules\\.bin\\winser -r" }, "repository": { "type": "git", @@ -19,7 +22,8 @@ "temp": "0.4.x" }, "optionalDependencies": { - "node-syslog":"1.1.3" + "node-syslog":"1.1.3", + "winser": "=0.0.11" }, "engine": { "node" : ">=0.4" From 777ea8c957700afa3252e23f4c85c646bb4f5e09 Mon Sep 17 00:00:00 2001 From: Patrick McCoy Date: Thu, 1 Nov 2012 12:59:12 -0600 Subject: [PATCH 054/233] Update stats.js Added a config option delete_counters with a default: false to control the behavior of the counters metrics to delete. --- stats.js | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/stats.js b/stats.js index 3a56bbf3..738eaf0c 100644 --- a/stats.js +++ b/stats.js @@ -51,8 +51,13 @@ function flushMetrics() { // After all listeners, reset the stats backendEvents.once('flush', function clear_metrics(ts, metrics) { // Clear the counters + config.delete_counters = config.delete_counters || false; for (key in metrics.counters) { - metrics.counters[key] = undefined; + if (config.delete_counters) { + metrics.counters[key] = undefined; + } else { + metrics.counters[key] = 0; + } } // Clear the timers From dc8cf29abe3d8103167c85fb029263a13b84c1ad Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Thu, 1 Nov 2012 20:32:45 -0400 Subject: [PATCH 055/233] Don't kill the process, just log and increment a counter --- stats.js | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/stats.js b/stats.js index 0495fa7e..6c96f029 100644 --- a/stats.js +++ b/stats.js @@ -13,7 +13,8 @@ var dgram = require('dgram') var keyCounter = {}; var counters = { "statsd.packets_received": 0, - "statsd.bad_lines_seen": 0 + "statsd.bad_lines_seen": 0, + "statsd.calculation_error": 0 }; var timers = {}; var gauges = {}; @@ -73,18 +74,12 @@ function flushMetrics() { }); pm.process_metrics(metrics_hash, flushInterval, time_stamp, function emitFlush(err, metrics) { - // Flush metrics to each backend only if the metrics processing was sucessful. - // Add processing_errors counter to allow for monitoring if (err) { - l.log("Exiting due to error processing metrics with: " + err); - // Send metrics to backends for any last minute processing - // and give backends a chance to cleanup before exiting. - backendEvents.emit('error', time_stamp, metrics, err); - // Only needed if other backends override the standard stacktrace/exit functionality - process.exit(1); - } else { - backendEvents.emit('flush', time_stamp, metrics); + l.log("Calculation Error: " + err); + counters["statsd.calculation_error"]++; + stats['messages']['calculation_error']++; } + backendEvents.emit('flush', time_stamp, metrics); }); }; @@ -93,6 +88,7 @@ var stats = { messages: { last_msg_seen: startup_time, bad_lines_seen: 0, + calculation_error: 0 } }; From 4530718f40944a4e2bf55be01b547e37ffa6ca88 Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Thu, 1 Nov 2012 20:37:46 -0400 Subject: [PATCH 056/233] fix two tests now that we added an additional default stat --- test/graphite_tests.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/graphite_tests.js b/test/graphite_tests.js index 8c1dcd4d..17f3c5b3 100644 --- a/test/graphite_tests.js +++ b/test/graphite_tests.js @@ -149,7 +149,7 @@ module.exports = { return data; }); test.ok(_.include(_.map(entries,function(x) { return _.keys(x)[0] }),'statsd.numStats'),'graphite output includes numStats'); - test.equal(_.find(entries, function(x) { return _.keys(x)[0] == 'statsd.numStats' })['statsd.numStats'],2); + test.equal(_.find(entries, function(x) { return _.keys(x)[0] == 'statsd.numStats' })['statsd.numStats'],3); test.done(); }); }); @@ -205,7 +205,7 @@ module.exports = { }); var numstat_test = function(post){ var mykey = 'statsd.numStats'; - return _.include(_.keys(post),mykey) && (post[mykey] == 3); + return _.include(_.keys(post),mykey) && (post[mykey] == 4); }; test.ok(_.any(hashes,numstat_test), 'statsd.numStats should be 1'); From 00c0c1fa8cfa7a7354a74aaa77f65fc0f6e44ee5 Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Thu, 1 Nov 2012 22:44:39 -0400 Subject: [PATCH 057/233] Remove Error handling stuff for now --- lib/process_metrics.js | 7 +------ stats.js | 13 +++---------- test/graphite_tests.js | 6 +++--- 3 files changed, 7 insertions(+), 19 deletions(-) diff --git a/lib/process_metrics.js b/lib/process_metrics.js index f6b4ae6d..f82616c3 100644 --- a/lib/process_metrics.js +++ b/lib/process_metrics.js @@ -2,7 +2,6 @@ var process_metrics = function (metrics, flushInterval, ts, flushCallback) { var key; var counter_rates = {}; var timer_data = {}; - var error = false; var counters = metrics.counters; var timers = metrics.timers; var pctThreshold = metrics.pctThreshold; @@ -80,11 +79,7 @@ var process_metrics = function (metrics, flushInterval, ts, flushCallback) { metrics.counter_rates = counter_rates; metrics.timer_data = timer_data; - if (Object.keys(counter_rates).length == 0 && Object.keys(timer_data).length == 0 ) { - error = "no metrics to calculate"; - } - - flushCallback(error, metrics); + flushCallback(metrics); } exports.process_metrics = process_metrics diff --git a/stats.js b/stats.js index 6c96f029..fe6ae8ca 100644 --- a/stats.js +++ b/stats.js @@ -13,8 +13,7 @@ var dgram = require('dgram') var keyCounter = {}; var counters = { "statsd.packets_received": 0, - "statsd.bad_lines_seen": 0, - "statsd.calculation_error": 0 + "statsd.bad_lines_seen": 0 }; var timers = {}; var gauges = {}; @@ -73,12 +72,7 @@ function flushMetrics() { } }); - pm.process_metrics(metrics_hash, flushInterval, time_stamp, function emitFlush(err, metrics) { - if (err) { - l.log("Calculation Error: " + err); - counters["statsd.calculation_error"]++; - stats['messages']['calculation_error']++; - } + pm.process_metrics(metrics_hash, flushInterval, time_stamp, function emitFlush(metrics) { backendEvents.emit('flush', time_stamp, metrics); }); @@ -87,8 +81,7 @@ function flushMetrics() { var stats = { messages: { last_msg_seen: startup_time, - bad_lines_seen: 0, - calculation_error: 0 + bad_lines_seen: 0 } }; diff --git a/test/graphite_tests.js b/test/graphite_tests.js index 17f3c5b3..0ff28fd0 100644 --- a/test/graphite_tests.js +++ b/test/graphite_tests.js @@ -149,7 +149,7 @@ module.exports = { return data; }); test.ok(_.include(_.map(entries,function(x) { return _.keys(x)[0] }),'statsd.numStats'),'graphite output includes numStats'); - test.equal(_.find(entries, function(x) { return _.keys(x)[0] == 'statsd.numStats' })['statsd.numStats'],3); + test.equal(_.find(entries, function(x) { return _.keys(x)[0] == 'statsd.numStats' })['statsd.numStats'],2); test.done(); }); }); @@ -205,9 +205,9 @@ module.exports = { }); var numstat_test = function(post){ var mykey = 'statsd.numStats'; - return _.include(_.keys(post),mykey) && (post[mykey] == 4); + return _.include(_.keys(post),mykey) && (post[mykey] == 3); }; - test.ok(_.any(hashes,numstat_test), 'statsd.numStats should be 1'); + test.ok(_.any(hashes,numstat_test), 'statsd.numStats should be 3'); var testavgvalue_test = function(post){ var mykey = 'stats.a_test_value'; From ad97934cf3374dd82f7efbef37e43866a91c4510 Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Thu, 1 Nov 2012 22:56:20 -0400 Subject: [PATCH 058/233] Merge upstream --- README.md | 12 ++++----- backends/repeater.js | 2 +- debian/statsd.init | 3 ++- debian/statsd.install | 2 +- debian/statsd.upstart | 3 +-- examples/php-example.php | 52 +++++++++++++++++++++++++++++--------- config.js => lib/config.js | 2 +- lib/logger.js | 4 +-- stats.js | 25 +++++++++++++----- test/graphite_tests.js | 33 ++++++++++++++++++++++++ 10 files changed, 104 insertions(+), 34 deletions(-) rename config.js => lib/config.js (94%) diff --git a/README.md b/README.md index e075852f..6dc52aa5 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,11 @@ Counting This is a simple counter. Add 1 to the "gorets" bucket. It stays in memory until the flush interval `config.flushInterval`. +### Sampling + + gorets:1|c|@0.1 + +Tells StatsD that this counter is being sent sampled every 1/10th of the time. Timing ------ @@ -51,13 +56,6 @@ generate the following list of stats for each threshold: Where `$KEY` is the key you stats key you specify when sending to statsd, and `$PCT` is the percentile threshold. -Sampling --------- - - gorets:1|c|@0.1 - -Tells StatsD that this counter is being sent sampled every 1/10th of the time. - Gauges ------ StatsD now also supports gauges, arbitrary values, which can be recorded. diff --git a/backends/repeater.js b/backends/repeater.js index 3befb508..66021f67 100644 --- a/backends/repeater.js +++ b/backends/repeater.js @@ -4,7 +4,7 @@ var util = require('util'), function RepeaterBackend(startupTime, config, emitter){ var self = this; this.config = config.repeater || []; - this.sock = dgram.createSocket('udp4'); + this.sock = dgram.createSocket('udp6'); // attach emitter.on('packet', function(packet, rinfo) { self.process(packet, rinfo); }); diff --git a/debian/statsd.init b/debian/statsd.init index 696715d3..1676e21f 100644 --- a/debian/statsd.init +++ b/debian/statsd.init @@ -25,6 +25,7 @@ DAEMON=$NODE_BIN DAEMON_ARGS="/usr/share/statsd/stats.js /etc/statsd/localConfig.js 2>&1 >> /var/log/statsd/statsd.log " PIDFILE=/var/run/$NAME.pid SCRIPTNAME=/etc/init.d/$NAME +CHDIR="/usr/share/statsd" # Exit if the package is not installed # [ -x "$DAEMON" ] || exit 0 @@ -50,7 +51,7 @@ do_start() # 2 if daemon could not be started start-stop-daemon --start --quiet -m --pidfile $PIDFILE --startas $DAEMON --background --test > /dev/null \ || return 1 - start-stop-daemon --start --quiet -m --pidfile $PIDFILE --startas $DAEMON --background -- \ + start-stop-daemon --start --quiet -m --pidfile $PIDFILE --startas $DAEMON --background --chdir $CHDIR -- \ $DAEMON_ARGS > /dev/null 2> /var/log/$NAME-stderr.log \ || return 2 # Add code here, if necessary, that waits for the process to be ready diff --git a/debian/statsd.install b/debian/statsd.install index 084d4ebd..bf7fa999 100644 --- a/debian/statsd.install +++ b/debian/statsd.install @@ -1,6 +1,6 @@ stats.js /usr/share/statsd -config.js /usr/share/statsd lib/*.js /usr/share/statsd/lib backends/*.js /usr/share/statsd/backends +lib/*.js /usr/share/statsd/lib debian/localConfig.js /etc/statsd debian/scripts/start /usr/share/statsd/scripts diff --git a/debian/statsd.upstart b/debian/statsd.upstart index e6bb85cf..216620cb 100644 --- a/debian/statsd.upstart +++ b/debian/statsd.upstart @@ -5,8 +5,7 @@ start on startup stop on shutdown script - # We found $HOME is needed. Without it, we ran into problems - export HOME="/root" + chdir /usr/share/statsd exec sudo -u nobody /usr/share/statsd/scripts/start end script diff --git a/examples/php-example.php b/examples/php-example.php index dd195821..08346e19 100644 --- a/examples/php-example.php +++ b/examples/php-example.php @@ -8,14 +8,41 @@ class StatsD { /** - * Log timing information + * Sets one or more timing values * - * @param string $stats The metric to in log timing info for. - * @param float $time The ellapsed time (ms) to log - * @param float|1 $sampleRate the rate (0-1) for sampling. + * @param string|array $stats The metric(s) to set. + * @param float $time The elapsed time (ms) to log + **/ + public static function timing($stats, $time) { + StatsD::updateStats($stats, $time, 1, 'ms'); + } + + /** + * Sets one or more gauges to a value + * + * @param string|array $stats The metric(s) to set. + * @param float $value The value for the stats. + **/ + public static function gauge($stats, $value) { + StatsD::updateStats($stats, $value, 1, 'g'); + } + + /** + * A "Set" is a count of unique events. + * This data type acts like a counter, but supports counting + * of unique occurences of values between flushes. The backend + * receives the number of unique events that happened since + * the last flush. + * + * The reference use case involved tracking the number of active + * and logged in users by sending the current userId of a user + * with each request with a key of "uniques" (or similar). + * + * @param string|array $stats The metric(s) to set. + * @param float $value The value for the stats. **/ - public static function timing($stat, $time, $sampleRate=1) { - StatsD::send(array($stat => "$time|ms"), $sampleRate); + public static function set($stats, $value) { + StatsD::updateStats($stats, $value, 1, 's'); } /** @@ -26,7 +53,7 @@ public static function timing($stat, $time, $sampleRate=1) { * @return boolean **/ public static function increment($stats, $sampleRate=1) { - StatsD::updateStats($stats, 1, $sampleRate); + StatsD::updateStats($stats, 1, $sampleRate, 'c'); } /** @@ -37,22 +64,23 @@ public static function increment($stats, $sampleRate=1) { * @return boolean **/ public static function decrement($stats, $sampleRate=1) { - StatsD::updateStats($stats, -1, $sampleRate); + StatsD::updateStats($stats, -1, $sampleRate, 'c'); } /** - * Updates one or more stats counters by arbitrary amounts. + * Updates one or more stats. * * @param string|array $stats The metric(s) to update. Should be either a string or array of metrics. * @param int|1 $delta The amount to increment/decrement each metric by. * @param float|1 $sampleRate the rate (0-1) for sampling. + * @param string|c $metric The metric type ("c" for count, "ms" for timing, "g" for gauge, "s" for set) * @return boolean **/ - public static function updateStats($stats, $delta=1, $sampleRate=1) { + public static function updateStats($stats, $delta=1, $sampleRate=1, $metric='c') { if (!is_array($stats)) { $stats = array($stats); } $data = array(); foreach($stats as $stat) { - $data[$stat] = "$delta|c"; + $data[$stat] = "$delta|$metric"; } StatsD::send($data, $sampleRate); @@ -137,4 +165,4 @@ public function getConfig($name) host = yourhost port = 8125 -*/ \ No newline at end of file +*/ diff --git a/config.js b/lib/config.js similarity index 94% rename from config.js rename to lib/config.js index 695214e9..c03aa049 100644 --- a/config.js +++ b/lib/config.js @@ -21,7 +21,7 @@ var Configurator = function (file) { this.updateConfig(); - fs.watchFile(file, function (curr, prev) { + fs.watch(file, function (curr, prev) { if (curr.ino != prev.ino) { self.updateConfig(); } }); }; diff --git a/lib/logger.js b/lib/logger.js index 402dabe0..f2264278 100644 --- a/lib/logger.js +++ b/lib/logger.js @@ -18,9 +18,9 @@ Logger.prototype = { log: function (msg, type) { if (this.backend == 'stdout') { if (!type) { - type = 'DEBUG: '; + type = 'DEBUG'; } - this.util.log(type + msg); + this.util.log(type + ": " + msg); } else { if (!type) { type = this.level diff --git a/stats.js b/stats.js index fe6ae8ca..ba7ed8cc 100644 --- a/stats.js +++ b/stats.js @@ -1,7 +1,7 @@ var dgram = require('dgram') , util = require('util') , net = require('net') - , config = require('./config') + , config = require('./lib/config') , fs = require('fs') , events = require('events') , logger = require('./lib/logger') @@ -160,8 +160,15 @@ config.configFile(process.argv[2], function (config, oldConfig) { } sets[key].insert(fields[0] || '0'); } else { - if (fields[2] && fields[2].match(/^@([\d\.]+)/)) { - sampleRate = Number(fields[2].match(/^@([\d\.]+)/)[1]); + if (fields[2]) { + if (fields[2].match(/^@([\d\.]+)/)) { + sampleRate = Number(fields[2].match(/^@([\d\.]+)/)[1]); + } else { + l.log('Bad line: ' + fields + ' in msg "' + metrics[midx] +'"; has invalid sample rate'); + counters["statsd.bad_lines_seen"]++; + stats['messages']['bad_lines_seen']++; + continue; + } } if (! counters[key]) { counters[key] = 0; @@ -306,7 +313,7 @@ config.configFile(process.argv[2], function (config, oldConfig) { if (keyFlushInterval > 0) { var keyFlushPercent = Number((config.keyFlush && config.keyFlush.percent) || 100); - var keyFlushLog = (config.keyFlush && config.keyFlush.log) || "stdout"; + var keyFlushLog = config.keyFlush && config.keyFlush.log; keyFlushInt = setInterval(function () { var key; @@ -326,9 +333,13 @@ config.configFile(process.argv[2], function (config, oldConfig) { logMessage += timeString + " count=" + sortedKeys[i][1] + " key=" + sortedKeys[i][0] + "\n"; } - var logFile = fs.createWriteStream(keyFlushLog, {flags: 'a+'}); - logFile.write(logMessage); - logFile.end(); + if (keyFlushLog) { + var logFile = fs.createWriteStream(keyFlushLog, {flags: 'a+'}); + logFile.write(logMessage); + logFile.end(); + } else { + process.stdout.write(logMessage); + } // clear the counter keyCounter = {}; diff --git a/test/graphite_tests.js b/test/graphite_tests.js index 0ff28fd0..32ac9be1 100644 --- a/test/graphite_tests.js +++ b/test/graphite_tests.js @@ -155,6 +155,39 @@ module.exports = { }); }, + send_malformed_post: function (test) { + test.expect(3); + + var testvalue = 1; + var me = this; + this.acceptor.once('connection',function(c){ + statsd_send('a_bad_test_value|z',me.sock,'127.0.0.1',8125,function(){ + collect_for(me.acceptor,me.myflush*2,function(strings){ + test.ok(strings.length > 0,'should receive some data'); + var hashes = _.map(strings, function(x) { + var chunks = x.split(' '); + var data = {}; + data[chunks[0]] = chunks[1]; + return data; + }); + var numstat_test = function(post){ + var mykey = 'statsd.numStats'; + return _.include(_.keys(post),mykey) && (post[mykey] == 2); + }; + test.ok(_.any(hashes,numstat_test), 'statsd.numStats should be 0'); + + var bad_lines_seen_value_test = function(post){ + var mykey = 'stats_counts.statsd.bad_lines_seen'; + return _.include(_.keys(post),mykey) && (post[mykey] == testvalue); + }; + test.ok(_.any(hashes,bad_lines_seen_value_test), 'stats_counts.statsd.bad_lines_seen should be ' + testvalue); + + test.done(); + }); + }); + }); + }, + timers_are_valid: function (test) { test.expect(3); From 3b638b244656ee0b50f649e6b94c99158c491739 Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Fri, 2 Nov 2012 00:12:15 -0300 Subject: [PATCH 059/233] set graphite back to default backend --- exampleConfig.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exampleConfig.js b/exampleConfig.js index b9dcbe9e..ae56fa6d 100644 --- a/exampleConfig.js +++ b/exampleConfig.js @@ -54,6 +54,6 @@ Optional Variables: graphitePort: 2003 , graphiteHost: "graphite.host.com" , port: 8125 -, backends: [ "./backends/repeater" ] +, backends: [ "./backends/graphite" ] , repeater: [ { host: "10.8.3.214", port: 8125 } ] } From e37c9526196e6fe17ffa97999569f6567a7941aa Mon Sep 17 00:00:00 2001 From: Daniel Schauenberg Date: Sat, 3 Nov 2012 16:50:11 -0400 Subject: [PATCH 060/233] making the repeater backend ipv4 only again some people reported problems with it, so we have to look into a better configurable solution for this. --- backends/repeater.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/repeater.js b/backends/repeater.js index 66021f67..3befb508 100644 --- a/backends/repeater.js +++ b/backends/repeater.js @@ -4,7 +4,7 @@ var util = require('util'), function RepeaterBackend(startupTime, config, emitter){ var self = this; this.config = config.repeater || []; - this.sock = dgram.createSocket('udp6'); + this.sock = dgram.createSocket('udp4'); // attach emitter.on('packet', function(packet, rinfo) { self.process(packet, rinfo); }); From f1206e663254492d944a6ec480300505b18aeda8 Mon Sep 17 00:00:00 2001 From: Eric Saxby Date: Sat, 3 Nov 2012 15:25:41 -0700 Subject: [PATCH 061/233] Repeater can be configured for udp4 or udp6 --- backends/repeater.js | 4 +++- exampleConfig.js | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/backends/repeater.js b/backends/repeater.js index 3befb508..5388a722 100644 --- a/backends/repeater.js +++ b/backends/repeater.js @@ -4,7 +4,9 @@ var util = require('util'), function RepeaterBackend(startupTime, config, emitter){ var self = this; this.config = config.repeater || []; - this.sock = dgram.createSocket('udp4'); + this.sock = (config.repeaterProtocol == 'udp6') ? + dgram.createSocket('udp6') : + dgram.createSocket('udp4'); // attach emitter.on('packet', function(packet, rinfo) { self.process(packet, rinfo); }); diff --git a/exampleConfig.js b/exampleConfig.js index ae56fa6d..b5eff454 100644 --- a/exampleConfig.js +++ b/exampleConfig.js @@ -49,6 +49,9 @@ Optional Variables: packets should be "repeated" (duplicated to). e.g. [ { host: '10.10.10.10', port: 8125 }, { host: 'observer', port: 88125 } ] + + repeaterProtocol: whether to use udp4 or udp4 for repeaters. + ["udp4" or "udp6", default: "udp4"] */ { graphitePort: 2003 @@ -56,4 +59,5 @@ Optional Variables: , port: 8125 , backends: [ "./backends/graphite" ] , repeater: [ { host: "10.8.3.214", port: 8125 } ] +, repeaterProtocol: "udp4" } From c9e09f8a4ba7b523429d5625120aefd9cc10cb29 Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Sat, 3 Nov 2012 22:52:58 -0400 Subject: [PATCH 062/233] Quick note on counter_rates and timer_data --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6dc52aa5..4f5561e9 100644 --- a/README.md +++ b/README.md @@ -276,8 +276,9 @@ metrics: { } ``` - Each backend module is passed the same set of statistics, so a - backend module should treat the metrics as immutable + The counter_rates and timer_data are precalculated statistics to simplify + the creation of backends. Each backend module is passed the same set of + statistics, so a backend module should treat the metrics as immutable structures. StatsD will reset timers and counters after each listener has handled the event. From 94ed6a5bb52cde898ac373ae8d541f1c08079ed0 Mon Sep 17 00:00:00 2001 From: Paul Date: Sun, 4 Nov 2012 01:12:47 -0300 Subject: [PATCH 063/233] engine -> engines in package.json --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 5129d49b..ce0d063c 100644 --- a/package.json +++ b/package.json @@ -25,7 +25,7 @@ "node-syslog":"1.1.3", "winser": "=0.0.11" }, - "engine": { + "engines": { "node" : ">=0.4" }, "bin": { "statsd": "./bin/statsd" } From 50dd2ae6844aa89d9d2782ff2fa0169a632004bf Mon Sep 17 00:00:00 2001 From: Dieter Plaetinck Date: Mon, 5 Nov 2012 14:21:56 -0500 Subject: [PATCH 064/233] support different histogram settings for different metrics --- README.md | 32 +++++++++++++++++++++++--------- backends/graphite.js | 15 +++++++++++---- exampleConfig.js | 19 ++++++++++++++----- 3 files changed, 48 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 8581c38f..74e97d22 100644 --- a/README.md +++ b/README.md @@ -51,15 +51,29 @@ generate the following list of stats for each threshold: Where `$KEY` is the key you stats key you specify when sending to statsd, and `$PCT` is the percentile threshold. -If `config.histogram` is set to a non-zero array, statsd will also -maintain frequencies for each bin as specified by the (non-inclusive) -upper limits in the array. (`'inf'` can be used to denote infinity, -which is highly recommended, as high outliers will not be accounted for if -your last upper limit is too low). A lower limit of 0 is assumed. -Note that this is actually more powerful than real histograms, as you can -make your bins arbitrarily wide if you want to. Though if you want to -view real histograms, you should make your bins equally wide -(equally sized class intervals). +Use the `config.histogram` setting to instruct statsd to maintain histograms +over time. Specify which metrics to match and a corresponding list of +ordered non-inclusive upper limits of bins (class intervals). +(use `inf` to denote infinity; a lower limit of 0 is assumed) +Each `flushInterval`, statsd will store how many values (absolute frequency) +fall within each bin (class interval), for all matching metrics. +First match wins. examples: + +* no histograms for any timer (default): `[]` +* histogram to only track render durations, + with unequal class intervals and catchall for outliers: + + [ { metric: 'render', bins: [8, 25, 50, 100, 'inf'] } ] + +* histogram for all timers except 'foo' related, + with equal class interval and catchall for outliers: + + [ { metric: 'foo', bins: [] }, + { metric: '', bins: [ 50, 100, 150, 200, 'inf'] } ] + +Note that this is actually more powerful than what's strictly considered +histograms, as you can make each bin arbitrarily wide if you want to +(upto infinity), i.e. class intervals of different sizes. Sampling -------- diff --git a/backends/graphite.js b/backends/graphite.js index 3ffe909f..de6bd5b7 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -128,14 +128,21 @@ var flush_stats = function graphite_flush(ts, metrics) { message += 'stats.timers.' + key + '.mean ' + mean + ' ' + ts + "\n"; // note: values bigger than the upper limit of the last bin are ignored, by design - num_bins = (config.histogram || []).length + conf = config.histogram || []; + bins = []; + for (var i = 0; i < conf.length; i++) { + if (key.indexOf(conf[i].metric) > -1) { + bins = conf[i].bins; + break; + } + } var i = 0; - for (var bin_i = 0; bin_i < num_bins; bin_i++) { + for (var bin_i = 0; bin_i < bins.length; bin_i++) { var freq = 0; - for (; i < count && (config.histogram[bin_i] == 'inf' || values[i] < config.histogram[bin_i]); i++) { + for (; i < count && (bins[bin_i] == 'inf' || values[i] < bins[bin_i]); i++) { freq += 1; } - message += 'stats.timers.' + key + '.bin_' + config.histogram[bin_i] + ' ' + freq + ' ' + ts + "\n"; + message += 'stats.timers.' + key + '.bin_' + bins[bin_i] + ' ' + freq + ' ' + ts + "\n"; } statString += message; diff --git a/exampleConfig.js b/exampleConfig.js index acafc77c..6b55dfb0 100644 --- a/exampleConfig.js +++ b/exampleConfig.js @@ -50,11 +50,20 @@ Optional Variables: percentThreshold: calculate the Nth percentile(s) (can be a single value or list of floating-point values) [%, default: 90] - histogram: an array of ordered non-inclusive upper limits of bins for - histogram (in ms). 'inf' means infinity. (default: []) - if non-empty, histograms are enabled and frequencies - for each bin are written. - e.g. [ 25, 50, 100, 150, 200, 'inf' ] + histogram: an array of mappings of strings (to match metrics) and + corresponding ordered non-inclusive upper limits of bins. + For all matching metrics, histograms are maintained over + time by writing the frequencies for all bins. + 'inf' means infinity. A lower limit of 0 is assumed. + default: [], meaning no histograms for any timer. + First match wins. examples: + * histogram to only track render durations, with unequal + class intervals and catchall for outliers: + [ { metric: 'render', bins: [8, 25, 50, 100, 'inf'] } ] + * histogram for all timers except 'foo' related, + equal class interval and catchall for outliers: + [ { metric: 'foo', bins: [] }, + { metric: '', bins: [ 50, 100, 150, 200, 'inf'] } ] */ { graphitePort: 2003 From 92c46c357ae5979cbd18498a6307b6263c8f3872 Mon Sep 17 00:00:00 2001 From: Dieter Plaetinck Date: Mon, 5 Nov 2012 16:03:16 -0500 Subject: [PATCH 065/233] bugfix: allow bin upper limits to contain decimals also modify an example to demonstrate --- README.md | 14 +++++++++----- backends/graphite.js | 3 ++- exampleConfig.js | 2 +- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 74e97d22..87259a44 100644 --- a/README.md +++ b/README.md @@ -57,13 +57,13 @@ ordered non-inclusive upper limits of bins (class intervals). (use `inf` to denote infinity; a lower limit of 0 is assumed) Each `flushInterval`, statsd will store how many values (absolute frequency) fall within each bin (class interval), for all matching metrics. -First match wins. examples: +Examples: * no histograms for any timer (default): `[]` * histogram to only track render durations, with unequal class intervals and catchall for outliers: - [ { metric: 'render', bins: [8, 25, 50, 100, 'inf'] } ] + [ { metric: 'render', bins: [ 0.01, 0.1, 1, 10, 'inf'] } ] * histogram for all timers except 'foo' related, with equal class interval and catchall for outliers: @@ -71,9 +71,13 @@ First match wins. examples: [ { metric: 'foo', bins: [] }, { metric: '', bins: [ 50, 100, 150, 200, 'inf'] } ] -Note that this is actually more powerful than what's strictly considered -histograms, as you can make each bin arbitrarily wide if you want to -(upto infinity), i.e. class intervals of different sizes. +Note: + +* first match for a metric wins. +* bin upper limits may contain decimals. +* this is actually more powerful than what's strictly considered +histograms, as you can make each bin arbitrarily wide, +i.e. class intervals of different sizes. Sampling -------- diff --git a/backends/graphite.js b/backends/graphite.js index de6bd5b7..435f27b5 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -142,7 +142,8 @@ var flush_stats = function graphite_flush(ts, metrics) { for (; i < count && (bins[bin_i] == 'inf' || values[i] < bins[bin_i]); i++) { freq += 1; } - message += 'stats.timers.' + key + '.bin_' + bins[bin_i] + ' ' + freq + ' ' + ts + "\n"; + bin_name = ('bin_' + bins[bin_i]).replace('.','_'); + message += 'stats.timers.' + key + '.' + bin_name + ' ' + freq + ' ' + ts + "\n"; } statString += message; diff --git a/exampleConfig.js b/exampleConfig.js index 6b55dfb0..1dacc007 100644 --- a/exampleConfig.js +++ b/exampleConfig.js @@ -59,7 +59,7 @@ Optional Variables: First match wins. examples: * histogram to only track render durations, with unequal class intervals and catchall for outliers: - [ { metric: 'render', bins: [8, 25, 50, 100, 'inf'] } ] + [ { metric: 'render', bins: [ 0.01, 0.1, 1, 10, 'inf'] } ] * histogram for all timers except 'foo' related, equal class interval and catchall for outliers: [ { metric: 'foo', bins: [] }, From 3caeea1846a5d8e3b9436e8c3efeac23244cbebe Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Tue, 6 Nov 2012 08:53:28 -0500 Subject: [PATCH 066/233] Small tweaks to the README --- README.md | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index aceea692..15375dbf 100644 --- a/README.md +++ b/README.md @@ -44,17 +44,19 @@ Timing glork:320|ms -The glork took 320ms to complete this time. StatsD figures out 90th percentile, -average (mean), lower and upper bounds for the flush interval. The percentile -threshold can be tweaked with `config.percentThreshold`. +The glork took 320ms to complete this time. StatsD figures out percentiles, +average (mean), standard deviation, sum, lower and upper bounds for the flush interval. +The percentile threshold can be tweaked with `config.percentThreshold`. The percentile threshold can be a single value, or a list of values, and will generate the following list of stats for each threshold: - stats.timers.$KEY.mean_$PCT stats.timers.$KEY.upper_$PCT + stats.timers.$KEY.mean_$PCT + stats.timers.$KEY.upper_$PCT + stats.timers.$KEY.sum_$PCT -Where `$KEY` is the key you stats key you specify when sending to statsd, and -`$PCT` is the percentile threshold. +Where `$KEY` is the stats key you specify when sending to statsd, and `$PCT` is +the percentile threshold. Gauges ------ From ad011afb0dc7503f9d729c30534fe270e7a81e54 Mon Sep 17 00:00:00 2001 From: ciaranj Date: Sun, 28 Oct 2012 12:37:10 +0000 Subject: [PATCH 067/233] Fixes Issues #182 - Broken config file watching. Implements fix as suggested by Peter Schuster and Daniel Schauenberg in comment thread. Signed-off-by: ciaranj --- lib/config.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/config.js b/lib/config.js index c03aa049..8d7e6b2e 100644 --- a/lib/config.js +++ b/lib/config.js @@ -21,8 +21,8 @@ var Configurator = function (file) { this.updateConfig(); - fs.watch(file, function (curr, prev) { - if (curr.ino != prev.ino) { self.updateConfig(); } + fs.watch(file, function (event, filename) { + if (event == 'change') { self.updateConfig(); } }); }; From 2546ee6263e30b616570c7af87a931e310a76c8a Mon Sep 17 00:00:00 2001 From: Daniel Schauenberg Date: Sun, 11 Nov 2012 17:44:54 -0500 Subject: [PATCH 068/233] add a statsd metrics hash which is also passed to the backend this only holds the metrics processing time for now but can be used for other metrics which are not fit to be added to the preexisting statsd metric values. --- README.md | 8 +++++--- backends/graphite.js | 5 +++++ lib/process_metrics.js | 4 ++++ 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 9a6b4c9e..3b63696f 100644 --- a/README.md +++ b/README.md @@ -51,11 +51,11 @@ The percentile threshold can be tweaked with `config.percentThreshold`. The percentile threshold can be a single value, or a list of values, and will generate the following list of stats for each threshold: - stats.timers.$KEY.mean_$PCT + stats.timers.$KEY.mean_$PCT stats.timers.$KEY.upper_$PCT stats.timers.$KEY.sum_$PCT -Where `$KEY` is the stats key you specify when sending to statsd, and `$PCT` is +Where `$KEY` is the stats key you specify when sending to statsd, and `$PCT` is the percentile threshold. Gauges @@ -274,12 +274,14 @@ metrics: { sets: sets, counter_rates: counter_rates, timer_data: timer_data, + statsd_metrics: statsd_metrics, pctThreshold: pctThreshold } ``` The counter_rates and timer_data are precalculated statistics to simplify - the creation of backends. Each backend module is passed the same set of + the creation of backends, the statsd_metrics hash contains metrics generated + by statsd itself. Each backend module is passed the same set of statistics, so a backend module should treat the metrics as immutable structures. StatsD will reset timers and counters after each listener has handled the event. diff --git a/backends/graphite.js b/backends/graphite.js index 3e1b756c..2793939b 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -62,6 +62,7 @@ var flush_stats = function graphite_flush(ts, metrics) { var sets = metrics.sets; var counter_rates = metrics.counter_rates; var timer_data = metrics.timer_data; + var statsd_metrics = metrics.statsd_metrics; for (key in counters) { statString += 'stats.' + key + ' ' + counter_rates[key] + ' ' + ts + "\n"; @@ -92,6 +93,10 @@ var flush_stats = function graphite_flush(ts, metrics) { numStats += 1; } + for (key in statsd_metrics) { + statString += 'stats.statsd.' + key + ' ' + statsd_metrics[key] + ' ' + ts + "\n"; + } + statString += 'statsd.numStats ' + numStats + ' ' + ts + "\n"; statString += 'stats.statsd.graphiteStats.calculationtime ' + (Date.now() - starttime) + ' ' + ts + "\n"; post_stats(statString); diff --git a/lib/process_metrics.js b/lib/process_metrics.js index f82616c3..44ad55eb 100644 --- a/lib/process_metrics.js +++ b/lib/process_metrics.js @@ -1,7 +1,9 @@ var process_metrics = function (metrics, flushInterval, ts, flushCallback) { + var starttime = Date.now(); var key; var counter_rates = {}; var timer_data = {}; + var statsd_metrics = {}; var counters = metrics.counters; var timers = metrics.timers; var pctThreshold = metrics.pctThreshold; @@ -75,9 +77,11 @@ var process_metrics = function (metrics, flushInterval, ts, flushCallback) { } } + statsd_metrics["processing_time"] = (Date.now() - starttime); //add processed metrics to the metrics_hash metrics.counter_rates = counter_rates; metrics.timer_data = timer_data; + metrics.statsd_metrics = statsd_metrics; flushCallback(metrics); } From 4cf1e6d206bc66995cb0d64326ebc1c395c53164 Mon Sep 17 00:00:00 2001 From: Daniel Schauenberg Date: Sun, 11 Nov 2012 17:51:24 -0500 Subject: [PATCH 069/233] add test for statsd_metrics --- test/process_metrics_tests.js | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/process_metrics_tests.js b/test/process_metrics_tests.js index dc55babb..6252c4b3 100644 --- a/test/process_metrics_tests.js +++ b/test/process_metrics_tests.js @@ -116,5 +116,12 @@ module.exports = { test.equal(200, timer_data.upper_80); test.equal(300, timer_data.sum_80); test.done(); + }, + statsd_metrics_exist: function(test) { + test.expect(1); + pm.process_metrics(this.metrics, 100, this.time_stamp, function(){}); + statsd_metrics = this.metrics.statsd_metrics; + test.notEqual(undefined, statsd_metrics["processing_time"]); + test.done(); } } From 962704be9c5881ddcb2f36281ba4a61ac58f2ba3 Mon Sep 17 00:00:00 2001 From: Matt Sanders Date: Mon, 12 Nov 2012 09:50:40 -0600 Subject: [PATCH 070/233] Tweak backend documentation in README --- README.md | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 3b63696f..5fa08e21 100644 --- a/README.md +++ b/README.md @@ -88,27 +88,29 @@ For more information, check the `exampleConfig.js`. Supported Backends ------------------ -StatsD supports multiple, pluggable, backend modules that can publish +StatsD supports pluggable backend modules that can publish statistics from the local StatsD daemon to a backend service or data -store. Backend services can retain statistics for -longer durations in a time series data store, visualize statistics in -graphs or tables, or generate alerts based on defined thresholds. A -backend can also correlate statistics sent from StatsD daemons running -across multiple hosts in an infrastructure. - -StatsD includes the following backends: - -* [Graphite][graphite] (`graphite`): Graphite is an open-source - time-series data store that provides visualization through a - web-browser interface. -* Console (`console`): The console backend outputs the received - metrics to stdout (e.g. for seeing what's going on during development). -* Repeater (`repeater`): The repeater backend utilizes the `packet` emit API to +store. Backend services can retain statistics in a time series data store, +visualize statistics in graphs or tables, or generate alerts based on +defined thresholds. A backend can also correlate statistics sent from StatsD +daemons running across multiple hosts in an infrastructure. + +StatsD includes the following built-in backends: + +* [Graphite][graphite] (`graphite`): An open-source + time-series data store that provides visualization through a web-browser. +* Console (`console`): Outputs the received + metrics to stdout (see what's going on during development). +* Repeater (`repeater`): Utilizes the `packet` emit API to forward raw packets retrieved by StatsD to multiple backend StatsD instances. -By default, the `graphite` backend will be loaded automatically. To -select which backends are loaded, set the `backends` configuration -variable to the list of backend modules to load. +A robust set of [other backends](https://github.com/etsy/statsd/wiki/Backends) +are also available as plugins to allow easy reporting into databases, queues +and third-party services. + +By default, the `graphite` backend will be loaded automatically. Multiple +backends can be run at once. To select which backends are loaded, set +the `backends` configuration variable to the list of backend modules to load. Backends are just npm modules which implement the interface described in section *Backend Interface*. In order to be able to load the backend, add the From d9ecb0a4c41c84539dc987d38caa406b034b0f53 Mon Sep 17 00:00:00 2001 From: Steve Ivy Date: Mon, 12 Nov 2012 09:20:43 -0700 Subject: [PATCH 071/233] Update examples/python_example.py Give sample client a proper initializer, and give it a basic example metric to run if the script is called itself. --- examples/python_example.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/examples/python_example.py b/examples/python_example.py index b2f39c8e..94283eed 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -11,6 +11,17 @@ # Sends statistics to the stats daemon over UDP class Statsd(object): + def __init__(self, host=localhost, port=8125): + self.host = host + self.port = port + try: + import local_settings as settings + self.host = settings.statsd_host + self.port = settings.statsd_port + except: + pass + self.addr=(host, port) + @staticmethod def timing(stat, time, sample_rate=1): """ @@ -83,9 +94,14 @@ def send(data, sample_rate=1): for stat in sampled_data.keys(): value = sampled_data[stat] send_data = "%s:%s" % (stat, value) - udp_sock.sendto(send_data, addr) + udp_sock.sendto(send_data, self.addr) except: import sys from pprint import pprint print "Unexpected error:", pprint(sys.exc_info()) pass # we don't care + + +if __name__=="__main__": + c = StatsdClient() + c.increment('example.python') From 6061c3b23e7412138c2419146da5e77a6ab42060 Mon Sep 17 00:00:00 2001 From: Steve Ivy Date: Mon, 12 Nov 2012 09:23:56 -0700 Subject: [PATCH 072/233] Update examples/python_example.py Fix host parameter value. --- examples/python_example.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/python_example.py b/examples/python_example.py index 94283eed..eaf0f393 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -11,7 +11,7 @@ # Sends statistics to the stats daemon over UDP class Statsd(object): - def __init__(self, host=localhost, port=8125): + def __init__(self, host='localhost', port=8125): self.host = host self.port = port try: From 5953c665b2fa37fc43daa99b68dd43de4aa0922c Mon Sep 17 00:00:00 2001 From: Steve Ivy Date: Mon, 12 Nov 2012 09:42:47 -0700 Subject: [PATCH 073/233] Update examples/python_example.py call the client a Client. --- examples/python_example.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/python_example.py b/examples/python_example.py index eaf0f393..6a231f47 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -9,8 +9,7 @@ # statsd_port = 8125 # Sends statistics to the stats daemon over UDP -class Statsd(object): - +class StatsdClient(object): def __init__(self, host='localhost', port=8125): self.host = host self.port = port From 6582ea204fb6097986fc34bd4ba8bee8b458b450 Mon Sep 17 00:00:00 2001 From: Dieter Plaetinck Date: Mon, 12 Nov 2012 16:07:36 -0500 Subject: [PATCH 074/233] clarify histogram/bin categorisation algorithm --- backends/graphite.js | 3 +++ 1 file changed, 3 insertions(+) diff --git a/backends/graphite.js b/backends/graphite.js index 435f27b5..75bf6b1b 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -136,6 +136,9 @@ var flush_stats = function graphite_flush(ts, metrics) { break; } } + // the outer loop iterates bins, the inner loop iterates timer values; + // within each run of the inner loop we should only consider the timer value range that's within the scope of the current bin + // so we leverage the fact that the values are already sorted to end up with only full 1 iteration of the entire values range var i = 0; for (var bin_i = 0; bin_i < bins.length; bin_i++) { var freq = 0; From 1e66b4e95f9098f6e34f027c4e58ffa93658add9 Mon Sep 17 00:00:00 2001 From: Patrick McCoy Date: Tue, 13 Nov 2012 09:59:37 -0700 Subject: [PATCH 075/233] Tests for Delete Counters Config Added tests for the delete counters config change. --- stats.js | 8 +- test/graphite_delete_counters_tests.js | 263 +++++++++++++++++++++++++ test/graphite_tests.js | 2 +- 3 files changed, 270 insertions(+), 3 deletions(-) create mode 100644 test/graphite_delete_counters_tests.js diff --git a/stats.js b/stats.js index 738eaf0c..4a647691 100644 --- a/stats.js +++ b/stats.js @@ -36,6 +36,9 @@ function loadBackend(config, name) { } }; +// global for conf +var conf; + // Flush metrics to each backend. function flushMetrics() { var time_stamp = Math.round(new Date().getTime() / 1000); @@ -51,9 +54,9 @@ function flushMetrics() { // After all listeners, reset the stats backendEvents.once('flush', function clear_metrics(ts, metrics) { // Clear the counters - config.delete_counters = config.delete_counters || false; + conf.deleteCounters = conf.deleteCounters || false; for (key in metrics.counters) { - if (config.delete_counters) { + if (conf.deleteCounters) { metrics.counters[key] = undefined; } else { metrics.counters[key] = 0; @@ -86,6 +89,7 @@ var stats = { var l; config.configFile(process.argv[2], function (config, oldConfig) { + conf = config; if (! config.debug && debugInt) { clearInterval(debugInt); debugInt = false; diff --git a/test/graphite_delete_counters_tests.js b/test/graphite_delete_counters_tests.js new file mode 100644 index 00000000..7738fcc5 --- /dev/null +++ b/test/graphite_delete_counters_tests.js @@ -0,0 +1,263 @@ +var fs = require('fs'), + net = require('net'), + temp = require('temp'), + spawn = require('child_process').spawn, + util = require('util'), + urlparse = require('url').parse, + _ = require('underscore'), + dgram = require('dgram'), + qsparse = require('querystring').parse, + http = require('http'); + + +var writeconfig = function(text,worker,cb,obj){ + temp.open({suffix: '-statsdconf.js'}, function(err, info) { + if (err) throw err; + fs.writeSync(info.fd, text); + fs.close(info.fd, function(err) { + if (err) throw err; + worker(info.path,cb,obj); + }); + }); +} + +var array_contents_are_equal = function(first,second){ + var intlen = _.intersection(first,second).length; + var unlen = _.union(first,second).length; + return (intlen == unlen) && (intlen == first.length); +} + +var statsd_send = function(data,sock,host,port,cb){ + send_data = new Buffer(data); + sock.send(send_data,0,send_data.length,port,host,function(err,bytes){ + if (err) { + throw err; + } + cb(); + }); +} + +// keep collecting data until a specified timeout period has elapsed +// this will let us capture all data chunks so we don't miss one +var collect_for = function(server,timeout,cb){ + var received = []; + var in_flight = 0; + var timed_out = false; + var collector = function(req,res){ + in_flight += 1; + var body = ''; + req.on('data',function(data){ body += data; }); + req.on('end',function(){ + received = received.concat(body.split("\n")); + in_flight -= 1; + if((in_flight < 1) && timed_out){ + server.removeListener('request',collector); + cb(received); + } + }); + } + + setTimeout(function (){ + timed_out = true; + if((in_flight < 1)) { + server.removeListener('connection',collector); + cb(received); + } + },timeout); + + server.on('connection',collector); +} + +module.exports = { + setUp: function (callback) { + this.testport = 31337; + this.myflush = 200; + var configfile = "{graphService: \"graphite\"\n\ + , batch: 200 \n\ + , flushInterval: " + this.myflush + " \n\ + , percentThreshold: 90\n\ + , port: 8125\n\ + , dumpMessages: false \n\ + , debug: false\n\ + , deleteCounters: true\n\ + , graphitePort: " + this.testport + "\n\ + , graphiteHost: \"127.0.0.1\"}"; + + this.acceptor = net.createServer(); + this.acceptor.listen(this.testport); + this.sock = dgram.createSocket('udp4'); + + this.server_up = true; + this.ok_to_die = false; + this.exit_callback_callback = process.exit; + + writeconfig(configfile,function(path,cb,obj){ + obj.path = path; + obj.server = spawn('node',['stats.js', path]); + obj.exit_callback = function (code) { + obj.server_up = false; + if(!obj.ok_to_die){ + console.log('node server unexpectedly quit with code: ' + code); + process.exit(1); + } + else { + obj.exit_callback_callback(); + } + }; + obj.server.on('exit', obj.exit_callback); + obj.server.stderr.on('data', function (data) { + console.log('stderr: ' + data.toString().replace(/\n$/,'')); + }); + /* + obj.server.stdout.on('data', function (data) { + console.log('stdout: ' + data.toString().replace(/\n$/,'')); + }); + */ + obj.server.stdout.on('data', function (data) { + // wait until server is up before we finish setUp + if (data.toString().match(/server is up/)) { + cb(); + } + }); + + },callback,this); + }, + tearDown: function (callback) { + this.sock.close(); + this.acceptor.close(); + this.ok_to_die = true; + if(this.server_up){ + this.exit_callback_callback = callback; + this.server.kill(); + } else { + callback(); + } + }, + + send_well_formed_posts: function (test) { + test.expect(2); + + // we should integrate a timeout into this + this.acceptor.once('connection',function(c){ + var body = ''; + c.on('data',function(d){ body += d; }); + c.on('end',function(){ + var rows = body.split("\n"); + var entries = _.map(rows, function(x) { + var chunks = x.split(' '); + var data = {}; + data[chunks[0]] = chunks[1]; + return data; + }); + test.ok(_.include(_.map(entries,function(x) { return _.keys(x)[0] }),'statsd.numStats'),'graphite output includes numStats'); + test.equal(_.find(entries, function(x) { return _.keys(x)[0] == 'statsd.numStats' })['statsd.numStats'],2); + test.done(); + }); + }); + }, + + send_malformed_post: function (test) { + test.expect(3); + + var testvalue = 1; + var me = this; + this.acceptor.once('connection',function(c){ + statsd_send('a_bad_test_value|z',me.sock,'127.0.0.1',8125,function(){ + collect_for(me.acceptor,me.myflush*2,function(strings){ + test.ok(strings.length > 0,'should receive some data'); + var hashes = _.map(strings, function(x) { + var chunks = x.split(' '); + var data = {}; + data[chunks[0]] = chunks[1]; + return data; + }); + var numstat_test = function(post){ + var mykey = 'statsd.numStats'; + return _.include(_.keys(post),mykey) && (post[mykey] == 2); + }; + test.ok(_.any(hashes,numstat_test), 'statsd.numStats should be 0'); + + var bad_lines_seen_value_test = function(post){ + var mykey = 'stats_counts.statsd.bad_lines_seen'; + return _.include(_.keys(post),mykey) && isNaN(post[mykey]); + }; + test.ok(_.any(hashes,bad_lines_seen_value_test), 'stats_counts.statsd.bad_lines_seen should be ' + testvalue); + + test.done(); + }); + }); + }); + }, + + timers_are_valid: function (test) { + test.expect(3); + + var testvalue = 100; + var me = this; + this.acceptor.once('connection',function(c){ + statsd_send('a_test_value:' + testvalue + '|ms',me.sock,'127.0.0.1',8125,function(){ + collect_for(me.acceptor,me.myflush*2,function(strings){ + test.ok(strings.length > 0,'should receive some data'); + var hashes = _.map(strings, function(x) { + var chunks = x.split(' '); + var data = {}; + data[chunks[0]] = chunks[1]; + return data; + }); + var numstat_test = function(post){ + var mykey = 'statsd.numStats'; + return _.include(_.keys(post),mykey) && (post[mykey] == 3); + }; + test.ok(_.any(hashes,numstat_test), 'statsd.numStats should be 1'); + + var testtimervalue_test = function(post){ + var mykey = 'stats.timers.a_test_value.mean_90'; + return _.include(_.keys(post),mykey) && (post[mykey] == testvalue); + }; + test.ok(_.any(hashes,testtimervalue_test), 'stats.timers.a_test_value.mean should be ' + testvalue); + + test.done(); + }); + }); + }); + }, + + counts_are_valid: function (test) { + test.expect(4); + + var testvalue = 100; + var me = this; + this.acceptor.once('connection',function(c){ + statsd_send('a_test_value:' + testvalue + '|c',me.sock,'127.0.0.1',8125,function(){ + collect_for(me.acceptor,me.myflush*2,function(strings){ + test.ok(strings.length > 0,'should receive some data'); + var hashes = _.map(strings, function(x) { + var chunks = x.split(' '); + var data = {}; + data[chunks[0]] = chunks[1]; + return data; + }); + var numstat_test = function(post){ + var mykey = 'statsd.numStats'; + return _.include(_.keys(post),mykey) && (post[mykey] == 3); + }; + test.ok(_.any(hashes,numstat_test), 'statsd.numStats should be 1'); + + var testavgvalue_test = function(post){ + var mykey = 'stats.a_test_value'; + return _.include(_.keys(post),mykey) && (post[mykey] == (testvalue/(me.myflush / 1000))); + }; + test.ok(_.any(hashes,testavgvalue_test), 'stats.a_test_value should be ' + (testvalue/(me.myflush / 1000))); + + var testcountvalue_test = function(post){ + var mykey = 'stats_counts.a_test_value'; + return _.include(_.keys(post),mykey) && (post[mykey] == testvalue); + }; + test.ok(_.any(hashes,testcountvalue_test), 'stats_counts.a_test_value should be ' + testvalue); + + test.done(); + }); + }); + }); + } +} diff --git a/test/graphite_tests.js b/test/graphite_tests.js index 776e1235..1f785424 100644 --- a/test/graphite_tests.js +++ b/test/graphite_tests.js @@ -178,7 +178,7 @@ module.exports = { var bad_lines_seen_value_test = function(post){ var mykey = 'stats_counts.statsd.bad_lines_seen'; - return _.include(_.keys(post),mykey) && isNaN(post[mykey]); + return _.include(_.keys(post),mykey) && (post[mykey] == testvalue); }; test.ok(_.any(hashes,bad_lines_seen_value_test), 'stats_counts.statsd.bad_lines_seen should be ' + testvalue); From 24390eef6e015bafb2c6e081687679f1bb4f3b42 Mon Sep 17 00:00:00 2001 From: Patrick McCoy Date: Wed, 14 Nov 2012 10:59:31 -0700 Subject: [PATCH 076/233] Updated the example config docs with the new option --- exampleConfig.js | 1 + 1 file changed, 1 insertion(+) diff --git a/exampleConfig.js b/exampleConfig.js index b9dcbe9e..19fbe11c 100644 --- a/exampleConfig.js +++ b/exampleConfig.js @@ -34,6 +34,7 @@ Optional Variables: interval: how often to log frequent keys [ms, default: 0] percent: percentage of frequent keys to log [%, default: 100] log: location of log file for frequent keys [default: STDOUT] + deleteCounters: when flushing to graphite, send null instead of 0 [default: false] console: prettyprint: whether to prettyprint the console backend From eca4f06a527682f644fc16ee2bc8597ebb858a7a Mon Sep 17 00:00:00 2001 From: Suvash Thapaliya Date: Thu, 22 Nov 2012 22:55:49 +0100 Subject: [PATCH 077/233] Adds sets for Ruby example + Whitespace cleanup --- examples/ruby_example2.rb | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/examples/ruby_example2.rb b/examples/ruby_example2.rb index b7ee33c7..0281a99f 100644 --- a/examples/ruby_example2.rb +++ b/examples/ruby_example2.rb @@ -5,8 +5,8 @@ # Ian Sefferman # http://www.iseff.com - -# If this is running in a Rails environment, will pick up config/statsd.yml. + +# If this is running in a Rails environment, will pick up config/statsd.yml. # config/statsd.yml should look like: # production: # host: statsd.domain.com @@ -24,7 +24,7 @@ # Sends statistics to the stats daemon over UDP class Statsd - + def self.timing(stats, time, sample_rate=1) Statsd.update_stats(stats, time, sample_rate, 'ms') end @@ -41,6 +41,10 @@ def self.gauges(stats, value, sample_rate=1) Statsd.update_stats(stats, value, sample_rate, 'g') end + def self.sets(stats, value, sample_rate=1) + Statsd.update_stats(stats, value, sample_rate, 's') + end + def self.update_stats(stats, delta=1, sample_rate=1, metric='c') stats = [stats].flatten @@ -56,7 +60,7 @@ def self.send(data, sample_rate=1) begin host = config["host"] || "localhost" port = config["port"] || "8125" - + sampled_data = {} if sample_rate < 1 if rand <= sample_rate @@ -67,7 +71,7 @@ def self.send(data, sample_rate=1) else sampled_data = data end - + udp = UDPSocket.new sampled_data.each_pair do |stat, val| send_data = "%s:%s" % [stat, val] @@ -80,7 +84,7 @@ def self.send(data, sample_rate=1) def self.config return @@config if self.class_variable_defined?(:@@config) - begin + begin config_path = File.join(File.dirname(__FILE__), "statsd.yml") # for Rails environments, check Rails.root/config/statsd.yml if defined? Rails From d119c57788ca62b5ac4d6dfbaffec1a3f8aceb58 Mon Sep 17 00:00:00 2001 From: Daniel Schauenberg Date: Sat, 24 Nov 2012 23:01:09 -0500 Subject: [PATCH 078/233] update the README with a quick namespace overview --- README.md | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 5fa08e21..45c793fa 100644 --- a/README.md +++ b/README.md @@ -90,8 +90,8 @@ Supported Backends StatsD supports pluggable backend modules that can publish statistics from the local StatsD daemon to a backend service or data -store. Backend services can retain statistics in a time series data store, -visualize statistics in graphs or tables, or generate alerts based on +store. Backend services can retain statistics in a time series data store, +visualize statistics in graphs or tables, or generate alerts based on defined thresholds. A backend can also correlate statistics sent from StatsD daemons running across multiple hosts in an infrastructure. @@ -104,12 +104,12 @@ StatsD includes the following built-in backends: * Repeater (`repeater`): Utilizes the `packet` emit API to forward raw packets retrieved by StatsD to multiple backend StatsD instances. -A robust set of [other backends](https://github.com/etsy/statsd/wiki/Backends) -are also available as plugins to allow easy reporting into databases, queues +A robust set of [other backends](https://github.com/etsy/statsd/wiki/Backends) +are also available as plugins to allow easy reporting into databases, queues and third-party services. By default, the `graphite` backend will be loaded automatically. Multiple -backends can be run at once. To select which backends are loaded, set +backends can be run at once. To select which backends are loaded, set the `backends` configuration variable to the list of backend modules to load. Backends are just npm modules which implement the interface described in @@ -311,6 +311,25 @@ metrics: { the raw received message string and the `rinfo` paramter contains remote address information from the UDP socket. + +Metric namespacing +------------------- +The metric namespacing in the Graphite backend is configurable with regard of +the prefixes. Per default all stats are put under `stats` in Graphite, which +makes it easier to consolidate them all under one schema. However it is +possible to change these namespaces in the backend configuration options. +The available configuration options (living under the `graphite` key) are: + +``` +legacyNamspace: use the legacy namespace [default: true] +globalPrefix: global prefix to use for sending stats to graphite [default: "stats"] +prefixCounter: graphite prefix for counter metrics [default: "counters"] +prefixTimer: graphite prefix for timer metrics [default: "timers"] +prefixGauge: graphite prefix for gauge metrics [default: "gauges"] +prefixSet: graphite prefix for set metrics [default: "sets"] +``` + + Inspiration ----------- From 20d2198d4209cca356af4dfff5a38f0be9424cb1 Mon Sep 17 00:00:00 2001 From: Daniel Schauenberg Date: Sat, 24 Nov 2012 23:10:33 -0500 Subject: [PATCH 079/233] make counter namespace changes more clear in the README --- README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/README.md b/README.md index 45c793fa..1c1c5dd8 100644 --- a/README.md +++ b/README.md @@ -329,6 +329,16 @@ prefixGauge: graphite prefix for gauge metrics [default: "gauges"] prefixSet: graphite prefix for set metrics [default: "sets"] ``` +If you decide to not use the legacy namespacing, besides the obvious changes +in the prefixing, there will also be a breaking change in the way counters are +submitted. So far counters didn't live under any namespace and were also a bit +confusing due to the way they record rate and absolute counts. In the legacy +setting rates were recorded under `stats.counter_name` directly, whereas the +absolute count could be found under `stats_count.counter_name`. With disabling +the legacy namespacing those values can be found (with default prefixing) +under `stats.counters.counter_name.rate` and +`stats.counters.counter_name.count` now. + Inspiration ----------- From 8f3bfb06dbf4b4b7a66cbd7a7629195945019a9c Mon Sep 17 00:00:00 2001 From: Daniel Schauenberg Date: Sat, 24 Nov 2012 23:14:59 -0500 Subject: [PATCH 080/233] fix typos in README --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1c1c5dd8..ea642afe 100644 --- a/README.md +++ b/README.md @@ -314,7 +314,7 @@ metrics: { Metric namespacing ------------------- -The metric namespacing in the Graphite backend is configurable with regard of +The metric namespacing in the Graphite backend is configurable with regard to the prefixes. Per default all stats are put under `stats` in Graphite, which makes it easier to consolidate them all under one schema. However it is possible to change these namespaces in the backend configuration options. @@ -329,7 +329,7 @@ prefixGauge: graphite prefix for gauge metrics [default: "gauges"] prefixSet: graphite prefix for set metrics [default: "sets"] ``` -If you decide to not use the legacy namespacing, besides the obvious changes +If you decide not to use the legacy namespacing, besides the obvious changes in the prefixing, there will also be a breaking change in the way counters are submitted. So far counters didn't live under any namespace and were also a bit confusing due to the way they record rate and absolute counts. In the legacy From dbd05971640cd30fca7360d84ced524c71719c61 Mon Sep 17 00:00:00 2001 From: Daniel Schauenberg Date: Sun, 25 Nov 2012 14:30:22 -0500 Subject: [PATCH 081/233] spell legacyNamespace correctly --- README.md | 2 +- exampleConfig.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ea642afe..6735e5f7 100644 --- a/README.md +++ b/README.md @@ -321,7 +321,7 @@ possible to change these namespaces in the backend configuration options. The available configuration options (living under the `graphite` key) are: ``` -legacyNamspace: use the legacy namespace [default: true] +legacyNamespace: use the legacy namespace [default: true] globalPrefix: global prefix to use for sending stats to graphite [default: "stats"] prefixCounter: graphite prefix for counter metrics [default: "counters"] prefixTimer: graphite prefix for timer metrics [default: "timers"] diff --git a/exampleConfig.js b/exampleConfig.js index 8dcb1829..456d4a21 100644 --- a/exampleConfig.js +++ b/exampleConfig.js @@ -45,7 +45,7 @@ Optional Variables: level: log level for [node-]syslog [string, default: LOG_INFO] graphite: - legacyNamspace: use the legacy namespace [default: true] + legacyNamespace: use the legacy namespace [default: true] globalPrefix: global prefix to use for sending stats to graphite [default: "stats"] prefixCounter: graphite prefix for counter metrics [default: "counters"] prefixTimer: graphite prefix for timer metrics [default: "timers"] From c03f33910ef9a894baf1fdfcb6799ccd07e91201 Mon Sep 17 00:00:00 2001 From: Patrick McCoy Date: Mon, 26 Nov 2012 12:56:29 -0700 Subject: [PATCH 082/233] Changed stats.js to use delete Changed stats.js to use delete instead of setting the values to undefined in order to save some memory. --- stats.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stats.js b/stats.js index 4a647691..207130f5 100644 --- a/stats.js +++ b/stats.js @@ -57,7 +57,7 @@ function flushMetrics() { conf.deleteCounters = conf.deleteCounters || false; for (key in metrics.counters) { if (conf.deleteCounters) { - metrics.counters[key] = undefined; + delete(metrics.counters[key]); } else { metrics.counters[key] = 0; } From d0901b8ed05e53cd8bf2055b9621e45af20a8328 Mon Sep 17 00:00:00 2001 From: Patrick McCoy Date: Mon, 26 Nov 2012 13:08:18 -0700 Subject: [PATCH 083/233] Fixed the tests Fixed the tests that were broken when changing to delete --- test/graphite_delete_counters_tests.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/graphite_delete_counters_tests.js b/test/graphite_delete_counters_tests.js index 7738fcc5..938f45c0 100644 --- a/test/graphite_delete_counters_tests.js +++ b/test/graphite_delete_counters_tests.js @@ -206,7 +206,7 @@ module.exports = { }); var numstat_test = function(post){ var mykey = 'statsd.numStats'; - return _.include(_.keys(post),mykey) && (post[mykey] == 3); + return _.include(_.keys(post),mykey) && (post[mykey] == 2); }; test.ok(_.any(hashes,numstat_test), 'statsd.numStats should be 1'); @@ -239,7 +239,7 @@ module.exports = { }); var numstat_test = function(post){ var mykey = 'statsd.numStats'; - return _.include(_.keys(post),mykey) && (post[mykey] == 3); + return _.include(_.keys(post),mykey) && (post[mykey] == 2); }; test.ok(_.any(hashes,numstat_test), 'statsd.numStats should be 1'); From c87f4386557ffafb04a77af18386fa8c5b691a52 Mon Sep 17 00:00:00 2001 From: Dieter Plaetinck Date: Thu, 29 Nov 2012 12:34:53 -0500 Subject: [PATCH 084/233] fix deleteCounters wording --- exampleConfig.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exampleConfig.js b/exampleConfig.js index 1699df57..90584ae8 100644 --- a/exampleConfig.js +++ b/exampleConfig.js @@ -34,7 +34,7 @@ Optional Variables: interval: how often to log frequent keys [ms, default: 0] percent: percentage of frequent keys to log [%, default: 100] log: location of log file for frequent keys [default: STDOUT] - deleteCounters: when flushing to graphite, send null instead of 0 [default: false] + deleteCounters: don't send values to graphite for inactive counters, as opposed to sending 0 [default: false] console: prettyprint: whether to prettyprint the console backend From 124c87f4f4bb8eb7f79db47c87d0f0ec791076d7 Mon Sep 17 00:00:00 2001 From: Dieter Plaetinck Date: Thu, 29 Nov 2012 12:51:45 -0500 Subject: [PATCH 085/233] document reset-to-0 behavior for counters also: * document config.deleteCounters in readme * move the config.flushInterval bit up to the part about flushing --- README.md | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 6735e5f7..1f85ad16 100644 --- a/README.md +++ b/README.md @@ -22,16 +22,19 @@ etc) general values should be integer. * *flush* - After the flush interval timeout (default 10 seconds), stats are - aggregated and sent to an upstream backend service. + After the flush interval timeout (defined by `config.flushInterval`, + default 10 seconds), stats are aggregated and sent to an upstream backend service. Counting -------- gorets:1|c -This is a simple counter. Add 1 to the "gorets" bucket. It stays in memory -until the flush interval `config.flushInterval`. +This is a simple counter. Add 1 to the "gorets" bucket. +At each flush the current count is sent and reset to 0. +If the count at flush is 0 then you can opt to send no metric at all for +this counter, by setting `config.deleteCounters` (applies only to graphite +backend) ### Sampling From f3d58af35d8e554154d2e0177e7e7c1cf6b32f3c Mon Sep 17 00:00:00 2001 From: Dieter Plaetinck Date: Thu, 29 Nov 2012 15:49:21 -0500 Subject: [PATCH 086/233] clarify how statsd submits counters --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1f85ad16..26896658 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ This is a simple counter. Add 1 to the "gorets" bucket. At each flush the current count is sent and reset to 0. If the count at flush is 0 then you can opt to send no metric at all for this counter, by setting `config.deleteCounters` (applies only to graphite -backend) +backend). Statsd will send both the rate as well as the count at each flush. ### Sampling From e0629d48e1aa9ca26656294dbb6f037b3fe01f54 Mon Sep 17 00:00:00 2001 From: goir Date: Wed, 5 Dec 2012 22:11:41 +0100 Subject: [PATCH 087/233] Update lib/process_metrics.js fixed pctThreshold replace if you specify floating point values. --- lib/process_metrics.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/process_metrics.js b/lib/process_metrics.js index 44ad55eb..bcccfae8 100644 --- a/lib/process_metrics.js +++ b/lib/process_metrics.js @@ -50,7 +50,7 @@ var process_metrics = function (metrics, flushInterval, ts, flushCallback) { } var clean_pct = '' + pct; - clean_pct.replace('.', '_'); + clean_pct = clean_pct.replace('.', '_'); current_timer_data["mean_" + clean_pct] = mean; current_timer_data["upper_" + clean_pct] = maxAtThreshold; current_timer_data["sum_" + clean_pct] = sum; From 3fe3d43ae943f34fdce627792b11b215d402837a Mon Sep 17 00:00:00 2001 From: Dieter Plaetinck Date: Sun, 9 Dec 2012 15:24:57 -0500 Subject: [PATCH 088/233] add tests for histograms --- test/process_metrics_tests.js | 53 +++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/test/process_metrics_tests.js b/test/process_metrics_tests.js index 6252c4b3..8eb2ab1d 100644 --- a/test/process_metrics_tests.js +++ b/test/process_metrics_tests.js @@ -115,6 +115,59 @@ module.exports = { test.equal(150, timer_data.mean_80); test.equal(200, timer_data.upper_80); test.equal(300, timer_data.sum_80); + test.done(); + }, // check if the correct settings are being applied. as well as actual counts + timers_histogram: function (test) { + test.expect(45); + this.metrics.timers['a'] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; + this.metrics.timers['abc'] = [0.1234, 2.89, 4, 6, 8]; + this.metrics.timers['foo'] = [0, 2, 4, 6, 8]; + this.metrics.timers['barbazfoobar'] = [0, 2, 4, 6, 8]; + this.metrics.timers['bar.bazfoobar.abc'] = [0, 2, 4, 6, 8]; + this.metrics.timers['xyz'] = [0, 2, 4, 6, 8]; + this.metrics.histogram = [ { metric: 'foo', bins: [] }, + { metric: 'abcd', bins: [ 1, 5, 'inf'] }, + { metric: 'abc', bins: [ 1, 2.21, 'inf'] }, + { metric: 'a', bins: [ 1, 2] } ]; + pm.process_metrics(this.metrics, 100, this.time_stamp, function(){}); + timer_data = this.metrics.timer_data; + // nothing matches the 'abcd' config, so nothing has bin_5 + test.equal(undefined, timer_data['a']['bin_5']); + test.equal(undefined, timer_data['abc']['bin_5']); + test.equal(undefined, timer_data['foo']['bin_5']); + test.equal(undefined, timer_data['barbazfoobar']['bin_5']); + test.equal(undefined, timer_data['bar.bazfoobar.abc']['bin_5']); + test.equal(undefined, timer_data['xyz']['bin_5']); + + // check that 'a' got the right config and numbers + test.equal(0, timer_data['a']['bin_1']); + test.equal(1, timer_data['a']['bin_2']); + test.equal(undefined, timer_data['a']['bin_inf']); + + // only 'abc' should have a bin_inf; also check all its counts, + // and make sure it has no other bins + // amount of non-bin_ keys: std, upper, lower, count, sum, mean -> 6 + test.equal(1, timer_data['abc']['bin_1']); + test.equal(0, timer_data['abc']['bin_2_21']); + test.equal(4, timer_data['abc']['bin_inf']); + for (key in timer_data['abc']) { + test.ok(key.indexOf('bin_') < 0 || key == 'bin_1' || key == 'bin_2_21' || key == 'bin_inf'); + } + + // 'foo', 'barbazfoobar' and 'bar.bazfoobar.meh' and 'xyz' should not have any bin + for (key in timer_data['foo']) { + test.ok(key.indexOf('bin_') < 0); + } + for (key in timer_data['barbazfoobar']) { + test.ok(key.indexOf('bin_') < 0); + } + for (key in timer_data['bar.bazfoobar.abc']) { + test.ok(key.indexOf('bin_') < 0); + } + for (key in timer_data['xyz']) { + test.ok(key.indexOf('bin_') < 0); + } + test.done(); }, statsd_metrics_exist: function(test) { From e223710dadaa0c5f9bbb736601e2d440e17da2d3 Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Mon, 10 Dec 2012 04:19:42 +0800 Subject: [PATCH 089/233] examples: python: unbreak example After Statsd -> StatsdClient renaming example is borken. --- examples/python_example.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/python_example.py b/examples/python_example.py index 6a231f47..9272f6e7 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -30,7 +30,7 @@ def timing(stat, time, sample_rate=1): """ stats = {} stats[stat] = "%d|ms" % time - Statsd.send(stats, sample_rate) + StatsdClient.send(stats, sample_rate) @staticmethod def increment(stats, sample_rate=1): @@ -39,7 +39,7 @@ def increment(stats, sample_rate=1): >>> Statsd.increment('some.int') >>> Statsd.increment('some.int',0.5) """ - Statsd.update_stats(stats, 1, sample_rate) + StatsdClient.update_stats(stats, 1, sample_rate) @staticmethod def decrement(stats, sample_rate=1): @@ -47,7 +47,7 @@ def decrement(stats, sample_rate=1): Decrements one or more stats counters >>> Statsd.decrement('some.int') """ - Statsd.update_stats(stats, -1, sample_rate) + StatsdClient.update_stats(stats, -1, sample_rate) @staticmethod def update_stats(stats, delta=1, sampleRate=1): @@ -61,7 +61,7 @@ def update_stats(stats, delta=1, sampleRate=1): for stat in stats: data[stat] = "%s|c" % delta - Statsd.send(data, sampleRate) + StatsdClient.send(data, sampleRate) @staticmethod def send(data, sample_rate=1): From a40eb4a9e334000f32ddfda2a87ad5c09ce99ca1 Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Mon, 10 Dec 2012 04:30:51 +0800 Subject: [PATCH 090/233] examples: python: unbreak StatsdClient.send() send() is defined as staticmethod, so it' does not have self. Fixing this also requires all other methods convertion to non-static which is logical assuming the should all use self.addr set by __init__(). --- examples/python_example.py | 31 +++++++++---------------------- 1 file changed, 9 insertions(+), 22 deletions(-) diff --git a/examples/python_example.py b/examples/python_example.py index 9272f6e7..4b82675c 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -21,8 +21,7 @@ def __init__(self, host='localhost', port=8125): pass self.addr=(host, port) - @staticmethod - def timing(stat, time, sample_rate=1): + def timing(self, stat, time, sample_rate=1): """ Log timing information >>> from python_example import Statsd @@ -30,27 +29,24 @@ def timing(stat, time, sample_rate=1): """ stats = {} stats[stat] = "%d|ms" % time - StatsdClient.send(stats, sample_rate) + self.send(stats, sample_rate) - @staticmethod - def increment(stats, sample_rate=1): + def increment(self, stats, sample_rate=1): """ Increments one or more stats counters >>> Statsd.increment('some.int') >>> Statsd.increment('some.int',0.5) """ - StatsdClient.update_stats(stats, 1, sample_rate) + self.update_stats(stats, 1, sample_rate) - @staticmethod - def decrement(stats, sample_rate=1): + def decrement(self, stats, sample_rate=1): """ Decrements one or more stats counters >>> Statsd.decrement('some.int') """ - StatsdClient.update_stats(stats, -1, sample_rate) + self.update_stats(stats, -1, sample_rate) - @staticmethod - def update_stats(stats, delta=1, sampleRate=1): + def update_stats(self, stats, delta=1, sampleRate=1): """ Updates one or more stats counters by arbitrary amounts >>> Statsd.update_stats('some.int',10) @@ -61,21 +57,12 @@ def update_stats(stats, delta=1, sampleRate=1): for stat in stats: data[stat] = "%s|c" % delta - StatsdClient.send(data, sampleRate) + self.send(data, sampleRate) - @staticmethod - def send(data, sample_rate=1): + def send(self, data, sample_rate=1): """ Squirt the metrics over UDP """ - try: - import local_settings as settings - host = settings.statsd_host - port = settings.statsd_port - addr=(host, port) - except: - exit(1) - sampled_data = {} if(sample_rate < 1): From ef03f8f054589aab5793d342294e5f61066fba20 Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Mon, 10 Dec 2012 04:35:31 +0800 Subject: [PATCH 091/233] examples: python: remove local_settings.py overrides It's rather strange to override parameters passed to __init__ via custom settings file. --- examples/python_example.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/examples/python_example.py b/examples/python_example.py index 4b82675c..5a089624 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -3,22 +3,11 @@ # Steve Ivy # http://monkinetic.com -# this file expects local_settings.py to be in the same dir, with statsd host and port information: -# -# statsd_host = 'localhost' -# statsd_port = 8125 - # Sends statistics to the stats daemon over UDP class StatsdClient(object): def __init__(self, host='localhost', port=8125): self.host = host self.port = port - try: - import local_settings as settings - self.host = settings.statsd_host - self.port = settings.statsd_port - except: - pass self.addr=(host, port) def timing(self, stat, time, sample_rate=1): From 798a7f72a75628104f7fdb96694c34f8bd20248d Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Mon, 10 Dec 2012 04:37:17 +0800 Subject: [PATCH 092/233] examples: python: host and port are not used --- examples/python_example.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/examples/python_example.py b/examples/python_example.py index 5a089624..1e43defb 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -6,8 +6,6 @@ # Sends statistics to the stats daemon over UDP class StatsdClient(object): def __init__(self, host='localhost', port=8125): - self.host = host - self.port = port self.addr=(host, port) def timing(self, stat, time, sample_rate=1): From 2c09c3b8ea556d6eb9ec4a8addbb827d80879715 Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Mon, 10 Dec 2012 04:39:56 +0800 Subject: [PATCH 093/233] examples: python: print exceptio to stderr --- examples/python_example.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/python_example.py b/examples/python_example.py index 1e43defb..f4f562de 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -71,7 +71,7 @@ def send(self, data, sample_rate=1): except: import sys from pprint import pprint - print "Unexpected error:", pprint(sys.exc_info()) + print >>sys.stderr, "Unexpected error:", pprint(sys.exc_info()) pass # we don't care From b861a9e0e4287e55b70e907f536ee464419d7427 Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Mon, 10 Dec 2012 04:42:43 +0800 Subject: [PATCH 094/233] examples: python: do not catch BaseExceptions --- examples/python_example.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/python_example.py b/examples/python_example.py index f4f562de..14480ea0 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -68,7 +68,7 @@ def send(self, data, sample_rate=1): value = sampled_data[stat] send_data = "%s:%s" % (stat, value) udp_sock.sendto(send_data, self.addr) - except: + except Exception: import sys from pprint import pprint print >>sys.stderr, "Unexpected error:", pprint(sys.exc_info()) From 9b5fd681d4302b1631093c193510f36ea5adb0e0 Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Mon, 10 Dec 2012 04:44:48 +0800 Subject: [PATCH 095/233] examples: python: more useful errors --- examples/python_example.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/python_example.py b/examples/python_example.py index 14480ea0..29a8230c 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -70,8 +70,8 @@ def send(self, data, sample_rate=1): udp_sock.sendto(send_data, self.addr) except Exception: import sys - from pprint import pprint - print >>sys.stderr, "Unexpected error:", pprint(sys.exc_info()) + import traceback + print >>sys.stderr, "Unexpected error: ", traceback.format_exc() pass # we don't care From 30292229fedd43a0823fc84549215d86b72a2c26 Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Mon, 10 Dec 2012 04:47:10 +0800 Subject: [PATCH 096/233] examples: python: StatsdClient.send() returns outcome It's useful but unused. --- examples/python_example.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/python_example.py b/examples/python_example.py index 29a8230c..6309acb9 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -72,7 +72,8 @@ def send(self, data, sample_rate=1): import sys import traceback print >>sys.stderr, "Unexpected error: ", traceback.format_exc() - pass # we don't care + return False + return True if __name__=="__main__": From a5c6a7824454ec627ff3c258af4e15e61dd1d675 Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Mon, 10 Dec 2012 04:49:34 +0800 Subject: [PATCH 097/233] examples: python: simplifications --- examples/python_example.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/examples/python_example.py b/examples/python_example.py index 6309acb9..e1d0aa73 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -55,8 +55,7 @@ def send(self, data, sample_rate=1): if(sample_rate < 1): import random if random.random() <= sample_rate: - for stat in data.keys(): - value = data[stat] + for stat, value in data.items(): sampled_data[stat] = "%s|@%s" %(value, sample_rate) else: sampled_data=data @@ -64,8 +63,7 @@ def send(self, data, sample_rate=1): from socket import socket, AF_INET, SOCK_DGRAM udp_sock = socket(AF_INET, SOCK_DGRAM) try: - for stat in sampled_data.keys(): - value = sampled_data[stat] + for stat, value in sampled_data.items(): send_data = "%s:%s" % (stat, value) udp_sock.sendto(send_data, self.addr) except Exception: From 0191e95a43ea0e93af672c0421ccda0095b6534a Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Mon, 10 Dec 2012 04:55:22 +0800 Subject: [PATCH 098/233] examples: python: use isinstance() to suport inheritance --- examples/python_example.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/python_example.py b/examples/python_example.py index e1d0aa73..5fa81aae 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -38,7 +38,7 @@ def update_stats(self, stats, delta=1, sampleRate=1): Updates one or more stats counters by arbitrary amounts >>> Statsd.update_stats('some.int',10) """ - if (type(stats) is not list): + if isinstance(stats, list): stats = [stats] data = {} for stat in stats: From 121625e3d7388467efe26b19f52affdf6d1945e9 Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Mon, 10 Dec 2012 04:57:57 +0800 Subject: [PATCH 099/233] exmaples: python: unbreak doctests --- examples/python_example.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/examples/python_example.py b/examples/python_example.py index 5fa81aae..f431c6b5 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -11,8 +11,9 @@ def __init__(self, host='localhost', port=8125): def timing(self, stat, time, sample_rate=1): """ Log timing information - >>> from python_example import Statsd - >>> Statsd.timing('some.time', 500) + >>> from python_example import StatsdClient + >>> client = StatsdClient() + >>> client.timing('some.time', 500) """ stats = {} stats[stat] = "%d|ms" % time @@ -21,22 +22,25 @@ def timing(self, stat, time, sample_rate=1): def increment(self, stats, sample_rate=1): """ Increments one or more stats counters - >>> Statsd.increment('some.int') - >>> Statsd.increment('some.int',0.5) + >>> client = StatsdClient() + >>> client.increment('some.int') + >>> client.increment('some.int',0.5) """ self.update_stats(stats, 1, sample_rate) def decrement(self, stats, sample_rate=1): """ Decrements one or more stats counters - >>> Statsd.decrement('some.int') + >>> client = StatsdClient() + >>> client.decrement('some.int') """ self.update_stats(stats, -1, sample_rate) def update_stats(self, stats, delta=1, sampleRate=1): """ Updates one or more stats counters by arbitrary amounts - >>> Statsd.update_stats('some.int',10) + >>> client = StatsdClient() + >>> client.update_stats('some.int',10) """ if isinstance(stats, list): stats = [stats] From f509f1131182a157d7362b944335a0df0b08f9ce Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Mon, 10 Dec 2012 05:00:36 +0800 Subject: [PATCH 100/233] examples: python: whitespacing --- examples/python_example.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/examples/python_example.py b/examples/python_example.py index f431c6b5..3a52912e 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -6,7 +6,7 @@ # Sends statistics to the stats daemon over UDP class StatsdClient(object): def __init__(self, host='localhost', port=8125): - self.addr=(host, port) + self.addr = (host, port) def timing(self, stat, time, sample_rate=1): """ @@ -24,7 +24,7 @@ def increment(self, stats, sample_rate=1): Increments one or more stats counters >>> client = StatsdClient() >>> client.increment('some.int') - >>> client.increment('some.int',0.5) + >>> client.increment('some.int', 0.5) """ self.update_stats(stats, 1, sample_rate) @@ -40,14 +40,13 @@ def update_stats(self, stats, delta=1, sampleRate=1): """ Updates one or more stats counters by arbitrary amounts >>> client = StatsdClient() - >>> client.update_stats('some.int',10) + >>> client.update_stats('some.int', 10) """ if isinstance(stats, list): stats = [stats] data = {} for stat in stats: data[stat] = "%s|c" % delta - self.send(data, sampleRate) def send(self, data, sample_rate=1): @@ -56,13 +55,13 @@ def send(self, data, sample_rate=1): """ sampled_data = {} - if(sample_rate < 1): + if (sample_rate < 1): import random if random.random() <= sample_rate: for stat, value in data.items(): sampled_data[stat] = "%s|@%s" %(value, sample_rate) else: - sampled_data=data + sampled_data = data from socket import socket, AF_INET, SOCK_DGRAM udp_sock = socket(AF_INET, SOCK_DGRAM) From ee129e79f6b87419c43e262b8f85c12e5b3343e3 Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Mon, 10 Dec 2012 05:03:12 +0800 Subject: [PATCH 101/233] examples: python: move frequently used imports to the top --- examples/python_example.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/python_example.py b/examples/python_example.py index 3a52912e..d381f981 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -3,6 +3,9 @@ # Steve Ivy # http://monkinetic.com +from random import random +from socket import socket, AF_INET, SOCK_DGRAM + # Sends statistics to the stats daemon over UDP class StatsdClient(object): def __init__(self, host='localhost', port=8125): @@ -56,14 +59,12 @@ def send(self, data, sample_rate=1): sampled_data = {} if (sample_rate < 1): - import random - if random.random() <= sample_rate: + if random() <= sample_rate: for stat, value in data.items(): sampled_data[stat] = "%s|@%s" %(value, sample_rate) else: sampled_data = data - from socket import socket, AF_INET, SOCK_DGRAM udp_sock = socket(AF_INET, SOCK_DGRAM) try: for stat, value in sampled_data.items(): From 7a9d9f3644a0c58742b1b465257f40fb7c9d62f3 Mon Sep 17 00:00:00 2001 From: Dieter Plaetinck Date: Sun, 9 Dec 2012 16:59:00 -0500 Subject: [PATCH 102/233] put all histogram bins in a "histogram" sub-hierarchy --- lib/process_metrics.js | 5 +++- test/process_metrics_tests.js | 48 +++++++++++++---------------------- 2 files changed, 21 insertions(+), 32 deletions(-) diff --git a/lib/process_metrics.js b/lib/process_metrics.js index a6b5e624..2f388170 100644 --- a/lib/process_metrics.js +++ b/lib/process_metrics.js @@ -82,6 +82,9 @@ var process_metrics = function (metrics, flushInterval, ts, flushCallback) { break; } } + if(bins.length) { + current_timer_data['histogram'] = {}; + } // the outer loop iterates bins, the inner loop iterates timer values; // within each run of the inner loop we should only consider the timer value range that's within the scope of the current bin // so we leverage the fact that the values are already sorted to end up with only full 1 iteration of the entire values range @@ -92,7 +95,7 @@ var process_metrics = function (metrics, flushInterval, ts, flushCallback) { freq += 1; } bin_name = ('bin_' + bins[bin_i]).replace('.','_'); - current_timer_data[bin_name] = freq; + current_timer_data['histogram'][bin_name] = freq; } timer_data[key] = current_timer_data; diff --git a/test/process_metrics_tests.js b/test/process_metrics_tests.js index 8eb2ab1d..0adfb4c9 100644 --- a/test/process_metrics_tests.js +++ b/test/process_metrics_tests.js @@ -1,4 +1,5 @@ -var pm = require('../lib/process_metrics') +var pm = require('../lib/process_metrics'), + _ = require('underscore'); module.exports = { setUp: function (callback) { @@ -118,7 +119,7 @@ module.exports = { test.done(); }, // check if the correct settings are being applied. as well as actual counts timers_histogram: function (test) { - test.expect(45); + test.expect(13); this.metrics.timers['a'] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; this.metrics.timers['abc'] = [0.1234, 2.89, 4, 6, 8]; this.metrics.timers['foo'] = [0, 2, 4, 6, 8]; @@ -132,41 +133,26 @@ module.exports = { pm.process_metrics(this.metrics, 100, this.time_stamp, function(){}); timer_data = this.metrics.timer_data; // nothing matches the 'abcd' config, so nothing has bin_5 - test.equal(undefined, timer_data['a']['bin_5']); - test.equal(undefined, timer_data['abc']['bin_5']); - test.equal(undefined, timer_data['foo']['bin_5']); - test.equal(undefined, timer_data['barbazfoobar']['bin_5']); - test.equal(undefined, timer_data['bar.bazfoobar.abc']['bin_5']); - test.equal(undefined, timer_data['xyz']['bin_5']); + test.equal(undefined, timer_data['a']['histogram']['bin_5']); + test.equal(undefined, timer_data['abc']['histogram']['bin_5']); // check that 'a' got the right config and numbers - test.equal(0, timer_data['a']['bin_1']); - test.equal(1, timer_data['a']['bin_2']); - test.equal(undefined, timer_data['a']['bin_inf']); + test.equal(0, timer_data['a']['histogram']['bin_1']); + test.equal(1, timer_data['a']['histogram']['bin_2']); + test.equal(undefined, timer_data['a']['histogram']['bin_inf']); // only 'abc' should have a bin_inf; also check all its counts, // and make sure it has no other bins - // amount of non-bin_ keys: std, upper, lower, count, sum, mean -> 6 - test.equal(1, timer_data['abc']['bin_1']); - test.equal(0, timer_data['abc']['bin_2_21']); - test.equal(4, timer_data['abc']['bin_inf']); - for (key in timer_data['abc']) { - test.ok(key.indexOf('bin_') < 0 || key == 'bin_1' || key == 'bin_2_21' || key == 'bin_inf'); - } + test.equal(1, timer_data['abc']['histogram']['bin_1']); + test.equal(0, timer_data['abc']['histogram']['bin_2_21']); + test.equal(4, timer_data['abc']['histogram']['bin_inf']); + test.equal(3, _.size(timer_data['abc']['histogram'])); - // 'foo', 'barbazfoobar' and 'bar.bazfoobar.meh' and 'xyz' should not have any bin - for (key in timer_data['foo']) { - test.ok(key.indexOf('bin_') < 0); - } - for (key in timer_data['barbazfoobar']) { - test.ok(key.indexOf('bin_') < 0); - } - for (key in timer_data['bar.bazfoobar.abc']) { - test.ok(key.indexOf('bin_') < 0); - } - for (key in timer_data['xyz']) { - test.ok(key.indexOf('bin_') < 0); - } + // these all have histograms disabled ('foo' explicitly, rest implicitly) + test.equal(undefined, timer_data['foo']['histogram']); + test.equal(undefined, timer_data['barbazfoobar']['histogram']); + test.equal(undefined, timer_data['bar.bazfoobar.abc']['histogram']); + test.equal(undefined, timer_data['xyz']['histogram']); test.done(); }, From a73a459c594de72556a15e2295f8efe5f82e1534 Mon Sep 17 00:00:00 2001 From: Tim Bunce Date: Tue, 11 Dec 2012 05:58:11 -0600 Subject: [PATCH 103/233] Avoid many redundant string concats in graphite backend --- backends/graphite.js | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/backends/graphite.js b/backends/graphite.js index 1208433b..cb5e77ba 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -68,6 +68,7 @@ var post_stats = function graphite_post_stats(statString) { } var flush_stats = function graphite_flush(ts, metrics) { + var ts_suffix = ' ' + ts + "\n"; var starttime = Date.now(); var statString = ''; var numStats = 0; @@ -87,11 +88,11 @@ var flush_stats = function graphite_flush(ts, metrics) { var valuePerSecond = value / (flushInterval / 1000); // calculate "per second" rate if (legacyNamespace === true) { - statString += namespace.join(".") + ' ' + valuePerSecond + ' ' + ts + "\n"; - statString += 'stats_counts.' + key + ' ' + value + ' ' + ts + "\n"; + statString += namespace.join(".") + ' ' + valuePerSecond + ts_suffix; + statString += 'stats_counts.' + key + ' ' + value + ts_suffix; } else { - statString += namespace.concat('rate').join(".") + ' ' + valuePerSecond + ' ' + ts + "\n"; - statString += namespace.concat('count').join(".") + ' ' + value + ' ' + ts + "\n"; + statString += namespace.concat('rate').join(".") + ' ' + valuePerSecond + ts_suffix; + statString += namespace.concat('count').join(".") + ' ' + value + ts_suffix; } numStats += 1; @@ -102,7 +103,7 @@ var flush_stats = function graphite_flush(ts, metrics) { for (timer_data_key in timer_data[key]) { var namespace = timerNamespace.concat(key); var the_key = namespace.join("."); - statString += the_key + '.' + timer_data_key + ' ' + timer_data[key][timer_data_key] + ' ' + ts + "\n"; + statString += the_key + '.' + timer_data_key + ' ' + timer_data[key][timer_data_key] + ts_suffix; } numStats += 1; @@ -111,29 +112,29 @@ var flush_stats = function graphite_flush(ts, metrics) { for (key in gauges) { var namespace = gaugesNamespace.concat(key); - statString += namespace.join(".") + ' ' + gauges[key] + ' ' + ts + "\n"; + statString += namespace.join(".") + ' ' + gauges[key] + ts_suffix; numStats += 1; } for (key in sets) { var namespace = setsNamespace.concat(key); - statString += namespace.join(".") + '.count ' + sets[key].values().length + ' ' + ts + "\n"; + statString += namespace.join(".") + '.count ' + sets[key].values().length + ts_suffix; numStats += 1; } var namespace = globalNamespace.concat('statsd'); if (legacyNamespace === true) { - statString += 'statsd.numStats ' + numStats + ' ' + ts + "\n"; - statString += 'stats.statsd.graphiteStats.calculationtime ' + (Date.now() - starttime) + ' ' + ts + "\n"; + statString += 'statsd.numStats ' + numStats + ts_suffix; + statString += 'stats.statsd.graphiteStats.calculationtime ' + (Date.now() - starttime) + ts_suffix; for (key in statsd_metrics) { - statString += 'stats.statsd.' + key + ' ' + statsd_metrics[key] + ' ' + ts + "\n"; + statString += 'stats.statsd.' + key + ' ' + statsd_metrics[key] + ts_suffix; } } else { - statString += namespace.join(".") + '.numStats ' + numStats + ' ' + ts + "\n"; - statString += namespace.join(".") + '.graphiteStats.calculationtime ' + (Date.now() - starttime) + ' ' + ts + "\n"; + statString += namespace.join(".") + '.numStats ' + numStats + ts_suffix; + statString += namespace.join(".") + '.graphiteStats.calculationtime ' + (Date.now() - starttime) + ts_suffix; for (key in statsd_metrics) { var the_key = namespace.concat(key); - statString += the_key.join(".") + ' ' + statsd_metrics[key] + ' ' + ts + "\n"; + statString += the_key.join(".") + ' ' + statsd_metrics[key] + ts_suffix; } } From d727054d0413e16b1e709d5b86cf592ca39586fb Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Tue, 11 Dec 2012 22:15:13 -0500 Subject: [PATCH 104/233] Use counter_rates in graphite backent --- backends/graphite.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/graphite.js b/backends/graphite.js index cb5e77ba..3fe47b2c 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -85,7 +85,7 @@ var flush_stats = function graphite_flush(ts, metrics) { for (key in counters) { var namespace = counterNamespace.concat(key); var value = counters[key]; - var valuePerSecond = value / (flushInterval / 1000); // calculate "per second" rate + var valuePerSecond = counter_rates[key]; // pre-calculated "per second" rate if (legacyNamespace === true) { statString += namespace.join(".") + ' ' + valuePerSecond + ts_suffix; From a73d5fabde1875aec7f874c8ccd0ff24e9750119 Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Tue, 11 Dec 2012 22:23:08 -0500 Subject: [PATCH 105/233] skip setting intermediate variable --- lib/process_metrics.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/process_metrics.js b/lib/process_metrics.js index bcccfae8..ae2f77ca 100644 --- a/lib/process_metrics.js +++ b/lib/process_metrics.js @@ -12,8 +12,7 @@ var process_metrics = function (metrics, flushInterval, ts, flushCallback) { var value = counters[key]; // calculate "per second" rate - var valuePerSecond = value / (flushInterval / 1000); - counter_rates[key] = valuePerSecond; + counter_rates[key] = value / (flushInterval / 1000); } for (key in timers) { From 4f7d31346e0bdd8ca080d6f0a9cd1dc8a94c2924 Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Tue, 11 Dec 2012 22:44:42 -0500 Subject: [PATCH 106/233] Add two addtional graphite backend metrics --- backends/graphite.js | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/backends/graphite.js b/backends/graphite.js index cb5e77ba..5076a859 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -41,6 +41,8 @@ var graphiteStats = {}; var post_stats = function graphite_post_stats(statString) { var last_flush = graphiteStats.last_flush || 0; var last_exception = graphiteStats.last_exception || 0; + var last_flush_time = graphiteStats.last_flush_time || 0; + var last_flush_length = graphiteStats.last_flush_length || 0; if (graphiteHost) { try { var graphite = net.createConnection(graphitePort, graphiteHost); @@ -51,12 +53,18 @@ var post_stats = function graphite_post_stats(statString) { }); graphite.on('connect', function() { var ts = Math.round(new Date().getTime() / 1000); + var ts_suffix = ' ' + ts + "\n"; var namespace = globalNamespace.concat('statsd'); - statString += namespace.join(".") + '.graphiteStats.last_exception ' + last_exception + ' ' + ts + "\n"; - statString += namespace.join(".") + '.graphiteStats.last_flush ' + last_flush + ' ' + ts + "\n"; + statString += namespace.join(".") + '.graphiteStats.last_exception ' + last_exception + ts_suffix; + statString += namespace.join(".") + '.graphiteStats.last_flush ' + last_flush + ts_suffix; + statString += namespace.join(".") + '.graphiteStats.last_flush_time ' + last_flush_time + ts_suffix; + statString += namespace.join(".") + '.graphiteStats.last_flush_length ' + last_flush_length + ts_suffix; + var starttime = Date.now(); this.write(statString); this.end(); + graphiteStats.last_flush_time = (Date.now() - starttime); graphiteStats.last_flush = Math.round(new Date().getTime() / 1000); + graphiteStats.last_flush_length = statString.length; }); } catch(e){ if (debug) { From fa0b5cf0861b1838eba2c77ae0c8b8e69fafeb23 Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Tue, 11 Dec 2012 22:48:43 -0500 Subject: [PATCH 107/233] Line up concats --- backends/graphite.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backends/graphite.js b/backends/graphite.js index 5076a859..800a621c 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -55,9 +55,9 @@ var post_stats = function graphite_post_stats(statString) { var ts = Math.round(new Date().getTime() / 1000); var ts_suffix = ' ' + ts + "\n"; var namespace = globalNamespace.concat('statsd'); - statString += namespace.join(".") + '.graphiteStats.last_exception ' + last_exception + ts_suffix; - statString += namespace.join(".") + '.graphiteStats.last_flush ' + last_flush + ts_suffix; - statString += namespace.join(".") + '.graphiteStats.last_flush_time ' + last_flush_time + ts_suffix; + statString += namespace.join(".") + '.graphiteStats.last_exception ' + last_exception + ts_suffix; + statString += namespace.join(".") + '.graphiteStats.last_flush ' + last_flush + ts_suffix; + statString += namespace.join(".") + '.graphiteStats.last_flush_time ' + last_flush_time + ts_suffix; statString += namespace.join(".") + '.graphiteStats.last_flush_length ' + last_flush_length + ts_suffix; var starttime = Date.now(); this.write(statString); From f9e591bcf575c9a92c4ac8b84574cbc4f9d8b6ba Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Wed, 12 Dec 2012 08:09:25 -0500 Subject: [PATCH 108/233] remove last_ prefix to avoid admin 'stats' match --- README.md | 2 ++ backends/graphite.js | 18 ++++++++++-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 26896658..dace9c4f 100644 --- a/README.md +++ b/README.md @@ -215,6 +215,8 @@ Graphite: flush to graphite * graphite.last_exception: the number of seconds elapsed since the last exception thrown whilst flushing to graphite +* graphite.flush_length: the length of the string sent to graphite +* graphite.flush_time: the time it took to send the data to graphite A simple nagios check can be found in the utils/ directory that can be used to check metric thresholds, for example the number of seconds since the last diff --git a/backends/graphite.js b/backends/graphite.js index 800a621c..2f3e6c1a 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -41,8 +41,8 @@ var graphiteStats = {}; var post_stats = function graphite_post_stats(statString) { var last_flush = graphiteStats.last_flush || 0; var last_exception = graphiteStats.last_exception || 0; - var last_flush_time = graphiteStats.last_flush_time || 0; - var last_flush_length = graphiteStats.last_flush_length || 0; + var flush_time = graphiteStats.flush_time || 0; + var flush_length = graphiteStats.flush_length || 0; if (graphiteHost) { try { var graphite = net.createConnection(graphitePort, graphiteHost); @@ -55,16 +55,16 @@ var post_stats = function graphite_post_stats(statString) { var ts = Math.round(new Date().getTime() / 1000); var ts_suffix = ' ' + ts + "\n"; var namespace = globalNamespace.concat('statsd'); - statString += namespace.join(".") + '.graphiteStats.last_exception ' + last_exception + ts_suffix; - statString += namespace.join(".") + '.graphiteStats.last_flush ' + last_flush + ts_suffix; - statString += namespace.join(".") + '.graphiteStats.last_flush_time ' + last_flush_time + ts_suffix; - statString += namespace.join(".") + '.graphiteStats.last_flush_length ' + last_flush_length + ts_suffix; + statString += namespace.join(".") + '.graphiteStats.last_exception ' + last_exception + ts_suffix; + statString += namespace.join(".") + '.graphiteStats.last_flush ' + last_flush + ts_suffix; + statString += namespace.join(".") + '.graphiteStats.flush_time ' + flush_time + ts_suffix; + statString += namespace.join(".") + '.graphiteStats.flush_length ' + flush_length + ts_suffix; var starttime = Date.now(); this.write(statString); this.end(); - graphiteStats.last_flush_time = (Date.now() - starttime); + graphiteStats.flush_time = (Date.now() - starttime); + graphiteStats.flush_length = statString.length; graphiteStats.last_flush = Math.round(new Date().getTime() / 1000); - graphiteStats.last_flush_length = statString.length; }); } catch(e){ if (debug) { @@ -207,6 +207,8 @@ exports.init = function graphite_init(startup_time, config, events) { graphiteStats.last_flush = startup_time; graphiteStats.last_exception = startup_time; + graphiteStats.flush_time = 0; + graphiteStats.flush_length = 0; flushInterval = config.flushInterval; From 4877c918dba34daf33ccb5e8a33a627a823b5f71 Mon Sep 17 00:00:00 2001 From: Dieter Plaetinck Date: Fri, 14 Dec 2012 11:17:26 -0500 Subject: [PATCH 109/233] add helper script to decide which timers to sample down --- utils/statsd-timer-metric-counts.sh | 33 +++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100755 utils/statsd-timer-metric-counts.sh diff --git a/utils/statsd-timer-metric-counts.sh b/utils/statsd-timer-metric-counts.sh new file mode 100755 index 00000000..fd748c8c --- /dev/null +++ b/utils/statsd-timer-metric-counts.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# is your statsd machine maxing out cpu? ... unable to pull udp packets out of the buffer +# at a fast enough rate? (see `netstat -su` ) timer metrics are by far the most cpu intensive +# and tuning the sampling of those is key to keeping cpu load under control. +# this tool (to be run on your graphite server) shows for all your timing metric keys how many packets +# it accepted for a "typical" flushInterval (averaged out). using this information you can make informed decisions as +# to which keys to sample and how much. +# note that in some bad cases you might see no effect after increasing your amount of sampling, the explanation is +# that you were first sending so many packets of which only a fraction were being processed and shown in these counts, +# that even after sampling more statsd still can't process them all and your count stays in the same range. + +graphite_url=http://dfvimeographite2.df.vimeows.com +whisper_dir=/var/lib/carbon/whisper +timers_subdir=stats/timers + +# you may want to adjust this function according to the characteristics of your environment +# I wish whisper-fetch.py supported the same function API as the http endpoint does, then I could avoid http here. +function get_indicative_count () { + metric=$1 + url=$graphite_url'/render/?from=-1h&target=summarize('$metric',%221hour%22,%22sum%22)&format=csv' + wget -q "$url" -O - | tail -n -1 | sed 's#.*,##' # yields a number ending with .0 or whitespace if values were None +} + +function list_timer_count_files () { + find "$whisper_dir/$timers_subdir" -name 'count.wsp' | sed -e "s#$whisper_dir/\($timers_subdir/.*/count\).wsp#\1#" -e 's#/#.#g' +} + +function list_timer_counts () { + for metric in $(list_timer_count_files); do + echo "$metric $(get_indicative_count $metric)" + done +} +list_timer_counts | grep 'count .*\.0' | sort -n -k2 From 0f3d3c09e6ea717f9e0ccebfb3faec161fb658c5 Mon Sep 17 00:00:00 2001 From: Dieter Plaetinck Date: Fri, 14 Dec 2012 16:12:55 -0500 Subject: [PATCH 110/233] description fix --- utils/statsd-timer-metric-counts.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/statsd-timer-metric-counts.sh b/utils/statsd-timer-metric-counts.sh index fd748c8c..d36967f6 100755 --- a/utils/statsd-timer-metric-counts.sh +++ b/utils/statsd-timer-metric-counts.sh @@ -3,7 +3,7 @@ # at a fast enough rate? (see `netstat -su` ) timer metrics are by far the most cpu intensive # and tuning the sampling of those is key to keeping cpu load under control. # this tool (to be run on your graphite server) shows for all your timing metric keys how many packets -# it accepted for a "typical" flushInterval (averaged out). using this information you can make informed decisions as +# it accepted in a given interval like 1hour. using this information you can make informed decisions as # to which keys to sample and how much. # note that in some bad cases you might see no effect after increasing your amount of sampling, the explanation is # that you were first sending so many packets of which only a fraction were being processed and shown in these counts, From 371e3516deeae76a5a5f53016241a59efd61d783 Mon Sep 17 00:00:00 2001 From: Nacho Soto Date: Sat, 15 Dec 2012 18:17:44 -0800 Subject: [PATCH 111/233] Fixing global leaks --- stats.js | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/stats.js b/stats.js index 91897df1..ff942563 100644 --- a/stats.js +++ b/stats.js @@ -61,7 +61,7 @@ function flushMetrics() { backendEvents.once('flush', function clear_metrics(ts, metrics) { // Clear the counters conf.deleteCounters = conf.deleteCounters || false; - for (key in metrics.counters) { + for (var key in metrics.counters) { if (conf.deleteCounters) { delete(metrics.counters[key]); } else { @@ -70,12 +70,12 @@ function flushMetrics() { } // Clear the timers - for (key in metrics.timers) { + for (var key in metrics.timers) { metrics.timers[key] = []; } // Clear the sets - for (key in metrics.sets) { + for (var key in metrics.sets) { metrics.sets[key] = new set.Set(); } }); @@ -126,7 +126,7 @@ config.configFile(process.argv[2], function (config, oldConfig) { counters["statsd.packets_received"]++; var metrics = msg.toString().split("\n"); - for (midx in metrics) { + for (var midx in metrics) { if (config.dumpMessages) { l.log(metrics[midx].toString()); } @@ -222,8 +222,8 @@ config.configFile(process.argv[2], function (config, oldConfig) { }; // Loop through the base stats - for (group in stats) { - for (metric in stats[group]) { + for (var group in stats) { + for (var metric in stats[group]) { stat_writer(group, metric, stats[group][metric]); } } @@ -260,7 +260,7 @@ config.configFile(process.argv[2], function (config, oldConfig) { break; case "delcounters": - for (index in cmdline) { + for (var index in cmdline) { delete counters[cmdline[index]]; stream.write("deleted: " + cmdline[index] + "\n"); } @@ -268,7 +268,7 @@ config.configFile(process.argv[2], function (config, oldConfig) { break; case "deltimers": - for (index in cmdline) { + for (var index in cmdline) { delete timers[cmdline[index]]; stream.write("deleted: " + cmdline[index] + "\n"); } @@ -276,7 +276,7 @@ config.configFile(process.argv[2], function (config, oldConfig) { break; case "delgauges": - for (index in cmdline) { + for (var index in cmdline) { delete gauges[cmdline[index]]; stream.write("deleted: " + cmdline[index] + "\n"); } @@ -328,7 +328,7 @@ config.configFile(process.argv[2], function (config, oldConfig) { var key; var sortedKeys = []; - for (key in keyCounter) { + for (var key in keyCounter) { sortedKeys.push([key, keyCounter[key]]); } From be15f48a2d09f27fc452ebc5c7ee54e2a1579723 Mon Sep 17 00:00:00 2001 From: NachoSoto Date: Sun, 16 Dec 2012 23:37:36 +0000 Subject: [PATCH 112/233] Another global leak --- backends/graphite.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/graphite.js b/backends/graphite.js index 3fe47b2c..685fb869 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -142,7 +142,7 @@ var flush_stats = function graphite_flush(ts, metrics) { }; var backend_status = function graphite_status(writeCb) { - for (stat in graphiteStats) { + for (var stat in graphiteStats) { writeCb(null, 'graphite', stat, graphiteStats[stat]); } }; From 3ca9a03922de19237b5acfb0fdeb8b266ff026e2 Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Tue, 18 Dec 2012 08:39:25 -0500 Subject: [PATCH 113/233] remove orphaned variable --- lib/process_metrics.js | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/process_metrics.js b/lib/process_metrics.js index ae2f77ca..0bc78201 100644 --- a/lib/process_metrics.js +++ b/lib/process_metrics.js @@ -34,8 +34,6 @@ var process_metrics = function (metrics, flushInterval, ts, flushCallback) { var mean = min; var maxAtThreshold = max; - var message = ""; - var key2; for (key2 in pctThreshold) { From 220f898c57263dfe60a393478842a2a83252b45d Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Sun, 23 Dec 2012 18:36:55 +0800 Subject: [PATCH 114/233] examples: python: added python3 support --- examples/python_example.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/python_example.py b/examples/python_example.py index d381f981..dde5c617 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -3,6 +3,7 @@ # Steve Ivy # http://monkinetic.com +from __future__ import print_function from random import random from socket import socket, AF_INET, SOCK_DGRAM @@ -69,11 +70,11 @@ def send(self, data, sample_rate=1): try: for stat, value in sampled_data.items(): send_data = "%s:%s" % (stat, value) - udp_sock.sendto(send_data, self.addr) + udp_sock.sendto(send_data.encode('utf-8'), self.addr) except Exception: import sys import traceback - print >>sys.stderr, "Unexpected error: ", traceback.format_exc() + print("Unexpected error: ", traceback.format_exc(), file=sys.stderr) return False return True From d67b1c0abe4795e0010749fb92d68026a91d7c9a Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Sun, 23 Dec 2012 18:40:39 +0800 Subject: [PATCH 115/233] examples: python: fixed batch update_stats() This fixes nasty bug introduced by: examples: python: use isinstance() to suport inheritance Pointy hat to: me TODO: proper tests for update_stats --- examples/python_example.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/python_example.py b/examples/python_example.py index dde5c617..e8cd89af 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -46,7 +46,7 @@ def update_stats(self, stats, delta=1, sampleRate=1): >>> client = StatsdClient() >>> client.update_stats('some.int', 10) """ - if isinstance(stats, list): + if not isinstance(stats, list): stats = [stats] data = {} for stat in stats: From eb9e279ebdecc8216777dd197978aaeadb8af1f0 Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Sun, 23 Dec 2012 19:13:19 +0800 Subject: [PATCH 116/233] examples: python: use .format --- examples/python_example.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/python_example.py b/examples/python_example.py index e8cd89af..4cb8cafd 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -20,7 +20,7 @@ def timing(self, stat, time, sample_rate=1): >>> client.timing('some.time', 500) """ stats = {} - stats[stat] = "%d|ms" % time + stats[stat] = "{0}|ms".format(time) self.send(stats, sample_rate) def increment(self, stats, sample_rate=1): @@ -50,7 +50,7 @@ def update_stats(self, stats, delta=1, sampleRate=1): stats = [stats] data = {} for stat in stats: - data[stat] = "%s|c" % delta + data[stat] = "{0}|c".format(delta) self.send(data, sampleRate) def send(self, data, sample_rate=1): @@ -62,14 +62,14 @@ def send(self, data, sample_rate=1): if (sample_rate < 1): if random() <= sample_rate: for stat, value in data.items(): - sampled_data[stat] = "%s|@%s" %(value, sample_rate) + sampled_data[stat] = "{0}|@{1}".format(value, sample_rate) else: sampled_data = data udp_sock = socket(AF_INET, SOCK_DGRAM) try: for stat, value in sampled_data.items(): - send_data = "%s:%s" % (stat, value) + send_data = "{0}:{1}".format(stat, value) udp_sock.sendto(send_data.encode('utf-8'), self.addr) except Exception: import sys From e20b1a08aca4e7a5c2b3246427b463b5370ee1d5 Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Sun, 23 Dec 2012 23:01:21 +0800 Subject: [PATCH 117/233] examples: python: split sample and send This will simplify testing --- examples/python_example.py | 52 +++++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/examples/python_example.py b/examples/python_example.py index 4cb8cafd..c3e21d56 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -21,7 +21,7 @@ def timing(self, stat, time, sample_rate=1): """ stats = {} stats[stat] = "{0}|ms".format(time) - self.send(stats, sample_rate) + self.sample_send(stats, sample_rate) def increment(self, stats, sample_rate=1): """ @@ -51,26 +51,60 @@ def update_stats(self, stats, delta=1, sampleRate=1): data = {} for stat in stats: data[stat] = "{0}|c".format(delta) - self.send(data, sampleRate) + self.sample_send(data, sampleRate) - def send(self, data, sample_rate=1): + def sample_send(self, data, sample_rate=1): """ - Squirt the metrics over UDP + Sample and squirt the metrics over UDP + + >>> client = StatsdClient() + >>> client.sample_send({"example.sample_send": "13|c"}, 1) + True """ - sampled_data = {} + return self.send(self.sample(data, sample_rate), self.addr) + + @staticmethod + def sample(data, sample_rate=1): + """ + Sample data dict + TODO(rbtz@): Convert to generator - if (sample_rate < 1): + >>> StatsdClient.sample({"example.sample2": "2"}, 1) + {'example.sample2': '2'} + >>> StatsdClient.sample({"example.sample3": "3"}, 0) + {} + >>> from random import seed + >>> seed(1) + >>> StatsdClient.sample({"example.sample5": "5", "example.sample7": "7"}, 0.99) + {'example.sample5': '5|@0.99', 'example.sample7': '7|@0.99'} + >>> StatsdClient.sample({"example.sample5": "5", "example.sample7": "7"}, 0.01) + {} + """ + sampled_data = {} + if 0 < sample_rate < 1: if random() <= sample_rate: for stat, value in data.items(): sampled_data[stat] = "{0}|@{1}".format(value, sample_rate) + elif sample_rate == 0: + sampled_data = {} else: sampled_data = data + return sampled_data + @staticmethod + def send(_dict, addr): + """ + Sends key/value pairs via UDP. + + >>> StatsdClient.send({"example.send":"11|c"}, ("127.0.0.1", 8125)) + True + """ + # TODO(rbtz@): IPv6 support udp_sock = socket(AF_INET, SOCK_DGRAM) try: - for stat, value in sampled_data.items(): - send_data = "{0}:{1}".format(stat, value) - udp_sock.sendto(send_data.encode('utf-8'), self.addr) + # TODO(rbtz@): Add batch support + for item in _dict.items(): + udp_sock.sendto(":".join(item).encode('utf-8'), addr) except Exception: import sys import traceback From 3fb77ec33daa0cf1c8b42dcd0c697f2229ad3a57 Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Sun, 23 Dec 2012 23:15:43 +0800 Subject: [PATCH 118/233] examples: python: prettify doctests --- examples/python_example.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/examples/python_example.py b/examples/python_example.py index c3e21d56..3ac37468 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -7,17 +7,21 @@ from random import random from socket import socket, AF_INET, SOCK_DGRAM -# Sends statistics to the stats daemon over UDP class StatsdClient(object): def __init__(self, host='localhost', port=8125): + """ + Sends statistics to the stats daemon over UDP + + >>> from python_example import StatsdClient + """ self.addr = (host, port) def timing(self, stat, time, sample_rate=1): """ Log timing information - >>> from python_example import StatsdClient + >>> client = StatsdClient() - >>> client.timing('some.time', 500) + >>> client.timing('example.timing', 500) """ stats = {} stats[stat] = "{0}|ms".format(time) @@ -26,25 +30,28 @@ def timing(self, stat, time, sample_rate=1): def increment(self, stats, sample_rate=1): """ Increments one or more stats counters + >>> client = StatsdClient() - >>> client.increment('some.int') - >>> client.increment('some.int', 0.5) + >>> client.increment('example.increment') + >>> client.increment('example.increment', 0.5) """ self.update_stats(stats, 1, sample_rate) def decrement(self, stats, sample_rate=1): """ Decrements one or more stats counters + >>> client = StatsdClient() - >>> client.decrement('some.int') + >>> client.decrement('example.decrement') """ self.update_stats(stats, -1, sample_rate) def update_stats(self, stats, delta=1, sampleRate=1): """ Updates one or more stats counters by arbitrary amounts + >>> client = StatsdClient() - >>> client.update_stats('some.int', 10) + >>> client.update_stats('example.update_stats', 17) """ if not isinstance(stats, list): stats = [stats] From e0f32bffa2199780e8efd1137d586ecade6ba1d8 Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Sun, 23 Dec 2012 23:18:34 +0800 Subject: [PATCH 119/233] examples: python: sampling used only in counters --- examples/python_example.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/python_example.py b/examples/python_example.py index 3ac37468..89a9b679 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -16,7 +16,7 @@ def __init__(self, host='localhost', port=8125): """ self.addr = (host, port) - def timing(self, stat, time, sample_rate=1): + def timing(self, stat, time): """ Log timing information @@ -25,7 +25,7 @@ def timing(self, stat, time, sample_rate=1): """ stats = {} stats[stat] = "{0}|ms".format(time) - self.sample_send(stats, sample_rate) + self.send(stats, self.addr) def increment(self, stats, sample_rate=1): """ From f145ca15e28df779d44cc0d495421f4cdb6bd943 Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Sun, 23 Dec 2012 23:22:22 +0800 Subject: [PATCH 120/233] examples: python: renamed update_stats to counter --- examples/python_example.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/python_example.py b/examples/python_example.py index 89a9b679..92c1c3e9 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -35,7 +35,7 @@ def increment(self, stats, sample_rate=1): >>> client.increment('example.increment') >>> client.increment('example.increment', 0.5) """ - self.update_stats(stats, 1, sample_rate) + self.count(stats, 1, sample_rate) def decrement(self, stats, sample_rate=1): """ @@ -44,14 +44,14 @@ def decrement(self, stats, sample_rate=1): >>> client = StatsdClient() >>> client.decrement('example.decrement') """ - self.update_stats(stats, -1, sample_rate) + self.count(stats, -1, sample_rate) - def update_stats(self, stats, delta=1, sampleRate=1): + def count(self, stats, delta=1, sampleRate=1): """ Updates one or more stats counters by arbitrary amounts >>> client = StatsdClient() - >>> client.update_stats('example.update_stats', 17) + >>> client.count('example.counter', 17) """ if not isinstance(stats, list): stats = [stats] From 386089964bf9d1052dea7cf7c1bbec773cc28259 Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Sun, 23 Dec 2012 23:24:54 +0800 Subject: [PATCH 121/233] examples: python: renamed sampleRate to sample_rate and make it mandatory --- examples/python_example.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/python_example.py b/examples/python_example.py index 92c1c3e9..98a95b07 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -46,7 +46,7 @@ def decrement(self, stats, sample_rate=1): """ self.count(stats, -1, sample_rate) - def count(self, stats, delta=1, sampleRate=1): + def count(self, stats, delta, sample_rate=1): """ Updates one or more stats counters by arbitrary amounts @@ -58,7 +58,7 @@ def count(self, stats, delta=1, sampleRate=1): data = {} for stat in stats: data[stat] = "{0}|c".format(delta) - self.sample_send(data, sampleRate) + self.sample_send(data, sample_rate) def sample_send(self, data, sample_rate=1): """ From 0a2ccff0644b8b949d90f47acfc44bcd252b3976 Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Sun, 23 Dec 2012 23:32:13 +0800 Subject: [PATCH 122/233] examples: python: inline sample_send --- examples/python_example.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/examples/python_example.py b/examples/python_example.py index 98a95b07..6dffa5b3 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -58,20 +58,10 @@ def count(self, stats, delta, sample_rate=1): data = {} for stat in stats: data[stat] = "{0}|c".format(delta) - self.sample_send(data, sample_rate) - - def sample_send(self, data, sample_rate=1): - """ - Sample and squirt the metrics over UDP - - >>> client = StatsdClient() - >>> client.sample_send({"example.sample_send": "13|c"}, 1) - True - """ - return self.send(self.sample(data, sample_rate), self.addr) + self.send(self.sample(data, sample_rate), self.addr) @staticmethod - def sample(data, sample_rate=1): + def sample(data, sample_rate): """ Sample data dict TODO(rbtz@): Convert to generator From 77a84486981a12dc11a2defd864dcd5d21e48572 Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Sun, 23 Dec 2012 23:33:15 +0800 Subject: [PATCH 123/233] examples: python: removed __main__ case We have doctests for that --- examples/python_example.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/examples/python_example.py b/examples/python_example.py index 6dffa5b3..69a7cc61 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -108,8 +108,3 @@ def send(_dict, addr): print("Unexpected error: ", traceback.format_exc(), file=sys.stderr) return False return True - - -if __name__=="__main__": - c = StatsdClient() - c.increment('example.python') From 83bdf8ddb7c64bcc248d5f034940ebb19bf9dad0 Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Sun, 23 Dec 2012 23:38:47 +0800 Subject: [PATCH 124/233] examples: python: removed useless comment --- examples/python_example.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/examples/python_example.py b/examples/python_example.py index 69a7cc61..822181d5 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -1,5 +1,3 @@ -# python_example.py - # Steve Ivy # http://monkinetic.com From 259754443cbb41f9c7a205e1a14308f7c62fdabd Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Sun, 23 Dec 2012 23:40:02 +0800 Subject: [PATCH 125/233] examples: python: library should not print anything Acctually raising an exception in case of an error is very pythonish. --- examples/python_example.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/examples/python_example.py b/examples/python_example.py index 822181d5..5965fc9e 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -1,7 +1,6 @@ # Steve Ivy # http://monkinetic.com -from __future__ import print_function from random import random from socket import socket, AF_INET, SOCK_DGRAM @@ -92,17 +91,9 @@ def send(_dict, addr): Sends key/value pairs via UDP. >>> StatsdClient.send({"example.send":"11|c"}, ("127.0.0.1", 8125)) - True """ # TODO(rbtz@): IPv6 support udp_sock = socket(AF_INET, SOCK_DGRAM) - try: - # TODO(rbtz@): Add batch support - for item in _dict.items(): - udp_sock.sendto(":".join(item).encode('utf-8'), addr) - except Exception: - import sys - import traceback - print("Unexpected error: ", traceback.format_exc(), file=sys.stderr) - return False - return True + # TODO(rbtz@): Add batch support + for item in _dict.items(): + udp_sock.sendto(":".join(item).encode('utf-8'), addr) From 422df23bc7de4e4bd2509763dd5551969ff0752a Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Mon, 24 Dec 2012 00:09:58 +0800 Subject: [PATCH 126/233] examples: python: general format function --- examples/python_example.py | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/examples/python_example.py b/examples/python_example.py index 5965fc9e..af0763ca 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -5,6 +5,11 @@ from socket import socket, AF_INET, SOCK_DGRAM class StatsdClient(object): + SC_TIMING = "ms" + SC_COUNT = "c" + SC_GAUGE = "g" + SC_SET = "s" + def __init__(self, host='localhost', port=8125): """ Sends statistics to the stats daemon over UDP @@ -13,15 +18,16 @@ def __init__(self, host='localhost', port=8125): """ self.addr = (host, port) - def timing(self, stat, time): + def timing(self, stats, time): """ Log timing information >>> client = StatsdClient() >>> client.timing('example.timing', 500) + >>> client.timing(('example.timing23', 'example.timing29'), 500) + """ - stats = {} - stats[stat] = "{0}|ms".format(time) + stats = self.format(stats, time, self.SC_TIMING) self.send(stats, self.addr) def increment(self, stats, sample_rate=1): @@ -50,12 +56,27 @@ def count(self, stats, delta, sample_rate=1): >>> client = StatsdClient() >>> client.count('example.counter', 17) """ - if not isinstance(stats, list): - stats = [stats] + stats = self.format(stats, delta, self.SC_COUNT) + self.send(self.sample(stats, sample_rate), self.addr) + + @staticmethod + def format(keys, value, _type): + """ + General format function. + + >>> StatsdClient.format("example.format", 2, "T") + {'example.format': '2|T'} + >>> StatsdClient.format(("example.format31", "example.format37"), "2", "T") + {'example.format31': '2|T', 'example.format37': '2|T'} + """ data = {} - for stat in stats: - data[stat] = "{0}|c".format(delta) - self.send(self.sample(data, sample_rate), self.addr) + value = "{0}|{1}".format(value, _type) + # TODO: Allow any iterable except strings + if not isinstance(keys, (list, tuple)): + keys = [keys] + for key in keys: + data[key] = value + return data @staticmethod def sample(data, sample_rate): From 18c67ab3f18a9d19c8dfa671da474377927970fa Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Mon, 24 Dec 2012 01:16:25 +0800 Subject: [PATCH 127/233] exmaples: python: add gauges support --- examples/python_example.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/examples/python_example.py b/examples/python_example.py index af0763ca..259182ab 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -30,6 +30,17 @@ def timing(self, stats, time): stats = self.format(stats, time, self.SC_TIMING) self.send(stats, self.addr) + def gauge(self, stats, value): + """ + Log gauges + + >>> client = StatsdClient() + >>> client.gauge('example.gauge', 47) + >>> client.gauge(('example.gauge41', 'example.gauge43'), 47) + """ + stats = self.format(stats, value, self.SC_GAUGE) + self.send(stats, self.addr) + def increment(self, stats, sample_rate=1): """ Increments one or more stats counters From 1857aeed9352c4794d235207365b4a95d682a125 Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Mon, 24 Dec 2012 01:33:58 +0800 Subject: [PATCH 128/233] examples: python: added sets support --- examples/python_example.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/examples/python_example.py b/examples/python_example.py index 259182ab..d6835c1c 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -25,7 +25,6 @@ def timing(self, stats, time): >>> client = StatsdClient() >>> client.timing('example.timing', 500) >>> client.timing(('example.timing23', 'example.timing29'), 500) - """ stats = self.format(stats, time, self.SC_TIMING) self.send(stats, self.addr) @@ -41,6 +40,17 @@ def gauge(self, stats, value): stats = self.format(stats, value, self.SC_GAUGE) self.send(stats, self.addr) + def set(self, stats, value): + """ + Log set + + >>> client = StatsdClient() + >>> client.set('example.set', "set") + >>> client.set(('example.set61', 'example.set67'), "2701") + """ + stats = self.format(stats, value, self.SC_SET) + self.send(stats, self.addr) + def increment(self, stats, sample_rate=1): """ Increments one or more stats counters From d49b7d9a6c8d5e744462402ba8e0ebee9f63bc57 Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Mon, 24 Dec 2012 01:36:24 +0800 Subject: [PATCH 129/233] examples: python: unify parameter names --- examples/python_example.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/python_example.py b/examples/python_example.py index d6835c1c..9deba160 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -18,7 +18,7 @@ def __init__(self, host='localhost', port=8125): """ self.addr = (host, port) - def timing(self, stats, time): + def timing(self, stats, value): """ Log timing information @@ -26,7 +26,7 @@ def timing(self, stats, time): >>> client.timing('example.timing', 500) >>> client.timing(('example.timing23', 'example.timing29'), 500) """ - stats = self.format(stats, time, self.SC_TIMING) + stats = self.format(stats, value, self.SC_TIMING) self.send(stats, self.addr) def gauge(self, stats, value): @@ -70,14 +70,14 @@ def decrement(self, stats, sample_rate=1): """ self.count(stats, -1, sample_rate) - def count(self, stats, delta, sample_rate=1): + def count(self, stats, value, sample_rate=1): """ Updates one or more stats counters by arbitrary amounts >>> client = StatsdClient() >>> client.count('example.counter', 17) """ - stats = self.format(stats, delta, self.SC_COUNT) + stats = self.format(stats, value, self.SC_COUNT) self.send(self.sample(stats, sample_rate), self.addr) @staticmethod From 7adbbc05872a36bb72c166f9d7495e682d6e1271 Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Mon, 24 Dec 2012 01:41:45 +0800 Subject: [PATCH 130/233] examples: python: simplify code by (re)introducing update_stats --- examples/python_example.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/examples/python_example.py b/examples/python_example.py index 9deba160..ce0d5d0b 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -26,8 +26,7 @@ def timing(self, stats, value): >>> client.timing('example.timing', 500) >>> client.timing(('example.timing23', 'example.timing29'), 500) """ - stats = self.format(stats, value, self.SC_TIMING) - self.send(stats, self.addr) + self.update_stats(stats, value, self.SC_TIMING) def gauge(self, stats, value): """ @@ -37,8 +36,7 @@ def gauge(self, stats, value): >>> client.gauge('example.gauge', 47) >>> client.gauge(('example.gauge41', 'example.gauge43'), 47) """ - stats = self.format(stats, value, self.SC_GAUGE) - self.send(stats, self.addr) + self.update_stats(stats, value, self.SC_GAUGE) def set(self, stats, value): """ @@ -48,8 +46,7 @@ def set(self, stats, value): >>> client.set('example.set', "set") >>> client.set(('example.set61', 'example.set67'), "2701") """ - stats = self.format(stats, value, self.SC_SET) - self.send(stats, self.addr) + self.update_stats(stats, value, self.SC_SET) def increment(self, stats, sample_rate=1): """ @@ -72,12 +69,21 @@ def decrement(self, stats, sample_rate=1): def count(self, stats, value, sample_rate=1): """ - Updates one or more stats counters by arbitrary amounts + Updates one or more stats counters by arbitrary value >>> client = StatsdClient() >>> client.count('example.counter', 17) """ - stats = self.format(stats, value, self.SC_COUNT) + self.update_stats(stats, value, self.SC_COUNT, sample_rate) + + def update_stats(self, stats, value, _type, sample_rate=1): + """ + Pipeline function that formats data, samples it and passes to send() + + >>> client = StatsdClient() + >>> client.update_stats('example.update_stats', 73, "c", 0.9) + """ + stats = self.format(stats, value, _type) self.send(self.sample(stats, sample_rate), self.addr) @staticmethod From ca067355e8489238ad49b22793a992da0f16dc43 Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Mon, 24 Dec 2012 01:45:42 +0800 Subject: [PATCH 131/233] examples: python: simplify fastpath --- examples/python_example.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/examples/python_example.py b/examples/python_example.py index ce0d5d0b..8b4311e2 100644 --- a/examples/python_example.py +++ b/examples/python_example.py @@ -122,16 +122,15 @@ def sample(data, sample_rate): >>> StatsdClient.sample({"example.sample5": "5", "example.sample7": "7"}, 0.01) {} """ - sampled_data = {} - if 0 < sample_rate < 1: + if sample_rate >= 1: + return data + elif sample_rate < 1: if random() <= sample_rate: + sampled_data = {} for stat, value in data.items(): sampled_data[stat] = "{0}|@{1}".format(value, sample_rate) - elif sample_rate == 0: - sampled_data = {} - else: - sampled_data = data - return sampled_data + return sampled_data + return {} @staticmethod def send(_dict, addr): From fe61c955a6ef950a44b5c706a7e831241f49f778 Mon Sep 17 00:00:00 2001 From: Drew Fradette Date: Mon, 24 Dec 2012 09:14:41 -0500 Subject: [PATCH 132/233] Added status to debian init.d script --- debian/statsd.init | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/debian/statsd.init b/debian/statsd.init index 1676e21f..5d8ff4b8 100644 --- a/debian/statsd.init +++ b/debian/statsd.init @@ -146,9 +146,11 @@ case "$1" in ;; esac ;; + status) + status_of_proc $DAEMON "$NAME" + ;; *) - #echo "Usage: $SCRIPTNAME {start|stop|restart|reload|force-reload}" >&2 - echo "Usage: $SCRIPTNAME {start|stop|restart|force-reload}" >&2 + echo "Usage: $SCRIPTNAME {start|stop|restart|status|force-reload}" >&2 exit 3 ;; esac From 00b1d10b34225194c51b469762aaa3ca9295669b Mon Sep 17 00:00:00 2001 From: Charles Daniel Date: Mon, 24 Dec 2012 19:36:24 -0500 Subject: [PATCH 133/233] Java client adding support for multi_metrics + timer flush --- examples/StatsdClient.java | 109 ++++++++++++++++++++++++++++++++++--- 1 file changed, 100 insertions(+), 9 deletions(-) diff --git a/examples/StatsdClient.java b/examples/StatsdClient.java index 24ac2b5e..3b31ff67 100644 --- a/examples/StatsdClient.java +++ b/examples/StatsdClient.java @@ -22,6 +22,14 @@ * // multiple keys with a sample rate * client.increment(10, .1, "foo.bar.baz", "foo.bar.boo", "foo.baz.bar"); * + * // To enable multi metrics (aka more than 1 metric in a UDP packet) (disabled by default) + * client.enableMultiMetrics(true); //disable by passing in false + * // To fine-tune udp packet buffer size (default=1500) + * client.setBufferSize((short) 1500); + * // To force flush the buffer out (good idea to add to your shutdown path) + * client.flush(); + * + * * Note: For best results, and greater availability, you'll probably want to * create a wrapper class which creates a static client and proxies to it. * @@ -36,10 +44,16 @@ import java.nio.channels.DatagramChannel; import java.util.Locale; import java.util.Random; +import java.util.Timer; +import java.util.TimerTask; import org.apache.log4j.Logger; -public class StatsdClient { +public class StatsdClient extends TimerTask { + private ByteBuffer sendBuffer; + private Timer flushTimer; + private boolean multi_metrics = false; + private static final Random RNG = new Random(); private static final Logger log = Logger.getLogger(StatsdClient.class.getName()); @@ -53,8 +67,49 @@ public StatsdClient(String host, int port) throws UnknownHostException, IOExcept public StatsdClient(InetAddress host, int port) throws IOException { _address = new InetSocketAddress(host, port); _channel = DatagramChannel.open(); + setBufferSize((short) 1500); } + protected void finalize() { + flush(); + } + + public synchronized void setBufferSize(short packetBufferSize) { + if(sendBuffer != null) { + flush(); + } + sendBuffer = ByteBuffer.allocate(packetBufferSize); + } + + public synchronized void enableMultiMetrics(boolean enable) { + multi_metrics = enable; + } + + public synchronized boolean startFlushTimer(long period) { + if(flushTimer == null) { + // period is in msecs + if(period <= 0) { period = 2000; } + flushTimer = new Timer(); + + // We pass this object in as the TimerTask (which calls run()) + flushTimer.schedule((TimerTask)this, period, period); + return true; + } + return false; + } + + public synchronized void stopFlushTimer() { + if(flushTimer != null) { + flushTimer.cancel(); + flushTimer = null; + } + } + + public void run() { // used by Timer, we're a Runnable TimerTask + flush(); + } + + public boolean timing(String key, int value) { return timing(key, value, 1.0); } @@ -143,24 +198,60 @@ private boolean send(double sampleRate, String... stats) { return retval; } - private boolean doSend(final String stat) { + private synchronized boolean doSend(String stat) { + try { + final byte[] data = stat.getBytes("utf-8"); + + // If we're going to go past the threshold of the buffer then flush. + // the +1 is for the potential '\n' in multi_metrics below + if(sendBuffer.remaining() < (data.length + 1)) { + flush(); + } + + if(sendBuffer.position() > 0) { // multiple metrics are separated by '\n' + sendBuffer.put( (byte) '\n'); + } + + sendBuffer.put(data); // append the data + + if(! multi_metrics) { + flush(); + } + + return true; + + } catch (IOException e) { + log.error( + String.format("Could not send stat %s to host %s:%d", sendBuffer.toString(), _address.getHostName(), + _address.getPort()), e); + return false; + } + } + + public synchronized boolean flush() { try { - final byte[] data = stat.getBytes("utf-8"); - final ByteBuffer buff = ByteBuffer.wrap(data); - final int nbSentBytes = _channel.send(buff, _address); + final int sizeOfBuffer = sendBuffer.position(); + + if(sizeOfBuffer <= 0) { return false; } // empty buffer + + // send and reset the buffer + sendBuffer.flip(); + final int nbSentBytes = _channel.send(sendBuffer, _address); + sendBuffer.limit(sendBuffer.capacity()); + sendBuffer.rewind(); - if (data.length == nbSentBytes) { + if (sizeOfBuffer == nbSentBytes) { return true; } else { log.error(String.format( - "Could not send entirely stat %s to host %s:%d. Only sent %d bytes out of %d bytes", stat, - _address.getHostName(), _address.getPort(), nbSentBytes, data.length)); + "Could not send entirely stat %s to host %s:%d. Only sent %d bytes out of %d bytes", sendBuffer.toString(), + _address.getHostName(), _address.getPort(), nbSentBytes, sizeOfBuffer)); return false; } } catch (IOException e) { log.error( - String.format("Could not send stat %s to host %s:%d", stat, _address.getHostName(), + String.format("Could not send stat %s to host %s:%d", sendBuffer.toString(), _address.getHostName(), _address.getPort()), e); return false; } From 91131dc4712a6f0d8e51cfeef1afc48a4c4efa12 Mon Sep 17 00:00:00 2001 From: Phil Sorber Date: Mon, 24 Dec 2012 21:50:40 -0500 Subject: [PATCH 134/233] Update README with more information about multi-metric packets. --- README.md | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 26896658..7cbad3d2 100644 --- a/README.md +++ b/README.md @@ -74,8 +74,25 @@ using a Set to store all occuring events. uniques:765|s -All metrics can also be batch send in a single UDP packet, separated by a -newline character. +Multi-Metric Packets +-------------------- +StatsD supports receiving multiple metrics in a single packet by separating them +with a newline. + + gorets:1|c\nglork:320|ms\ngaugor:333|g\nuniques:765|s + +Be careful to keep the total length of the payload within your network's MTU. There +is no single good value to use, but here are some guidelines for common network +scenarios: + +* Fast Ethernet (1432) - This is most likely for Intranets. +* Gigabit Ethernet (8932) - Jumbo frames can make use of this feature much more + efficient. +* Commodity Internet (512) - If you are routing over the internet a value in this + range will be reasonable. You might be able to go higher, but you are at the mercy + of all the hops in your route. + +*(These payload numbers take into account the maximum IP + UDP header sizes)* Debugging --------- From 65da1a2d30db4b92c01fbb87a4029a06a1be7a81 Mon Sep 17 00:00:00 2001 From: Rafael Garcia-Suarez Date: Wed, 2 Jan 2013 10:31:38 +0100 Subject: [PATCH 135/233] Minor README additions --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 7cbad3d2..bd931f20 100644 --- a/README.md +++ b/README.md @@ -233,6 +233,10 @@ Graphite: * graphite.last_exception: the number of seconds elapsed since the last exception thrown whilst flushing to graphite +Those statistics will also be sent to graphite under the namespaces +`stats.statsd.graphiteStats.last_exception` and +`stats.statsd.graphiteStats.last_flush`. + A simple nagios check can be found in the utils/ directory that can be used to check metric thresholds, for example the number of seconds since the last successful flush to graphite. @@ -359,6 +363,8 @@ the legacy namespacing those values can be found (with default prefixing) under `stats.counters.counter_name.rate` and `stats.counters.counter_name.count` now. +The number of elements in sets will be recorded under the metric +`stats.sets.set_name.count` (where "sets" is the prefixSet). Inspiration ----------- From 324267c527133b97f8902f4479af676bc0d7ce58 Mon Sep 17 00:00:00 2001 From: Devdas Bhagat Date: Wed, 2 Jan 2013 14:58:28 +0100 Subject: [PATCH 136/233] This resolves an issue where a random TCP client goes away before a response is sent from the server. The resulting exception was not handled, and would cause node.js to crash. The log file would show this message as a result: node.js:201 throw e; // process.nextTick error, or 'error' event on first tick ^ Error: write ECONNRESET at errnoException (net.js:642:11) at Object.afterWrite [as oncomplete] (net.js:480:18) --- stats.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/stats.js b/stats.js index 91897df1..2f9c1cef 100644 --- a/stats.js +++ b/stats.js @@ -193,6 +193,10 @@ config.configFile(process.argv[2], function (config, oldConfig) { mgmtServer = net.createServer(function(stream) { stream.setEncoding('ascii'); + stream.on('error', function(err) { + l.log('Caught ' + err +', Moving on') + }); + stream.on('data', function(data) { var cmdline = data.trim().split(" "); var cmd = cmdline.shift(); From 53a23c931df7857ba5f886458e2f3da5edbb5dac Mon Sep 17 00:00:00 2001 From: david raistrick Date: Fri, 14 Dec 2012 17:58:46 -0500 Subject: [PATCH 137/233] comment out repeater config (not required to run), change host.com to example.com (see rfc2606) in example config --- exampleConfig.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/exampleConfig.js b/exampleConfig.js index 90584ae8..b3d6dac8 100644 --- a/exampleConfig.js +++ b/exampleConfig.js @@ -64,9 +64,9 @@ Optional Variables: */ { graphitePort: 2003 -, graphiteHost: "graphite.host.com" +, graphiteHost: "graphite.example.com" , port: 8125 , backends: [ "./backends/graphite" ] -, repeater: [ { host: "10.8.3.214", port: 8125 } ] -, repeaterProtocol: "udp4" +//, repeater: [ { host: "graphiterepeater.example.com", port: 8125 } ] +//, repeaterProtocol: "udp4" } From 2a2b956580402d55a202c8812e9e296609b9cf2f Mon Sep 17 00:00:00 2001 From: david raistrick Date: Wed, 2 Jan 2013 11:18:37 -0500 Subject: [PATCH 138/233] remove commented examples --- exampleConfig.js | 2 -- 1 file changed, 2 deletions(-) diff --git a/exampleConfig.js b/exampleConfig.js index b3d6dac8..b8150a13 100644 --- a/exampleConfig.js +++ b/exampleConfig.js @@ -67,6 +67,4 @@ Optional Variables: , graphiteHost: "graphite.example.com" , port: 8125 , backends: [ "./backends/graphite" ] -//, repeater: [ { host: "graphiterepeater.example.com", port: 8125 } ] -//, repeaterProtocol: "udp4" } From 6ec854c63a7ecd734a4a620e847892bca7eab868 Mon Sep 17 00:00:00 2001 From: david raistrick Date: Wed, 2 Jan 2013 12:18:35 -0500 Subject: [PATCH 139/233] rework statsd's internal statistics to have a configurable prefix for each running copy of statsd - to prevent conflicts when multiple statsd send data to the same graphite - see https://github.com/etsy/statsd/issues/216 --- backends/graphite.js | 10 +- exampleConfig.js | 2 + stats.js | 22 +- test/graphite_legacy_tests_statsprefix.js | 230 +++++++++++++++++++ test/graphite_tests_statsprefix.js | 264 ++++++++++++++++++++++ 5 files changed, 516 insertions(+), 12 deletions(-) create mode 100644 test/graphite_legacy_tests_statsprefix.js create mode 100644 test/graphite_tests_statsprefix.js diff --git a/backends/graphite.js b/backends/graphite.js index 3fe47b2c..a1f4dda2 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -51,7 +51,7 @@ var post_stats = function graphite_post_stats(statString) { }); graphite.on('connect', function() { var ts = Math.round(new Date().getTime() / 1000); - var namespace = globalNamespace.concat('statsd'); + var namespace = globalNamespace.concat(prefixStats); statString += namespace.join(".") + '.graphiteStats.last_exception ' + last_exception + ' ' + ts + "\n"; statString += namespace.join(".") + '.graphiteStats.last_flush ' + last_flush + ' ' + ts + "\n"; this.write(statString); @@ -122,12 +122,12 @@ var flush_stats = function graphite_flush(ts, metrics) { numStats += 1; } - var namespace = globalNamespace.concat('statsd'); + var namespace = globalNamespace.concat(prefixStats); if (legacyNamespace === true) { - statString += 'statsd.numStats ' + numStats + ts_suffix; - statString += 'stats.statsd.graphiteStats.calculationtime ' + (Date.now() - starttime) + ts_suffix; + statString += prefixStats + '.numStats ' + numStats + ts_suffix; + statString += 'stats.' + prefixStats + '.graphiteStats.calculationtime ' + (Date.now() - starttime) + ts_suffix; for (key in statsd_metrics) { - statString += 'stats.statsd.' + key + ' ' + statsd_metrics[key] + ts_suffix; + statString += 'stats.' + prefixStats + '.' + key + ' ' + statsd_metrics[key] + ts_suffix; } } else { statString += namespace.join(".") + '.numStats ' + numStats + ts_suffix; diff --git a/exampleConfig.js b/exampleConfig.js index 90584ae8..c4fc4212 100644 --- a/exampleConfig.js +++ b/exampleConfig.js @@ -35,6 +35,8 @@ Optional Variables: percent: percentage of frequent keys to log [%, default: 100] log: location of log file for frequent keys [default: STDOUT] deleteCounters: don't send values to graphite for inactive counters, as opposed to sending 0 [default: false] + prefixStats: prefix to use for the statsd statistics data for this running instance of statsd [default: statsd] + applies to both legacy and new namespacing console: prettyprint: whether to prettyprint the console backend diff --git a/stats.js b/stats.js index 91897df1..4855d135 100644 --- a/stats.js +++ b/stats.js @@ -11,10 +11,7 @@ var dgram = require('dgram') // initialize data structures with defaults for statsd stats var keyCounter = {}; -var counters = { - "statsd.packets_received": 0, - "statsd.bad_lines_seen": 0 -}; +var counters = {}; var timers = {}; var gauges = {}; var sets = {}; @@ -116,6 +113,17 @@ config.configFile(process.argv[2], function (config, oldConfig) { }, config.debugInterval || 10000); } + // setup config for stats prefix + prefixStats = config.prefixStats; + prefixStats = prefixStats !== undefined ? prefixStats : "statsd"; + //setup the names for the stats stored in counters{} + bad_lines_seen = prefixStats + ".bad_lines_seen"; + packets_received = prefixStats + ".packets_received"; + + //now set to zero so we can increment them + counters[bad_lines_seen] = 0; + counters[packets_received] = 0; + if (server === undefined) { // key counting @@ -123,7 +131,7 @@ config.configFile(process.argv[2], function (config, oldConfig) { server = dgram.createSocket('udp4', function (msg, rinfo) { backendEvents.emit('packet', msg, rinfo); - counters["statsd.packets_received"]++; + counters[packets_received]++; var metrics = msg.toString().split("\n"); for (midx in metrics) { @@ -152,7 +160,7 @@ config.configFile(process.argv[2], function (config, oldConfig) { var fields = bits[i].split("|"); if (fields[1] === undefined) { l.log('Bad line: ' + fields + ' in msg "' + metrics[midx] +'"'); - counters["statsd.bad_lines_seen"]++; + counters[bad_lines_seen]++; stats['messages']['bad_lines_seen']++; continue; } @@ -174,7 +182,7 @@ config.configFile(process.argv[2], function (config, oldConfig) { sampleRate = Number(fields[2].match(/^@([\d\.]+)/)[1]); } else { l.log('Bad line: ' + fields + ' in msg "' + metrics[midx] +'"; has invalid sample rate'); - counters["statsd.bad_lines_seen"]++; + counters[bad_lines_seen]++; stats['messages']['bad_lines_seen']++; continue; } diff --git a/test/graphite_legacy_tests_statsprefix.js b/test/graphite_legacy_tests_statsprefix.js new file mode 100644 index 00000000..4dcf1986 --- /dev/null +++ b/test/graphite_legacy_tests_statsprefix.js @@ -0,0 +1,230 @@ +var fs = require('fs'), + net = require('net'), + temp = require('temp'), + spawn = require('child_process').spawn, + util = require('util'), + urlparse = require('url').parse, + _ = require('underscore'), + dgram = require('dgram'), + qsparse = require('querystring').parse, + http = require('http'); + + +var writeconfig = function(text,worker,cb,obj){ + temp.open({suffix: '-statsdconf.js'}, function(err, info) { + if (err) throw err; + fs.writeSync(info.fd, text); + fs.close(info.fd, function(err) { + if (err) throw err; + worker(info.path,cb,obj); + }); + }); +} + +var array_contents_are_equal = function(first,second){ + var intlen = _.intersection(first,second).length; + var unlen = _.union(first,second).length; + return (intlen == unlen) && (intlen == first.length); +} + +var statsd_send = function(data,sock,host,port,cb){ + send_data = new Buffer(data); + sock.send(send_data,0,send_data.length,port,host,function(err,bytes){ + if (err) { + throw err; + } + cb(); + }); +} + +// keep collecting data until a specified timeout period has elapsed +// this will let us capture all data chunks so we don't miss one +var collect_for = function(server,timeout,cb){ + var received = []; + var in_flight = 0; + var timed_out = false; + var collector = function(req,res){ + in_flight += 1; + var body = ''; + req.on('data',function(data){ body += data; }); + req.on('end',function(){ + received = received.concat(body.split("\n")); + in_flight -= 1; + if((in_flight < 1) && timed_out){ + server.removeListener('request',collector); + cb(received); + } + }); + } + + setTimeout(function (){ + timed_out = true; + if((in_flight < 1)) { + server.removeListener('connection',collector); + cb(received); + } + },timeout); + + server.on('connection',collector); +} + +module.exports = { + setUp: function (callback) { + this.testport = 31337; + this.myflush = 200; + var configfile = "{graphService: \"graphite\"\n\ + , batch: 200 \n\ + , flushInterval: " + this.myflush + " \n\ + , percentThreshold: 90\n\ + , port: 8125\n\ + , dumpMessages: false \n\ + , debug: false\n\ + , prefixStats: \"statsprefix\"\n\ + , graphitePort: " + this.testport + "\n\ + , graphiteHost: \"127.0.0.1\"}"; + + this.acceptor = net.createServer(); + this.acceptor.listen(this.testport); + this.sock = dgram.createSocket('udp4'); + + this.server_up = true; + this.ok_to_die = false; + this.exit_callback_callback = process.exit; + + writeconfig(configfile,function(path,cb,obj){ + obj.path = path; + obj.server = spawn('node',['stats.js', path]); + obj.exit_callback = function (code) { + obj.server_up = false; + if(!obj.ok_to_die){ + console.log('node server unexpectedly quit with code: ' + code); + process.exit(1); + } + else { + obj.exit_callback_callback(); + } + }; + obj.server.on('exit', obj.exit_callback); + obj.server.stderr.on('data', function (data) { + console.log('stderr: ' + data.toString().replace(/\n$/,'')); + }); + /* + obj.server.stdout.on('data', function (data) { + console.log('stdout: ' + data.toString().replace(/\n$/,'')); + }); + */ + obj.server.stdout.on('data', function (data) { + // wait until server is up before we finish setUp + if (data.toString().match(/server is up/)) { + cb(); + } + }); + + },callback,this); + }, + tearDown: function (callback) { + this.sock.close(); + this.acceptor.close(); + this.ok_to_die = true; + if(this.server_up){ + this.exit_callback_callback = callback; + this.server.kill(); + } else { + callback(); + } + }, + + send_well_formed_posts: function (test) { + test.expect(2); + + // we should integrate a timeout into this + this.acceptor.once('connection',function(c){ + var body = ''; + c.on('data',function(d){ body += d; }); + c.on('end',function(){ + var rows = body.split("\n"); + var entries = _.map(rows, function(x) { + var chunks = x.split(' '); + var data = {}; + data[chunks[0]] = chunks[1]; + return data; + }); + test.ok(_.include(_.map(entries,function(x) { return _.keys(x)[0] }),'statsprefix.numStats'),'graphite output includes numStats'); + test.equal(_.find(entries, function(x) { return _.keys(x)[0] == 'statsprefix.numStats' })['statsprefix.numStats'],2); + test.done(); + }); + }); + }, + + timers_are_valid: function (test) { + test.expect(3); + + var testvalue = 100; + var me = this; + this.acceptor.once('connection',function(c){ + statsd_send('a_test_value:' + testvalue + '|ms',me.sock,'127.0.0.1',8125,function(){ + collect_for(me.acceptor,me.myflush*2,function(strings){ + test.ok(strings.length > 0,'should receive some data'); + var hashes = _.map(strings, function(x) { + var chunks = x.split(' '); + var data = {}; + data[chunks[0]] = chunks[1]; + return data; + }); + var numstat_test = function(post){ + var mykey = 'statsprefix.numStats'; + return _.include(_.keys(post),mykey) && (post[mykey] == 3); + }; + test.ok(_.any(hashes,numstat_test), 'statsprefix.numStats should be 1'); + + var testtimervalue_test = function(post){ + var mykey = 'stats.timers.a_test_value.mean_90'; + return _.include(_.keys(post),mykey) && (post[mykey] == testvalue); + }; + test.ok(_.any(hashes,testtimervalue_test), 'stats.timers.a_test_value.mean should be ' + testvalue); + + test.done(); + }); + }); + }); + }, + + counts_are_valid: function (test) { + test.expect(4); + + var testvalue = 100; + var me = this; + this.acceptor.once('connection',function(c){ + statsd_send('a_test_value:' + testvalue + '|c',me.sock,'127.0.0.1',8125,function(){ + collect_for(me.acceptor,me.myflush*2,function(strings){ + test.ok(strings.length > 0,'should receive some data'); + var hashes = _.map(strings, function(x) { + var chunks = x.split(' '); + var data = {}; + data[chunks[0]] = chunks[1]; + return data; + }); + var numstat_test = function(post){ + var mykey = 'statsprefix.numStats'; + return _.include(_.keys(post),mykey) && (post[mykey] == 3); + }; + test.ok(_.any(hashes,numstat_test), 'statsprefix.numStats should be 1'); + + var testavgvalue_test = function(post){ + var mykey = 'stats.a_test_value'; + return _.include(_.keys(post),mykey) && (post[mykey] == (testvalue/(me.myflush / 1000))); + }; + test.ok(_.any(hashes,testavgvalue_test), 'stats.a_test_value should be ' + (testvalue/(me.myflush / 1000))); + + var testcountvalue_test = function(post){ + var mykey = 'stats_counts.a_test_value'; + return _.include(_.keys(post),mykey) && (post[mykey] == testvalue); + }; + test.ok(_.any(hashes,testcountvalue_test), 'stats_counts.a_test_value should be ' + testvalue); + + test.done(); + }); + }); + }); + } +} diff --git a/test/graphite_tests_statsprefix.js b/test/graphite_tests_statsprefix.js new file mode 100644 index 00000000..762bc44a --- /dev/null +++ b/test/graphite_tests_statsprefix.js @@ -0,0 +1,264 @@ +var fs = require('fs'), + net = require('net'), + temp = require('temp'), + spawn = require('child_process').spawn, + util = require('util'), + urlparse = require('url').parse, + _ = require('underscore'), + dgram = require('dgram'), + qsparse = require('querystring').parse, + http = require('http'); + + +var writeconfig = function(text,worker,cb,obj){ + temp.open({suffix: '-statsdconf.js'}, function(err, info) { + if (err) throw err; + fs.writeSync(info.fd, text); + fs.close(info.fd, function(err) { + if (err) throw err; + worker(info.path,cb,obj); + }); + }); +} + +var array_contents_are_equal = function(first,second){ + var intlen = _.intersection(first,second).length; + var unlen = _.union(first,second).length; + return (intlen == unlen) && (intlen == first.length); +} + +var statsd_send = function(data,sock,host,port,cb){ + send_data = new Buffer(data); + sock.send(send_data,0,send_data.length,port,host,function(err,bytes){ + if (err) { + throw err; + } + cb(); + }); +} + +// keep collecting data until a specified timeout period has elapsed +// this will let us capture all data chunks so we don't miss one +var collect_for = function(server,timeout,cb){ + var received = []; + var in_flight = 0; + var timed_out = false; + var collector = function(req,res){ + in_flight += 1; + var body = ''; + req.on('data',function(data){ body += data; }); + req.on('end',function(){ + received = received.concat(body.split("\n")); + in_flight -= 1; + if((in_flight < 1) && timed_out){ + server.removeListener('request',collector); + cb(received); + } + }); + } + + setTimeout(function (){ + timed_out = true; + if((in_flight < 1)) { + server.removeListener('connection',collector); + cb(received); + } + },timeout); + + server.on('connection',collector); +} + +module.exports = { + setUp: function (callback) { + this.testport = 31337; + this.myflush = 200; + var configfile = "{graphService: \"graphite\"\n\ + , batch: 200 \n\ + , flushInterval: " + this.myflush + " \n\ + , percentThreshold: 90\n\ + , port: 8125\n\ + , dumpMessages: false \n\ + , debug: false\n\ + , prefixStats: \"statsprefix\"\n\ + , graphite: { legacyNamespace: false }\n\ + , graphitePort: " + this.testport + "\n\ + , graphiteHost: \"127.0.0.1\"}"; + + this.acceptor = net.createServer(); + this.acceptor.listen(this.testport); + this.sock = dgram.createSocket('udp4'); + + this.server_up = true; + this.ok_to_die = false; + this.exit_callback_callback = process.exit; + + writeconfig(configfile,function(path,cb,obj){ + obj.path = path; + obj.server = spawn('node',['stats.js', path]); + obj.exit_callback = function (code) { + obj.server_up = false; + if(!obj.ok_to_die){ + console.log('node server unexpectedly quit with code: ' + code); + process.exit(1); + } + else { + obj.exit_callback_callback(); + } + }; + obj.server.on('exit', obj.exit_callback); + obj.server.stderr.on('data', function (data) { + console.log('stderr: ' + data.toString().replace(/\n$/,'')); + }); + /* + obj.server.stdout.on('data', function (data) { + console.log('stdout: ' + data.toString().replace(/\n$/,'')); + }); + */ + obj.server.stdout.on('data', function (data) { + // wait until server is up before we finish setUp + if (data.toString().match(/server is up/)) { + cb(); + } + }); + + },callback,this); + }, + tearDown: function (callback) { + this.sock.close(); + this.acceptor.close(); + this.ok_to_die = true; + if(this.server_up){ + this.exit_callback_callback = callback; + this.server.kill(); + } else { + callback(); + } + }, + + send_well_formed_posts: function (test) { + test.expect(2); + + // we should integrate a timeout into this + this.acceptor.once('connection',function(c){ + var body = ''; + c.on('data',function(d){ body += d; }); + c.on('end',function(){ + var rows = body.split("\n"); + var entries = _.map(rows, function(x) { + var chunks = x.split(' '); + var data = {}; + data[chunks[0]] = chunks[1]; + return data; + }); + test.ok(_.include(_.map(entries,function(x) { return _.keys(x)[0] }),'stats.statsprefix.numStats'),'graphite output includes numStats'); + test.equal(_.find(entries, function(x) { return _.keys(x)[0] == 'stats.statsprefix.numStats' })['stats.statsprefix.numStats'],2); + test.done(); + }); + }); + }, + + send_malformed_post: function (test) { + test.expect(3); + + var testvalue = 1; + var me = this; + this.acceptor.once('connection',function(c){ + statsd_send('a_bad_test_value|z',me.sock,'127.0.0.1',8125,function(){ + collect_for(me.acceptor,me.myflush*2,function(strings){ + test.ok(strings.length > 0,'should receive some data'); + var hashes = _.map(strings, function(x) { + var chunks = x.split(' '); + var data = {}; + data[chunks[0]] = chunks[1]; + return data; + }); + var numstat_test = function(post){ + var mykey = 'stats.statsprefix.numStats'; + return _.include(_.keys(post),mykey) && (post[mykey] == 2); + }; + test.ok(_.any(hashes,numstat_test), 'statsprefix.numStats should be 0'); + + var bad_lines_seen_value_test = function(post){ + var mykey = 'stats.counters.statsprefix.bad_lines_seen.count'; + return _.include(_.keys(post),mykey) && (post[mykey] == testvalue); + }; + test.ok(_.any(hashes,bad_lines_seen_value_test), 'stats.counters.statsprefix.bad_lines_seen.count should be ' + testvalue); + + test.done(); + }); + }); + }); + }, + + timers_are_valid: function (test) { + test.expect(3); + + var testvalue = 100; + var me = this; + this.acceptor.once('connection',function(c){ + statsd_send('a_test_value:' + testvalue + '|ms',me.sock,'127.0.0.1',8125,function(){ + collect_for(me.acceptor,me.myflush*2,function(strings){ + test.ok(strings.length > 0,'should receive some data'); + var hashes = _.map(strings, function(x) { + var chunks = x.split(' '); + var data = {}; + data[chunks[0]] = chunks[1]; + return data; + }); + var numstat_test = function(post){ + var mykey = 'stats.statsprefix.numStats'; + return _.include(_.keys(post),mykey) && (post[mykey] == 3); + }; + test.ok(_.any(hashes,numstat_test), 'stats.statsprefix.numStats should be 1'); + + var testtimervalue_test = function(post){ + var mykey = 'stats.timers.a_test_value.mean_90'; + return _.include(_.keys(post),mykey) && (post[mykey] == testvalue); + }; + test.ok(_.any(hashes,testtimervalue_test), 'stats.timers.a_test_value.mean should be ' + testvalue); + + test.done(); + }); + }); + }); + }, + + counts_are_valid: function (test) { + test.expect(4); + + var testvalue = 100; + var me = this; + this.acceptor.once('connection',function(c){ + statsd_send('a_test_value:' + testvalue + '|c',me.sock,'127.0.0.1',8125,function(){ + collect_for(me.acceptor,me.myflush*2,function(strings){ + test.ok(strings.length > 0,'should receive some data'); + var hashes = _.map(strings, function(x) { + var chunks = x.split(' '); + var data = {}; + data[chunks[0]] = chunks[1]; + return data; + }); + var numstat_test = function(post){ + var mykey = 'stats.statsprefix.numStats'; + return _.include(_.keys(post),mykey) && (post[mykey] == 3); + }; + test.ok(_.any(hashes,numstat_test), 'statsprefix.numStats should be 3'); + + var testavgvalue_test = function(post){ + var mykey = 'stats.counters.a_test_value.rate'; + return _.include(_.keys(post),mykey) && (post[mykey] == (testvalue/(me.myflush / 1000))); + }; + test.ok(_.any(hashes,testavgvalue_test), 'a_test_value.rate should be ' + (testvalue/(me.myflush / 1000))); + + var testcountvalue_test = function(post){ + var mykey = 'stats.counters.a_test_value.count'; + return _.include(_.keys(post),mykey) && (post[mykey] == testvalue); + }; + test.ok(_.any(hashes,testcountvalue_test), 'a_test_value.count should be ' + testvalue); + + test.done(); + }); + }); + }); + } +} From 3f1a116cff9fda7b0333e380505f02eb43d9d7b9 Mon Sep 17 00:00:00 2001 From: david raistrick Date: Fri, 4 Jan 2013 16:48:00 -0500 Subject: [PATCH 140/233] fix the deleteCounters bug where packets_received and bad_lines_seen are deleted - cannot ++ an undefined value.. adjust how we check for the stats counters to support the prefixStats patch - should apply safe with and without the patch correct delete_counters unit test to not look for bad_lines_seen of NaN instead look for it set to 1 --- stats.js | 22 +++++++++++++++++++++- test/graphite_delete_counters_tests.js | 2 +- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/stats.js b/stats.js index 2f9c1cef..3e59fa95 100644 --- a/stats.js +++ b/stats.js @@ -61,9 +61,29 @@ function flushMetrics() { backendEvents.once('flush', function clear_metrics(ts, metrics) { // Clear the counters conf.deleteCounters = conf.deleteCounters || false; + + // handle the case where these vars are not setup - for this patch to work w/o requiring + // the statsPrefix patch + var prefixStats; + prefixStats = conf.prefixStats; + prefixStats = prefixStats !== undefined ? prefixStats : "statsd"; + //setup the names for the stats stored in counters{} + bad_lines_seen = prefixStats + ".bad_lines_seen"; + packets_received = prefixStats + ".packets_received"; + for (key in metrics.counters) { if (conf.deleteCounters) { - delete(metrics.counters[key]); + if (key == packets_received || key == bad_lines_seen) { + // if (conf.debug) { + // l.log("resetting stats key: " + key); + // } + metrics.counters[key] = 0; + } else { + //if (conf.debug) { + // l.log("deleting key: " + key); + //} + delete(metrics.counters[key]); + } } else { metrics.counters[key] = 0; } diff --git a/test/graphite_delete_counters_tests.js b/test/graphite_delete_counters_tests.js index 938f45c0..04308f65 100644 --- a/test/graphite_delete_counters_tests.js +++ b/test/graphite_delete_counters_tests.js @@ -179,7 +179,7 @@ module.exports = { var bad_lines_seen_value_test = function(post){ var mykey = 'stats_counts.statsd.bad_lines_seen'; - return _.include(_.keys(post),mykey) && isNaN(post[mykey]); + return _.include(_.keys(post),mykey) && (post[mykey] == testvalue); }; test.ok(_.any(hashes,bad_lines_seen_value_test), 'stats_counts.statsd.bad_lines_seen should be ' + testvalue); From 1a755c4bfd4c4b5becba3763f8422b3c76119878 Mon Sep 17 00:00:00 2001 From: david raistrick Date: Fri, 4 Jan 2013 18:24:51 -0500 Subject: [PATCH 141/233] add config (deleteIdleStats) to delete idle sets, gauges, timers - and a overall flag to set them all but allow individual overrides update delete_counters unit test to test deleteIdleStats which covers all stat types --- stats.js | 33 ++++++++++++++++++++++++-- test/graphite_delete_counters_tests.js | 9 ++++++- 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/stats.js b/stats.js index 2f9c1cef..f84b16a8 100644 --- a/stats.js +++ b/stats.js @@ -59,6 +59,17 @@ function flushMetrics() { // After all listeners, reset the stats backendEvents.once('flush', function clear_metrics(ts, metrics) { + // TODO: a lot of this should be moved up into an init/constructor so we don't have to do it every + // single flushInterval.... + // allows us to flag all of these on with a single config but still override them individually + conf.deleteIdleStats = conf.deleteIdleStats !== undefined ? conf.deleteIdleStats : false; + if (conf.deleteIdleStats) { + conf.deleteCounters = conf.deleteCounters !== undefined ? conf.deleteCounters : true; + conf.deleteTimers = conf.deleteTimers !== undefined ? conf.deleteTimers : true; + conf.deleteSets = conf.deleteSets !== undefined ? conf.deleteSets : true; + conf.deleteGauges = conf.deleteGauges !== undefined ? conf.deleteGauges : true; + } + // Clear the counters conf.deleteCounters = conf.deleteCounters || false; for (key in metrics.counters) { @@ -70,13 +81,31 @@ function flushMetrics() { } // Clear the timers + conf.deleteTimers = conf.deleteTimers || false; for (key in metrics.timers) { - metrics.timers[key] = []; + if (conf.deleteTimers) { + delete(metrics.timers[key]); + } else { + metrics.timers[key] = []; + } } // Clear the sets + conf.deleteSets = conf.deleteSets || false; for (key in metrics.sets) { - metrics.sets[key] = new set.Set(); + if (conf.deleteSets) { + delete(metrics.sets[key]); + } else { + metrics.sets[key] = new set.Set(); + } + } + + // normally gauges are not reset. so if we don't delete them, continue to persist previous value + conf.deleteGauges = conf.deleteGauges || false; + if (conf.deleteGauges) { + for (key in metrics.gauges) { + delete(metrics.gauges[key]); + } } }); diff --git a/test/graphite_delete_counters_tests.js b/test/graphite_delete_counters_tests.js index 938f45c0..54b85a5c 100644 --- a/test/graphite_delete_counters_tests.js +++ b/test/graphite_delete_counters_tests.js @@ -1,3 +1,10 @@ +// this unit test, for deleteCounters and other stats related to deleteIdleStats +// should probably be reviewed for sanity - I'm not sure it really tests appropriately +// for example, it should test that data is written the first time +// then test that the counter/etc is actually removed when it doesn't get data.. +// - keen99 + + var fs = require('fs'), net = require('net'), temp = require('temp'), @@ -79,7 +86,7 @@ module.exports = { , port: 8125\n\ , dumpMessages: false \n\ , debug: false\n\ - , deleteCounters: true\n\ + , deleteIdleStats: true\n\ , graphitePort: " + this.testport + "\n\ , graphiteHost: \"127.0.0.1\"}"; From e4a6cd2dc1cdae3b40d1bcd034d1f45992a02cd9 Mon Sep 17 00:00:00 2001 From: david raistrick Date: Fri, 4 Jan 2013 18:33:37 -0500 Subject: [PATCH 142/233] update readme and example for deleteIdleStats and friends --- README.md | 9 +++++++++ exampleConfig.js | 7 ++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index bd931f20..5322c471 100644 --- a/README.md +++ b/README.md @@ -61,11 +61,17 @@ generate the following list of stats for each threshold: Where `$KEY` is the stats key you specify when sending to statsd, and `$PCT` is the percentile threshold. +If the count at flush is 0 then you can opt to send no metric at all for this timer, +by setting `config.deleteTimers` (applies only to graphite backend). + Gauges ------ StatsD now also supports gauges, arbitrary values, which can be recorded. gaugor:333|g + +If the gauge is not updated at the next flush, it will send the previous value. You can opt to send +no metric at all for this gauge, by setting `config.deleteGauge` (applies only to graphite backend). Sets ---- @@ -74,6 +80,9 @@ using a Set to store all occuring events. uniques:765|s +If the count at flush is 0 then you can opt to send no metric at all for this set, by +setting `config.deleteSets` (applies only to graphite backend). + Multi-Metric Packets -------------------- StatsD supports receiving multiple metrics in a single packet by separating them diff --git a/exampleConfig.js b/exampleConfig.js index 90584ae8..fb50be4e 100644 --- a/exampleConfig.js +++ b/exampleConfig.js @@ -34,8 +34,13 @@ Optional Variables: interval: how often to log frequent keys [ms, default: 0] percent: percentage of frequent keys to log [%, default: 100] log: location of log file for frequent keys [default: STDOUT] + deleteIdleStats: don't send values to graphite for inactive counters, sets, gauges, or timeers + as opposed to sending 0. For gauges, this unsets the gauge (instead of sending + the previous value). Can be indivdually overriden. [default: false] deleteCounters: don't send values to graphite for inactive counters, as opposed to sending 0 [default: false] - + deleteGauges : don't send values to graphite for inactive gauges, as opposed to sending the previous value [default: false] + deleteTimers: don't send values to graphite for inactive timers, as opposed to sending 0 [default: false] + deleteSets: don't send values to graphite for inactive sets, as opposed to sending 0 [default: false] console: prettyprint: whether to prettyprint the console backend output [true or false, default: true] From 6ed4e1a2902e4d0ac6321b0e3e727af26e96d0ef Mon Sep 17 00:00:00 2001 From: david raistrick Date: Fri, 4 Jan 2013 18:40:35 -0500 Subject: [PATCH 143/233] reorder example for easier merge --- exampleConfig.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exampleConfig.js b/exampleConfig.js index fb50be4e..96fd4b58 100644 --- a/exampleConfig.js +++ b/exampleConfig.js @@ -37,10 +37,10 @@ Optional Variables: deleteIdleStats: don't send values to graphite for inactive counters, sets, gauges, or timeers as opposed to sending 0. For gauges, this unsets the gauge (instead of sending the previous value). Can be indivdually overriden. [default: false] - deleteCounters: don't send values to graphite for inactive counters, as opposed to sending 0 [default: false] deleteGauges : don't send values to graphite for inactive gauges, as opposed to sending the previous value [default: false] deleteTimers: don't send values to graphite for inactive timers, as opposed to sending 0 [default: false] deleteSets: don't send values to graphite for inactive sets, as opposed to sending 0 [default: false] + deleteCounters: don't send values to graphite for inactive counters, as opposed to sending 0 [default: false] console: prettyprint: whether to prettyprint the console backend output [true or false, default: true] From f21e6d02dad0d7b92efed54430c940f1aabfe9d8 Mon Sep 17 00:00:00 2001 From: david raistrick Date: Fri, 4 Jan 2013 18:41:32 -0500 Subject: [PATCH 144/233] reorder example for easier merge --- exampleConfig.js | 1 + 1 file changed, 1 insertion(+) diff --git a/exampleConfig.js b/exampleConfig.js index 96fd4b58..915de2b0 100644 --- a/exampleConfig.js +++ b/exampleConfig.js @@ -41,6 +41,7 @@ Optional Variables: deleteTimers: don't send values to graphite for inactive timers, as opposed to sending 0 [default: false] deleteSets: don't send values to graphite for inactive sets, as opposed to sending 0 [default: false] deleteCounters: don't send values to graphite for inactive counters, as opposed to sending 0 [default: false] + console: prettyprint: whether to prettyprint the console backend output [true or false, default: true] From 9fef3161f4bf6c7c33add9ab3d7bbd6f8221d7f5 Mon Sep 17 00:00:00 2001 From: david raistrick Date: Fri, 4 Jan 2013 19:04:07 -0500 Subject: [PATCH 145/233] add a bit more debug output - sets, numStats --- backends/graphite.js | 5 ++++- stats.js | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/backends/graphite.js b/backends/graphite.js index 3fe47b2c..8dab60a2 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -137,8 +137,11 @@ var flush_stats = function graphite_flush(ts, metrics) { statString += the_key.join(".") + ' ' + statsd_metrics[key] + ts_suffix; } } - post_stats(statString); + // lets debug log a few of our metrics + if (debug) { + util.log("DEBUG: numStats: " + numStats); + } }; var backend_status = function graphite_status(writeCb) { diff --git a/stats.js b/stats.js index 2f9c1cef..081a4f8a 100644 --- a/stats.js +++ b/stats.js @@ -110,8 +110,9 @@ config.configFile(process.argv[2], function (config, oldConfig) { clearInterval(debugInt); } debugInt = setInterval(function () { - l.log("Counters:\n" + util.inspect(counters) + + l.log("\nCounters:\n" + util.inspect(counters) + "\nTimers:\n" + util.inspect(timers) + + "\nSets:\n" + util.inspect(sets) + "\nGauges:\n" + util.inspect(gauges), 'debug'); }, config.debugInterval || 10000); } From 14f45a34917065e93bc26e0bf3d2cdea327bfa47 Mon Sep 17 00:00:00 2001 From: Daniel Schauenberg Date: Mon, 14 Jan 2013 13:41:31 +0100 Subject: [PATCH 146/233] fix README.md with new travis CI build status img --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bd931f20..bc151752 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -StatsD [![Build Status](https://secure.travis-ci.org/etsy/statsd.png)](http://travis-ci.org/etsy/statsd) +StatsD [![Build Status](https://travis-ci.org/etsy/statsd.png?branch=backends-as-packages)](https://travis-ci.org/etsy/statsd) ====== A network daemon that runs on the [Node.js][node] platform and From 338fbd5d47267b4c675fa7c0476e4f33cc1604da Mon Sep 17 00:00:00 2001 From: Dieter Plaetinck Date: Mon, 14 Jan 2013 10:41:20 -0500 Subject: [PATCH 147/233] remove metric name sanitisation. this is a task for the backends --- lib/process_metrics.js | 2 +- test/process_metrics_tests.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/process_metrics.js b/lib/process_metrics.js index 2f388170..b95371e8 100644 --- a/lib/process_metrics.js +++ b/lib/process_metrics.js @@ -94,7 +94,7 @@ var process_metrics = function (metrics, flushInterval, ts, flushCallback) { for (; i < count && (bins[bin_i] == 'inf' || values[i] < bins[bin_i]); i++) { freq += 1; } - bin_name = ('bin_' + bins[bin_i]).replace('.','_'); + bin_name = 'bin_' + bins[bin_i]; current_timer_data['histogram'][bin_name] = freq; } diff --git a/test/process_metrics_tests.js b/test/process_metrics_tests.js index 0adfb4c9..675c4a7e 100644 --- a/test/process_metrics_tests.js +++ b/test/process_metrics_tests.js @@ -144,7 +144,7 @@ module.exports = { // only 'abc' should have a bin_inf; also check all its counts, // and make sure it has no other bins test.equal(1, timer_data['abc']['histogram']['bin_1']); - test.equal(0, timer_data['abc']['histogram']['bin_2_21']); + test.equal(0, timer_data['abc']['histogram']['bin_2.21']); test.equal(4, timer_data['abc']['histogram']['bin_inf']); test.equal(3, _.size(timer_data['abc']['histogram'])); From 8ef73bc501c713eb502a916c8915b4abf605029a Mon Sep 17 00:00:00 2001 From: david raistrick Date: Mon, 14 Jan 2013 13:05:06 -0500 Subject: [PATCH 148/233] remove commented lines --- stats.js | 6 ------ 1 file changed, 6 deletions(-) diff --git a/stats.js b/stats.js index 3e59fa95..c1b039dd 100644 --- a/stats.js +++ b/stats.js @@ -74,14 +74,8 @@ function flushMetrics() { for (key in metrics.counters) { if (conf.deleteCounters) { if (key == packets_received || key == bad_lines_seen) { - // if (conf.debug) { - // l.log("resetting stats key: " + key); - // } metrics.counters[key] = 0; } else { - //if (conf.debug) { - // l.log("deleting key: " + key); - //} delete(metrics.counters[key]); } } else { From b947540e14679dba40100c87d192514f3a7dc3e3 Mon Sep 17 00:00:00 2001 From: Paul Dixon Date: Wed, 16 Jan 2013 08:37:57 +0000 Subject: [PATCH 149/233] fixing a typo --- exampleConfig.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exampleConfig.js b/exampleConfig.js index eb67aa16..c0b0316d 100644 --- a/exampleConfig.js +++ b/exampleConfig.js @@ -61,7 +61,7 @@ Optional Variables: e.g. [ { host: '10.10.10.10', port: 8125 }, { host: 'observer', port: 88125 } ] - repeaterProtocol: whether to use udp4 or udp4 for repeaters. + repeaterProtocol: whether to use udp4 or udp6 for repeaters. ["udp4" or "udp6", default: "udp4"] */ { From 4379e46a5b1eb9088b102be490cd5ef44e815b0e Mon Sep 17 00:00:00 2001 From: Daniel Schauenberg Date: Thu, 17 Jan 2013 14:32:08 +0100 Subject: [PATCH 150/233] use the logger module in the graphite backend --- backends/graphite.js | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/backends/graphite.js b/backends/graphite.js index d72d477c..7a4d83e6 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -13,7 +13,10 @@ */ var net = require('net'), - util = require('util'); + logger = require('../lib/logger'); + +// this will be instantiated to the logger +var l; var debug; var flushInterval; @@ -46,7 +49,7 @@ var post_stats = function graphite_post_stats(statString) { var graphite = net.createConnection(graphitePort, graphiteHost); graphite.addListener('error', function(connectionException){ if (debug) { - util.log(connectionException); + l.log(connectionException); } }); graphite.on('connect', function() { @@ -60,7 +63,7 @@ var post_stats = function graphite_post_stats(statString) { }); } catch(e){ if (debug) { - util.log(e); + l.log(e); } graphiteStats.last_exception = Math.round(new Date().getTime() / 1000); } @@ -148,6 +151,7 @@ var backend_status = function graphite_status(writeCb) { }; exports.init = function graphite_init(startup_time, config, events) { + l = new logger.Logger(config.log || {}); debug = config.debug; graphiteHost = config.graphiteHost; graphitePort = config.graphitePort; From 82bf4456277460331f963e5e4c2cd717718316b3 Mon Sep 17 00:00:00 2001 From: david raistrick Date: Thu, 17 Jan 2013 12:31:31 -0500 Subject: [PATCH 151/233] update debug log to new logger, make all debug prints use consistent DEBUG: prefix by removing the extra arg --- backends/graphite.js | 3 +-- stats.js | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/backends/graphite.js b/backends/graphite.js index 1ce92c17..8a6229d0 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -141,9 +141,8 @@ var flush_stats = function graphite_flush(ts, metrics) { } } post_stats(statString); - // lets debug log a few of our metrics if (debug) { - util.log("DEBUG: numStats: " + numStats); + l.log("numStats: " + numStats); } }; diff --git a/stats.js b/stats.js index 721a634d..45778aa2 100644 --- a/stats.js +++ b/stats.js @@ -27,7 +27,7 @@ function loadBackend(config, name) { var backendmod = require(name); if (config.debug) { - l.log("Loading backend: " + name, 'debug'); + l.log("Loading backend: " + name, 'DEBUG'); } var ret = backendmod.init(startup_time, config, backendEvents); @@ -110,7 +110,7 @@ config.configFile(process.argv[2], function (config, oldConfig) { l.log("\nCounters:\n" + util.inspect(counters) + "\nTimers:\n" + util.inspect(timers) + "\nSets:\n" + util.inspect(sets) + - "\nGauges:\n" + util.inspect(gauges), 'debug'); + "\nGauges:\n" + util.inspect(gauges), 'DEBUG'); }, config.debugInterval || 10000); } From fd2a367e3c2ef74c024cb3f7352a85e9b7a6bfe7 Mon Sep 17 00:00:00 2001 From: Jehiah Czebotar Date: Wed, 23 Jan 2013 09:32:04 -0500 Subject: [PATCH 152/233] fix typo 'stats_count.' -> 'stats_counts' and reword for clarity --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index bc151752..64f91fe3 100644 --- a/README.md +++ b/README.md @@ -358,8 +358,8 @@ in the prefixing, there will also be a breaking change in the way counters are submitted. So far counters didn't live under any namespace and were also a bit confusing due to the way they record rate and absolute counts. In the legacy setting rates were recorded under `stats.counter_name` directly, whereas the -absolute count could be found under `stats_count.counter_name`. With disabling -the legacy namespacing those values can be found (with default prefixing) +absolute count could be found under `stats_counts.counter_name`. When legacy namespacing +is disabled those values can be found (with default prefixing) under `stats.counters.counter_name.rate` and `stats.counters.counter_name.count` now. From cc12534e3b3bf5137a263e4e065876329e1a719c Mon Sep 17 00:00:00 2001 From: Ian Malpass Date: Fri, 1 Feb 2013 10:43:50 -0600 Subject: [PATCH 153/233] Adding support for sampling timers. --- lib/process_metrics.js | 5 ++++- stats.js | 25 +++++++++++++++---------- test/graphite_tests.js | 35 ++++++++++++++++++++++++++++++++++- test/process_metrics_tests.js | 32 ++++++++++++++++++++++++++++++-- 4 files changed, 83 insertions(+), 14 deletions(-) diff --git a/lib/process_metrics.js b/lib/process_metrics.js index 0bc78201..2d26e08a 100644 --- a/lib/process_metrics.js +++ b/lib/process_metrics.js @@ -6,6 +6,7 @@ var process_metrics = function (metrics, flushInterval, ts, flushCallback) { var statsd_metrics = {}; var counters = metrics.counters; var timers = metrics.timers; + var timer_counters = metrics.timer_counters; var pctThreshold = metrics.pctThreshold; for (key in counters) { @@ -61,11 +62,13 @@ var process_metrics = function (metrics, flushInterval, ts, flushCallback) { for (var i = 0; i < count; i++) { sumOfDiffs += (values[i] - mean) * (values[i] - mean); } + var stddev = Math.sqrt(sumOfDiffs / count); current_timer_data["std"] = stddev; current_timer_data["upper"] = max; current_timer_data["lower"] = min; - current_timer_data["count"] = count; + current_timer_data["count"] = timer_counters[key]; + current_timer_data["count_ps"] = timer_counters[key] / (flushInterval / 1000); current_timer_data["sum"] = sum; current_timer_data["mean"] = mean; diff --git a/stats.js b/stats.js index fe4def16..8c6d9eff 100644 --- a/stats.js +++ b/stats.js @@ -13,6 +13,7 @@ var dgram = require('dgram') var keyCounter = {}; var counters = {}; var timers = {}; +var timer_counters = {}; var gauges = {}; var sets = {}; var counter_rates = {}; @@ -48,6 +49,7 @@ function flushMetrics() { counters: counters, gauges: gauges, timers: timers, + timer_counters: timer_counters, sets: sets, counter_rates: counter_rates, timer_data: timer_data, @@ -69,6 +71,7 @@ function flushMetrics() { // Clear the timers for (var key in metrics.timers) { metrics.timers[key] = []; + metrics.timer_counters[key] = 0; } // Clear the sets @@ -158,6 +161,16 @@ config.configFile(process.argv[2], function (config, oldConfig) { for (var i = 0; i < bits.length; i++) { var sampleRate = 1; var fields = bits[i].split("|"); + if (fields[2]) { + if (fields[2].match(/^@([\d\.]+)/)) { + sampleRate = Number(fields[2].match(/^@([\d\.]+)/)[1]); + } else { + l.log('Bad line: ' + fields + ' in msg "' + metrics[midx] +'"; has invalid sample rate'); + counters[bad_lines_seen]++; + stats['messages']['bad_lines_seen']++; + continue; + } + } if (fields[1] === undefined) { l.log('Bad line: ' + fields + ' in msg "' + metrics[midx] +'"'); counters[bad_lines_seen]++; @@ -167,8 +180,10 @@ config.configFile(process.argv[2], function (config, oldConfig) { if (fields[1].trim() == "ms") { if (! timers[key]) { timers[key] = []; + timer_counters[key] = 0; } timers[key].push(Number(fields[0] || 0)); + timer_counters[key] += (1 / sampleRate); } else if (fields[1].trim() == "g") { gauges[key] = Number(fields[0] || 0); } else if (fields[1].trim() == "s") { @@ -177,16 +192,6 @@ config.configFile(process.argv[2], function (config, oldConfig) { } sets[key].insert(fields[0] || '0'); } else { - if (fields[2]) { - if (fields[2].match(/^@([\d\.]+)/)) { - sampleRate = Number(fields[2].match(/^@([\d\.]+)/)[1]); - } else { - l.log('Bad line: ' + fields + ' in msg "' + metrics[midx] +'"; has invalid sample rate'); - counters[bad_lines_seen]++; - stats['messages']['bad_lines_seen']++; - continue; - } - } if (! counters[key]) { counters[key] = 0; } diff --git a/test/graphite_tests.js b/test/graphite_tests.js index c0a9b05b..ed09f9c0 100644 --- a/test/graphite_tests.js +++ b/test/graphite_tests.js @@ -190,7 +190,7 @@ module.exports = { }, timers_are_valid: function (test) { - test.expect(3); + test.expect(5); var testvalue = 100; var me = this; @@ -216,6 +216,39 @@ module.exports = { }; test.ok(_.any(hashes,testtimervalue_test), 'stats.timers.a_test_value.mean should be ' + testvalue); + var count_test = function(post, metric){ + var mykey = 'stats.timers.a_test_value.' + metric; + return _.first(_.filter(_.pluck(post, mykey), function (e) { return e })); + }; + test.equals(count_test(hashes, 'count_ps'), 5, 'count_ps should be 5'); + test.equals(count_test(hashes, 'count'), 1, 'count should be 1'); + + test.done(); + }); + }); + }); + }, + + sampled_timers_are_valid: function (test) { + test.expect(2); + + var testvalue = 100; + var me = this; + this.acceptor.once('connection',function(c){ + statsd_send('a_test_value:' + testvalue + '|ms|@0.1',me.sock,'127.0.0.1',8125,function(){ + collect_for(me.acceptor,me.myflush*2,function(strings){ + var hashes = _.map(strings, function(x) { + var chunks = x.split(' '); + var data = {}; + data[chunks[0]] = chunks[1]; + return data; + }); + var count_test = function(post, metric){ + var mykey = 'stats.timers.a_test_value.' + metric; + return _.first(_.filter(_.pluck(post, mykey), function (e) { return e })); + }; + test.equals(count_test(hashes, 'count_ps'), 50, 'count_ps should be 50'); + test.equals(count_test(hashes, 'count'), 10, 'count should be 10'); test.done(); }); }); diff --git a/test/process_metrics_tests.js b/test/process_metrics_tests.js index 6252c4b3..0b9d904e 100644 --- a/test/process_metrics_tests.js +++ b/test/process_metrics_tests.js @@ -7,6 +7,7 @@ module.exports = { var counters = {}; var gauges = {}; var timers = {}; + var timer_counters = {}; var sets = {}; var pctThreshold = null; @@ -14,6 +15,7 @@ module.exports = { counters: counters, gauges: gauges, timers: timers, + timer_counters: timer_counters, sets: sets, pctThreshold: pctThreshold } @@ -36,33 +38,38 @@ module.exports = { timers_handle_empty: function(test) { test.expect(1); this.metrics.timers['a'] = []; + this.metrics.timer_counters['a'] = 0; pm.process_metrics(this.metrics, 100, this.time_stamp, function(){}); //potentially a cleaner way to check this test.equal(undefined, this.metrics.counter_rates['a']); test.done(); }, timers_single_time: function(test) { - test.expect(6); + test.expect(7); this.metrics.timers['a'] = [100]; + this.metrics.timer_counters['a'] = 1; pm.process_metrics(this.metrics, 100, this.time_stamp, function(){}); timer_data = this.metrics.timer_data['a']; test.equal(0, timer_data.std); test.equal(100, timer_data.upper); test.equal(100, timer_data.lower); test.equal(1, timer_data.count); + test.equal(10, timer_data.count_ps); test.equal(100, timer_data.sum); test.equal(100, timer_data.mean); test.done(); }, timers_multiple_times: function(test) { - test.expect(6); + test.expect(7); this.metrics.timers['a'] = [100, 200, 300]; + this.metrics.timer_counters['a'] = 3; pm.process_metrics(this.metrics, 100, this.time_stamp, function(){}); timer_data = this.metrics.timer_data['a']; test.equal(81.64965809277261, timer_data.std); test.equal(300, timer_data.upper); test.equal(100, timer_data.lower); test.equal(3, timer_data.count); + test.equal(30, timer_data.count_ps); test.equal(600, timer_data.sum); test.equal(200, timer_data.mean); test.done(); @@ -70,6 +77,7 @@ module.exports = { timers_single_time_single_percentile: function(test) { test.expect(3); this.metrics.timers['a'] = [100]; + this.metrics.timer_counters['a'] = 1; this.metrics.pctThreshold = [90]; pm.process_metrics(this.metrics, 100, this.time_stamp, function(){}); timer_data = this.metrics.timer_data['a']; @@ -81,6 +89,7 @@ module.exports = { timers_single_time_multiple_percentiles: function(test) { test.expect(6); this.metrics.timers['a'] = [100]; + this.metrics.timer_counters['a'] = 1; this.metrics.pctThreshold = [90, 80]; pm.process_metrics(this.metrics, 100, this.time_stamp, function(){}); timer_data = this.metrics.timer_data['a']; @@ -95,6 +104,7 @@ module.exports = { timers_multiple_times_single_percentiles: function(test) { test.expect(3); this.metrics.timers['a'] = [100, 200, 300]; + this.metrics.timer_counters['a'] = 3; this.metrics.pctThreshold = [90]; pm.process_metrics(this.metrics, 100, this.time_stamp, function(){}); timer_data = this.metrics.timer_data['a']; @@ -106,9 +116,27 @@ module.exports = { timers_multiple_times_multiple_percentiles: function(test) { test.expect(6); this.metrics.timers['a'] = [100, 200, 300]; + this.metrics.timer_counters['a'] = 3; + this.metrics.pctThreshold = [90, 80]; + pm.process_metrics(this.metrics, 100, this.time_stamp, function(){}); + timer_data = this.metrics.timer_data['a']; + test.equal(200, timer_data.mean_90); + test.equal(300, timer_data.upper_90); + test.equal(600, timer_data.sum_90); + test.equal(150, timer_data.mean_80); + test.equal(200, timer_data.upper_80); + test.equal(300, timer_data.sum_80); + test.done(); + }, + timers_sampled_times: function(test) { + test.expect(8); + this.metrics.timers['a'] = [100, 200, 300]; + this.metrics.timer_counters['a'] = 50; this.metrics.pctThreshold = [90, 80]; pm.process_metrics(this.metrics, 100, this.time_stamp, function(){}); timer_data = this.metrics.timer_data['a']; + test.equal(50, timer_data.count); + test.equal(500, timer_data.count_ps); test.equal(200, timer_data.mean_90); test.equal(300, timer_data.upper_90); test.equal(600, timer_data.sum_90); From a636fc2383b50fceb7354370c66a236394784676 Mon Sep 17 00:00:00 2001 From: Yarek T Date: Wed, 6 Feb 2013 13:40:18 +0000 Subject: [PATCH 154/233] Catch DNS errors from UDP socket If repeater is used with hostnames rather than IPs a DNS resolution error crashes statsd server. If you have a chain of statsd servers configured to repeat data down the line this has the potential to bring down the entire chain. Attaching an error handler stops dgram socket from throwing an exception and instead logs to console --- backends/repeater.js | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/backends/repeater.js b/backends/repeater.js index 5388a722..94e3f8ba 100644 --- a/backends/repeater.js +++ b/backends/repeater.js @@ -7,7 +7,10 @@ function RepeaterBackend(startupTime, config, emitter){ this.sock = (config.repeaterProtocol == 'udp6') ? dgram.createSocket('udp6') : dgram.createSocket('udp4'); - + // Attach DNS error handler + sock.on('error', function (err) { + console.log('Repeater error: ' + err); + }); // attach emitter.on('packet', function(packet, rinfo) { self.process(packet, rinfo); }); }; From d374de667d35c9cd21cd380a5c0d7887864dbce0 Mon Sep 17 00:00:00 2001 From: Yarek T Date: Thu, 7 Feb 2013 23:12:24 +0000 Subject: [PATCH 155/233] Update backends/repeater.js --- backends/repeater.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/repeater.js b/backends/repeater.js index 94e3f8ba..6c917d2d 100644 --- a/backends/repeater.js +++ b/backends/repeater.js @@ -8,7 +8,7 @@ function RepeaterBackend(startupTime, config, emitter){ dgram.createSocket('udp6') : dgram.createSocket('udp4'); // Attach DNS error handler - sock.on('error', function (err) { + this.sock.on('error', function (err) { console.log('Repeater error: ' + err); }); // attach From 54086c61c2c8eb0bc37b32982a7e3d59bf9781e1 Mon Sep 17 00:00:00 2001 From: Yarek T Date: Fri, 8 Feb 2013 23:26:11 +0000 Subject: [PATCH 156/233] Use Logger for repeater and don't scream if debug is off --- backends/repeater.js | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/backends/repeater.js b/backends/repeater.js index 6c917d2d..2da129ae 100644 --- a/backends/repeater.js +++ b/backends/repeater.js @@ -1,6 +1,8 @@ var util = require('util'), - dgram = require('dgram'); - + dgram = require('dgram'), + logger = require('../lib/logger'); +var l; +var debug; function RepeaterBackend(startupTime, config, emitter){ var self = this; this.config = config.repeater || []; @@ -9,7 +11,9 @@ function RepeaterBackend(startupTime, config, emitter){ dgram.createSocket('udp4'); // Attach DNS error handler this.sock.on('error', function (err) { - console.log('Repeater error: ' + err); + if (debug) { + l.log('Repeater error: ' + err); + } }); // attach emitter.on('packet', function(packet, rinfo) { self.process(packet, rinfo); }); @@ -21,8 +25,8 @@ RepeaterBackend.prototype.process = function(packet, rinfo) { for(var i=0; i Date: Fri, 1 Feb 2013 23:43:38 -0800 Subject: [PATCH 157/233] Readme example to delete a counter from shell --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 64f91fe3..cb404dd9 100644 --- a/README.md +++ b/README.md @@ -226,6 +226,11 @@ The stats output currently will give you: Each backend will also publish a set of statistics, prefixed by its module name. +You can use this to delete buckets no longer in use. For example, if you were hosting statsd at 10.10.10.10: + + #to delete counter sandbox.test.temporary + echo "delcounters sandbox.test.temporary" | nc 10.10.10.10 8126 + Graphite: * graphite.last_flush: the number of seconds elapsed since the last successful From 7447b557241017adb8b5033220ed7dcffc92a69e Mon Sep 17 00:00:00 2001 From: Dieter Plaetinck Date: Mon, 11 Feb 2013 18:22:40 -0500 Subject: [PATCH 158/233] bugfix: make sure histogram metric path is correct also slight optimisation in the metric setting loop for timers --- backends/graphite.js | 13 ++++++++++--- test/graphite_tests.js | 2 ++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/backends/graphite.js b/backends/graphite.js index 1208433b..32207394 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -99,10 +99,17 @@ var flush_stats = function graphite_flush(ts, metrics) { for (key in timer_data) { if (Object.keys(timer_data).length > 0) { + var namespace = timerNamespace.concat(key); + var the_key = namespace.join("."); for (timer_data_key in timer_data[key]) { - var namespace = timerNamespace.concat(key); - var the_key = namespace.join("."); - statString += the_key + '.' + timer_data_key + ' ' + timer_data[key][timer_data_key] + ' ' + ts + "\n"; + if (typeof(timer_data_key) === 'string') { + statString += the_key + '.' + timer_data_key + ' ' + timer_data[key][timer_data_key] + ' ' + ts + "\n"; + } else { + for (timer_data_sub_key in timer_data[key][timer_data_key]) { + statString += the_key + '.' + timer_data_key + '.' + timer_data_sub_key + ' ' + + timer_data[key][timer_data_key][timer_data_sub_key] + ' ' + ts + "\n"; + } + } } numStats += 1; diff --git a/test/graphite_tests.js b/test/graphite_tests.js index c0a9b05b..4cb1dab6 100644 --- a/test/graphite_tests.js +++ b/test/graphite_tests.js @@ -76,6 +76,7 @@ module.exports = { , batch: 200 \n\ , flushInterval: " + this.myflush + " \n\ , percentThreshold: 90\n\ + , timer: {histogram: [ { metric: \"a_test_value\", bins: [10] } ]}\n\ , port: 8125\n\ , dumpMessages: false \n\ , debug: false\n\ @@ -214,6 +215,7 @@ module.exports = { var mykey = 'stats.timers.a_test_value.mean_90'; return _.include(_.keys(post),mykey) && (post[mykey] == testvalue); }; + //TODO: test here that one of the histogram datapoints is also correct test.ok(_.any(hashes,testtimervalue_test), 'stats.timers.a_test_value.mean should be ' + testvalue); test.done(); From f0a9361c2de72ecd11444e11c5c2c34e5fd7f936 Mon Sep 17 00:00:00 2001 From: Daniel Schauenberg Date: Tue, 19 Feb 2013 15:22:14 -0500 Subject: [PATCH 159/233] make histograms work with the graphite backend --- backends/graphite.js | 3 ++- stats.js | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/backends/graphite.js b/backends/graphite.js index 40677dfc..973b66b5 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -117,10 +117,11 @@ var flush_stats = function graphite_flush(ts, metrics) { var namespace = timerNamespace.concat(key); var the_key = namespace.join("."); - if (typeof(timer_data_key) === 'string') { + if (typeof(timer_data[key][timer_data_key]) === 'number') { statString += the_key + '.' + timer_data_key + ' ' + timer_data[key][timer_data_key] + ts_suffix; } else { for (timer_data_sub_key in timer_data[key][timer_data_key]) { + l.log(timer_data[key][timer_data_key][timer_data_sub_key].toString()); statString += the_key + '.' + timer_data_key + '.' + timer_data_sub_key + ' ' + timer_data[key][timer_data_key][timer_data_sub_key] + ts_suffix; } diff --git a/stats.js b/stats.js index 1f727569..e18a08e9 100644 --- a/stats.js +++ b/stats.js @@ -54,7 +54,7 @@ function flushMetrics() { counter_rates: counter_rates, timer_data: timer_data, pctThreshold: pctThreshold, - histogram: config.histogram + histogram: conf.histogram } // After all listeners, reset the stats @@ -124,7 +124,7 @@ config.configFile(process.argv[2], function (config, oldConfig) { bad_lines_seen = prefixStats + ".bad_lines_seen"; packets_received = prefixStats + ".packets_received"; - //now set to zero so we can increment them + //now set to zero so we can increment them counters[bad_lines_seen] = 0; counters[packets_received] = 0; From d246dab8117d3741997e0cf1879dd90d17e6071e Mon Sep 17 00:00:00 2001 From: Dieter Plaetinck Date: Tue, 19 Feb 2013 18:13:04 -0500 Subject: [PATCH 160/233] move percentThreshold in global namespace for now --- exampleConfig.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/exampleConfig.js b/exampleConfig.js index 31b90b66..f6e2f976 100644 --- a/exampleConfig.js +++ b/exampleConfig.js @@ -27,6 +27,9 @@ Optional Variables: debugInterval: interval to print debug information [ms, default: 10000] dumpMessages: log all incoming messages flushInterval: interval (in ms) to flush to Graphite + percentThreshold: for time information, calculate the Nth percentile(s) + (can be a single value or list of floating-point values) + [%, default: 90] keyFlush: log the most frequently sent keys [object, default: undefined] interval: how often to log frequent keys [ms, default: 0] percent: percentage of frequent keys to log [%, default: 100] @@ -62,9 +65,6 @@ Optional Variables: ["udp4" or "udp6", default: "udp4"] timer: - percentThreshold: calculate the Nth percentile(s) - (can be a single value or list of floating-point values) - [%, default: 90] histogram: an array of mappings of strings (to match metrics) and corresponding ordered non-inclusive upper limits of bins. For all matching metrics, histograms are maintained over From 2ca3d215f66b348f8c0c311765f83b58fbff986b Mon Sep 17 00:00:00 2001 From: Dieter Plaetinck Date: Tue, 19 Feb 2013 18:18:55 -0500 Subject: [PATCH 161/233] implement graphite test for histograms --- test/graphite_tests.js | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/test/graphite_tests.js b/test/graphite_tests.js index fd67cadc..8d066589 100644 --- a/test/graphite_tests.js +++ b/test/graphite_tests.js @@ -76,7 +76,7 @@ module.exports = { , batch: 200 \n\ , flushInterval: " + this.myflush + " \n\ , percentThreshold: 90\n\ - , timer: {histogram: [ { metric: \"a_test_value\", bins: [10] } ]}\n\ + , histogram: [ { metric: \"a_test_value\", bins: [10] } ]\n\ , port: 8125\n\ , dumpMessages: false \n\ , debug: false\n\ @@ -191,7 +191,7 @@ module.exports = { }, timers_are_valid: function (test) { - test.expect(5); + test.expect(6); var testvalue = 100; var me = this; @@ -215,7 +215,11 @@ module.exports = { var mykey = 'stats.timers.a_test_value.mean_90'; return _.include(_.keys(post),mykey) && (post[mykey] == testvalue); }; - //TODO: test here that one of the histogram datapoints is also correct + var testtimerhistogramvalue_test = function(post){ + var mykey = 'stats.timers.a_test_value.histogram.bin_10'; + return _.include(_.keys(post),mykey) && (post[mykey] == 0); + }; + test.ok(_.any(hashes,testtimerhistogramvalue_test), 'stats.timers.a_test_value.mean should be ' + 0); test.ok(_.any(hashes,testtimervalue_test), 'stats.timers.a_test_value.mean should be ' + testvalue); var count_test = function(post, metric){ From 08eb71ac60c25f8755a600794be8c04195e4331e Mon Sep 17 00:00:00 2001 From: Dieter Plaetinck Date: Tue, 19 Feb 2013 18:23:46 -0500 Subject: [PATCH 162/233] remove timer subsection that was never used --- exampleConfig.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/exampleConfig.js b/exampleConfig.js index f6e2f976..b042342a 100644 --- a/exampleConfig.js +++ b/exampleConfig.js @@ -64,8 +64,7 @@ Optional Variables: repeaterProtocol: whether to use udp4 or udp6 for repeaters. ["udp4" or "udp6", default: "udp4"] - timer: - histogram: an array of mappings of strings (to match metrics) and + histogram: for timers, an array of mappings of strings (to match metrics) and corresponding ordered non-inclusive upper limits of bins. For all matching metrics, histograms are maintained over time by writing the frequencies for all bins. From 2d256e279bb2d624a0af4af3496d61a70160be54 Mon Sep 17 00:00:00 2001 From: Dieter Plaetinck Date: Tue, 19 Feb 2013 18:40:10 -0500 Subject: [PATCH 163/233] clarify histogram graphite test: use a bin that actually includes the value --- test/graphite_tests.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/graphite_tests.js b/test/graphite_tests.js index 8d066589..bf1886a8 100644 --- a/test/graphite_tests.js +++ b/test/graphite_tests.js @@ -76,7 +76,7 @@ module.exports = { , batch: 200 \n\ , flushInterval: " + this.myflush + " \n\ , percentThreshold: 90\n\ - , histogram: [ { metric: \"a_test_value\", bins: [10] } ]\n\ + , histogram: [ { metric: \"a_test_value\", bins: [1000] } ]\n\ , port: 8125\n\ , dumpMessages: false \n\ , debug: false\n\ @@ -216,10 +216,10 @@ module.exports = { return _.include(_.keys(post),mykey) && (post[mykey] == testvalue); }; var testtimerhistogramvalue_test = function(post){ - var mykey = 'stats.timers.a_test_value.histogram.bin_10'; - return _.include(_.keys(post),mykey) && (post[mykey] == 0); + var mykey = 'stats.timers.a_test_value.histogram.bin_1000'; + return _.include(_.keys(post),mykey) && (post[mykey] == 1); }; - test.ok(_.any(hashes,testtimerhistogramvalue_test), 'stats.timers.a_test_value.mean should be ' + 0); + test.ok(_.any(hashes,testtimerhistogramvalue_test), 'stats.timers.a_test_value.mean should be ' + 1); test.ok(_.any(hashes,testtimervalue_test), 'stats.timers.a_test_value.mean should be ' + testvalue); var count_test = function(post, metric){ From 68082eaf32ea6c2a05ffb9cea1a155ffa76e50f8 Mon Sep 17 00:00:00 2001 From: Dieter Plaetinck Date: Tue, 19 Feb 2013 19:02:37 -0500 Subject: [PATCH 164/233] clearer sample graphite url --- utils/statsd-timer-metric-counts.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/statsd-timer-metric-counts.sh b/utils/statsd-timer-metric-counts.sh index d36967f6..944820b0 100755 --- a/utils/statsd-timer-metric-counts.sh +++ b/utils/statsd-timer-metric-counts.sh @@ -9,7 +9,7 @@ # that you were first sending so many packets of which only a fraction were being processed and shown in these counts, # that even after sampling more statsd still can't process them all and your count stays in the same range. -graphite_url=http://dfvimeographite2.df.vimeows.com +graphite_url=http:// whisper_dir=/var/lib/carbon/whisper timers_subdir=stats/timers From 51e30b6c9288d80294da95f3256a1921bc51ee1c Mon Sep 17 00:00:00 2001 From: Daniel Schauenberg Date: Tue, 19 Feb 2013 23:22:54 -0500 Subject: [PATCH 165/233] fix indent --- stats.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stats.js b/stats.js index d3be3886..6ca4656b 100644 --- a/stats.js +++ b/stats.js @@ -62,10 +62,10 @@ function flushMetrics() { conf.deleteCounters = conf.deleteCounters || false; for (key in metrics.counters) { if (conf.deleteCounters) { - if ((key.indexOf("packets_received") != -1) || (key.indexOf("bad_lines_seen") != -1) { + if ((key.indexOf("packets_received") != -1) || (key.indexOf("bad_lines_seen") != -1) { metrics.counters[key] = 0; } else { - delete(metrics.counters[key]); + delete(metrics.counters[key]); } } else { metrics.counters[key] = 0; From a99c8bd0d68764472ae08c6929e4adf0fdada2d6 Mon Sep 17 00:00:00 2001 From: Daniel Schauenberg Date: Tue, 19 Feb 2013 23:24:18 -0500 Subject: [PATCH 166/233] fix syntax --- stats.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stats.js b/stats.js index 6ca4656b..3b3acb0a 100644 --- a/stats.js +++ b/stats.js @@ -62,7 +62,7 @@ function flushMetrics() { conf.deleteCounters = conf.deleteCounters || false; for (key in metrics.counters) { if (conf.deleteCounters) { - if ((key.indexOf("packets_received") != -1) || (key.indexOf("bad_lines_seen") != -1) { + if ((key.indexOf("packets_received") != -1) || (key.indexOf("bad_lines_seen") != -1)) { metrics.counters[key] = 0; } else { delete(metrics.counters[key]); From ec4056e507fa7719630e37f04ee77ccb99a451b6 Mon Sep 17 00:00:00 2001 From: draco2003 Date: Thu, 28 Feb 2013 20:57:23 +0000 Subject: [PATCH 167/233] Remove extra Object.keys check in the timer_data loop --- backends/graphite.js | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/backends/graphite.js b/backends/graphite.js index 0a57ce3d..79aca15d 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -76,7 +76,7 @@ var post_stats = function graphite_post_stats(statString) { graphiteStats.last_exception = Math.round(new Date().getTime() / 1000); } } -} +}; var flush_stats = function graphite_flush(ts, metrics) { var ts_suffix = ' ' + ts + "\n"; @@ -110,26 +110,23 @@ var flush_stats = function graphite_flush(ts, metrics) { } for (key in timer_data) { - if (Object.keys(timer_data).length > 0) { + var namespace = timerNamespace.concat(key); + var the_key = namespace.join("."); + for (timer_data_key in timer_data[key]) { var namespace = timerNamespace.concat(key); var the_key = namespace.join("."); - for (timer_data_key in timer_data[key]) { - var namespace = timerNamespace.concat(key); - var the_key = namespace.join("."); - - if (typeof(timer_data[key][timer_data_key]) === 'number') { - statString += the_key + '.' + timer_data_key + ' ' + timer_data[key][timer_data_key] + ts_suffix; - } else { - for (timer_data_sub_key in timer_data[key][timer_data_key]) { - l.log(timer_data[key][timer_data_key][timer_data_sub_key].toString()); - statString += the_key + '.' + timer_data_key + '.' + timer_data_sub_key + ' ' + - timer_data[key][timer_data_key][timer_data_sub_key] + ts_suffix; - } + + if (typeof(timer_data[key][timer_data_key]) === 'number') { + statString += the_key + '.' + timer_data_key + ' ' + timer_data[key][timer_data_key] + ts_suffix; + } else { + for (timer_data_sub_key in timer_data[key][timer_data_key]) { + l.log(timer_data[key][timer_data_key][timer_data_sub_key].toString()); + statString += the_key + '.' + timer_data_key + '.' + timer_data_sub_key + ' ' + + timer_data[key][timer_data_key][timer_data_sub_key] + ts_suffix; } } - - numStats += 1; } + numStats += 1; } for (key in gauges) { From 3eecd18d3c68828e3754882d0d6ab8b34be0ab31 Mon Sep 17 00:00:00 2001 From: James Socol Date: Mon, 9 Apr 2012 14:45:27 -0400 Subject: [PATCH 168/233] Add support for gauge deltas. * Add tests for gauges and new gauge deltas. * Add support for +/- prefix for gauge values to support modifying gauge values in-place. --- stats.js | 6 +++- test/graphite_tests.js | 70 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/stats.js b/stats.js index aac60fd8..d1e7b8f1 100644 --- a/stats.js +++ b/stats.js @@ -191,7 +191,11 @@ config.configFile(process.argv[2], function (config, oldConfig) { timers[key].push(Number(fields[0] || 0)); timer_counters[key] += (1 / sampleRate); } else if (fields[1].trim() == "g") { - gauges[key] = Number(fields[0] || 0); + if (gauges[key] && fields[0].match(/^[-+]/)) { + gauges[key] += Number(fields[0] || 0); + } else { + gauges[key] = Number(fields[0] || 0); + } } else if (fields[1].trim() == "s") { if (! sets[key]) { sets[key] = new set.Set(); diff --git a/test/graphite_tests.js b/test/graphite_tests.js index bf1886a8..08ddfe1b 100644 --- a/test/graphite_tests.js +++ b/test/graphite_tests.js @@ -298,5 +298,75 @@ module.exports = { }); }); }); + }, + + gauges_are_valid: function(test) { + test.expect(3); + + var testvalue = 70; + var me = this; + this.acceptor.once('connection', function(c) { + statsd_send('a_test_value:' + testvalue + '|g', me.sock, '127.0.0.1', 8125, function() { + collect_for(me.acceptor, me.myflush*2, function(strings) { + test.ok(strings.length > 0, 'should receive some data'); + var hashes = _.map(strings, function(x) { + var chunks = x.split(' '); + var data = {}; + data[chunks[0]] = chunks[1]; + return data; + }); + var numstat_test = function(post) { + var mykey = 'statsd.numStats'; + return _.include(_.keys(post), mykey) && (post[mykey] == 1); + }; + test.ok(_.any(hashes, numstat_test), 'statsd.numStats should be 1'); + + var gaugevalue_test = function(post) { + var mykey = 'stats.gauges.a_test_value'; + return _.include(_.keys(post), mykey) && (post[mykey] == testvalue); + }; + test.ok(_.any(hashes, gaugevalue_test), 'stats.gauges.a_test_value should be ' + testvalue); + + test.done(); + }); + }); + }); + }, + + gauge_modifications_are_valid: function(test) { + test.expect(3); + + var teststartvalue = 50; + var testdeltavalue = '-3'; + var testresult = teststartvalue + Number(testdeltavalue); + var me = this; + this.acceptor.once('connection', function(c) { + statsd_send('test_value:' + teststartvalue + '|g', me.sock, '127.0.0.1', 8125, function() { + statsd_send('test_value:' + testdeltavalue + '|g', me.sock, '127.0.0.1', 8125, function() { + collect_for(me.acceptor, me.myflush * 2, function(strings) { + test.ok(strings.length > 0, 'should receive some data'); + var hashes = _.map(strings, function(x) { + var chunks = x.split(' '); + var data = {}; + data[chunks[0]] = chunks[1]; + return data; + }); + var numstat_test = function(post) { + var mykey = 'statsd.numStats'; + return _.include(_.keys(post), mykey) && (post[mykey] == 1); + }; + test.ok(_.any(hashes, numstat_test), 'statsd.numStats should be 1'); + + var gaugevalue_test = function(post) { + var mykey = 'stats.gauges.test_value'; + return _.include(_.keys(post), mykey) && (post[mykey] == testresult); + }; + test.ok(_.any(hashes, gaugevalue_test), 'stats.gauges.test_value should be ' + testresult); + + test.done(); + }); + }); + }); + }); } } From c583fc3c8710dd55ab5ebd40f99fd4ebc53ddbaa Mon Sep 17 00:00:00 2001 From: James Socol Date: Fri, 1 Mar 2013 11:30:52 -0500 Subject: [PATCH 169/233] Remove numStats asserts. --- test/graphite_tests.js | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/test/graphite_tests.js b/test/graphite_tests.js index 08ddfe1b..b93a6aec 100644 --- a/test/graphite_tests.js +++ b/test/graphite_tests.js @@ -301,7 +301,7 @@ module.exports = { }, gauges_are_valid: function(test) { - test.expect(3); + test.expect(2); var testvalue = 70; var me = this; @@ -315,11 +315,6 @@ module.exports = { data[chunks[0]] = chunks[1]; return data; }); - var numstat_test = function(post) { - var mykey = 'statsd.numStats'; - return _.include(_.keys(post), mykey) && (post[mykey] == 1); - }; - test.ok(_.any(hashes, numstat_test), 'statsd.numStats should be 1'); var gaugevalue_test = function(post) { var mykey = 'stats.gauges.a_test_value'; @@ -334,7 +329,7 @@ module.exports = { }, gauge_modifications_are_valid: function(test) { - test.expect(3); + test.expect(2); var teststartvalue = 50; var testdeltavalue = '-3'; @@ -351,11 +346,6 @@ module.exports = { data[chunks[0]] = chunks[1]; return data; }); - var numstat_test = function(post) { - var mykey = 'statsd.numStats'; - return _.include(_.keys(post), mykey) && (post[mykey] == 1); - }; - test.ok(_.any(hashes, numstat_test), 'statsd.numStats should be 1'); var gaugevalue_test = function(post) { var mykey = 'stats.gauges.test_value'; From 1170392de1cc3ed3aacdd97d2b326d3710743607 Mon Sep 17 00:00:00 2001 From: James Socol Date: Fri, 1 Mar 2013 13:13:46 -0500 Subject: [PATCH 170/233] Update readme with gauge delta info. --- README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.md b/README.md index 3356890c..d7a4b0dd 100644 --- a/README.md +++ b/README.md @@ -95,6 +95,19 @@ StatsD now also supports gauges, arbitrary values, which can be recorded. gaugor:333|g +Adding a sign to the gauge value will change the value, rather than setting it. + + gaugor:-10|g + gaugor:+4|g + +So if `gaugor` was `333`, those commands would set it to `333 - 10 + 4`, or +`327`. + +Note: + +This implies you can't explicitly set a gauge to a negative number +without first setting it to zero. + Sets ---- StatsD supports counting unique occurences of events between flushes, From dcf0620da1ba3492a4b7decd063fcf31ce365cb9 Mon Sep 17 00:00:00 2001 From: draco2003 Date: Sat, 2 Mar 2013 00:19:33 +0000 Subject: [PATCH 171/233] remove duplicate code, and fix global leak --- backends/graphite.js | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/backends/graphite.js b/backends/graphite.js index 79aca15d..46cacd79 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -110,8 +110,6 @@ var flush_stats = function graphite_flush(ts, metrics) { } for (key in timer_data) { - var namespace = timerNamespace.concat(key); - var the_key = namespace.join("."); for (timer_data_key in timer_data[key]) { var namespace = timerNamespace.concat(key); var the_key = namespace.join("."); @@ -119,7 +117,7 @@ var flush_stats = function graphite_flush(ts, metrics) { if (typeof(timer_data[key][timer_data_key]) === 'number') { statString += the_key + '.' + timer_data_key + ' ' + timer_data[key][timer_data_key] + ts_suffix; } else { - for (timer_data_sub_key in timer_data[key][timer_data_key]) { + for (var timer_data_sub_key in timer_data[key][timer_data_key]) { l.log(timer_data[key][timer_data_key][timer_data_sub_key].toString()); statString += the_key + '.' + timer_data_key + '.' + timer_data_sub_key + ' ' + timer_data[key][timer_data_key][timer_data_sub_key] + ts_suffix; From 046634974785fe34616710b29462478f3eb14db2 Mon Sep 17 00:00:00 2001 From: draco2003 Date: Sat, 2 Mar 2013 00:28:19 +0000 Subject: [PATCH 172/233] pulled out the wrong set --- backends/graphite.js | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/backends/graphite.js b/backends/graphite.js index 46cacd79..9e59fc09 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -110,10 +110,9 @@ var flush_stats = function graphite_flush(ts, metrics) { } for (key in timer_data) { + var namespace = timerNamespace.concat(key); + var the_key = namespace.join("."); for (timer_data_key in timer_data[key]) { - var namespace = timerNamespace.concat(key); - var the_key = namespace.join("."); - if (typeof(timer_data[key][timer_data_key]) === 'number') { statString += the_key + '.' + timer_data_key + ' ' + timer_data[key][timer_data_key] + ts_suffix; } else { From e36bc535548eb519032d0f3f117d6fe313d615c6 Mon Sep 17 00:00:00 2001 From: draco2003 Date: Sat, 2 Mar 2013 03:44:40 +0000 Subject: [PATCH 173/233] Just join once. --- backends/graphite.js | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/backends/graphite.js b/backends/graphite.js index 9e59fc09..7ccd7318 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -57,11 +57,11 @@ var post_stats = function graphite_post_stats(statString) { graphite.on('connect', function() { var ts = Math.round(new Date().getTime() / 1000); var ts_suffix = ' ' + ts + "\n"; - var namespace = globalNamespace.concat(prefixStats); - statString += namespace.join(".") + '.graphiteStats.last_exception ' + last_exception + ts_suffix; - statString += namespace.join(".") + '.graphiteStats.last_flush ' + last_flush + ts_suffix; - statString += namespace.join(".") + '.graphiteStats.flush_time ' + flush_time + ts_suffix; - statString += namespace.join(".") + '.graphiteStats.flush_length ' + flush_length + ts_suffix; + var namespace = globalNamespace.concat(prefixStats).join("."); + statString += namespace + '.graphiteStats.last_exception ' + last_exception + ts_suffix; + statString += namespace + '.graphiteStats.last_flush ' + last_flush + ts_suffix; + statString += namespace + '.graphiteStats.flush_time ' + flush_time + ts_suffix; + statString += namespace + '.graphiteStats.flush_length ' + flush_length + ts_suffix; var starttime = Date.now(); this.write(statString); this.end(); From f968b2347e4ed0aaef55b4fbf3a8a990dbe9d215 Mon Sep 17 00:00:00 2001 From: draco2003 Date: Sat, 2 Mar 2013 03:45:36 +0000 Subject: [PATCH 174/233] OCD --- backends/graphite.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backends/graphite.js b/backends/graphite.js index 7ccd7318..55206316 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -32,12 +32,12 @@ var prefixGauge; var prefixSet; // set up namespaces -var legacyNamespace = true; +var legacyNamespace = true; var globalNamespace = []; var counterNamespace = []; var timerNamespace = []; var gaugesNamespace = []; -var setsNamespace = []; +var setsNamespace = []; var graphiteStats = {}; From 54ab8a5c43289d27642a19da6f9c0a5cd970d8d9 Mon Sep 17 00:00:00 2001 From: draco2003 Date: Sat, 2 Mar 2013 03:51:27 +0000 Subject: [PATCH 175/233] Logging is for debug mode --- backends/graphite.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backends/graphite.js b/backends/graphite.js index 55206316..ab6d27b1 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -117,7 +117,9 @@ var flush_stats = function graphite_flush(ts, metrics) { statString += the_key + '.' + timer_data_key + ' ' + timer_data[key][timer_data_key] + ts_suffix; } else { for (var timer_data_sub_key in timer_data[key][timer_data_key]) { - l.log(timer_data[key][timer_data_key][timer_data_sub_key].toString()); + if (debug) { + l.log(timer_data[key][timer_data_key][timer_data_sub_key].toString()); + } statString += the_key + '.' + timer_data_key + '.' + timer_data_sub_key + ' ' + timer_data[key][timer_data_key][timer_data_sub_key] + ts_suffix; } From 69d4f6af3ca0a4a779e03b1a0a3e1183767db493 Mon Sep 17 00:00:00 2001 From: draco2003 Date: Sat, 2 Mar 2013 04:52:28 +0000 Subject: [PATCH 176/233] only split the packet if we need to --- stats.js | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/stats.js b/stats.js index d1e7b8f1..738e5da4 100644 --- a/stats.js +++ b/stats.js @@ -141,7 +141,12 @@ config.configFile(process.argv[2], function (config, oldConfig) { server = dgram.createSocket('udp4', function (msg, rinfo) { backendEvents.emit('packet', msg, rinfo); counters[packets_received]++; - var metrics = msg.toString().split("\n"); + var packet_data = msg.toString(); + if (packet_data.indexOf("\n") > -1) { + var metrics = packet_data.split("\n"); + } else { + var metrics = [ packet_data ] ; + } for (var midx in metrics) { if (config.dumpMessages) { From eab7dd6e0dd8a3b77770d20f70bcbe3c1abff2f5 Mon Sep 17 00:00:00 2001 From: draco2003 Date: Mon, 4 Mar 2013 22:57:40 +0000 Subject: [PATCH 177/233] a little cleanup and reduce the trim calls --- stats.js | 68 ++++++++++++++++++++++++++------------------------------ 1 file changed, 32 insertions(+), 36 deletions(-) diff --git a/stats.js b/stats.js index d1e7b8f1..9008d107 100644 --- a/stats.js +++ b/stats.js @@ -36,7 +36,7 @@ function loadBackend(config, name) { l.log("Failed to load backend: " + name); process.exit(1); } -}; +} // global for conf var conf; @@ -55,33 +55,33 @@ function flushMetrics() { timer_data: timer_data, pctThreshold: pctThreshold, histogram: conf.histogram - } + }; // After all listeners, reset the stats backendEvents.once('flush', function clear_metrics(ts, metrics) { // Clear the counters conf.deleteCounters = conf.deleteCounters || false; - for (key in metrics.counters) { + for (var counter_key in metrics.counters) { if (conf.deleteCounters) { - if ((key.indexOf("packets_received") != -1) || (key.indexOf("bad_lines_seen") != -1)) { - metrics.counters[key] = 0; + if ((counter_key.indexOf("packets_received") != -1) || (counter_key.indexOf("bad_lines_seen") != -1)) { + metrics.counters[counter_key] = 0; } else { - delete(metrics.counters[key]); + delete(metrics.counters[counter_key]); } } else { - metrics.counters[key] = 0; + metrics.counters[counter_key] = 0; } } // Clear the timers - for (var key in metrics.timers) { - metrics.timers[key] = []; - metrics.timer_counters[key] = 0; + for (var timer_key in metrics.timers) { + metrics.timers[timer_key] = []; + metrics.timer_counters[timer_key] = 0; } // Clear the sets - for (var key in metrics.sets) { - metrics.sets[key] = new set.Set(); + for (var set_key in metrics.sets) { + metrics.sets[set_key] = new set.Set(); } }); @@ -89,7 +89,7 @@ function flushMetrics() { backendEvents.emit('flush', time_stamp, metrics); }); -}; +} var stats = { messages: { @@ -160,7 +160,7 @@ config.configFile(process.argv[2], function (config, oldConfig) { keyCounter[key] += 1; } - if (bits.length == 0) { + if (bits.length === 0) { bits.push("1"); } @@ -173,30 +173,31 @@ config.configFile(process.argv[2], function (config, oldConfig) { } else { l.log('Bad line: ' + fields + ' in msg "' + metrics[midx] +'"; has invalid sample rate'); counters[bad_lines_seen]++; - stats['messages']['bad_lines_seen']++; + stats.messages.bad_lines_seen++; continue; } } if (fields[1] === undefined) { l.log('Bad line: ' + fields + ' in msg "' + metrics[midx] +'"'); counters[bad_lines_seen]++; - stats['messages']['bad_lines_seen']++; + stats.messages.bad_lines_seen++; continue; } - if (fields[1].trim() == "ms") { + var metric_type = fields[1].trim(); + if (metric_type === "ms") { if (! timers[key]) { timers[key] = []; timer_counters[key] = 0; } timers[key].push(Number(fields[0] || 0)); timer_counters[key] += (1 / sampleRate); - } else if (fields[1].trim() == "g") { + } else if (metric_type === "g") { if (gauges[key] && fields[0].match(/^[-+]/)) { gauges[key] += Number(fields[0] || 0); } else { gauges[key] = Number(fields[0] || 0); } - } else if (fields[1].trim() == "s") { + } else if (metric_type === "s") { if (! sets[key]) { sets[key] = new set.Set(); } @@ -210,14 +211,14 @@ config.configFile(process.argv[2], function (config, oldConfig) { } } - stats['messages']['last_msg_seen'] = Math.round(new Date().getTime() / 1000); + stats.messages.last_msg_seen = Math.round(new Date().getTime() / 1000); }); mgmtServer = net.createServer(function(stream) { stream.setEncoding('ascii'); stream.on('error', function(err) { - l.log('Caught ' + err +', Moving on') + l.log('Caught ' + err +', Moving on'); }); stream.on('data', function(data) { @@ -287,25 +288,25 @@ config.configFile(process.argv[2], function (config, oldConfig) { break; case "delcounters": - for (var index in cmdline) { - delete counters[cmdline[index]]; - stream.write("deleted: " + cmdline[index] + "\n"); + for (var counter_index in cmdline) { + delete counters[cmdline[counter_index]]; + stream.write("deleted: " + cmdline[counter_index] + "\n"); } stream.write("END\n\n"); break; case "deltimers": - for (var index in cmdline) { - delete timers[cmdline[index]]; - stream.write("deleted: " + cmdline[index] + "\n"); + for (var timer_index in cmdline) { + delete timers[cmdline[timer_index]]; + stream.write("deleted: " + cmdline[timer_index] + "\n"); } stream.write("END\n\n"); break; case "delgauges": - for (var index in cmdline) { - delete gauges[cmdline[index]]; - stream.write("deleted: " + cmdline[index] + "\n"); + for (var gauge_index in cmdline) { + delete gauges[cmdline[gauge_index]]; + stream.write("deleted: " + cmdline[gauge_index] + "\n"); } stream.write("END\n\n"); break; @@ -352,7 +353,6 @@ config.configFile(process.argv[2], function (config, oldConfig) { var keyFlushLog = config.keyFlush && config.keyFlush.log; keyFlushInt = setInterval(function () { - var key; var sortedKeys = []; for (var key in keyCounter) { @@ -381,9 +381,5 @@ config.configFile(process.argv[2], function (config, oldConfig) { keyCounter = {}; }, keyFlushInterval); } - - - ; - } -}) +}); From 1ea68365f8b5d1f010bf28c3b2364c3fdffde3ab Mon Sep 17 00:00:00 2001 From: draco2003 Date: Thu, 7 Mar 2013 02:58:55 +0000 Subject: [PATCH 178/233] Initial Doc Split starting point --- README.md | 398 +++----------------------------------- docs/admin_interface.md | 46 +++++ docs/backend.md | 46 +++++ docs/backend_interface.md | 73 +++++++ docs/graphite.md | 66 +++++++ docs/metric_types.md | 115 +++++++++++ docs/namespacing.md | 30 +++ 7 files changed, 403 insertions(+), 371 deletions(-) create mode 100644 docs/admin_interface.md create mode 100644 docs/backend.md create mode 100644 docs/backend_interface.md create mode 100644 docs/graphite.md create mode 100644 docs/metric_types.md create mode 100644 docs/namespacing.md diff --git a/README.md b/README.md index d7a4b0dd..49f50aa0 100644 --- a/README.md +++ b/README.md @@ -8,8 +8,16 @@ and sends aggregates to one or more pluggable backend services (e.g., We ([Etsy][etsy]) [blogged][blog post] about how it works and why we created it. +Inspiration +----------- -Concepts +StatsD was inspired (heavily) by the project (of the same name) at Flickr. +Here's a post where Cal Henderson described it in depth: +[Counting and timing](http://code.flickr.com/blog/2008/10/27/counting-timing/). +Cal re-released the code recently: +[Perl StatsD](https://github.com/iamcal/Flickr-StatsD) + +Key Concepts -------- * *buckets* @@ -25,115 +33,26 @@ general values should be integer. After the flush interval timeout (defined by `config.flushInterval`, default 10 seconds), stats are aggregated and sent to an upstream backend service. -Counting --------- - - gorets:1|c - -This is a simple counter. Add 1 to the "gorets" bucket. -At each flush the current count is sent and reset to 0. -If the count at flush is 0 then you can opt to send no metric at all for -this counter, by setting `config.deleteCounters` (applies only to graphite -backend). Statsd will send both the rate as well as the count at each flush. - -### Sampling - - gorets:1|c|@0.1 - -Tells StatsD that this counter is being sent sampled every 1/10th of the time. - -Timing ------- - - glork:320|ms - -The glork took 320ms to complete this time. StatsD figures out percentiles, -average (mean), standard deviation, sum, lower and upper bounds for the flush interval. -The percentile threshold can be tweaked with `config.percentThreshold`. - -The percentile threshold can be a single value, or a list of values, and will -generate the following list of stats for each threshold: - - stats.timers.$KEY.mean_$PCT - stats.timers.$KEY.upper_$PCT - stats.timers.$KEY.sum_$PCT - -Where `$KEY` is the stats key you specify when sending to statsd, and `$PCT` is -the percentile threshold. - -Use the `config.histogram` setting to instruct statsd to maintain histograms -over time. Specify which metrics to match and a corresponding list of -ordered non-inclusive upper limits of bins (class intervals). -(use `inf` to denote infinity; a lower limit of 0 is assumed) -Each `flushInterval`, statsd will store how many values (absolute frequency) -fall within each bin (class interval), for all matching metrics. -Examples: - -* no histograms for any timer (default): `[]` -* histogram to only track render durations, - with unequal class intervals and catchall for outliers: - - [ { metric: 'render', bins: [ 0.01, 0.1, 1, 10, 'inf'] } ] - -* histogram for all timers except 'foo' related, - with equal class interval and catchall for outliers: - - [ { metric: 'foo', bins: [] }, - { metric: '', bins: [ 50, 100, 150, 200, 'inf'] } ] - -Note: - -* first match for a metric wins. -* bin upper limits may contain decimals. -* this is actually more powerful than what's strictly considered -histograms, as you can make each bin arbitrarily wide, -i.e. class intervals of different sizes. - -Gauges ------- -StatsD now also supports gauges, arbitrary values, which can be recorded. - - gaugor:333|g - -Adding a sign to the gauge value will change the value, rather than setting it. - - gaugor:-10|g - gaugor:+4|g - -So if `gaugor` was `333`, those commands would set it to `333 - 10 + 4`, or -`327`. - -Note: - -This implies you can't explicitly set a gauge to a negative number -without first setting it to zero. - -Sets ----- -StatsD supports counting unique occurences of events between flushes, -using a Set to store all occuring events. - - uniques:765|s -Multi-Metric Packets --------------------- -StatsD supports receiving multiple metrics in a single packet by separating them -with a newline. +Installation and Configuration +------------------------------ - gorets:1|c\nglork:320|ms\ngaugor:333|g\nuniques:765|s + * Install node.js + * Clone the project + * Create a config file from exampleConfig.js and put it somewhere + * Start the Daemon: -Be careful to keep the total length of the payload within your network's MTU. There -is no single good value to use, but here are some guidelines for common network -scenarios: + node stats.js /path/to/config -* Fast Ethernet (1432) - This is most likely for Intranets. -* Gigabit Ethernet (8932) - Jumbo frames can make use of this feature much more - efficient. -* Commodity Internet (512) - If you are routing over the internet a value in this - range will be reasonable. You might be able to go higher, but you are at the mercy - of all the hops in your route. +More Specific Topics +-------- +[Metric Types](./docs/metric_types.md) +[Graphite Integration](./docs/graphite.md) +[Supported Backends](./docs/backends.md) +[Admin TCP Interface](./docs/admin_interface.md) +[Backend Interface](./docs/backend_interface.md) +[Metric Namespacing](./docs/namespacing.md) -*(These payload numbers take into account the maximum IP + UDP header sizes)* Debugging --------- @@ -146,158 +65,6 @@ There are additional config variables available for debugging: For more information, check the `exampleConfig.js`. -Supported Backends ------------------- - -StatsD supports pluggable backend modules that can publish -statistics from the local StatsD daemon to a backend service or data -store. Backend services can retain statistics in a time series data store, -visualize statistics in graphs or tables, or generate alerts based on -defined thresholds. A backend can also correlate statistics sent from StatsD -daemons running across multiple hosts in an infrastructure. - -StatsD includes the following built-in backends: - -* [Graphite][graphite] (`graphite`): An open-source - time-series data store that provides visualization through a web-browser. -* Console (`console`): Outputs the received - metrics to stdout (see what's going on during development). -* Repeater (`repeater`): Utilizes the `packet` emit API to - forward raw packets retrieved by StatsD to multiple backend StatsD instances. - -A robust set of [other backends](https://github.com/etsy/statsd/wiki/Backends) -are also available as plugins to allow easy reporting into databases, queues -and third-party services. - -By default, the `graphite` backend will be loaded automatically. Multiple -backends can be run at once. To select which backends are loaded, set -the `backends` configuration variable to the list of backend modules to load. - -Backends are just npm modules which implement the interface described in -section *Backend Interface*. In order to be able to load the backend, add the -module name into the `backends` variable in your config. As the name is also -used in the `require` directive, you can load one of the provided backends by -giving the relative path (e.g. `./backends/graphite`). - -Graphite Schema ---------------- - -Graphite uses "schemas" to define the different round robin datasets it houses -(analogous to RRAs in rrdtool). Here's an example for the stats databases: - -In conf/storage-schemas.conf: - - [stats] - pattern = ^stats.* - retentions = 10:2160,60:10080,600:262974 - -In conf/storage-aggregation.conf: - - [min] - pattern = \.min$ - xFilesFactor = 0.1 - aggregationMethod = min - - [max] - pattern = \.max$ - xFilesFactor = 0.1 - aggregationMethod = max - - [sum] - pattern = \.count$ - xFilesFactor = 0 - aggregationMethod = sum - - [default_average] - pattern = .* - xFilesFactor = 0.3 - aggregationMethod = average - -This translates to: - -* 6 hours of 10 second data (what we consider "near-realtime") -* 1 week of 1 minute data -* 5 years of 10 minute data -* For databases with 'min' or 'max' in the name, keep only the minimum and - maximum value when rolling up data and store a None if less than 10% of the - datapoints were received -* For databases with 'count' in the name, add all the values together, and - store only a None if none of the datapoints were received -* For all other databases, average the values (mean) when rolling up data, and - store a None if less than 30% of the datapoints were received - -(Note: Newer versions of Graphite can take human readable time formats like -10s:6h,1min:7d,10min:5y) - -Retentions and aggregations are read from the file in order, the first pattern -that matches is used. This is set when the database is first created, changing -these config files will not change databases that have already been created. -To view or alter the settings on existing files, use whisper-info.py and -whisper-resize.py included with the Whisper package. - -These settings have been a good tradeoff so far between size-of-file (round -robin databases are fixed size) and data we care about. Each "stats" database -is about 3.2 megs with these retentions. - -Many users have been confused to see their hit counts averaged, missing when -the data is intermittent, or never stored when statsd is sending at a different -interval than graphite expects. Storage aggregation settings will help you -control this and understand what Graphite is doing internally with your data. - -TCP Stats Interface -------------------- - -A really simple TCP management interface is available by default on port 8126 -or overriden in the configuration file. Inspired by the memcache stats approach -this can be used to monitor a live statsd server. You can interact with the -management server by telnetting to port 8126, the following commands are -available: - -* stats - some stats about the running server -* counters - a dump of all the current counters -* timers - a dump of the current timers - -The stats output currently will give you: - -* uptime: the number of seconds elapsed since statsd started -* messages.last_msg_seen: the number of elapsed seconds since statsd received a - message -* messages.bad_lines_seen: the number of bad lines seen since startup - -Each backend will also publish a set of statistics, prefixed by its -module name. - -You can use this to delete buckets no longer in use. For example, if you were hosting statsd at 10.10.10.10: - - #to delete counter sandbox.test.temporary - echo "delcounters sandbox.test.temporary" | nc 10.10.10.10 8126 - -Graphite: - -* graphite.last_flush: the number of seconds elapsed since the last successful - flush to graphite -* graphite.last_exception: the number of seconds elapsed since the last - exception thrown whilst flushing to graphite -* graphite.flush_length: the length of the string sent to graphite -* graphite.flush_time: the time it took to send the data to graphite - -Those statistics will also be sent to graphite under the namespaces -`stats.statsd.graphiteStats.last_exception` and -`stats.statsd.graphiteStats.last_flush`. - -A simple nagios check can be found in the utils/ directory that can be used to -check metric thresholds, for example the number of seconds since the last -successful flush to graphite. - -Installation and Configuration ------------------------------- - - * Install node.js - * Clone the project - * Create a config file from exampleConfig.js and put it somewhere - * Start the Daemon: - - node stats.js /path/to/config Tests ----- @@ -311,117 +78,6 @@ background (don't do this on a production machine!). Tests can be executed with `./run_tests.sh`. -Backend Interface ------------------ - -Backend modules are Node.js [modules][nodemods] that listen for a -number of events emitted from StatsD. Each backend module should -export the following initialization function: - -* `init(startup_time, config, events)`: This method is invoked from StatsD to - initialize the backend module. It accepts three parameters: - `startup_time` is the startup time of StatsD in epoch seconds, - `config` is the parsed config file hash, and `events` is the event - emitter that backends can use to listen for events. - - The backend module should return `true` from init() to indicate - success. A return of `false` indicates a failure to load the module - (missing configuration?) and will cause StatsD to exit. - -Backends can listen for the following events emitted by StatsD from -the `events` object: - -* Event: **'flush'** - - Parameters: `(time_stamp, metrics)` - - Emitted on each flush interval so that backends can push aggregate - metrics to their respective backend services. The event is passed - two parameters: `time_stamp` is the current time in epoch seconds - and `metrics` is a hash representing the StatsD statistics: - - ``` -metrics: { - counters: counters, - gauges: gauges, - timers: timers, - sets: sets, - counter_rates: counter_rates, - timer_data: timer_data, - statsd_metrics: statsd_metrics, - pctThreshold: pctThreshold -} - ``` - - The counter_rates and timer_data are precalculated statistics to simplify - the creation of backends, the statsd_metrics hash contains metrics generated - by statsd itself. Each backend module is passed the same set of - statistics, so a backend module should treat the metrics as immutable - structures. StatsD will reset timers and counters after each - listener has handled the event. - -* Event: **'status'** - - Parameters: `(writeCb)` - - Emitted when a user invokes a *stats* command on the management - server port. It allows each backend module to dump backend-specific - status statistics to the management port. - - The `writeCb` callback function has a signature of `f(error, - backend_name, stat_name, stat_value)`. The backend module should - invoke this method with each stat_name and stat_value that should be - sent to the management port. StatsD will prefix each stat name with - the `backend_name`. The backend should set `error` to *null*, or, in - the case of a failure, an appropriate error. - -* Event: **'packet'** - - Parameters: `(packet, rinfo)` - - This is emitted for every incoming packet. The `packet` parameter contains - the raw received message string and the `rinfo` paramter contains remote - address information from the UDP socket. - - -Metric namespacing -------------------- -The metric namespacing in the Graphite backend is configurable with regard to -the prefixes. Per default all stats are put under `stats` in Graphite, which -makes it easier to consolidate them all under one schema. However it is -possible to change these namespaces in the backend configuration options. -The available configuration options (living under the `graphite` key) are: - -``` -legacyNamespace: use the legacy namespace [default: true] -globalPrefix: global prefix to use for sending stats to graphite [default: "stats"] -prefixCounter: graphite prefix for counter metrics [default: "counters"] -prefixTimer: graphite prefix for timer metrics [default: "timers"] -prefixGauge: graphite prefix for gauge metrics [default: "gauges"] -prefixSet: graphite prefix for set metrics [default: "sets"] -``` - -If you decide not to use the legacy namespacing, besides the obvious changes -in the prefixing, there will also be a breaking change in the way counters are -submitted. So far counters didn't live under any namespace and were also a bit -confusing due to the way they record rate and absolute counts. In the legacy -setting rates were recorded under `stats.counter_name` directly, whereas the -absolute count could be found under `stats_counts.counter_name`. When legacy namespacing -is disabled those values can be found (with default prefixing) -under `stats.counters.counter_name.rate` and -`stats.counters.counter_name.count` now. - -The number of elements in sets will be recorded under the metric -`stats.sets.set_name.count` (where "sets" is the prefixSet). - -Inspiration ------------ - -StatsD was inspired (heavily) by the project (of the same name) at Flickr. -Here's a post where Cal Henderson described it in depth: -[Counting and timing](http://code.flickr.com/blog/2008/10/27/counting-timing/). -Cal re-released the code recently: -[Perl StatsD](https://github.com/iamcal/Flickr-StatsD) Meta --------- @@ -440,12 +96,12 @@ fork StatsD from here: http://github.com/etsy/statsd 2. Hack away 3. If you are adding new functionality, document it in the README 4. If necessary, rebase your commits into logical chunks, without errors -5. Push the branch up to GitHub -6. Send a pull request to the etsy/statsd project. +5. Verfiy your code by running the test suite, and adding additional tests if able. +6. Push the branch up to GitHub +7. Send a pull request to the etsy/statsd project. We'll do our best to get your changes in! - [graphite]: http://graphite.wikidot.com [etsy]: http://www.etsy.com [blog post]: http://codeascraft.etsy.com/2011/02/15/measure-anything-measure-everything/ diff --git a/docs/admin_interface.md b/docs/admin_interface.md new file mode 100644 index 00000000..d1df04e1 --- /dev/null +++ b/docs/admin_interface.md @@ -0,0 +1,46 @@ +TCP Stats Interface +------------------- + +A really simple TCP management interface is available by default on port 8126 +or overriden in the configuration file. Inspired by the memcache stats approach +this can be used to monitor a live statsd server. You can interact with the +management server by telnetting to port 8126, the following commands are +available: + +* stats - some stats about the running server +* counters - a dump of all the current counters +* timers - a dump of the current timers + +The stats output currently will give you: + +* uptime: the number of seconds elapsed since statsd started +* messages.last_msg_seen: the number of elapsed seconds since statsd received a + message +* messages.bad_lines_seen: the number of bad lines seen since startup + +Each backend will also publish a set of statistics, prefixed by its +module name. + +You can use this to delete buckets no longer in use. For example, if you were hosting statsd at 10.10.10.10: + + #to delete counter sandbox.test.temporary + echo "delcounters sandbox.test.temporary" | nc 10.10.10.10 8126 + +Graphite: + +* graphite.last_flush: the number of seconds elapsed since the last successful + flush to graphite +* graphite.last_exception: the number of seconds elapsed since the last + exception thrown whilst flushing to graphite +* graphite.flush_length: the length of the string sent to graphite +* graphite.flush_time: the time it took to send the data to graphite + +Those statistics will also be sent to graphite under the namespaces +`stats.statsd.graphiteStats.last_exception` and +`stats.statsd.graphiteStats.last_flush`. + +A simple nagios check can be found in the utils/ directory that can be used to +check metric thresholds, for example the number of seconds since the last +successful flush to graphite. + + diff --git a/docs/backend.md b/docs/backend.md new file mode 100644 index 00000000..e554104c --- /dev/null +++ b/docs/backend.md @@ -0,0 +1,46 @@ +Supported Backends +------------------ + +StatsD supports pluggable backend modules that can publish +statistics from the local StatsD daemon to a backend service or data +store. Backend services can retain statistics in a time series data store, +visualize statistics in graphs or tables, or generate alerts based on +defined thresholds. A backend can also correlate statistics sent from StatsD +daemons running across multiple hosts in an infrastructure. + +StatsD includes the following built-in backends: + +* [Graphite][graphite] (`graphite`): An open-source + time-series data store that provides visualization through a web-browser. +* Console (`console`): Outputs the received + metrics to stdout (see what's going on during development). +* Repeater (`repeater`): Utilizes the `packet` emit API to + forward raw packets retrieved by StatsD to multiple backend StatsD instances. + +By default, the `graphite` backend will be loaded automatically. Multiple +backends can be run at once. To select which backends are loaded, set +the `backends` configuration variable to the list of backend modules to load. + +Backends are just npm modules which implement the interface described in +section [Backend Interface](./BackendInterface.md). In order to be able to load the backend, add the +module name into the `backends` variable in your config. As the name is also +used in the `require` directive, you can load one of the provided backends by +giving the relative path (e.g. `./backends/graphite`). + +A robust set of are also available as plugins to allow easy reporting into databases, +queues and third-party services. + +## Available Third-party backends +- [amqp-backend](https://github.com/mrtazz/statsd-amqp-backend) +- [ganglia-backend](https://github.com/jbuchbinder/statsd-ganglia-backend) +- [librato-backend](https://github.com/librato/statsd-librato-backend) +- [socket.io-backend](https://github.com/Chatham/statsd-socket.io) +- [statsd-backend](https://github.com/dynmeth/statsd-backend) +- [mongo-backend](https://github.com/dynmeth/mongo-statsd-backend) +- [datadog-backend](https://github.com/datadog/statsd) +- [monitis backend](https://github.com/jeremiahshirk/statsd-monitis-backend) +- [instrumental backend](https://github.com/collectiveidea/statsd-instrumental-backend) +- [hosted graphite backend](https://github.com/hostedgraphite/statsdplugin) +- [statsd aggregation backend](https://github.com/wanelo/gossip_girl) +- [zabbix-backend](https://github.com/parkerd/statsd-zabbix-backend) +- [opentsdb backend](https://github.com/emurphy/statsd-opentsdb-backend) diff --git a/docs/backend_interface.md b/docs/backend_interface.md new file mode 100644 index 00000000..a5afc0b5 --- /dev/null +++ b/docs/backend_interface.md @@ -0,0 +1,73 @@ +Backend Interface +----------------- + +Backend modules are Node.js [modules][nodemods] that listen for a +number of events emitted from StatsD. Each backend module should +export the following initialization function: + +* `init(startup_time, config, events)`: This method is invoked from StatsD to + initialize the backend module. It accepts three parameters: + `startup_time` is the startup time of StatsD in epoch seconds, + `config` is the parsed config file hash, and `events` is the event + emitter that backends can use to listen for events. + + The backend module should return `true` from init() to indicate + success. A return of `false` indicates a failure to load the module + (missing configuration?) and will cause StatsD to exit. + +Backends can listen for the following events emitted by StatsD from +the `events` object: + +* Event: **'flush'** + + Parameters: `(time_stamp, metrics)` + + Emitted on each flush interval so that backends can push aggregate + metrics to their respective backend services. The event is passed + two parameters: `time_stamp` is the current time in epoch seconds + and `metrics` is a hash representing the StatsD statistics: + + ``` +metrics: { + counters: counters, + gauges: gauges, + timers: timers, + sets: sets, + counter_rates: counter_rates, + timer_data: timer_data, + statsd_metrics: statsd_metrics, + pctThreshold: pctThreshold +} + ``` + + The counter_rates and timer_data are precalculated statistics to simplify + the creation of backends, the statsd_metrics hash contains metrics generated + by statsd itself. Each backend module is passed the same set of + statistics, so a backend module should treat the metrics as immutable + structures. StatsD will reset timers and counters after each + listener has handled the event. + +* Event: **'status'** + + Parameters: `(writeCb)` + + Emitted when a user invokes a *stats* command on the management + server port. It allows each backend module to dump backend-specific + status statistics to the management port. + + The `writeCb` callback function has a signature of `f(error, + backend_name, stat_name, stat_value)`. The backend module should + invoke this method with each stat_name and stat_value that should be + sent to the management port. StatsD will prefix each stat name with + the `backend_name`. The backend should set `error` to *null*, or, in + the case of a failure, an appropriate error. + +* Event: **'packet'** + + Parameters: `(packet, rinfo)` + + This is emitted for every incoming packet. The `packet` parameter contains + the raw received message string and the `rinfo` paramter contains remote + address information from the UDP socket. + + diff --git a/docs/graphite.md b/docs/graphite.md new file mode 100644 index 00000000..bcaeb86b --- /dev/null +++ b/docs/graphite.md @@ -0,0 +1,66 @@ +Graphite Schema +--------------- + +Graphite uses "schemas" to define the different round robin datasets it houses +(analogous to RRAs in rrdtool). Here's an example for the stats databases: + +In conf/storage-schemas.conf: + + [stats] + pattern = ^stats.* + retentions = 10:2160,60:10080,600:262974 + +In conf/storage-aggregation.conf: + + [min] + pattern = \.min$ + xFilesFactor = 0.1 + aggregationMethod = min + + [max] + pattern = \.max$ + xFilesFactor = 0.1 + aggregationMethod = max + + [sum] + pattern = \.count$ + xFilesFactor = 0 + aggregationMethod = sum + + [default_average] + pattern = .* + xFilesFactor = 0.3 + aggregationMethod = average + +This translates to: + +* 6 hours of 10 second data (what we consider "near-realtime") +* 1 week of 1 minute data +* 5 years of 10 minute data +* For databases with 'min' or 'max' in the name, keep only the minimum and + maximum value when rolling up data and store a None if less than 10% of the + datapoints were received +* For databases with 'count' in the name, add all the values together, and + store only a None if none of the datapoints were received +* For all other databases, average the values (mean) when rolling up data, and + store a None if less than 30% of the datapoints were received + +(Note: Newer versions of Graphite can take human readable time formats like +10s:6h,1min:7d,10min:5y) + +Retentions and aggregations are read from the file in order, the first pattern +that matches is used. This is set when the database is first created, changing +these config files will not change databases that have already been created. +To view or alter the settings on existing files, use whisper-info.py and +whisper-resize.py included with the Whisper package. + +These settings have been a good tradeoff so far between size-of-file (round +robin databases are fixed size) and data we care about. Each "stats" database +is about 3.2 megs with these retentions. + +Many users have been confused to see their hit counts averaged, missing when +the data is intermittent, or never stored when statsd is sending at a different +interval than graphite expects. Storage aggregation settings will help you +control this and understand what Graphite is doing internally with your data. + + diff --git a/docs/metric_types.md b/docs/metric_types.md new file mode 100644 index 00000000..bee62585 --- /dev/null +++ b/docs/metric_types.md @@ -0,0 +1,115 @@ +StatsD Metric Types +================== + + +Counting +-------- + + gorets:1|c + +This is a simple counter. Add 1 to the "gorets" bucket. +At each flush the current count is sent and reset to 0. +If the count at flush is 0 then you can opt to send no metric at all for +this counter, by setting `config.deleteCounters` (applies only to graphite +backend). Statsd will send both the rate as well as the count at each flush. + +### Sampling + + gorets:1|c|@0.1 + +Tells StatsD that this counter is being sent sampled every 1/10th of the time. + +Timing +------ + + glork:320|ms + +The glork took 320ms to complete this time. StatsD figures out percentiles, +average (mean), standard deviation, sum, lower and upper bounds for the flush interval. +The percentile threshold can be tweaked with `config.percentThreshold`. + +The percentile threshold can be a single value, or a list of values, and will +generate the following list of stats for each threshold: + + stats.timers.$KEY.mean_$PCT + stats.timers.$KEY.upper_$PCT + stats.timers.$KEY.sum_$PCT + +Where `$KEY` is the stats key you specify when sending to statsd, and `$PCT` is +the percentile threshold. + +Use the `config.histogram` setting to instruct statsd to maintain histograms +over time. Specify which metrics to match and a corresponding list of +ordered non-inclusive upper limits of bins (class intervals). +(use `inf` to denote infinity; a lower limit of 0 is assumed) +Each `flushInterval`, statsd will store how many values (absolute frequency) +fall within each bin (class interval), for all matching metrics. +Examples: + +* no histograms for any timer (default): `[]` +* histogram to only track render durations, + with unequal class intervals and catchall for outliers: + + [ { metric: 'render', bins: [ 0.01, 0.1, 1, 10, 'inf'] } ] + +* histogram for all timers except 'foo' related, + with equal class interval and catchall for outliers: + + [ { metric: 'foo', bins: [] }, + { metric: '', bins: [ 50, 100, 150, 200, 'inf'] } ] + +Note: + +* first match for a metric wins. +* bin upper limits may contain decimals. +* this is actually more powerful than what's strictly considered +histograms, as you can make each bin arbitrarily wide, +i.e. class intervals of different sizes. + +Gauges +------ +StatsD now also supports gauges, arbitrary values, which can be recorded. + + gaugor:333|g + +Adding a sign to the gauge value will change the value, rather than setting it. + + gaugor:-10|g + gaugor:+4|g + +So if `gaugor` was `333`, those commands would set it to `333 - 10 + 4`, or +`327`. + +Note: + +This implies you can't explicitly set a gauge to a negative number +without first setting it to zero. + +Sets +---- +StatsD supports counting unique occurences of events between flushes, +using a Set to store all occuring events. + + uniques:765|s + +Multi-Metric Packets +-------------------- +StatsD supports receiving multiple metrics in a single packet by separating them +with a newline. + + gorets:1|c\nglork:320|ms\ngaugor:333|g\nuniques:765|s + +Be careful to keep the total length of the payload within your network's MTU. There +is no single good value to use, but here are some guidelines for common network +scenarios: + +* Fast Ethernet (1432) - This is most likely for Intranets. +* Gigabit Ethernet (8932) - Jumbo frames can make use of this feature much more + efficient. +* Commodity Internet (512) - If you are routing over the internet a value in this + range will be reasonable. You might be able to go higher, but you are at the mercy + of all the hops in your route. + +*(These payload numbers take into account the maximum IP + UDP header sizes)* + + diff --git a/docs/namespacing.md b/docs/namespacing.md new file mode 100644 index 00000000..cba54762 --- /dev/null +++ b/docs/namespacing.md @@ -0,0 +1,30 @@ +Metric namespacing +------------------- +The metric namespacing in the Graphite backend is configurable with regard to +the prefixes. Per default all stats are put under `stats` in Graphite, which +makes it easier to consolidate them all under one schema. However it is +possible to change these namespaces in the backend configuration options. +The available configuration options (living under the `graphite` key) are: + +``` +legacyNamespace: use the legacy namespace [default: true] +globalPrefix: global prefix to use for sending stats to graphite [default: "stats"] +prefixCounter: graphite prefix for counter metrics [default: "counters"] +prefixTimer: graphite prefix for timer metrics [default: "timers"] +prefixGauge: graphite prefix for gauge metrics [default: "gauges"] +prefixSet: graphite prefix for set metrics [default: "sets"] +``` + +If you decide not to use the legacy namespacing, besides the obvious changes +in the prefixing, there will also be a breaking change in the way counters are +submitted. So far counters didn't live under any namespace and were also a bit +confusing due to the way they record rate and absolute counts. In the legacy +setting rates were recorded under `stats.counter_name` directly, whereas the +absolute count could be found under `stats_counts.counter_name`. When legacy namespacing +is disabled those values can be found (with default prefixing) +under `stats.counters.counter_name.rate` and +`stats.counters.counter_name.count` now. + +The number of elements in sets will be recorded under the metric +`stats.sets.set_name.count` (where "sets" is the prefixSet). + From 740c29ebd140e3613ff78429cfe8dbc1e2832c4c Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Wed, 6 Mar 2013 22:04:22 -0500 Subject: [PATCH 179/233] Fix link layout slightly --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 49f50aa0..cee4b91f 100644 --- a/README.md +++ b/README.md @@ -46,12 +46,12 @@ Installation and Configuration More Specific Topics -------- -[Metric Types](./docs/metric_types.md) -[Graphite Integration](./docs/graphite.md) -[Supported Backends](./docs/backends.md) -[Admin TCP Interface](./docs/admin_interface.md) -[Backend Interface](./docs/backend_interface.md) -[Metric Namespacing](./docs/namespacing.md) +* [Metric Types](./docs/metric_types.md) +* [Graphite Integration](./docs/graphite.md) +* [Supported Backends](./docs/backends.md) +* [Admin TCP Interface](./docs/admin_interface.md) +* [Backend Interface](./docs/backend_interface.md) +* [Metric Namespacing](./docs/namespacing.md) Debugging From ec0f46888e6b2023dae98aea04337f9dd344dca3 Mon Sep 17 00:00:00 2001 From: draco2003 Date: Thu, 7 Mar 2013 03:55:30 +0000 Subject: [PATCH 180/233] Remove debugInterval in favor of console backend --- README.md | 5 ++--- exampleConfig.js | 1 - stats.js | 27 +++++---------------------- 3 files changed, 7 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index d7a4b0dd..22cc996e 100644 --- a/README.md +++ b/README.md @@ -140,8 +140,7 @@ Debugging There are additional config variables available for debugging: -* `debug` - log exceptions and periodically print out information on counters and timers -* `debugInterval` - interval for printing out information on counters and timers +* `debug` - log exceptions and print out more diagnostic info * `dumpMessages` - print debug info on incoming messages For more information, check the `exampleConfig.js`. @@ -161,7 +160,7 @@ StatsD includes the following built-in backends: * [Graphite][graphite] (`graphite`): An open-source time-series data store that provides visualization through a web-browser. * Console (`console`): Outputs the received - metrics to stdout (see what's going on during development). + metrics to stdout (see what's going on during development or debugging). * Repeater (`repeater`): Utilizes the `packet` emit API to forward raw packets retrieved by StatsD to multiple backend StatsD instances. diff --git a/exampleConfig.js b/exampleConfig.js index b042342a..4b877af3 100644 --- a/exampleConfig.js +++ b/exampleConfig.js @@ -24,7 +24,6 @@ Optional Variables: mgmt_address: address to run the management TCP interface on [default: 0.0.0.0] mgmt_port: port to run the management TCP interface on [default: 8126] - debugInterval: interval to print debug information [ms, default: 10000] dumpMessages: log all incoming messages flushInterval: interval (in ms) to flush to Graphite percentThreshold: for time information, calculate the Nth percentile(s) diff --git a/stats.js b/stats.js index 9008d107..7a9d56ba 100644 --- a/stats.js +++ b/stats.js @@ -19,7 +19,7 @@ var sets = {}; var counter_rates = {}; var timer_data = {}; var pctThreshold = null; -var debugInt, flushInterval, keyFlushInt, server, mgmtServer; +var flushInterval, keyFlushInt, server, mgmtServer; var startup_time = Math.round(new Date().getTime() / 1000); var backendEvents = new events.EventEmitter(); @@ -103,34 +103,17 @@ var l; config.configFile(process.argv[2], function (config, oldConfig) { conf = config; - if (! config.debug && debugInt) { - clearInterval(debugInt); - debugInt = false; - } - l = new logger.Logger(config.log || {}); - if (config.debug) { - if (debugInt !== undefined) { - clearInterval(debugInt); - } - debugInt = setInterval(function () { - l.log("\nCounters:\n" + util.inspect(counters) + - "\nTimers:\n" + util.inspect(timers) + - "\nSets:\n" + util.inspect(sets) + - "\nGauges:\n" + util.inspect(gauges), 'DEBUG'); - }, config.debugInterval || 10000); - } - // setup config for stats prefix - prefixStats = config.prefixStats; - prefixStats = prefixStats !== undefined ? prefixStats : "statsd"; + prefixStats = config.prefixStats; + prefixStats = prefixStats !== undefined ? prefixStats : "statsd"; //setup the names for the stats stored in counters{} - bad_lines_seen = prefixStats + ".bad_lines_seen"; + bad_lines_seen = prefixStats + ".bad_lines_seen"; packets_received = prefixStats + ".packets_received"; //now set to zero so we can increment them - counters[bad_lines_seen] = 0; + counters[bad_lines_seen] = 0; counters[packets_received] = 0; if (server === undefined) { From 943ebdbbd314fb94543beee7014a992d38997320 Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Thu, 7 Mar 2013 09:17:22 -0500 Subject: [PATCH 181/233] Test testing on 9.0 --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index f8fa5d1a..91dc8ca8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,6 +2,7 @@ language: node_js node_js: - 0.6 - 0.8 + - 0.9 script: "./run_tests.sh" notifications: email: false From 47373a87fcc0c5e7258862d6641dcaa3eef33f83 Mon Sep 17 00:00:00 2001 From: Elad Rosenheim Date: Thu, 7 Mar 2013 23:04:59 +0200 Subject: [PATCH 182/233] Update graphite doc for common pitfalls --- docs/graphite.md | 104 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 72 insertions(+), 32 deletions(-) diff --git a/docs/graphite.md b/docs/graphite.md index bcaeb86b..e019be8b 100644 --- a/docs/graphite.md +++ b/docs/graphite.md @@ -1,66 +1,106 @@ -Graphite Schema ---------------- +Configuring Graphite for StatsD +------------------------------- -Graphite uses "schemas" to define the different round robin datasets it houses -(analogous to RRAs in rrdtool). Here's an example for the stats databases: +Many users have been confused to see their hit counts averaged, gone missing when +the data is intermittent, or never stored when statsd is sending at a different +interval than graphite expects. Careful setup of Graphite as suggested below should help to alleviate all these issues. When configuring Graphite, two main factors you need to consider are: + +1. What is the highest resolution of data points kept by Graphite, and at which points in time is data downsampled to lower resolutions. This decision is by nature directly related to your functional requirements: how far back should you keep data? what is the data resolution you actually need? However, the retention rules you set must also be in sync with statsd. + +2. How should data be aggregated when downsampled, in order to correctly preserve its meaning? Graphite of course knows nothing of the 'meaning' of your data, so let's explore the correct setup for the various metrics sent by statsd. -In conf/storage-schemas.conf: +### Storage Schemas + +To define retention and downsampling which match your needs, edit Graphite's conf/storage-schemas.conf file. Here is a simple example file that would handle all metrics sent by statsd: [stats] pattern = ^stats.* - retentions = 10:2160,60:10080,600:262974 + retentions = 10s:6h,1min:7d,10min:5y + +This translates to: for all metrics starting with 'stats' (i.e. all metrics sent by statsd), capture: + +* 6 hours of 10 second data (what we consider "near-realtime") +* 1 week of 1 minute data +* 5 years of 10 minute data + +These settings have been a good tradeoff so far between size-of-file (database files are fixed size) and data we care about. Each "stats" database file is about 3.2 megs with these retentions. + +Retentions are read from the file in order and the first pattern that matches is used. +Graphite stores each metric in its own database file, and the retentions take effect when a metric file is first created. This means that changing this config file would not affect any files already created. To view or alter the settings on existing files, use whisper-info.py and whisper-resize.py included with the Whisper package. + +#### Correlation with statsd's flush interval: + +In the case of the above example, what would happen if you flush from statsd any faster then every 10 seconds? in that case, multiple values for the same metric may reach Graphite at any given 10-second timespan, and only the last value would take hold and be persisted - so your data would immediately be partially lost. + +To fix that, simply ensure your flush interval is at least as long as the highest-resolution retention. However, a long interval may cause other unfortunate mishaps, so keep reading - it pays to understand what's really going on. + +(Note: Older versions of Graphite do not support the human-readable time format shown above) + +### Storage Aggregation + +The next step is ensuring your data isn't corrupted or discarded when downsampled. Continuing with the example above, take for instance the downsampling of .mean values calculated for all stastd timers: -In conf/storage-aggregation.conf: +Graphite should downsample up to 6 samples representing 10-second mean values into a single value signfying the mean for a 1-minute timespan. This is simple: just average all samples to get the new value, and this is exactly the default method applied by Graphite. However, what about the .count metric also sent for timers? Each sample contains the count of occurences per flush interval, so you want these samples summed-up, not averaged! + +You would not even notice any problem till you look at a graph for data older than 6 hours ago, since Graphite would need only the high-res 10-second samples to render the first 6 hours, but would have to switch to lower resolution data for rendering a longer timespan. + +Two other metric kinds also deserve a note: + +* Counts which are normalized by statsd to signify a per-second count should not be summed, since their meaning does not change when downsampling. + +* Metrics for minimum/maximum values should not be averaged but rather preserve the lowest/highest point, respectively. + +Let's see now how to configure downsampling in Graphite's conf/storage-aggregation.conf: [min] - pattern = \.min$ + pattern = \.lower$ xFilesFactor = 0.1 aggregationMethod = min [max] - pattern = \.max$ + pattern = \.upper$ xFilesFactor = 0.1 aggregationMethod = max [sum] + pattern = \.sum$ + xFilesFactor = 0 + aggregationMethod = sum + + [count] pattern = \.count$ xFilesFactor = 0 aggregationMethod = sum + [count_legacy] + pattern = ^stats_counts.* + xFilesFactor = 0 + aggregationMethod = sum + [default_average] pattern = .* xFilesFactor = 0.3 aggregationMethod = average -This translates to: +This means: -* 6 hours of 10 second data (what we consider "near-realtime") -* 1 week of 1 minute data -* 5 years of 10 minute data -* For databases with 'min' or 'max' in the name, keep only the minimum and - maximum value when rolling up data and store a None if less than 10% of the - datapoints were received -* For databases with 'count' in the name, add all the values together, and - store only a None if none of the datapoints were received +* For metrics ending with '.lower' or '.upper' (these are sent for all timers), keep only the minimum and maximum value when rolling up data and store a None if less than 10% of the datapoints were received. +* For metrics ending with 'count' or 'sum' in the name, or those under 'stats_counts', add all the values together, and store a None only if none of the datapoints were received. This would capture all non-normalized counters, but ignore the per-second ones. * For all other databases, average the values (mean) when rolling up data, and store a None if less than 30% of the datapoints were received -(Note: Newer versions of Graphite can take human readable time formats like -10s:6h,1min:7d,10min:5y) +Pay close attention to xFilesFactor: if your flush interval is long enough so there are not enough samples to satisfy this minimum factor, your data would simply be lost in the first downsampling cycle. However, setting a very low factor would also produce a misleading result, since you would probably agree that if you only have a single 10-second mean value sample reported in a 10-minute timeframe, this single sample alone should not normally be downsampled into a 10-minute mean value. For counts, however, every count should count ;-), hence the zero factor. -Retentions and aggregations are read from the file in order, the first pattern -that matches is used. This is set when the database is first created, changing -these config files will not change databases that have already been created. -To view or alter the settings on existing files, use whisper-info.py and -whisper-resize.py included with the Whisper package. +**Notes:** -These settings have been a good tradeoff so far between size-of-file (round -robin databases are fixed size) and data we care about. Each "stats" database -is about 3.2 megs with these retentions. +1. a '.count' metric is calculated for all timers, but up to and including v0.5.0, non-normalized counters are written under stats_counts - not under stats.counters as you might expect. Post-0.5.0, if you set legacyNamespace=false in the config then counters would indeed be written under stats.counters, in two variations: per-second counts under stats.counters.\.*rate*, and non-normalized per-flush counts under stats.counters.\.*count*. Hence, the rules above would handle counts for both timers and legacy/non-legacy counters. -Many users have been confused to see their hit counts averaged, missing when -the data is intermittent, or never stored when statsd is sending at a different -interval than graphite expects. Storage aggregation settings will help you -control this and understand what Graphite is doing internally with your data. +2. upper and lower values are also calculated for the n-percentile value defined for timers. The above example does not include rules for these, for brevity and performance. + +Similar to retentions, the aggregations in effect for any metric are set once the metric is first received, so a change to these settings would not affect existing metrics. + +### In conclusion +Graphite's handling of your statsd metrics should be verified at least once: is data mysteriously lost at any point? is data downsampled properly? are you defining graphs for counter metrics without knowing what timespan does each y-value actually represent? (admittedly, in some cases you may not even care about the y-values in the graph, as only the trend is of any interest. The coolest graphs seem to always lack y-values...) +For more information, see: http://graphite.readthedocs.org/en/latest/config-carbon.html From d0cf5d21e026f051f7c08c3f86b61eb053fb8545 Mon Sep 17 00:00:00 2001 From: Alex Williams Date: Sun, 10 Mar 2013 15:43:07 +0000 Subject: [PATCH 183/233] Add configuration option to listen on an IPv6 address --- README.md | 14 ++++++++++++++ exampleConfig.js | 1 + stats.js | 3 ++- 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 22cc996e..df784583 100644 --- a/README.md +++ b/README.md @@ -413,6 +413,20 @@ under `stats.counters.counter_name.rate` and The number of elements in sets will be recorded under the metric `stats.sets.set_name.count` (where "sets" is the prefixSet). +IPv6 +---- + +Here's an example on how to configure the UDP listener to listen on an IPv6 address: + +``` +{ +, address: "::1" +, address_ipv6: true +} +``` + +By default, if the `address_ipv6` configuration option is not set (or set to `false`), the socket will use IPv4. + Inspiration ----------- diff --git a/exampleConfig.js b/exampleConfig.js index 4b877af3..c86670d8 100644 --- a/exampleConfig.js +++ b/exampleConfig.js @@ -20,6 +20,7 @@ Optional Variables: the default graphite backend will be loaded. debug: debug flag [default: false] address: address to listen on over UDP [default: 0.0.0.0] + address_ipv6: defines if the address is an IPv4 or IPv6 address [true or false, default: false] port: port to listen for messages on over UDP [default: 8125] mgmt_address: address to run the management TCP interface on [default: 0.0.0.0] diff --git a/stats.js b/stats.js index deff4012..e579c910 100644 --- a/stats.js +++ b/stats.js @@ -121,7 +121,8 @@ config.configFile(process.argv[2], function (config, oldConfig) { // key counting var keyFlushInterval = Number((config.keyFlush && config.keyFlush.interval) || 0); - server = dgram.createSocket('udp4', function (msg, rinfo) { + var udp_version = config.address_ipv6 ? 'udp6' : 'udp4' + server = dgram.createSocket(udp_version, function (msg, rinfo) { backendEvents.emit('packet', msg, rinfo); counters[packets_received]++; var packet_data = msg.toString(); From 0fe6a83313596aa59d612d76b513bd0c9eee2b36 Mon Sep 17 00:00:00 2001 From: "P. Mark Anderson" Date: Mon, 11 Mar 2013 17:46:23 -0700 Subject: [PATCH 184/233] Fixed link to supported backends --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 475afea2..12e0bcb9 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ More Specific Topics -------- * [Metric Types](./docs/metric_types.md) * [Graphite Integration](./docs/graphite.md) -* [Supported Backends](./docs/backends.md) +* [Supported Backends](./docs/backend.md) * [Admin TCP Interface](./docs/admin_interface.md) * [Backend Interface](./docs/backend_interface.md) * [Metric Namespacing](./docs/namespacing.md) From 7bad61e2c4f151ef7007b8aa8681833e77b2ca5c Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Tue, 12 Mar 2013 20:07:46 -0300 Subject: [PATCH 185/233] Testing 0.10 --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 91dc8ca8..5ec61f77 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,6 +3,7 @@ node_js: - 0.6 - 0.8 - 0.9 + - "0.10" script: "./run_tests.sh" notifications: email: false From 41ea5885ccf75c20f8729f714f920fd6c82ff145 Mon Sep 17 00:00:00 2001 From: Joshua Garnett Date: Thu, 14 Mar 2013 10:51:36 -0400 Subject: [PATCH 186/233] Adding Scala Example --- examples/README.md | 1 + examples/StatsD.scala | 232 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 233 insertions(+) create mode 100644 examples/StatsD.scala diff --git a/examples/README.md b/examples/README.md index 582a561c..3b7547c4 100644 --- a/examples/README.md +++ b/examples/README.md @@ -12,6 +12,7 @@ Here's a bunch of example code contributed by the community for interfacing with ruby_example.rb - Ruby statsd.erl - Erlang statsd-client.sh - Bash + StatsD.scala - Scala Third Party StatsD Libraries ============================ diff --git a/examples/StatsD.scala b/examples/StatsD.scala new file mode 100644 index 00000000..7830b92d --- /dev/null +++ b/examples/StatsD.scala @@ -0,0 +1,232 @@ +/* + +Scala implementation of Andrew Gwozdziewycz's StatsdClient.java + +Copyright (c) 2013 Joshua Garnett + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +package com.statsd + +import java.io.IOException +import java.net._ +import java.nio.ByteBuffer +import java.nio.channels.DatagramChannel +import java.util.Random +import org.slf4j.LoggerFactory +import akka.actor._ + +/** + * Client for sending stats to StatsD uses Akka to manage concurrency + * + * @param context The Akka ActorContext + * @param host The statsd host + * @param port The statsd port + * @param multiMetrics If true, multiple stats will be sent in a single UDP packet + * @param packetBufferSize If multiMetrics is true, this is the max buffer size before sending the UDP packet + */ +class StatsD(context: ActorContext, + host: String, + port: Int, + multiMetrics: Boolean = true, + packetBufferSize: Int = 1024) { + + private val rand = new Random() + + private val actorRef = context.actorOf(Props(new StatsDActor(host, port, multiMetrics, packetBufferSize))) + + /** + * Sends timing stats in milliseconds to StatsD + * + * @param key name of the stat + * @param value time in milliseconds + */ + def timing(key: String, value: Int, sampleRate: Double = 1.0) = { + send(key, value.toString, StatsDProtocol.TIMING_METRIC, sampleRate) + } + + /** + * Decrement StatsD counter + * + * @param key name of the stat + * @param magnitude how much to decrement + */ + def decrement(key: String, magnitude: Int = -1, sampleRate: Double = 1.0) = { + increment(key, magnitude, sampleRate) + } + + /** + * Increment StatsD counter + * + * @param key name of the stat + * @param magnitude how much to increment + */ + def increment(key: String, magnitude: Int = 1, sampleRate: Double = 1.0) = { + send(key, magnitude.toString, StatsDProtocol.COUNTER_METRIC, sampleRate) + } + + /** + * StatsD now also supports gauges, arbitrary values, which can be recorded. + * + * @param key name of the stat + * @param value Can be a fixed value or increase or decrease (Ex: "10" "-1" "+5") + */ + def gauge(key: String, value: String = "1", sampleRate: Double = 1.0) = { + send(key, value, StatsDProtocol.GAUGE_METRIC, sampleRate) + } + + /** + * StatsD supports counting unique occurrences of events between flushes, using a Set to store all occurring events. + * + * @param key name of the stat + * @param value value of the set + */ + def set(key: String, value: Int, sampleRate: Double = 1.0) = { + send(key, value.toString, StatsDProtocol.SET_METRIC, sampleRate) + } + + /** + * Checks the sample rate and sends the stat to the actor if it passes + */ + private def send(key: String, value: String, metric: String, sampleRate: Double): Boolean = { + if (sampleRate >= 1 || rand.nextDouble <= sampleRate) { + actorRef ! SendStat(StatsDProtocol.stat(key, value, metric, sampleRate)) + true + } + else { + false + } + } +} + +object StatsDProtocol { + val TIMING_METRIC = "ms" + val COUNTER_METRIC = "c" + val GAUGE_METRIC = "g" + val SET_METRIC = "s" + + /** + * @return Returns a string that conforms to the StatsD protocol: + * KEY:VALUE|METRIC or KEY:VALUE|METRIC|@SAMPLE_RATE + */ + def stat(key: String, value: String, metric: String, sampleRate: Double) = { + val sampleRateString = if (sampleRate < 1) "|@" + sampleRate else "" + key + ":" + value + "|" + metric + sampleRateString + } +} + +/** + * Message for the StatsDActor + */ +private case class SendStat(stat: String) + +/** + * @param host The statsd host + * @param port The statsd port + * @param multiMetrics If true, multiple stats will be sent in a single UDP packet + * @param packetBufferSize If multiMetrics is true, this is the max buffer size before sending the UDP packet + */ +private class StatsDActor(host: String, + port: Int, + multiMetrics: Boolean, + packetBufferSize: Int) extends Actor { + + private val log = LoggerFactory.getLogger(getClass()) + + private val sendBuffer = ByteBuffer.allocate(packetBufferSize) + + private val address = new InetSocketAddress(InetAddress.getByName(host), port) + private val channel = DatagramChannel.open() + + def receive = { + case msg: SendStat => doSend(msg.stat) + case _ => log.error("Unknown message") + } + + override def postStop() = { + //save any remaining data to StatsD + flush + + //Close the channel + if (channel.isOpen()) { + channel.close() + } + + sendBuffer.clear() + } + + private def doSend(stat: String) = { + try { + val data = stat.getBytes("utf-8") + + // If we're going to go past the threshold of the buffer then flush. + // the +1 is for the potential '\n' in multi_metrics below + if (sendBuffer.remaining() < (data.length + 1)) { + flush + } + + // multiple metrics are separated by '\n' + if (sendBuffer.position() > 0) { + sendBuffer.put('\n'.asInstanceOf[Byte]) + } + + // append the data + sendBuffer.put(data) + + if (!multiMetrics) { + flush + } + + } + catch { + case e: IOException => { + log.error("Could not send stat {} to host {}:{}", sendBuffer.toString, address.getHostName(), address.getPort().toString, e) + } + } + } + + private def flush(): Unit = { + try { + val sizeOfBuffer = sendBuffer.position() + + if (sizeOfBuffer <= 0) { + // empty buffer + return + } + + // send and reset the buffer + sendBuffer.flip() + val nbSentBytes = channel.send(sendBuffer, address) + sendBuffer.limit(sendBuffer.capacity()) + sendBuffer.rewind() + + if (sizeOfBuffer != nbSentBytes) { + log.error("Could not send entirely stat {} to host {}:{}. Only sent {} bytes out of {} bytes", sendBuffer.toString(), + address.getHostName(), address.getPort().toString, nbSentBytes.toString, sizeOfBuffer.toString) + } + + } + catch { + case e: IOException => { + log.error("Could not send stat {} to host {}:{}", sendBuffer.toString, address.getHostName(), address.getPort().toString, e) + } + } + } +} \ No newline at end of file From 366faeffd15e20051fa17837865001e960b2cbb7 Mon Sep 17 00:00:00 2001 From: sreuter Date: Fri, 15 Mar 2013 12:31:12 +0100 Subject: [PATCH 187/233] Added Leftronic-Backend to docs --- docs/backend.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/backend.md b/docs/backend.md index e554104c..68c01664 100644 --- a/docs/backend.md +++ b/docs/backend.md @@ -44,3 +44,4 @@ queues and third-party services. - [statsd aggregation backend](https://github.com/wanelo/gossip_girl) - [zabbix-backend](https://github.com/parkerd/statsd-zabbix-backend) - [opentsdb backend](https://github.com/emurphy/statsd-opentsdb-backend) +- [leftronic backend](https://github.com/sreuter/statsd-leftronic-backend) From 8cd74ba5068566e261a82ca0b3611421ab964e03 Mon Sep 17 00:00:00 2001 From: sreuter Date: Fri, 15 Mar 2013 16:35:27 +0100 Subject: [PATCH 188/233] Update admin_interface.md Added gauges command to admin_interface.md documentation file --- docs/admin_interface.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/admin_interface.md b/docs/admin_interface.md index d1df04e1..a11abd2f 100644 --- a/docs/admin_interface.md +++ b/docs/admin_interface.md @@ -9,6 +9,7 @@ available: * stats - some stats about the running server * counters - a dump of all the current counters +* gauges - a dump of all the current gauges * timers - a dump of the current timers The stats output currently will give you: From 17b926b0f850761766dbdd903ae638fd13d7b10f Mon Sep 17 00:00:00 2001 From: draco2003 Date: Fri, 15 Mar 2013 16:46:41 +0000 Subject: [PATCH 189/233] Update to fully qualified links --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 12e0bcb9..275eda5b 100644 --- a/README.md +++ b/README.md @@ -46,12 +46,12 @@ Installation and Configuration More Specific Topics -------- -* [Metric Types](./docs/metric_types.md) -* [Graphite Integration](./docs/graphite.md) -* [Supported Backends](./docs/backend.md) -* [Admin TCP Interface](./docs/admin_interface.md) -* [Backend Interface](./docs/backend_interface.md) -* [Metric Namespacing](./docs/namespacing.md) +* [Metric Types](https://github.com/etsy/statsd/blob/master/docs/metric_types.md) +* [Graphite Integration](https://github.com/etsy/statsd/blob/master/docs/graphite.md) +* [Supported Backends](https://github.com/etsy/statsd/blob/master/docs/backend.md) +* [Admin TCP Interface](https://github.com/etsy/statsd/blob/master/docs/admin_interface.md) +* [Backend Interface](https://github.com/etsy/statsd/blob/master/docs/backend_interface.md) +* [Metric Namespacing](https://github.com/etsy/statsd/blob/master/docs/namespacing.md) Debugging From c1c8090a6d458b11571646d73ead078281ca6446 Mon Sep 17 00:00:00 2001 From: draco2003 Date: Fri, 15 Mar 2013 16:49:08 +0000 Subject: [PATCH 190/233] Update Changelog with changes since 0.5.0 --- Changelog.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/Changelog.md b/Changelog.md index 045ec378..05218b68 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,5 +1,27 @@ # Changelog +## v0.6.0 (03/15/2013) +- added new metric types : sets, guage deltas, histograms +- added ability to delete idle stats +- added support for configurable namespacing +- added standard Deviation to timers stats (.std) +- added last_flush_time and last_flush_length metrics to graphite backend +- added ipv6 support +- added Statsd repeater backend +- added helper script to decide which timers to sample down +- added Windows service support +- added Scala example +- added support for sampling timers. +- added build testing on node 0.8, 0.9, and 0.10 +- fixed broken config file watching. +- fixed for DNS errors from UDP socket +- fixed for TCP client goes away crash. +- removed debugInterval in favor of Console backend debugging +- updated and reorganized Docs +- updated examples scripts +- improved the quality of randomness used for sampling. +- moved config.js to /lib folder to avoid confusion + ## v0.5.0 (07/20/2012) - add support for logging to syslog - add basic metrics gathering for StatsD and Graphite backend itself From 2ccde8266bbe941ac5f79efe39103b99e1196d92 Mon Sep 17 00:00:00 2001 From: draco2003 Date: Fri, 15 Mar 2013 16:50:02 +0000 Subject: [PATCH 191/233] Update package.json to 0.6.0 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index ce0d063c..64530569 100644 --- a/package.json +++ b/package.json @@ -12,7 +12,7 @@ "type": "git", "url": "https://github.com/etsy/statsd.git" }, - "version": "0.5.0", + "version": "0.6.0", "dependencies": { }, "devDependencies": { From f369dfade2882d41896a7f2e6c364188a9b3bef8 Mon Sep 17 00:00:00 2001 From: Zach Wily Date: Tue, 4 Dec 2012 20:53:21 -0700 Subject: [PATCH 192/233] Support gathering "top" percentile statistics Adds support for collecting statistics on top percentiles, instead of the default bottom percentiles. You specify a top percentile by using a negative number - so -10 will collect the top 10% of data. It will emit: mean_top10, lower_top10, and sum_top10. Using a negative number may seem hacky, but it's convenient and there is a precedent - referencing an array from the end in some languages can be done with negative indexes. --- exampleConfig.js | 1 + lib/process_metrics.js | 20 ++++++++++++++------ test/process_metrics_tests.js | 22 ++++++++++++++++++++++ 3 files changed, 37 insertions(+), 6 deletions(-) diff --git a/exampleConfig.js b/exampleConfig.js index b03fcec7..6d3655e8 100644 --- a/exampleConfig.js +++ b/exampleConfig.js @@ -29,6 +29,7 @@ Optional Variables: flushInterval: interval (in ms) to flush to Graphite percentThreshold: for time information, calculate the Nth percentile(s) (can be a single value or list of floating-point values) + negative values mean to use "top" Nth percentile(s) values [%, default: 90] keyFlush: log the most frequently sent keys [object, default: undefined] interval: how often to log frequent keys [ms, default: 0] diff --git a/lib/process_metrics.js b/lib/process_metrics.js index 3b1bd365..f63bdb8d 100644 --- a/lib/process_metrics.js +++ b/lib/process_metrics.js @@ -34,24 +34,32 @@ var process_metrics = function (metrics, flushInterval, ts, flushCallback) { var sum = min; var mean = min; - var maxAtThreshold = max; + var thresholdBoundary = max; var key2; for (key2 in pctThreshold) { var pct = pctThreshold[key2]; if (count > 1) { - var numInThreshold = Math.round(pct / 100 * count); + var numInThreshold = Math.round(Math.abs(pct) / 100 * count); + if (numInThreshold === 0) { + continue; + } - maxAtThreshold = values[numInThreshold - 1]; - sum = cumulativeValues[numInThreshold - 1]; + if (pct > 0) { + thresholdBoundary = values[numInThreshold - 1]; + sum = cumulativeValues[numInThreshold - 1]; + } else { + thresholdBoundary = values[count - numInThreshold]; + sum = cumulativeValues[count - 1] - cumulativeValues[count - numInThreshold - 1]; + } mean = sum / numInThreshold; } var clean_pct = '' + pct; - clean_pct = clean_pct.replace('.', '_'); + clean_pct = clean_pct.replace('.', '_').replace('-', 'top'); current_timer_data["mean_" + clean_pct] = mean; - current_timer_data["upper_" + clean_pct] = maxAtThreshold; + current_timer_data[(pct > 0 ? "upper_" : "lower_") + clean_pct] = thresholdBoundary; current_timer_data["sum_" + clean_pct] = sum; } diff --git a/test/process_metrics_tests.js b/test/process_metrics_tests.js index 78630c05..3fd1e0eb 100644 --- a/test/process_metrics_tests.js +++ b/test/process_metrics_tests.js @@ -182,6 +182,28 @@ module.exports = { test.equal(undefined, timer_data['bar.bazfoobar.abc']['histogram']); test.equal(undefined, timer_data['xyz']['histogram']); + test.done(); + }, + timers_single_time_single_top_percentile: function(test) { + test.expect(3); + this.metrics.timers['a'] = [100]; + this.metrics.pctThreshold = [-10]; + pm.process_metrics(this.metrics, 100, this.time_stamp, function(){}); + timer_data = this.metrics.timer_data['a']; + test.equal(100, timer_data.mean_top10); + test.equal(100, timer_data.lower_top10); + test.equal(100, timer_data.sum_top10); + test.done(); + }, + timers_multiple_times_single_top_percentile: function(test) { + test.expect(3); + this.metrics.timers['a'] = [10, 10, 10, 10, 10, 10, 10, 10, 100, 200]; + this.metrics.pctThreshold = [-20]; + pm.process_metrics(this.metrics, 100, this.time_stamp, function(){}); + timer_data = this.metrics.timer_data['a']; + test.equal(150, timer_data.mean_top20); + test.equal(100, timer_data.lower_top20); + test.equal(300, timer_data.sum_top20); test.done(); }, statsd_metrics_exist: function(test) { From 49c6d107ac5be4d7f808e5c448f6c2e337d03acd Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Mon, 18 Mar 2013 15:47:37 -0300 Subject: [PATCH 193/233] Add New logo to readme Added the new logo to the Readme. Also standardized the link format to be Reference based. --- README.md | 43 +++++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 275eda5b..71d7412d 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ -StatsD [![Build Status](https://travis-ci.org/etsy/statsd.png?branch=backends-as-packages)](https://travis-ci.org/etsy/statsd) +![Statsd Logo][statsd_logo] +StatsD [![Build Status][travis-ci_status_img]][travis-ci_statsd] ====== A network daemon that runs on the [Node.js][node] platform and @@ -13,9 +14,9 @@ Inspiration StatsD was inspired (heavily) by the project (of the same name) at Flickr. Here's a post where Cal Henderson described it in depth: -[Counting and timing](http://code.flickr.com/blog/2008/10/27/counting-timing/). +[Counting and timing][counting-timing] Cal re-released the code recently: -[Perl StatsD](https://github.com/iamcal/Flickr-StatsD) +[Perl StatsD][Flicker-StatsD] Key Concepts -------- @@ -46,12 +47,12 @@ Installation and Configuration More Specific Topics -------- -* [Metric Types](https://github.com/etsy/statsd/blob/master/docs/metric_types.md) -* [Graphite Integration](https://github.com/etsy/statsd/blob/master/docs/graphite.md) -* [Supported Backends](https://github.com/etsy/statsd/blob/master/docs/backend.md) -* [Admin TCP Interface](https://github.com/etsy/statsd/blob/master/docs/admin_interface.md) -* [Backend Interface](https://github.com/etsy/statsd/blob/master/docs/backend_interface.md) -* [Metric Namespacing](https://github.com/etsy/statsd/blob/master/docs/namespacing.md) +* [Metric Types][docs_metric_types] +* [Graphite Integration][docs_graphite] +* [Supported Backends][docs_backend] +* [Admin TCP Interface][docs_admin_interface] +* [Backend Interface][docs_backend_interface] +* [Metric Namespacing][docs_namespacing] Debugging @@ -101,16 +102,26 @@ fork StatsD from here: http://github.com/etsy/statsd We'll do our best to get your changes in! +Contributors +----------------- + +In lieu of a list of contributors, check out the commit history for the project: +https://github.com/etsy/statsd/graphs/contributors + [graphite]: http://graphite.wikidot.com [etsy]: http://www.etsy.com [blog post]: http://codeascraft.etsy.com/2011/02/15/measure-anything-measure-everything/ [node]: http://nodejs.org [nodemods]: http://nodejs.org/api/modules.html +[counting-timing]: http://code.flickr.com/blog/2008/10/27/counting-timing/ +[Flicker-StatsD]: https://github.com/iamcal/Flickr-StatsD [udp]: http://en.wikipedia.org/wiki/User_Datagram_Protocol - - -Contributors ------------------ - -In lieu of a list of contributors, check out the commit history for the project: -https://github.com/etsy/statsd/graphs/contributors +[statsd_logo]: https://www.etsy.com/images/github/github-statsd.png +[docs_metric_types]: https://github.com/etsy/statsd/blob/master/docs/metric_types.md +[docs_graphite]: https://github.com/etsy/statsd/blob/master/docs/graphite.md +[docs_backend]: https://github.com/etsy/statsd/blob/master/docs/backend.md +[docs_admin_interface]: https://github.com/etsy/statsd/blob/master/docs/admin_interface.md +[docs_backend_interface]: https://github.com/etsy/statsd/blob/master/docs/backend_interface.md +[docs_namespacing]: https://github.com/etsy/statsd/blob/master/docs/namespacing.md +[travis-ci_status_img]: https://travis-ci.org/etsy/statsd.png?branch=backends-as-packages +[travis-ci_statsd]: https://travis-ci.org/etsy/statsd From 34c171ad119d31c30f927ea430df6ec69f752e9b Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Mon, 18 Mar 2013 16:15:10 -0300 Subject: [PATCH 194/233] Remove logo for now. Needs better formatting --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 71d7412d..c81d2633 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,3 @@ -![Statsd Logo][statsd_logo] StatsD [![Build Status][travis-ci_status_img]][travis-ci_statsd] ====== From 0d03a7c14e2f81a9a97cfa970769c986e7396f55 Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Mon, 18 Mar 2013 16:23:44 -0300 Subject: [PATCH 195/233] Remove logo ref if we aren't using it yet. --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index c81d2633..684464b0 100644 --- a/README.md +++ b/README.md @@ -115,7 +115,6 @@ https://github.com/etsy/statsd/graphs/contributors [counting-timing]: http://code.flickr.com/blog/2008/10/27/counting-timing/ [Flicker-StatsD]: https://github.com/iamcal/Flickr-StatsD [udp]: http://en.wikipedia.org/wiki/User_Datagram_Protocol -[statsd_logo]: https://www.etsy.com/images/github/github-statsd.png [docs_metric_types]: https://github.com/etsy/statsd/blob/master/docs/metric_types.md [docs_graphite]: https://github.com/etsy/statsd/blob/master/docs/graphite.md [docs_backend]: https://github.com/etsy/statsd/blob/master/docs/backend.md From 6daa91d7a71d465d3e085228f2eea4b9050de304 Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Tue, 19 Mar 2013 11:20:58 -0300 Subject: [PATCH 196/233] Bump node-syslog and engine required version --- package.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package.json b/package.json index 64530569..b8e918c7 100644 --- a/package.json +++ b/package.json @@ -22,11 +22,11 @@ "temp": "0.4.x" }, "optionalDependencies": { - "node-syslog":"1.1.3", + "node-syslog":"1.1.7", "winser": "=0.0.11" }, "engines": { - "node" : ">=0.4" + "node" : ">=0.6" }, "bin": { "statsd": "./bin/statsd" } } From bad995b7f5c4632f925e4055f73d789a1b8779db Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Tue, 19 Mar 2013 11:42:39 -0300 Subject: [PATCH 197/233] Test Updating nodeunit --- package.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/package.json b/package.json index b8e918c7..f9a35a57 100644 --- a/package.json +++ b/package.json @@ -16,8 +16,7 @@ "dependencies": { }, "devDependencies": { - "nodeunit": "0.6.x", - "async": "0.1.x", + "nodeunit": "0.7.x", "underscore": "1.2.x", "temp": "0.4.x" }, From 763bece7ebb2ef495831aa792ad194304abe0b9a Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Tue, 19 Mar 2013 11:50:19 -0300 Subject: [PATCH 198/233] update underscore and temp --- package.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package.json b/package.json index f9a35a57..614987c3 100644 --- a/package.json +++ b/package.json @@ -17,8 +17,8 @@ }, "devDependencies": { "nodeunit": "0.7.x", - "underscore": "1.2.x", - "temp": "0.4.x" + "underscore": "1.4.x", + "temp": "0.5.x" }, "optionalDependencies": { "node-syslog":"1.1.7", From 1f6afa0fa468bd2a9d8a27b9af7ae6f9f55bf715 Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Tue, 19 Mar 2013 11:53:48 -0300 Subject: [PATCH 199/233] Revert temp version bump to keep v0.6.0 support --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 614987c3..b6e929dd 100644 --- a/package.json +++ b/package.json @@ -18,7 +18,7 @@ "devDependencies": { "nodeunit": "0.7.x", "underscore": "1.4.x", - "temp": "0.5.x" + "temp": "0.4.x" }, "optionalDependencies": { "node-syslog":"1.1.7", From 5c0ce673b6eb9ab90591124e7475ebc13ccca80b Mon Sep 17 00:00:00 2001 From: David Howell Date: Fri, 22 Mar 2013 15:40:26 +1030 Subject: [PATCH 200/233] Added median calculation to timer_data Added median calculation to timer_data and associated test cases --- lib/process_metrics.js | 4 ++++ test/process_metrics_tests.js | 14 ++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/lib/process_metrics.js b/lib/process_metrics.js index 3b1bd365..7008a854 100644 --- a/lib/process_metrics.js +++ b/lib/process_metrics.js @@ -64,6 +64,9 @@ var process_metrics = function (metrics, flushInterval, ts, flushCallback) { sumOfDiffs += (values[i] - mean) * (values[i] - mean); } + var mid = Math.floor(count/2); + var median = (count % 2) ? values[mid] : (values[mid-1] + values[mid])/2; + var stddev = Math.sqrt(sumOfDiffs / count); current_timer_data["std"] = stddev; current_timer_data["upper"] = max; @@ -72,6 +75,7 @@ var process_metrics = function (metrics, flushInterval, ts, flushCallback) { current_timer_data["count_ps"] = timer_counters[key] / (flushInterval / 1000); current_timer_data["sum"] = sum; current_timer_data["mean"] = mean; + current_timer_data["median"] = median; // note: values bigger than the upper limit of the last bin are ignored, by design conf = histogram || []; diff --git a/test/process_metrics_tests.js b/test/process_metrics_tests.js index 78630c05..2b03b71e 100644 --- a/test/process_metrics_tests.js +++ b/test/process_metrics_tests.js @@ -46,7 +46,7 @@ module.exports = { test.done(); }, timers_single_time: function(test) { - test.expect(7); + test.expect(8); this.metrics.timers['a'] = [100]; this.metrics.timer_counters['a'] = 1; pm.process_metrics(this.metrics, 100, this.time_stamp, function(){}); @@ -58,10 +58,11 @@ module.exports = { test.equal(10, timer_data.count_ps); test.equal(100, timer_data.sum); test.equal(100, timer_data.mean); + test.equal(100, timer_data.median); test.done(); }, timers_multiple_times: function(test) { - test.expect(7); + test.expect(8); this.metrics.timers['a'] = [100, 200, 300]; this.metrics.timer_counters['a'] = 3; pm.process_metrics(this.metrics, 100, this.time_stamp, function(){}); @@ -73,6 +74,7 @@ module.exports = { test.equal(30, timer_data.count_ps); test.equal(600, timer_data.sum); test.equal(200, timer_data.mean); + test.equal(200, timer_data.median); test.done(); }, timers_single_time_single_percentile: function(test) { @@ -190,5 +192,13 @@ module.exports = { statsd_metrics = this.metrics.statsd_metrics; test.notEqual(undefined, statsd_metrics["processing_time"]); test.done(); + }, + timers_multiple_times_even: function(test) { + test.expect(1); + this.metrics.timers['a'] = [100, 200, 300, 400]; + pm.process_metrics(this.metrics, 100, this.time_stamp, function(){}); + timer_data = this.metrics.timer_data['a']; + test.equal(250, timer_data.median); + test.done(); } } From 76c13f03fade92c9f103bcc613efb87d4aa736da Mon Sep 17 00:00:00 2001 From: David Howell Date: Fri, 22 Mar 2013 15:58:32 +1030 Subject: [PATCH 201/233] Changed test case to ensure sorting Changed the test data in timers_multiple_times_even so make sure the metrics are sorted before getting the median value. If the data was not sorted, the median value would be 300 instead of 250. --- test/process_metrics_tests.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/process_metrics_tests.js b/test/process_metrics_tests.js index 2b03b71e..f564abcc 100644 --- a/test/process_metrics_tests.js +++ b/test/process_metrics_tests.js @@ -195,7 +195,7 @@ module.exports = { }, timers_multiple_times_even: function(test) { test.expect(1); - this.metrics.timers['a'] = [100, 200, 300, 400]; + this.metrics.timers['a'] = [300, 200, 400, 100]; pm.process_metrics(this.metrics, 100, this.time_stamp, function(){}); timer_data = this.metrics.timer_data['a']; test.equal(250, timer_data.median); From 3852334c074f6c65ba9de5127ce30a53673cd9cb Mon Sep 17 00:00:00 2001 From: Jason Carver Date: Sat, 9 Feb 2013 00:26:00 -0800 Subject: [PATCH 202/233] refactor stat deletion code, no functional changes --- stats.js | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/stats.js b/stats.js index e5b4c645..4758e863 100644 --- a/stats.js +++ b/stats.js @@ -131,6 +131,16 @@ var stats = { // Global for the logger var l; +function delete_stats(stats_type, cmdline, stream) { + + //for each metric requested on the command line + for (var index in cmdline) { + delete stats_type[cmdline[index]]; + stream.write("deleted: " + metric + "\n"); + } + stream.write("END\n\n"); +} + config.configFile(process.argv[2], function (config, oldConfig) { conf = config; l = new logger.Logger(config.log || {}); @@ -307,27 +317,15 @@ config.configFile(process.argv[2], function (config, oldConfig) { break; case "delcounters": - for (var counter_index in cmdline) { - delete counters[cmdline[counter_index]]; - stream.write("deleted: " + cmdline[counter_index] + "\n"); - } - stream.write("END\n\n"); + delete_stats(counters, cmdline, stream); break; case "deltimers": - for (var timer_index in cmdline) { - delete timers[cmdline[timer_index]]; - stream.write("deleted: " + cmdline[timer_index] + "\n"); - } - stream.write("END\n\n"); + delete_stats(timers, cmdline, stream); break; case "delgauges": - for (var gauge_index in cmdline) { - delete gauges[cmdline[gauge_index]]; - stream.write("deleted: " + cmdline[gauge_index] + "\n"); - } - stream.write("END\n\n"); + delete_stats(gauges, cmdline, stream); break; case "quit": From d07b90ed67fe45032860ff8923f9cd56b45206d4 Mon Sep 17 00:00:00 2001 From: Jason Carver Date: Sat, 9 Feb 2013 00:33:49 -0800 Subject: [PATCH 203/233] delete all metrics in a folder using a command like: delcounters path.to.parent.* --- stats.js | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/stats.js b/stats.js index 4758e863..919ccc93 100644 --- a/stats.js +++ b/stats.js @@ -135,12 +135,47 @@ function delete_stats(stats_type, cmdline, stream) { //for each metric requested on the command line for (var index in cmdline) { - delete stats_type[cmdline[index]]; - stream.write("deleted: " + metric + "\n"); + + //get a list of deletable metrics that match the request + deletable = stats_to_delete(stats_type, cmdline[index]); + + //warn if no matches + if (deletable.length == 0) { + stream.write("metric " + cmdline[index] + " not found\n"); + } + + //delete all requested metrics + for (var del_idx in deletable) { + delete stats_type[deletable[del_idx]]; + stream.write("deleted: " + deletable[del_idx] + "\n"); + } } stream.write("END\n\n"); } +function stats_to_delete(stats_type, bucket){ + deletable = [] + + //typical case: one-off deletion + if (bucket in stats_type) { + deletable.push(bucket); + } + + //special case: delete a whole 'folder' (and subfolders) of stats + if (bucket.slice(-2) == ".*") { + var folder = bucket.slice(0,-1); + + for (var name in stats_type) { + //check if stat is in bucket, ie~ name starts with folder + if (name.substring(0, folder.length) == folder) { + deletable.push(name); + } + } + } + + return deletable; +} + config.configFile(process.argv[2], function (config, oldConfig) { conf = config; l = new logger.Logger(config.log || {}); From 2d9b0de787c4ec7ce85ff9f36e5b52be8327ec50 Mon Sep 17 00:00:00 2001 From: Jason Carver Date: Mon, 25 Feb 2013 22:09:26 -0800 Subject: [PATCH 204/233] delete stats via management console: move to lib/, docs, generalize, tests --- lib/mgmt_console.js | 68 ++++++++++++++++++++++++++++++++++++++ stats.js | 52 +++-------------------------- test/mgmt_console_tests.js | 65 ++++++++++++++++++++++++++++++++++++ 3 files changed, 137 insertions(+), 48 deletions(-) create mode 100644 lib/mgmt_console.js create mode 100644 test/mgmt_console_tests.js diff --git a/lib/mgmt_console.js b/lib/mgmt_console.js new file mode 100644 index 00000000..daac059d --- /dev/null +++ b/lib/mgmt_console.js @@ -0,0 +1,68 @@ + +/** + * delete_stats - delete all matching statistics + * + * Side effect notes: this function works by altering stats_type in place, + * and calls stream.write(str) to display user feedback. + * + * @param stats_type array of all statistics of this type (eg~ timers) to delete from + * @param cmdline array of all requested deletions, which can be fully qualified, + * or end in a .* to delete a folder, like stats.temp.* + * @param stream buffer output for for all outgoing user feedback + */ +exports.delete_stats = function(stats_type, cmdline, stream) { + + //for each metric requested on the command line + for (var index in cmdline) { + + //get a list of deletable metrics that match the request + deletable = existing_stats(stats_type, cmdline[index]); + + //warn if no matches + if (deletable.length == 0) { + stream.write("metric " + cmdline[index] + " not found\n"); + } + + //delete all requested metrics + for (var del_idx in deletable) { + delete stats_type[deletable[del_idx]]; + stream.write("deleted: " + deletable[del_idx] + "\n"); + } + } + stream.write("END\n\n"); +} + +/** + * existing_stats - find fully qualified matches for the requested stats bucket + * + * @param stats_type array of all statistics of this type (eg~ timers) to match + * @param bucket string to search on, which can be fully qualified, + * or end in a .* to search for a folder, like stats.temp.* + * + * @return array of fully qualified stats that match the specified bucket. if + * no matches, an empty array is a valid response + */ +function existing_stats(stats_type, bucket){ + matches = [] + + //typical case: one-off, fully qualified + if (bucket in stats_type) { + matches.push(bucket); + } + + //special case: match a whole 'folder' (and subfolders) of stats + if (bucket.slice(-2) == ".*") { + var folder = bucket.slice(0,-1); + + for (var name in stats_type) { + //check if stat is in bucket, ie~ name starts with folder + if (name.substring(0, folder.length) == folder) { + matches.push(name); + } + } + } + + return matches; +} + +exports.existing_stats = existing_stats; diff --git a/stats.js b/stats.js index 919ccc93..04174c72 100644 --- a/stats.js +++ b/stats.js @@ -7,6 +7,7 @@ var dgram = require('dgram') , logger = require('./lib/logger') , set = require('./lib/set') , pm = require('./lib/process_metrics') + , mgmt = require('./lib/mgmt_console') // initialize data structures with defaults for statsd stats @@ -131,51 +132,6 @@ var stats = { // Global for the logger var l; -function delete_stats(stats_type, cmdline, stream) { - - //for each metric requested on the command line - for (var index in cmdline) { - - //get a list of deletable metrics that match the request - deletable = stats_to_delete(stats_type, cmdline[index]); - - //warn if no matches - if (deletable.length == 0) { - stream.write("metric " + cmdline[index] + " not found\n"); - } - - //delete all requested metrics - for (var del_idx in deletable) { - delete stats_type[deletable[del_idx]]; - stream.write("deleted: " + deletable[del_idx] + "\n"); - } - } - stream.write("END\n\n"); -} - -function stats_to_delete(stats_type, bucket){ - deletable = [] - - //typical case: one-off deletion - if (bucket in stats_type) { - deletable.push(bucket); - } - - //special case: delete a whole 'folder' (and subfolders) of stats - if (bucket.slice(-2) == ".*") { - var folder = bucket.slice(0,-1); - - for (var name in stats_type) { - //check if stat is in bucket, ie~ name starts with folder - if (name.substring(0, folder.length) == folder) { - deletable.push(name); - } - } - } - - return deletable; -} - config.configFile(process.argv[2], function (config, oldConfig) { conf = config; l = new logger.Logger(config.log || {}); @@ -352,15 +308,15 @@ config.configFile(process.argv[2], function (config, oldConfig) { break; case "delcounters": - delete_stats(counters, cmdline, stream); + mgmt.delete_stats(counters, cmdline, stream); break; case "deltimers": - delete_stats(timers, cmdline, stream); + mgmt.delete_stats(timers, cmdline, stream); break; case "delgauges": - delete_stats(gauges, cmdline, stream); + mgmt.delete_stats(gauges, cmdline, stream); break; case "quit": diff --git a/test/mgmt_console_tests.js b/test/mgmt_console_tests.js new file mode 100644 index 00000000..40ec3563 --- /dev/null +++ b/test/mgmt_console_tests.js @@ -0,0 +1,65 @@ +var mgmt = require('../lib/mgmt_console'); + +module.exports = { + stat_matches: function(test) { + test.expect(8); + stat_vertical = {'a.b':1,'a.c':1,'c':1}; + + //test function + f = function (bucket) { return mgmt.existing_stats(stat_vertical, bucket) } + + //empties + test.deepEqual(f('d'), []); + test.deepEqual(f('a'), []); + test.deepEqual(f('c.a'), []); + test.deepEqual(f('c.*'), []); + test.deepEqual(f(''), []); + + //single matches + test.deepEqual(f('a.b'), ['a.b']); + test.deepEqual(f('c'), ['c']); + + //multiple matches + test.deepEqual(f('a.*'), ['a.b', 'a.c']); + + test.done(); + }, + + stat_deletes: function(test) { + test.expect(6); + + var stream = { + buffer : '', + clear : function() { this.buffer = '' }, + write : function(to_write) { this.buffer += to_write }, + }; + + stats_fixture = + + //delete missing + stat_vertical = {'a.b':1,'a.c':1,'d':1}; + stream.clear(); + mgmt.delete_stats(stat_vertical, ['e'], stream); + + test.deepEqual(stat_vertical, stats_fixture); + test.equal(stream.buffer, 'metric e not found\nEND\n\n'); + + //delete fully qualified + stat_vertical = {'a.b':1,'a.c':1,'d':1}; + stream.clear(); + mgmt.delete_stats(stat_vertical, ['a.b'], stream); + + test.deepEqual(stat_vertical, {'a.c':1,'d':1}); + test.equal(stream.buffer, 'deleted: a.b\nEND\n\n'); + + //delete folder + stat_vertical = {'a.b':1,'a.c':1,'d':1}; + stream.clear(); + mgmt.delete_stats(stat_vertical, ['a.*'], stream); + + test.deepEqual(stat_vertical, {'d':1}); + test.equal(stream.buffer, 'deleted: a.b\ndeleted: a.c\nEND\n\n'); + + test.done(); + }, +} From 8dadf9f232cc0c01ce049e921f5b74a5e60f1673 Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Thu, 28 Mar 2013 13:41:06 -0400 Subject: [PATCH 205/233] Add health status functionality --- docs/admin_interface.md | 5 +++++ exampleConfig.js | 1 + stats.js | 15 ++++++++++++++- 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/docs/admin_interface.md b/docs/admin_interface.md index a11abd2f..a9e786f1 100644 --- a/docs/admin_interface.md +++ b/docs/admin_interface.md @@ -11,6 +11,7 @@ available: * counters - a dump of all the current counters * gauges - a dump of all the current gauges * timers - a dump of the current timers +* health - a way to set the health status of statsd The stats output currently will give you: @@ -44,4 +45,8 @@ A simple nagios check can be found in the utils/ directory that can be used to check metric thresholds, for example the number of seconds since the last successful flush to graphite. +The health output: +* the health command alone allows you to see the current health status. +* using health up or health down, you can change the current health status. +* the healthStatus configuration option allows you to set the default health status at start. diff --git a/exampleConfig.js b/exampleConfig.js index 6d3655e8..58609055 100644 --- a/exampleConfig.js +++ b/exampleConfig.js @@ -25,6 +25,7 @@ Optional Variables: mgmt_address: address to run the management TCP interface on [default: 0.0.0.0] mgmt_port: port to run the management TCP interface on [default: 8126] + healthStatus: default health status to be returned and statsd process starts ['up' or 'down', default: 'up'] dumpMessages: log all incoming messages flushInterval: interval (in ms) to flush to Graphite percentThreshold: for time information, calculate the Nth percentile(s) diff --git a/stats.js b/stats.js index e5b4c645..afe274a1 100644 --- a/stats.js +++ b/stats.js @@ -234,6 +234,7 @@ config.configFile(process.argv[2], function (config, oldConfig) { }); mgmtServer = net.createServer(function(stream) { + var healthStatus = config.healthStatus || 'up'; stream.setEncoding('ascii'); stream.on('error', function(err) { @@ -246,7 +247,19 @@ config.configFile(process.argv[2], function (config, oldConfig) { switch(cmd) { case "help": - stream.write("Commands: stats, counters, timers, gauges, delcounters, deltimers, delgauges, quit\n\n"); + stream.write("Commands: stats, counters, timers, gauges, delcounters, deltimers, delgauges, health, quit\n\n"); + break; + + case "health": + if (cmdline.length > 0) { + var cmdaction = cmdline[0].toLowerCase(); + if (cmdaction === 'up') { + healthStatus = 'up'; + } else if (cmdaction === 'down') { + healthStatus = 'down'; + } + } + stream.write("health: " + healthStatus + "\n"); break; case "stats": From d6a8a3b8a289bb74e38505963dc1b1713c5c13bc Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Thu, 28 Mar 2013 16:13:21 -0400 Subject: [PATCH 206/233] adding SIGTERM handling --- stats.js | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/stats.js b/stats.js index afe274a1..49d2264d 100644 --- a/stats.js +++ b/stats.js @@ -22,6 +22,7 @@ var pctThreshold = null; var flushInterval, keyFlushInt, server, mgmtServer; var startup_time = Math.round(new Date().getTime() / 1000); var backendEvents = new events.EventEmitter(); +var healthStatus = config.healthStatus || 'up'; // Load and init the backend from the backends/ directory. function loadBackend(config, name) { @@ -234,7 +235,6 @@ config.configFile(process.argv[2], function (config, oldConfig) { }); mgmtServer = net.createServer(function(stream) { - var healthStatus = config.healthStatus || 'up'; stream.setEncoding('ascii'); stream.on('error', function(err) { @@ -415,3 +415,13 @@ config.configFile(process.argv[2], function (config, oldConfig) { } } }); + +process.on('SIGTERM', function() { + if (config.debug) { + l.log('Going Down in ' + flushInterval + 'ms'); + } + healthStatus = 'down'; + setTimeout(function() { + process.exit(); + }, flushInterval); +}); From c947f66a047a6d0eadd77acd6fc455f666c9149a Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Sat, 30 Mar 2013 19:05:17 -0300 Subject: [PATCH 207/233] Fix link breakage from initial doc split --- docs/backend.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/backend.md b/docs/backend.md index 68c01664..2a92f9b7 100644 --- a/docs/backend.md +++ b/docs/backend.md @@ -45,3 +45,6 @@ queues and third-party services. - [zabbix-backend](https://github.com/parkerd/statsd-zabbix-backend) - [opentsdb backend](https://github.com/emurphy/statsd-opentsdb-backend) - [leftronic backend](https://github.com/sreuter/statsd-leftronic-backend) + + +[graphite]: http://graphite.wikidot.com From 2c6503400f52866d04c2ecdbb6c8b766e82f4054 Mon Sep 17 00:00:00 2001 From: Daniel Schauenberg Date: Sat, 30 Mar 2013 18:45:33 -0400 Subject: [PATCH 208/233] don't show Bad Line warning for empty strings --- stats.js | 3 +++ 1 file changed, 3 insertions(+) diff --git a/stats.js b/stats.js index e5b4c645..64e31a70 100644 --- a/stats.js +++ b/stats.js @@ -163,6 +163,9 @@ config.configFile(process.argv[2], function (config, oldConfig) { } for (var midx in metrics) { + if (metrics[midx].length == 0) { + continue; + } if (config.dumpMessages) { l.log(metrics[midx].toString()); } From 1c32a6a673e890b87be1620cc30cb2ea501f64a6 Mon Sep 17 00:00:00 2001 From: Daniel Schauenberg Date: Sat, 30 Mar 2013 20:28:29 -0400 Subject: [PATCH 209/233] use strict comparison for message length --- stats.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stats.js b/stats.js index 64e31a70..1a4690b0 100644 --- a/stats.js +++ b/stats.js @@ -163,7 +163,7 @@ config.configFile(process.argv[2], function (config, oldConfig) { } for (var midx in metrics) { - if (metrics[midx].length == 0) { + if (metrics[midx].length === 0) { continue; } if (config.dumpMessages) { From d48f2e8f7c1a2d3f002dbc2478ffec052b71e4d9 Mon Sep 17 00:00:00 2001 From: Chenjun Shen Date: Sun, 31 Mar 2013 20:48:30 +0200 Subject: [PATCH 210/233] typo --- Changelog.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Changelog.md b/Changelog.md index 05218b68..42585268 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,7 +1,7 @@ # Changelog ## v0.6.0 (03/15/2013) -- added new metric types : sets, guage deltas, histograms +- added new metric types : sets, gauge deltas, histograms - added ability to delete idle stats - added support for configurable namespacing - added standard Deviation to timers stats (.std) From ee5ec77ca8196d0123469a153321ab505c98d3a2 Mon Sep 17 00:00:00 2001 From: Matthijs van der Vleuten Date: Thu, 4 Apr 2013 18:00:19 +0300 Subject: [PATCH 211/233] Fix typo --- docs/graphite.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/graphite.md b/docs/graphite.md index e019be8b..568438f6 100644 --- a/docs/graphite.md +++ b/docs/graphite.md @@ -38,7 +38,7 @@ To fix that, simply ensure your flush interval is at least as long as the highes ### Storage Aggregation -The next step is ensuring your data isn't corrupted or discarded when downsampled. Continuing with the example above, take for instance the downsampling of .mean values calculated for all stastd timers: +The next step is ensuring your data isn't corrupted or discarded when downsampled. Continuing with the example above, take for instance the downsampling of .mean values calculated for all statsd timers: Graphite should downsample up to 6 samples representing 10-second mean values into a single value signfying the mean for a 1-minute timespan. This is simple: just average all samples to get the new value, and this is exactly the default method applied by Graphite. However, what about the .count metric also sent for timers? Each sample contains the count of occurences per flush interval, so you want these samples summed-up, not averaged! From bae19124f21f7eed88e1c31afc7b8dcac913fa11 Mon Sep 17 00:00:00 2001 From: Dieter Plaetinck Date: Thu, 4 Apr 2013 15:39:34 -0400 Subject: [PATCH 212/233] fix description of graphite.{last_flush,last_exception} --- docs/admin_interface.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/docs/admin_interface.md b/docs/admin_interface.md index a11abd2f..db6f73ae 100644 --- a/docs/admin_interface.md +++ b/docs/admin_interface.md @@ -29,10 +29,8 @@ You can use this to delete buckets no longer in use. For example, if you were ho Graphite: -* graphite.last_flush: the number of seconds elapsed since the last successful - flush to graphite -* graphite.last_exception: the number of seconds elapsed since the last - exception thrown whilst flushing to graphite +* graphite.last_flush: unix timestamp of last successful flush to graphite +* graphite.last_exception: unix timestamp of last exception thrown whilst flushing to graphite * graphite.flush_length: the length of the string sent to graphite * graphite.flush_time: the time it took to send the data to graphite From 4cd8167d968924e00c7f4fa42aa9fe48284447f3 Mon Sep 17 00:00:00 2001 From: draco2003 Date: Mon, 8 Apr 2013 15:43:33 +0000 Subject: [PATCH 213/233] Cleanup shutdown process --- stats.js | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/stats.js b/stats.js index 49d2264d..6a45bd76 100644 --- a/stats.js +++ b/stats.js @@ -417,11 +417,13 @@ config.configFile(process.argv[2], function (config, oldConfig) { }); process.on('SIGTERM', function() { - if (config.debug) { - l.log('Going Down in ' + flushInterval + 'ms'); + if (conf.debug) { + util.log('Starting Final Flush'); } healthStatus = 'down'; - setTimeout(function() { - process.exit(); - }, flushInterval); + process.exit(); +}); + +process.on('exit', function () { + flushMetrics(); }); From d1cb58ed1c3906f3f2faa831fcd03d0db7817163 Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Mon, 8 Apr 2013 13:24:10 -0300 Subject: [PATCH 214/233] Don't need to test on .9 now that .10 is out. We'll stick to supporting stable branches of node for now, and test on future unstable when they are close to being released as stable. --- .travis.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 5ec61f77..693dc4d7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,8 +1,7 @@ language: node_js node_js: - - 0.6 - - 0.8 - - 0.9 + - "0.6" + - "0.8" - "0.10" script: "./run_tests.sh" notifications: From 61fb8a096b2c26fcec40bf4feed77ae8f9817703 Mon Sep 17 00:00:00 2001 From: Brian Hatfield Date: Wed, 10 Apr 2013 23:06:05 -0400 Subject: [PATCH 215/233] Add local config to git ignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 3c3629e6..a43ccabd 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ node_modules + +config.js + From bc6146fc08089d6936b2335b4f0de8c171f2ad0d Mon Sep 17 00:00:00 2001 From: Brian Hatfield Date: Wed, 10 Apr 2013 23:48:22 -0400 Subject: [PATCH 216/233] Remove statsCached from console backend --- backends/console.js | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/backends/console.js b/backends/console.js index ca1eed4e..2e5ffef6 100644 --- a/backends/console.js +++ b/backends/console.js @@ -6,33 +6,17 @@ function ConsoleBackend(startupTime, config, emitter){ this.lastException = startupTime; this.config = config.console || {}; - this.statsCache = { - counters: {}, - timers: {} - }; - // attach emitter.on('flush', function(timestamp, metrics) { self.flush(timestamp, metrics); }); emitter.on('status', function(callback) { self.status(callback); }); }; ConsoleBackend.prototype.flush = function(timestamp, metrics) { - var self = this; console.log('Flushing stats at', new Date(timestamp * 1000).toString()); - // merge with previously sent values - Object.keys(self.statsCache).forEach(function(type) { - if(!metrics[type]) return; - Object.keys(metrics[type]).forEach(function(name) { - var value = metrics[type][name]; - self.statsCache[type][name] || (self.statsCache[type][name] = 0); - self.statsCache[type][name] += value; - }); - }); - var out = { - counters: this.statsCache.counters, - timers: this.statsCache.timers, + counters: metrics.counters, + timers: metrics.timers, gauges: metrics.gauges, timer_data: metrics.timer_data, counter_rates: metrics.counter_rates, From e55b095b106e9e0567d47b847b1bc88062cd8e07 Mon Sep 17 00:00:00 2001 From: Brian Hatfield Date: Thu, 11 Apr 2013 11:51:58 -0400 Subject: [PATCH 217/233] Revert "Add local config to git ignore" This reverts commit 61fb8a096b2c26fcec40bf4feed77ae8f9817703. --- .gitignore | 3 --- 1 file changed, 3 deletions(-) diff --git a/.gitignore b/.gitignore index a43ccabd..3c3629e6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1 @@ node_modules - -config.js - From d8ab2fd37e466094aae5073273d3c4ef587f4fb5 Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Thu, 11 Apr 2013 18:56:37 -0300 Subject: [PATCH 218/233] Update docs to include del* commands Updated command list to show all the delete commands, as well as adding an example to show how to delete a folder of counters. --- docs/admin_interface.md | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/docs/admin_interface.md b/docs/admin_interface.md index e51811c1..853d5a00 100644 --- a/docs/admin_interface.md +++ b/docs/admin_interface.md @@ -11,22 +11,29 @@ available: * counters - a dump of all the current counters * gauges - a dump of all the current gauges * timers - a dump of the current timers +* delcounters - delete a counter or folder of counters +* delgauges - delete a gauge or folder of gauges +* deltimers - delete a timer or folder of timers * health - a way to set the health status of statsd The stats output currently will give you: * uptime: the number of seconds elapsed since statsd started -* messages.last_msg_seen: the number of elapsed seconds since statsd received a - message +* messages.last_msg_seen: the number of elapsed seconds since statsd received a message * messages.bad_lines_seen: the number of bad lines seen since startup -Each backend will also publish a set of statistics, prefixed by its -module name. - -You can use this to delete buckets no longer in use. For example, if you were hosting statsd at 10.10.10.10: +You can use the del commands to delete an individual metric like this : #to delete counter sandbox.test.temporary - echo "delcounters sandbox.test.temporary" | nc 10.10.10.10 8126 + echo "delcounters sandbox.test.temporary" | nc 127.0.0.1 8126 + +Or you can use the del command to delete a folder of metrics like this : + + #to delete counters sandbox.test.* + echo "delcounters sandbox.test.*" | nc 127.0.0.1 8126 + + +Each backend will also publish a set of statistics, prefixed by its module name. Graphite: From 67968aacf84837afd4086b8f082cb9faa6344fe3 Mon Sep 17 00:00:00 2001 From: Brian Hatfield Date: Tue, 16 Apr 2013 13:30:39 -0400 Subject: [PATCH 219/233] Add link to statsd-http-backend --- docs/backend.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/backend.md b/docs/backend.md index 2a92f9b7..818c02d5 100644 --- a/docs/backend.md +++ b/docs/backend.md @@ -41,6 +41,7 @@ queues and third-party services. - [monitis backend](https://github.com/jeremiahshirk/statsd-monitis-backend) - [instrumental backend](https://github.com/collectiveidea/statsd-instrumental-backend) - [hosted graphite backend](https://github.com/hostedgraphite/statsdplugin) +- [graphite http backend](https://github.com/bmhatfield/statsd-http-backend) - [statsd aggregation backend](https://github.com/wanelo/gossip_girl) - [zabbix-backend](https://github.com/parkerd/statsd-zabbix-backend) - [opentsdb backend](https://github.com/emurphy/statsd-opentsdb-backend) From 0db7e7fdf64fcfddcd8be650e13e55bb6f355cff Mon Sep 17 00:00:00 2001 From: draco2003 Date: Tue, 30 Apr 2013 14:53:55 +0000 Subject: [PATCH 220/233] cleanup whitespace and small tweaks --- lib/mgmt_console.js | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/lib/mgmt_console.js b/lib/mgmt_console.js index daac059d..3c497c79 100644 --- a/lib/mgmt_console.js +++ b/lib/mgmt_console.js @@ -1,10 +1,10 @@ /** * delete_stats - delete all matching statistics - * + * * Side effect notes: this function works by altering stats_type in place, * and calls stream.write(str) to display user feedback. - * + * * @param stats_type array of all statistics of this type (eg~ timers) to delete from * @param cmdline array of all requested deletions, which can be fully qualified, * or end in a .* to delete a folder, like stats.temp.* @@ -14,15 +14,15 @@ exports.delete_stats = function(stats_type, cmdline, stream) { //for each metric requested on the command line for (var index in cmdline) { - + //get a list of deletable metrics that match the request deletable = existing_stats(stats_type, cmdline[index]); - + //warn if no matches - if (deletable.length == 0) { + if (deletable.length === 0) { stream.write("metric " + cmdline[index] + " not found\n"); } - + //delete all requested metrics for (var del_idx in deletable) { delete stats_type[deletable[del_idx]]; @@ -30,20 +30,20 @@ exports.delete_stats = function(stats_type, cmdline, stream) { } } stream.write("END\n\n"); -} +}; /** * existing_stats - find fully qualified matches for the requested stats bucket - * + * * @param stats_type array of all statistics of this type (eg~ timers) to match * @param bucket string to search on, which can be fully qualified, * or end in a .* to search for a folder, like stats.temp.* - * + * * @return array of fully qualified stats that match the specified bucket. if * no matches, an empty array is a valid response */ function existing_stats(stats_type, bucket){ - matches = [] + matches = []; //typical case: one-off, fully qualified if (bucket in stats_type) { @@ -53,7 +53,7 @@ function existing_stats(stats_type, bucket){ //special case: match a whole 'folder' (and subfolders) of stats if (bucket.slice(-2) == ".*") { var folder = bucket.slice(0,-1); - + for (var name in stats_type) { //check if stat is in bucket, ie~ name starts with folder if (name.substring(0, folder.length) == folder) { @@ -61,7 +61,7 @@ function existing_stats(stats_type, bucket){ } } } - + return matches; } From 297b9074d298ba81234c4f5faf5d4f27390622f8 Mon Sep 17 00:00:00 2001 From: Marc Fournier Date: Tue, 14 May 2013 21:13:06 +0200 Subject: [PATCH 221/233] debian: avoid running service as root/nobody Create a new _statsd system user on package installation (and remove him when package is purged), and modify upstrart and init-script to have this user run the daemon. --- debian/dirs | 3 ++- debian/postinst | 21 +++++++++++++++++---- debian/postrm | 21 +++++++++++++++++++++ debian/statsd.init | 7 ++++--- debian/statsd.upstart | 2 +- 5 files changed, 45 insertions(+), 9 deletions(-) create mode 100644 debian/postrm diff --git a/debian/dirs b/debian/dirs index 4778ce6a..dc8b5569 100644 --- a/debian/dirs +++ b/debian/dirs @@ -1 +1,2 @@ -var/log/statsd \ No newline at end of file +var/log/statsd +var/run/statsd diff --git a/debian/postinst b/debian/postinst index dc2c0615..797e7755 100755 --- a/debian/postinst +++ b/debian/postinst @@ -1,6 +1,19 @@ #!/bin/sh -if [ "`stat -c %G /var/log/statsd`" != 'nogroup' ]; then - chown root.nogroup /var/log/statsd - chmod 770 /var/log/statsd -fi \ No newline at end of file +set -e + +if [ "$1" = configure ]; then + + if ! getent passwd _statsd > /dev/null; then + adduser --system --quiet --home /nonexistent --no-create-home \ + --shell /bin/false --force-badname --group --gecos "StatsD User" _statsd + fi + + if ! dpkg-statoverride --list /var/run/statsd >/dev/null 2>&1; then + dpkg-statoverride --update --add _statsd _statsd 0755 /var/run/statsd + fi + + if ! dpkg-statoverride --list /var/log/statsd >/dev/null 2>&1; then + dpkg-statoverride --update --add _statsd _statsd 0755 /var/log/statsd + fi +fi diff --git a/debian/postrm b/debian/postrm new file mode 100644 index 00000000..bcf023b4 --- /dev/null +++ b/debian/postrm @@ -0,0 +1,21 @@ +#!/bin/sh + +set -e + +if [ "$1" = purge ]; then + + update-rc.d statsd remove >/dev/null || true + + rm -rf /var/log/statsd /var/run/statsd + + if dpkg-statoverride --list /var/log/statsd >/dev/null 2>&1; then + dpkg-statoverride --remove /var/log/statsd + fi + + if dpkg-statoverride --list /var/run/statsd >/dev/null 2>&1; then + dpkg-statoverride --remove /var/run/statsd + fi + + deluser --system --quiet _statsd || true + delgroup --system --quiet _statsd || true +fi diff --git a/debian/statsd.init b/debian/statsd.init index 5d8ff4b8..d9348541 100644 --- a/debian/statsd.init +++ b/debian/statsd.init @@ -21,9 +21,10 @@ fi PATH=/sbin:/usr/sbin:/bin:/usr/bin DESC="StatsD" NAME=statsd +USER=_statsd DAEMON=$NODE_BIN DAEMON_ARGS="/usr/share/statsd/stats.js /etc/statsd/localConfig.js 2>&1 >> /var/log/statsd/statsd.log " -PIDFILE=/var/run/$NAME.pid +PIDFILE=/var/run/$NAME/$NAME.pid SCRIPTNAME=/etc/init.d/$NAME CHDIR="/usr/share/statsd" @@ -49,9 +50,9 @@ do_start() # 0 if daemon has been started # 1 if daemon was already running # 2 if daemon could not be started - start-stop-daemon --start --quiet -m --pidfile $PIDFILE --startas $DAEMON --background --test > /dev/null \ + start-stop-daemon --start --quiet -m --pidfile $PIDFILE --startas $DAEMON --chuid $USER:$USER --background --test > /dev/null \ || return 1 - start-stop-daemon --start --quiet -m --pidfile $PIDFILE --startas $DAEMON --background --chdir $CHDIR -- \ + start-stop-daemon --start --quiet -m --pidfile $PIDFILE --startas $DAEMON --chuid $USER:$USER --background --chdir $CHDIR -- \ $DAEMON_ARGS > /dev/null 2> /var/log/$NAME-stderr.log \ || return 2 # Add code here, if necessary, that waits for the process to be ready diff --git a/debian/statsd.upstart b/debian/statsd.upstart index 216620cb..4421f075 100644 --- a/debian/statsd.upstart +++ b/debian/statsd.upstart @@ -7,5 +7,5 @@ stop on shutdown script chdir /usr/share/statsd - exec sudo -u nobody /usr/share/statsd/scripts/start + exec sudo -u _statsd /usr/share/statsd/scripts/start end script From 35adfdd62bec6d2e8bb60223643d7cbc173d86db Mon Sep 17 00:00:00 2001 From: Marc Fournier Date: Tue, 14 May 2013 21:22:36 +0200 Subject: [PATCH 222/233] debian: include docs and examples in package --- debian/docs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/debian/docs b/debian/docs index b43bf86b..ec110902 100644 --- a/debian/docs +++ b/debian/docs @@ -1 +1,3 @@ README.md +docs/ +examples/ From f3720a49da5cfc5fb61449f1b5b42e8ebaae70bb Mon Sep 17 00:00:00 2001 From: Marc Fournier Date: Tue, 14 May 2013 21:24:10 +0200 Subject: [PATCH 223/233] debian: bump package release number --- debian/changelog | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/debian/changelog b/debian/changelog index 9844a41b..194f92e5 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +statsd (0.0.3-1) unstable; urgency=low + + * avoid running daemon as root + * install docs and examples + + -- Marc Fournier Tue, 14 May 2013 21:22:45 +0200 + statsd (0.0.3) unstable; urgency=low * removed rdio references from init scripts and config From 0e19c41f53019fb68454eb7368fd4f9df5c77ff4 Mon Sep 17 00:00:00 2001 From: drowe Date: Wed, 15 May 2013 21:44:22 +0000 Subject: [PATCH 224/233] add jshint comment and do a little cleanup --- backends/console.js | 6 ++++-- backends/graphite.js | 2 ++ backends/repeater.js | 12 ++++++++---- lib/config.js | 4 +++- lib/logger.js | 16 +++++++++------- lib/mgmt_console.js | 1 + lib/process_metrics.js | 6 ++++-- lib/set.js | 8 +++++--- stats.js | 4 +++- 9 files changed, 39 insertions(+), 20 deletions(-) diff --git a/backends/console.js b/backends/console.js index 2e5ffef6..90da4384 100644 --- a/backends/console.js +++ b/backends/console.js @@ -1,3 +1,5 @@ +/*jshint node:true, laxcomma:true */ + var util = require('util'); function ConsoleBackend(startupTime, config, emitter){ @@ -9,7 +11,7 @@ function ConsoleBackend(startupTime, config, emitter){ // attach emitter.on('flush', function(timestamp, metrics) { self.flush(timestamp, metrics); }); emitter.on('status', function(callback) { self.status(callback); }); -}; +} ConsoleBackend.prototype.flush = function(timestamp, metrics) { console.log('Flushing stats at', new Date(timestamp * 1000).toString()); @@ -22,7 +24,7 @@ ConsoleBackend.prototype.flush = function(timestamp, metrics) { counter_rates: metrics.counter_rates, sets: function (vals) { var ret = {}; - for (val in vals) { + for (var val in vals) { ret[val] = vals[val].values(); } return ret; diff --git a/backends/graphite.js b/backends/graphite.js index ab6d27b1..6e37f59b 100644 --- a/backends/graphite.js +++ b/backends/graphite.js @@ -1,3 +1,5 @@ +/*jshint node:true, laxcomma:true */ + /* * Flush stats to graphite (http://graphite.wikidot.com/). * diff --git a/backends/repeater.js b/backends/repeater.js index 2da129ae..a6079aab 100644 --- a/backends/repeater.js +++ b/backends/repeater.js @@ -1,8 +1,12 @@ -var util = require('util'), - dgram = require('dgram'), - logger = require('../lib/logger'); +/*jshint node:true, laxcomma:true */ + +var util = require('util') + , dgram = require('dgram') + , logger = require('../lib/logger'); + var l; var debug; + function RepeaterBackend(startupTime, config, emitter){ var self = this; this.config = config.repeater || []; @@ -17,7 +21,7 @@ function RepeaterBackend(startupTime, config, emitter){ }); // attach emitter.on('packet', function(packet, rinfo) { self.process(packet, rinfo); }); -}; +} RepeaterBackend.prototype.process = function(packet, rinfo) { var self = this; diff --git a/lib/config.js b/lib/config.js index 8d7e6b2e..8d8cf31a 100644 --- a/lib/config.js +++ b/lib/config.js @@ -1,5 +1,7 @@ +/*jshint node:true, laxcomma:true */ + var fs = require('fs') - , util = require('util') + , util = require('util'); var Configurator = function (file) { diff --git a/lib/logger.js b/lib/logger.js index f2264278..fea607b9 100644 --- a/lib/logger.js +++ b/lib/logger.js @@ -1,7 +1,9 @@ +/*jshint node:true, laxcomma:true */ + var Logger = function (config) { this.config = config; - this.backend = this.config.backend || 'stdout' - this.level = this.config.level || "LOG_INFO" + this.backend = this.config.backend || 'stdout'; + this.level = this.config.level || "LOG_INFO"; if (this.backend == 'stdout') { this.util = require('util'); } else { @@ -9,10 +11,10 @@ var Logger = function (config) { this.util = require('node-syslog'); this.util.init(config.application || 'statsd', this.util.LOG_PID | this.util.LOG_ODELAY, this.util.LOG_LOCAL0); } else { - throw "Logger: Should be 'stdout' or 'syslog'." + throw "Logger: Should be 'stdout' or 'syslog'."; } } -} +}; Logger.prototype = { log: function (msg, type) { @@ -23,7 +25,7 @@ Logger.prototype = { this.util.log(type + ": " + msg); } else { if (!type) { - type = this.level + type = this.level; if (!this.util[type]) { throw "Undefined log level: " + type; } @@ -33,6 +35,6 @@ Logger.prototype = { this.util.log(this.util[type], msg); } } -} +}; -exports.Logger = Logger +exports.Logger = Logger; diff --git a/lib/mgmt_console.js b/lib/mgmt_console.js index 3c497c79..e9431c5f 100644 --- a/lib/mgmt_console.js +++ b/lib/mgmt_console.js @@ -1,3 +1,4 @@ +/*jshint node:true, laxcomma:true */ /** * delete_stats - delete all matching statistics diff --git a/lib/process_metrics.js b/lib/process_metrics.js index b6be6f53..bfaa7b2c 100644 --- a/lib/process_metrics.js +++ b/lib/process_metrics.js @@ -1,3 +1,5 @@ +/*jshint node:true, laxcomma:true */ + var process_metrics = function (metrics, flushInterval, ts, flushCallback) { var starttime = Date.now(); var key; @@ -122,6 +124,6 @@ var process_metrics = function (metrics, flushInterval, ts, flushCallback) { metrics.statsd_metrics = statsd_metrics; flushCallback(metrics); - } + }; -exports.process_metrics = process_metrics +exports.process_metrics = process_metrics; diff --git a/lib/set.js b/lib/set.js index 8458b063..b48a7a7f 100644 --- a/lib/set.js +++ b/lib/set.js @@ -1,6 +1,8 @@ +/*jshint node:true, laxcomma:true */ + var Set = function() { this.store = {}; -} +}; Set.prototype = { has: function(value) { @@ -20,11 +22,11 @@ Set.prototype = { }, values: function() { var values = []; - for (value in this.store) { + for (var value in this.store) { values.push(value); } return values; } -} +}; exports.Set = Set; diff --git a/stats.js b/stats.js index d3f3fc9b..e9a20dbb 100644 --- a/stats.js +++ b/stats.js @@ -1,3 +1,5 @@ +/*jshint node:true, laxcomma:true */ + var dgram = require('dgram') , util = require('util') , net = require('net') @@ -7,7 +9,7 @@ var dgram = require('dgram') , logger = require('./lib/logger') , set = require('./lib/set') , pm = require('./lib/process_metrics') - , mgmt = require('./lib/mgmt_console') + , mgmt = require('./lib/mgmt_console'); // initialize data structures with defaults for statsd stats From 182d5583394d15bec0e7a10542bc8b9e40811b59 Mon Sep 17 00:00:00 2001 From: Alex Whitman Date: Mon, 20 May 2013 11:33:40 +0100 Subject: [PATCH 225/233] Set the process title to 'statsd' Makes it easier to see which node processes are statsd when looking at a process list. --- README.md | 2 +- stats.js | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 684464b0..dbf521ff 100644 --- a/README.md +++ b/README.md @@ -72,7 +72,7 @@ A test framework has been added using node-unit and some custom code to start and manipulate statsd. Please add tests under test/ for any new features or bug fixes encountered. Testing a live server can be tricky, attempts were made to eliminate race conditions but it may be possible to encounter a stuck state. If -doing dev work, a `killall node` will kill any stray test servers in the +doing dev work, a `killall statsd` will kill any stray test servers in the background (don't do this on a production machine!). Tests can be executed with `./run_tests.sh`. diff --git a/stats.js b/stats.js index e9a20dbb..b2e2cca0 100644 --- a/stats.js +++ b/stats.js @@ -410,6 +410,8 @@ config.configFile(process.argv[2], function (config, oldConfig) { } }); +process.title = 'statsd'; + process.on('SIGTERM', function() { if (conf.debug) { util.log('Starting Final Flush'); From 8cb9a6368fbb30684d0de1bf84d7a37e9858e2f9 Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Mon, 20 May 2013 11:14:46 -0300 Subject: [PATCH 226/233] fix histogram indent in exampleConfig.js --- exampleConfig.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exampleConfig.js b/exampleConfig.js index 58609055..1070aec1 100644 --- a/exampleConfig.js +++ b/exampleConfig.js @@ -72,7 +72,7 @@ Optional Variables: repeaterProtocol: whether to use udp4 or udp6 for repeaters. ["udp4" or "udp6", default: "udp4"] - histogram: for timers, an array of mappings of strings (to match metrics) and + histogram: for timers, an array of mappings of strings (to match metrics) and corresponding ordered non-inclusive upper limits of bins. For all matching metrics, histograms are maintained over time by writing the frequencies for all bins. From 4d9bed141ee9edbc094a8c6476a3cda0da2cf3cb Mon Sep 17 00:00:00 2001 From: Kiall Mac Innes Date: Thu, 27 Jun 2013 19:38:19 +0100 Subject: [PATCH 227/233] Update debian packaging for 0.0.6 --- debian/changelog | 7 +++++++ debian/control | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/debian/changelog b/debian/changelog index 194f92e5..1c49d2ae 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +statsd (0.0.6-1) unstable; urgency=low + + * Update packaging for 0.0.6 + * Bump nodejs dependancy to 0.6 per pcakage.json + + -- Kiall Mac Innes Thu, 27 Jun 2013 19:17:00 +0100 + statsd (0.0.3-1) unstable; urgency=low * avoid running daemon as root diff --git a/debian/control b/debian/control index bf73377d..a1731e39 100644 --- a/debian/control +++ b/debian/control @@ -7,7 +7,7 @@ Build-Depends: debhelper (>= 8.0.0) Package: statsd Architecture: all -Depends: ${misc:Depends}, nodejs (>= 0.4.7) +Depends: ${misc:Depends}, nodejs (>= 0.6) Description: Stats aggregation daemon A network daemon for aggregating statistics (counters and timers), rolling them up, then sending them to graphite. From 4ec3a03ab818d7a4d9a059652a1f61e10e45461b Mon Sep 17 00:00:00 2001 From: Ole Michaelis Date: Fri, 28 Jun 2013 13:03:38 +0200 Subject: [PATCH 228/233] Order backends alphabetically So its easier to find the one you are looking for --- docs/backend.md | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/docs/backend.md b/docs/backend.md index 818c02d5..32839726 100644 --- a/docs/backend.md +++ b/docs/backend.md @@ -32,20 +32,19 @@ queues and third-party services. ## Available Third-party backends - [amqp-backend](https://github.com/mrtazz/statsd-amqp-backend) +- [datadog-backend](https://github.com/datadog/statsd) - [ganglia-backend](https://github.com/jbuchbinder/statsd-ganglia-backend) +- [hosted graphite backend](https://github.com/hostedgraphite/statsdplugin) +- [instrumental backend](https://github.com/collectiveidea/statsd-instrumental-backend) +- [leftronic backend](https://github.com/sreuter/statsd-leftronic-backend) - [librato-backend](https://github.com/librato/statsd-librato-backend) -- [socket.io-backend](https://github.com/Chatham/statsd-socket.io) -- [statsd-backend](https://github.com/dynmeth/statsd-backend) - [mongo-backend](https://github.com/dynmeth/mongo-statsd-backend) -- [datadog-backend](https://github.com/datadog/statsd) - [monitis backend](https://github.com/jeremiahshirk/statsd-monitis-backend) -- [instrumental backend](https://github.com/collectiveidea/statsd-instrumental-backend) -- [hosted graphite backend](https://github.com/hostedgraphite/statsdplugin) -- [graphite http backend](https://github.com/bmhatfield/statsd-http-backend) +- [opentsdb backend](https://github.com/emurphy/statsd-opentsdb-backend) +- [socket.io-backend](https://github.com/Chatham/statsd-socket.io) +- [statsd-backend](https://github.com/dynmeth/statsd-backend) +- [statsd http backend](https://github.com/bmhatfield/statsd-http-backend) - [statsd aggregation backend](https://github.com/wanelo/gossip_girl) - [zabbix-backend](https://github.com/parkerd/statsd-zabbix-backend) -- [opentsdb backend](https://github.com/emurphy/statsd-opentsdb-backend) -- [leftronic backend](https://github.com/sreuter/statsd-leftronic-backend) - [graphite]: http://graphite.wikidot.com From 50d8525009cd5dd69d5e5a0fe426a4c7ebfebb1e Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Fri, 28 Jun 2013 17:21:09 -0400 Subject: [PATCH 229/233] cleanup two failing tests --- test/graphite_delete_counters_tests.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/graphite_delete_counters_tests.js b/test/graphite_delete_counters_tests.js index df8c3846..00c70aa7 100644 --- a/test/graphite_delete_counters_tests.js +++ b/test/graphite_delete_counters_tests.js @@ -213,9 +213,9 @@ module.exports = { }); var numstat_test = function(post){ var mykey = 'statsd.numStats'; - return _.include(_.keys(post),mykey) && (post[mykey] == 2); + return _.include(_.keys(post),mykey) && (post[mykey] == 3); }; - test.ok(_.any(hashes,numstat_test), 'statsd.numStats should be 1'); + test.ok(_.any(hashes,numstat_test), 'statsd.numStats should be 3'); var testtimervalue_test = function(post){ var mykey = 'stats.timers.a_test_value.mean_90'; @@ -246,9 +246,9 @@ module.exports = { }); var numstat_test = function(post){ var mykey = 'statsd.numStats'; - return _.include(_.keys(post),mykey) && (post[mykey] == 2); + return _.include(_.keys(post),mykey) && (post[mykey] == 3); }; - test.ok(_.any(hashes,numstat_test), 'statsd.numStats should be 1'); + test.ok(_.any(hashes,numstat_test), 'statsd.numStats should be 3'); var testavgvalue_test = function(post){ var mykey = 'stats.a_test_value'; From 87f4858a8b1c6683d55987f5a7aa964f1903d5b7 Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Wed, 3 Jul 2013 16:33:00 -0400 Subject: [PATCH 230/233] don't change value we are testing for, increase the time we wait to look for it --- test/graphite_delete_counters_tests.js | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/test/graphite_delete_counters_tests.js b/test/graphite_delete_counters_tests.js index 00c70aa7..a90ea53b 100644 --- a/test/graphite_delete_counters_tests.js +++ b/test/graphite_delete_counters_tests.js @@ -170,7 +170,7 @@ module.exports = { var me = this; this.acceptor.once('connection',function(c){ statsd_send('a_bad_test_value|z',me.sock,'127.0.0.1',8125,function(){ - collect_for(me.acceptor,me.myflush*2,function(strings){ + collect_for(me.acceptor,me.myflush*3,function(strings){ test.ok(strings.length > 0,'should receive some data'); var hashes = _.map(strings, function(x) { var chunks = x.split(' '); @@ -203,7 +203,7 @@ module.exports = { var me = this; this.acceptor.once('connection',function(c){ statsd_send('a_test_value:' + testvalue + '|ms',me.sock,'127.0.0.1',8125,function(){ - collect_for(me.acceptor,me.myflush*2,function(strings){ + collect_for(me.acceptor,me.myflush*3,function(strings){ test.ok(strings.length > 0,'should receive some data'); var hashes = _.map(strings, function(x) { var chunks = x.split(' '); @@ -213,9 +213,9 @@ module.exports = { }); var numstat_test = function(post){ var mykey = 'statsd.numStats'; - return _.include(_.keys(post),mykey) && (post[mykey] == 3); + return _.include(_.keys(post),mykey) && (post[mykey] == 2); }; - test.ok(_.any(hashes,numstat_test), 'statsd.numStats should be 3'); + test.ok(_.any(hashes,numstat_test), 'statsd.numStats should be 2'); var testtimervalue_test = function(post){ var mykey = 'stats.timers.a_test_value.mean_90'; @@ -236,7 +236,7 @@ module.exports = { var me = this; this.acceptor.once('connection',function(c){ statsd_send('a_test_value:' + testvalue + '|c',me.sock,'127.0.0.1',8125,function(){ - collect_for(me.acceptor,me.myflush*2,function(strings){ + collect_for(me.acceptor,me.myflush*3,function(strings){ test.ok(strings.length > 0,'should receive some data'); var hashes = _.map(strings, function(x) { var chunks = x.split(' '); @@ -246,9 +246,9 @@ module.exports = { }); var numstat_test = function(post){ var mykey = 'statsd.numStats'; - return _.include(_.keys(post),mykey) && (post[mykey] == 3); + return _.include(_.keys(post),mykey) && (post[mykey] == 2); }; - test.ok(_.any(hashes,numstat_test), 'statsd.numStats should be 3'); + test.ok(_.any(hashes,numstat_test), 'statsd.numStats should be 2'); var testavgvalue_test = function(post){ var mykey = 'stats.a_test_value'; From 841145029620f9fabd2f5478add2ea4080cc057b Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Wed, 3 Jul 2013 16:37:31 -0400 Subject: [PATCH 231/233] Drop v0.6 support --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index b6e929dd..c5a8a82f 100644 --- a/package.json +++ b/package.json @@ -25,7 +25,7 @@ "winser": "=0.0.11" }, "engines": { - "node" : ">=0.6" + "node" : ">=0.8" }, "bin": { "statsd": "./bin/statsd" } } From 665453f8b25c6e88eb418a008dabe203ad76c2a8 Mon Sep 17 00:00:00 2001 From: Dan Rowe Date: Wed, 3 Jul 2013 16:38:14 -0400 Subject: [PATCH 232/233] Remove v0.6 from node versions we test against --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 693dc4d7..0a14a565 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,5 @@ language: node_js node_js: - - "0.6" - "0.8" - "0.10" script: "./run_tests.sh" From 10fb3e64331a4205f6b5a4fdcaa83eda9ad265b8 Mon Sep 17 00:00:00 2001 From: Steve Sanbeg Date: Wed, 17 Jul 2013 16:02:58 -0400 Subject: [PATCH 233/233] sync up with CPAN version - use send instead of print to avoid weird buffering issue --- examples/Etsy/StatsD.pm | 67 ++++++++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 27 deletions(-) diff --git a/examples/Etsy/StatsD.pm b/examples/Etsy/StatsD.pm index 0654d5c1..c31eb3cf 100644 --- a/examples/Etsy/StatsD.pm +++ b/examples/Etsy/StatsD.pm @@ -4,9 +4,11 @@ use warnings; use IO::Socket; use Carp; +our $VERSION = 1.000002; + =head1 NAME -Etsy::StatsD +Etsy::StatsD - Object-Oriented Client for Etsy's StatsD Server =head1 DESCRIPTION @@ -21,17 +23,17 @@ Create a new instance. =cut sub new { - my ($class, $host, $port, $sample_rate) = @_; + my ( $class, $host, $port, $sample_rate ) = @_; $host = 'localhost' unless defined $host; - $port = 8125 unless defined $port; + $port = 8125 unless defined $port; my $sock = new IO::Socket::INET( - PeerAddr => $host, - PeerPort => $port, - Proto => 'udp', + PeerAddr => $host, + PeerPort => $port, + Proto => 'udp', ) or croak "Failed to initialize socket: $!"; - bless {socket=>$sock, sample_rate=>$sample_rate}, $class; + bless { socket => $sock, sample_rate => $sample_rate }, $class; } =item timing(STAT, TIME, SAMPLE_RATE) @@ -41,8 +43,8 @@ Log timing information =cut sub timing { - my ($self, $stat, $time, $sample_rate) = @_; - $self->send({$stat => "$time|ms"}, $sample_rate); + my ( $self, $stat, $time, $sample_rate ) = @_; + $self->send( { $stat => "$time|ms" }, $sample_rate ); } =item increment(STATS, SAMPLE_RATE) @@ -52,8 +54,8 @@ Increment one of more stats counters. =cut sub increment { - my ($self, $stats, $sample_rate) = @_; - $self->update($stats, 1, $sample_rate); + my ( $self, $stats, $sample_rate ) = @_; + $self->update( $stats, 1, $sample_rate ); } =item decrement(STATS, SAMPLE_RATE) @@ -63,8 +65,8 @@ Decrement one of more stats counters. =cut sub decrement { - my ($self, $stats, $sample_rate) = @_; - $self->update($stats, -1, $sample_rate); + my ( $self, $stats, $sample_rate ) = @_; + $self->update( $stats, -1, $sample_rate ); } =item update(STATS, DELTA, SAMPLE_RATE) @@ -74,15 +76,16 @@ Update one of more stats counters by arbitrary amounts. =cut sub update { - my ($self, $stats, $delta, $sample_rate) = @_; + my ( $self, $stats, $delta, $sample_rate ) = @_; $delta = 1 unless defined $delta; my %data; - if (ref($stats) eq 'ARRAY') { - %data = map {$_ => "$delta|c"} @$stats; - } else { - %data = ($stats => "$delta|c"); + if ( ref($stats) eq 'ARRAY' ) { + %data = map { $_ => "$delta|c" } @$stats; + } + else { + %data = ( $stats => "$delta|c" ); } - $self->send(\%data, $sample_rate); + $self->send( \%data, $sample_rate ); } =item send(DATA, SAMPLE_RATE) @@ -94,30 +97,40 @@ Sending logging data; implicitly called by most of the other methods. =cut sub send { - my ($self, $data, $sample_rate) = @_; + my ( $self, $data, $sample_rate ) = @_; $sample_rate = $self->{sample_rate} unless defined $sample_rate; my $sampled_data; - if ( defined($sample_rate) and $sample_rate < 1 ){ - while (my($stat,$value) = each %$data) { + if ( defined($sample_rate) and $sample_rate < 1 ) { + while ( my ( $stat, $value ) = each %$data ) { $sampled_data->{$stat} = "$value|\@$sample_rate" if rand() <= $sample_rate; } - } else { + } + else { $sampled_data = $data; } - + return '0 but true' unless keys %$sampled_data; #failures in any of this can be silently ignored - my $count=0; + my $count = 0; my $socket = $self->{socket}; - while (my($stat,$value) = each %$sampled_data) { - print $socket "$stat:$value\n"; + while ( my ( $stat, $value ) = each %$sampled_data ) { + _send_to_sock($socket, "$stat:$value\n"); ++$count; } return $count; } +sub _send_to_sock( $$ ) { + my ($sock,$msg) = @_; + CORE::send( $sock, $msg, 0 ); +} + +=head1 SEE ALSO + +L + =head1 AUTHOR Steve Sanbeg L