Skip to content

Commit

Permalink
Remove size limit for per-metric lookup tables
Browse files Browse the repository at this point in the history
Size based cleanup was added to avoid unbounded growth of lookup tables
as metrics get deleted/reset. This should not be necessary now when lookup
tables are being reset on metric deletion.

This reverts eb1876d
  • Loading branch information
knyar committed Apr 17, 2024
1 parent 49c8087 commit bee53e4
Show file tree
Hide file tree
Showing 3 changed files with 2 additions and 67 deletions.
6 changes: 0 additions & 6 deletions README.md
Expand Up @@ -106,12 +106,6 @@ section of nginx configuration.
* `sync_interval` (number): sets the sync interval for per-worker counters and
key index (in seconds). This sets the boundary on eventual consistency of
counter metric increments, and metric resets/deletions. Defaults to 1.
* `lookup_max_size` (number): maximum size of a per-metric lookup table
maintained by each worker to cache full metric names. Defaults to 1000.
If you have metrics with extremely high cardinality and lots of available
RAM, you might want to increase this to avoid cache getting flushed too
often. Decreasing this makes sense if you have a very large number of
metrics or need to minimize memory usage of this library.

Returns a `prometheus` object that should be used to register metrics.

Expand Down
25 changes: 2 additions & 23 deletions prometheus.lua
Expand Up @@ -92,9 +92,6 @@ local DEFAULT_ERROR_METRIC_NAME = "nginx_metric_errors_total"
-- Default value for per-worker counter sync interval (seconds).
local DEFAULT_SYNC_INTERVAL = 1

-- Default max size of lookup table
local DEFAULT_LOOKUP_MAX_SIZE = 1000

-- Default set of latency buckets, 5ms to 10s:
local DEFAULT_BUCKETS = {0.005, 0.01, 0.02, 0.03, 0.05, 0.075, 0.1, 0.2, 0.3,
0.4, 0.5, 0.75, 1, 1.5, 2, 3, 4, 5, 10}
Expand Down Expand Up @@ -383,10 +380,6 @@ local function lookup_or_create(self, label_values)
self.name, self.label_count, cnt, table_to_string(label_values))
end

if self.lookup_size >= self.lookup_max_size then
self:reset_lookup()
end

local t = self.lookup
if label_values then
-- Don't use ipairs here to avoid inner loop generates trace first
Expand Down Expand Up @@ -444,8 +437,6 @@ local function lookup_or_create(self, label_values)
end
t[LEAF_KEY] = full_name

self.lookup_size = self.lookup_size + 1

local err = self._key_index:add(full_name, ERR_MSG_LRU_EVICTION)
if err then
return nil, err
Expand Down Expand Up @@ -625,12 +616,6 @@ local function observe(self, value, label_values)
end
end

-- Reset the metric name lookup table for a given metric.
local function reset_lookup(self)
self.lookup_size = 0
self.lookup = {}
end

-- Delete all metrics for a given gauge, counter or a histogram.
--
-- This is like `del`, but will delete all time series for all previously
Expand Down Expand Up @@ -699,7 +684,7 @@ local function reset(self)
end

-- Clean up the full metric name lookup table as well.
self:reset_lookup()
self.lookup = {}
end

-- Initialize the module.
Expand Down Expand Up @@ -736,13 +721,10 @@ function Prometheus.init(dict_name, options_or_prefix)
DEFAULT_ERROR_METRIC_NAME
self.sync_interval = options_or_prefix.sync_interval or
DEFAULT_SYNC_INTERVAL
self.lookup_max_size = options_or_prefix.lookup_max_size or
DEFAULT_LOOKUP_MAX_SIZE
else
self.prefix = options_or_prefix or ''
self.error_metric_name = DEFAULT_ERROR_METRIC_NAME
self.sync_interval = DEFAULT_SYNC_INTERVAL
self.lookup_max_size = DEFAULT_LOOKUP_MAX_SIZE
end

self.registry = {}
Expand All @@ -759,7 +741,7 @@ function Prometheus.init(dict_name, options_or_prefix)
metric_name = ngx_re_gsub(metric_name, "_sum$", "", "jo")
local m = self.registry[metric_name]
if m and m.typ == TYPE_HISTOGRAM then
m:reset_lookup()
m.lookup = {}
end
end)

Expand Down Expand Up @@ -869,9 +851,6 @@ local function register(self, name, help, label_names, buckets, typ)
-- ['my.net']['200'][LEAF_KEY] = 'http_count{host="my.net",status="200"}'
-- ['my.net']['500'][LEAF_KEY] = 'http_count{host="my.net",status="500"}'
lookup = {},
lookup_size = 0,
lookup_max_size = self.lookup_max_size,
reset_lookup = reset_lookup,
parent = self,
-- Store a reference for logging functions for faster lookup.
_log_error = function(...) self:log_error(...) end,
Expand Down
38 changes: 0 additions & 38 deletions prometheus_test.lua
Expand Up @@ -849,44 +849,6 @@ function TestKeyIndex:testSync()
luaunit.assertEquals(self.last_deleted_key, "key2")
end

function TestPrometheus:testLookupMaxSize()
local c = self.p:counter("testc", "Counter", {"tag"})
local g = self.p:gauge("testg", "Gauge", {"tag"})
local h = self.p:histogram("testh", "Histogram", {"tag"})

luaunit.assertEquals(c.lookup_size, 0)
luaunit.assertEquals(g.lookup_size, 0)
luaunit.assertEquals(h.lookup_size, 0)
luaunit.assertEquals(c.lookup_max_size, 1000)
luaunit.assertEquals(g.lookup_max_size, 1000)
luaunit.assertEquals(h.lookup_max_size, 1000)

for _, max_size in ipairs({10, 17, 101}) do
local p1 = require('prometheus').init("metrics",
{lookup_max_size=max_size})
local c1 = p1:counter("testc1", "Counter", {"tag"})
local g1 = p1:gauge("testg1", "Gauge", {"tag"})
local h1 = p1:histogram("testh1", "Histogram", {"tag"})

luaunit.assertEquals(p1.lookup_max_size, max_size)
luaunit.assertEquals(c1.lookup_max_size, max_size)
luaunit.assertEquals(g1.lookup_max_size, max_size)
luaunit.assertEquals(h1.lookup_max_size, max_size)

for i = 0, max_size * 2.1, 1 do
c1:inc(1, {tostring(i)})
g1:set(3, {tostring(i)})
h1:observe(50, {tostring(i)})

luaunit.assertEquals(c1.lookup_size, (i % max_size)+1)
luaunit.assertEquals(g1.lookup_size, (i % max_size)+1)
luaunit.assertEquals(h1.lookup_size, (i % max_size)+1)
end
end

luaunit.assertEquals(self.dict:get("nginx_metric_errors_total"), 0)
end

function TestPrometheus.testPrintfTable()
local p = require('prometheus')
luaunit.assertEquals(p._table_to_string(nil), "nil")
Expand Down

0 comments on commit bee53e4

Please sign in to comment.