Skip to content

Commit

Permalink
cartridge: expose operation last error to issues
Browse files Browse the repository at this point in the history
Expose last operation error to Cartridge issues.

The original issue was about exposing migrations inconsistency from
new `migrations` tab to Cartridge issues as well. But using
straightforward approach is rather bad: checking inconsistency is
a full cluster map-reduce operation, and, if exposed to `get_issues`,
it will omit N^2 network requests since issues are collected from each
instance, there is no way to check whether migrations are consistent
without cluster map-reduce and there is no distinct migrator provider
-- any instance is migration provider. And, since `get_issues` may
trigger rather often, having such a feature may make cluster
unhealthy (we already had similar things with metrics [1]). Last error
is reset on each operation call.

1. tarantool/metrics#243

Closes #73
  • Loading branch information
DifferentialOrange committed Apr 12, 2024
1 parent f0198dc commit 7ad494d
Show file tree
Hide file tree
Showing 5 changed files with 245 additions and 9 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).

## Unreleased

## Added:
- Expose last operation error to Cartridge issues (gh-73)

## [1.0.0]

### Fixed:
Expand Down
53 changes: 46 additions & 7 deletions migrator/init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ local migrator_error = require('errors').new_class(module_name)
local utils = require('migrator.utils')
vars:new('loader', require('migrator.directory-loader').new())
vars:new('use_cartridge_ddl', true)
vars:new('last_operation_error', nil)


local function get_diff(applied)
Expand Down Expand Up @@ -63,26 +64,39 @@ local function get_storage_timeout()
end

-- Makes sure that the passed migrations match the list from the local reader.
local function check_migrations_consistency(migrations_per_instance)
local function check_migrations_consistency(migrations_per_instance, opts)
opts = opts or {}

local names = fun.iter(vars.loader:list()):map(function(m) return m.name end):totable()
for host, applied in pairs(migrations_per_instance) do
if utils.compare(names, applied) == false then
local err_msg = string.format('Inconsistent migrations in cluster: ' ..
'expected: %s, applied on %s: %s', json.encode(names), host, json.encode(applied))

if opts.store_last_error then
vars.last_operation_error = err_msg
end
log.error(err_msg)
error(err_msg)
end
end
end

-- Makes sure there is no migrations list in cluster config.
local function check_no_migrations_in_config()
local function check_no_migrations_in_config(opts)
opts = opts or {}

local config = confapplier.get_readonly('migrations')
if config ~= nil and config.applied ~= nil and #config.applied > 0 then
error('Cannot perform an upgrade. A list of applied migrations is found in cluster ' ..
local err_msg = 'Cannot perform an upgrade. A list of applied migrations is found in cluster ' ..
'config. Current migrator version works only with local list of applied migrations. ' ..
'Run "move_migrations_state" to move cluster-wide migrations state to local ' ..
'storage before up invocation.')
'storage before up invocation.'

if opts.store_last_error then
vars.last_operation_error = err_msg
end
error(err_msg)
end
end

Expand All @@ -106,7 +120,9 @@ end
-- }
-- If no migrations applied, the table is empty.
local function up()
check_no_migrations_in_config()
vars.last_operation_error = nil

check_no_migrations_in_config{store_last_error = true}

local result = {}
local all_migrations = {}
Expand Down Expand Up @@ -144,12 +160,14 @@ local function up()
end
if #errors > 0 then
local err_msg = string.format('Errors happened during migrations: %s', json.encode(errors))

vars.last_operation_error = err_msg
log.error(err_msg)
error(err_msg)
end

log.verbose('All fibers joined, results are: %s', json.encode(result))
check_migrations_consistency(all_migrations)
check_migrations_consistency(all_migrations, {store_last_error = true})

local patch = {
['schema.yml'] = fetch_schema()
Expand All @@ -159,6 +177,7 @@ local function up()

local _, err = cartridge.config_patch_clusterwide(patch)
if err ~= nil then
vars.last_operation_error = err
log.error(err)
error(err)
end
Expand Down Expand Up @@ -239,6 +258,8 @@ end
-- @function move_migrations_state
-- @return table of instance uris with migration names copied.
local function move_migrations_state(current_server_only)
vars.last_operation_error = nil

local config = confapplier.get_readonly('migrations')

if config == nil or config.applied == nil or #config.applied == 0 then
Expand All @@ -263,7 +284,11 @@ local function move_migrations_state(current_server_only)
log.error('Failed to copy migrations state from cluster config on %s: %s',
uri, json.encode(err))
end
error("Failed to copy migrations state: " .. json.encode(errmap))

local err_msg = "Failed to copy migrations state: " .. json.encode(errmap)

vars.last_operation_error = err_msg
error(err_msg)
end

-- Remove applied migrations from cluster-wide configuration.
Expand All @@ -277,6 +302,7 @@ local function move_migrations_state(current_server_only)

local _, err = cartridge.config_patch_clusterwide(patch)
if err ~= nil then
vars.last_operation_error = err
log.error(err)
error(err)
end
Expand Down Expand Up @@ -381,11 +407,24 @@ local function validate_config(conf_new)
return true
end

local function get_issues()
if vars.last_operation_error ~= nil then
return {{
level = 'warning',
message = tostring(vars.last_operation_error),
}}
end

return {}
end

return {
init = init,

validate_config = validate_config,

get_issues = get_issues,

permanent = true,

upgrade = upgrade,
Expand Down
51 changes: 51 additions & 0 deletions test/helper/utils.lua
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
local checks = require('checks')

local t = require('luatest')
local luatest_utils = require('luatest.utils')

Expand Down Expand Up @@ -123,8 +125,57 @@ local function downgrade_ddl_schema_if_required(ddl_schema)
return ddl_schema
end

local function get_cluster_migrator_issues(main_server)
local cluster_issues = main_server:exec(function()
return require('cartridge.issues').list_on_cluster()
end)

local migrator_issues = {}
for _, issue in ipairs(cluster_issues) do
if issue.topic == 'migrator' then
table.insert(migrator_issues, issue)
end
end

return migrator_issues
end

local function assert_cluster_has_migrator_issue(opts)
checks({
main_server = 'table',
server_with_issue = 'table',
level = 'string',
content_fragments = 'table',
})

local migrator_issues = get_cluster_migrator_issues(opts.main_server)
t.assert_not_equals(migrator_issues, {}, 'issues found')
t.assert_equals(#migrator_issues, 1, ('only one issue expected, got %s'):format(migrator_issues))

local issue = migrator_issues[1]
t.assert_equals(issue.level, opts.level)
t.assert_equals(issue.replicaset_uuid, opts.server_with_issue.replicaset_uuid)
t.assert_equals(issue.instance_uuid, opts.server_with_issue.instance_uuid)
t.assert_equals(issue.topic, 'migrator')

for _, fragment in ipairs(opts.content_fragments) do
t.assert_str_contains(issue.message, fragment)
end
end

local function assert_cluster_has_no_migrator_issues(opts)
checks({
main_server = 'table',
})

local migrator_issues = get_cluster_migrator_issues(opts.main_server)
t.assert_equals(migrator_issues, {}, 'issues not found')
end

return {
set_sections = set_sections,
cleanup = cleanup,
downgrade_ddl_schema_if_required = downgrade_ddl_schema_if_required,
assert_cluster_has_migrator_issue = assert_cluster_has_migrator_issue,
assert_cluster_has_no_migrator_issues = assert_cluster_has_no_migrator_issues,
}
77 changes: 77 additions & 0 deletions test/integration/fockups_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,16 @@ g.test_error_in_migrations = function()
t.assert_equals(status, false)
t.assert_str_contains(tostring(resp), 'Oops')
t.assert_str_contains(tostring(resp), 'Errors happened during migrations')

utils.assert_cluster_has_migrator_issue{
main_server = g.cluster.main_server,
server_with_issue = g.cluster.main_server,
level = 'warning',
content_fragments = {
'Errors happened during migrations',
'Oops',
},
}
end

g.test_inconsistent_migrations = function()
Expand Down Expand Up @@ -155,6 +165,15 @@ g.test_inconsistent_migrations = function()
t.assert_equals(status, false)
t.assert_str_contains(tostring(resp), 'Inconsistent migrations in cluster: '
.. 'expected: [\"102_local\"],')

utils.assert_cluster_has_migrator_issue{
main_server = g.cluster.main_server,
server_with_issue = g.cluster.main_server,
level = 'warning',
content_fragments = {
'Inconsistent migrations in cluster: expected: [\"102_local\"],',
},
}
end

g.test_reload = function()
Expand Down Expand Up @@ -240,6 +259,64 @@ g.test_up_clusterwide_applied_migrations_exist = function(cg)
local status, resp = main:eval([[ return pcall(require('migrator').up) ]])
t.assert_not(status)
t.assert_str_contains(tostring(resp), 'A list of applied migrations is found in cluster config')

utils.assert_cluster_has_migrator_issue{
main_server = g.cluster.main_server,
server_with_issue = g.cluster.main_server,
level = 'warning',
content_fragments = {
'A list of applied migrations is found in cluster config',
},
}
end

g.test_inconsistent_migration_cartridge_issue_cleaned_after_fix = function()
local no_migrations_eval = [[
require('migrator').set_loader({
list = function() return {} end
})
]]
local one_migration_eval = [[
require('migrator').set_loader({
list = function(_)
return {
{
name = '102_local',
up = function() return true end
},
}
end
})
]]

for _, server in pairs(g.cluster.servers) do
server.net_box:eval(no_migrations_eval)
end
g.cluster.main_server.net_box:eval(one_migration_eval)

local status, resp = g.cluster.main_server:eval("return pcall(require('migrator').up)")
t.assert_equals(status, false)
t.assert_str_contains(
tostring(resp),
'Inconsistent migrations in cluster: expected: [\"102_local\"],'
)

utils.assert_cluster_has_migrator_issue{
main_server = g.cluster.main_server,
server_with_issue = g.cluster.main_server,
level = 'warning',
content_fragments = {
'Inconsistent migrations in cluster: expected: [\"102_local\"],',
},
}

for _, server in pairs(g.cluster.servers) do
server.net_box:eval(one_migration_eval)
end
status, resp = g.cluster.main_server:eval("return pcall(require('migrator').up)")
t.assert_equals(status, true, resp)

utils.assert_cluster_has_no_migrator_issues{main_server = g.cluster.main_server}
end

g.after_each(function()
Expand Down

0 comments on commit 7ad494d

Please sign in to comment.