Skip to content

Commit

Permalink
[test] Ensure no undue retry counts reset for all instances expect th…
Browse files Browse the repository at this point in the history
…e one updating its count

--- with patch
    @ n:node-1

    COMMENT: Prepare monitor in idle status
      COMMENT: hack rejoin_grace_period to 0.001

    COMMENT: monitor.init()
    @ n:node-1 c:monitor
      monitor started
      boot id 1634301625.590444, last None
      detect mock service_command(('cluster', ['status', '--parallel', '--refresh']), {'local': False})
      .../test_monitor_ensure_restart_ri0/var/nodes_info.json updated

    COMMENT: monitor.do()
      COMMENT: proc ps count = 0

    COMMENT: create ccfg/cluster status.json

    COMMENT: monitor.do()
      node monitor status change: init => rejoin
      end of rejoin grace period: single node cluster
      node monitor status change: rejoin => idle
      COMMENT: proc ps count = 0
      COMMENT: create service restart-multiple-3.conf config
          [DEFAULT]
          id = 91d8e2b4-cdf8-4f52-9830-3214ec475c0f
          nodes = *
          orchestrate = ha
          [fs#1]
          type = flag
          restart = 3
          [fs#2]
          type = flag
          restart = 5
          [fs#3]
          type = flag
          restart = 2

    COMMENT: create restart-multiple-3 status.json, with {
      "avail": "up",
      "overall": "up",
      "topology": "failover",
      "frozen": 0,
      "monitor": {
        "status": "up",
        "overall": "up",
        "status_updated": 1634301625.7396312
      },
      "updated": 1634301625.7396312,
      "resources": {
        "fs#1": {
          "status": "down",
          "type": "fs.flag",
          "label": "fs.flag",
          "provisioned": {
            "state": true,
            "mtime": 1634301625.739407
          }
        },
        "fs#2": {
          "status": "down",
          "type": "fs.flag",
          "label": "fs.flag",
          "provisioned": {
            "state": true,
            "mtime": 1634301625.739413
          }
        },
        "fs#3": {
          "status": "up",
          "type": "fs.flag",
          "label": "fs.flag",
          "provisioned": {
            "state": true,
            "mtime": 1634301625.739414
          }
        }
      }
    }

    COMMENT: monitor.do()
      restart-multiple-3 local expect set: started
      service restart-multiple-3 restart resource fs#1 (fs.flag) down None, try 1/3
      service restart-multiple-3 restart resource fs#2 (fs.flag) down None, try 1/5
      restart-multiple-3 monitor status change: idle => restarting
      detect mock service_command(('restart-multiple-3', ['start', '--rid', 'fs#1,fs#2']), {})
      COMMENT: proc ps count = 1

    COMMENT: monitor.do()
      restart-multiple-3 monitor status change: restarting => idle
    E fs#1 start returned success but resource is still not up
    E fs#2 start returned success but resource is still not up
      service restart-multiple-3 restart resource fs#1 (fs.flag) down None, try 2/3
      service restart-multiple-3 restart resource fs#2 (fs.flag) down None, try 2/5
      restart-multiple-3 monitor status change: idle => restarting
      detect mock service_command(('restart-multiple-3', ['start', '--rid', 'fs#1,fs#2']), {})
      COMMENT: proc ps count = 1

    COMMENT: monitor.do()
      restart-multiple-3 monitor status change: restarting => idle
    E fs#1 start returned success but resource is still not up
    E fs#2 start returned success but resource is still not up
      service restart-multiple-3 restart resource fs#1 (fs.flag) down None, try 3/3
      service restart-multiple-3 restart resource fs#2 (fs.flag) down None, try 3/5
      restart-multiple-3 monitor status change: idle => restarting
      detect mock service_command(('restart-multiple-3', ['start', '--rid', 'fs#1,fs#2']), {})
      COMMENT: proc ps count = 1

    COMMENT: monitor.do()
      restart-multiple-3 monitor status change: restarting => idle
    E fs#1 start returned success but resource is still not up
    E fs#2 start returned success but resource is still not up
      service restart-multiple-3 max restart (3) reached for resource fs#1 (fs.flag)
      service restart-multiple-3 resource fs#1 (fs.flag) degraded to down None
      service restart-multiple-3 restart resource fs#2 (fs.flag) down None, try 4/5
      restart-multiple-3 monitor status change: idle => restarting
      detect mock service_command(('restart-multiple-3', ['start', '--rid', 'fs#2']), {})
      COMMENT: proc ps count = 1

    COMMENT: monitor.do()
      restart-multiple-3 monitor status change: restarting => idle
    E fs#2 start returned success but resource is still not up
      service restart-multiple-3 restart resource fs#2 (fs.flag) down None, try 5/5
      restart-multiple-3 monitor status change: idle => restarting
      detect mock service_command(('restart-multiple-3', ['start', '--rid', 'fs#2']), {})
      COMMENT: proc ps count = 1

    COMMENT: monitor.do()
      restart-multiple-3 monitor status change: restarting => idle
    E fs#2 start returned success but resource is still not up
      service restart-multiple-3 max restart (5) reached for resource fs#2 (fs.flag)
      service restart-multiple-3 resource fs#2 (fs.flag) degraded to down None
      COMMENT: proc ps count = 0

    COMMENT: monitor.do()
      COMMENT: proc ps count = 0

    COMMENT: monitor.do()
      COMMENT: proc ps count = 0

    COMMENT: monitor.do()
      COMMENT: proc ps count = 0

    COMMENT: monitor.do()
      COMMENT: proc ps count = 0
      COMMENT: ensure retries are {'fs#1': 4, 'fs#2': 6, 'fs#3': 0}
      COMMENT: ASSERT call('restart-multiple-3', ['start']) has not been called
      COMMENT: ASSERT call('restart-multiple-3', ['start', '--rid', 'fs#1,fs#2,fs#3']) has not been called
      COMMENT: ASSERT call('restart-multiple-3', ['start', '--rid', 'fs#3']) has not been called
      COMMENT: ASSERT call('restart-multiple-3', ['start', '--rid', 'fs#1']) has not been called
      COMMENT: ASSERT call('restart-multiple-3', ['start', '--rid', 'fs#1,fs#2']) has been called 3 times
      COMMENT: ASSERT call('restart-multiple-3', ['start', '--rid', 'fs#2']) has been called 2 times

--- without patch
    @ n:node-1

    COMMENT: Prepare monitor in idle status
      COMMENT: hack rejoin_grace_period to 0.001

    COMMENT: monitor.init()
    @ n:node-1 c:monitor
      monitor started
      boot id 1634301694.791345, last None
      detect mock service_command(('cluster', ['status', '--parallel', '--refresh']), {'local': False})
      .../test_monitor_ensure_restart_ri0/var/nodes_info.json updated

    COMMENT: monitor.do()
      COMMENT: proc ps count = 0

    COMMENT: create ccfg/cluster status.json

    COMMENT: monitor.do()
      node monitor status change: init => rejoin
      end of rejoin grace period: single node cluster
      node monitor status change: rejoin => idle
      COMMENT: proc ps count = 0
      COMMENT: create service restart-multiple-3.conf config
          [DEFAULT]
          id = 878dfa49-f3e2-416d-b3f5-dd8bb09f6a66
          nodes = *
          orchestrate = ha
          [fs#1]
          type = flag
          restart = 3
          [fs#2]
          type = flag
          restart = 5
          [fs#3]
          type = flag
          restart = 2

    COMMENT: create restart-multiple-3 status.json, with {
      "avail": "up",
      "overall": "up",
      "topology": "failover",
      "frozen": 0,
      "monitor": {
        "status": "up",
        "overall": "up",
        "status_updated": 1634301694.951895
      },
      "updated": 1634301694.951895,
      "resources": {
        "fs#1": {
          "status": "down",
          "type": "fs.flag",
          "label": "fs.flag",
          "provisioned": {
            "state": true,
            "mtime": 1634301694.951589
          }
        },
        "fs#2": {
          "status": "down",
          "type": "fs.flag",
          "label": "fs.flag",
          "provisioned": {
            "state": true,
            "mtime": 1634301694.951595
          }
        },
        "fs#3": {
          "status": "up",
          "type": "fs.flag",
          "label": "fs.flag",
          "provisioned": {
            "state": true,
            "mtime": 1634301694.951597
          }
        }
      }
    }

    COMMENT: monitor.do()
      restart-multiple-3 local expect set: started
      service restart-multiple-3 restart resource fs#1 (fs.flag) down None, try 1/3
      service restart-multiple-3 restart resource fs#2 (fs.flag) down None, try 1/5
      restart-multiple-3 monitor status change: idle => restarting
      detect mock service_command(('restart-multiple-3', ['start', '--rid', 'fs#1,fs#2']), {})
      COMMENT: proc ps count = 1

    COMMENT: monitor.do()
      restart-multiple-3 monitor status change: restarting => idle
    E fs#1 start returned success but resource is still not up
    E fs#2 start returned success but resource is still not up
      service restart-multiple-3 restart resource fs#1 (fs.flag) down None, try 1/3
      service restart-multiple-3 restart resource fs#2 (fs.flag) down None, try 1/5
      restart-multiple-3 monitor status change: idle => restarting
      detect mock service_command(('restart-multiple-3', ['start', '--rid', 'fs#1,fs#2']), {})
      COMMENT: proc ps count = 1

    COMMENT: monitor.do()
      restart-multiple-3 monitor status change: restarting => idle
    E fs#1 start returned success but resource is still not up
    E fs#2 start returned success but resource is still not up
      service restart-multiple-3 restart resource fs#1 (fs.flag) down None, try 1/3
      service restart-multiple-3 restart resource fs#2 (fs.flag) down None, try 1/5
      restart-multiple-3 monitor status change: idle => restarting
      detect mock service_command(('restart-multiple-3', ['start', '--rid', 'fs#1,fs#2']), {})
      COMMENT: proc ps count = 1

    COMMENT: monitor.do()
      restart-multiple-3 monitor status change: restarting => idle
    E fs#1 start returned success but resource is still not up
    E fs#2 start returned success but resource is still not up
      service restart-multiple-3 restart resource fs#1 (fs.flag) down None, try 1/3
      service restart-multiple-3 restart resource fs#2 (fs.flag) down None, try 1/5
      restart-multiple-3 monitor status change: idle => restarting
      detect mock service_command(('restart-multiple-3', ['start', '--rid', 'fs#1,fs#2']), {})
      COMMENT: proc ps count = 1

    COMMENT: monitor.do()
      restart-multiple-3 monitor status change: restarting => idle
    E fs#1 start returned success but resource is still not up
    E fs#2 start returned success but resource is still not up
      service restart-multiple-3 restart resource fs#1 (fs.flag) down None, try 1/3
      service restart-multiple-3 restart resource fs#2 (fs.flag) down None, try 1/5
      restart-multiple-3 monitor status change: idle => restarting
      detect mock service_command(('restart-multiple-3', ['start', '--rid', 'fs#1,fs#2']), {})
      COMMENT: proc ps count = 1

    COMMENT: monitor.do()
      restart-multiple-3 monitor status change: restarting => idle
    E fs#1 start returned success but resource is still not up
    E fs#2 start returned success but resource is still not up
      service restart-multiple-3 restart resource fs#1 (fs.flag) down None, try 1/3
      service restart-multiple-3 restart resource fs#2 (fs.flag) down None, try 1/5
      restart-multiple-3 monitor status change: idle => restarting
      detect mock service_command(('restart-multiple-3', ['start', '--rid', 'fs#1,fs#2']), {})
      COMMENT: proc ps count = 1

    COMMENT: monitor.do()
      restart-multiple-3 monitor status change: restarting => idle
    E fs#1 start returned success but resource is still not up
    E fs#2 start returned success but resource is still not up
      service restart-multiple-3 restart resource fs#1 (fs.flag) down None, try 1/3
      service restart-multiple-3 restart resource fs#2 (fs.flag) down None, try 1/5
      restart-multiple-3 monitor status change: idle => restarting
      detect mock service_command(('restart-multiple-3', ['start', '--rid', 'fs#1,fs#2']), {})
      COMMENT: proc ps count = 1

    COMMENT: monitor.do()
      restart-multiple-3 monitor status change: restarting => idle
    E fs#1 start returned success but resource is still not up
    E fs#2 start returned success but resource is still not up
      service restart-multiple-3 restart resource fs#1 (fs.flag) down None, try 1/3
      service restart-multiple-3 restart resource fs#2 (fs.flag) down None, try 1/5
      restart-multiple-3 monitor status change: idle => restarting
      detect mock service_command(('restart-multiple-3', ['start', '--rid', 'fs#1,fs#2']), {})
      COMMENT: proc ps count = 1

    COMMENT: monitor.do()
      restart-multiple-3 monitor status change: restarting => idle
    E fs#1 start returned success but resource is still not up
    E fs#2 start returned success but resource is still not up
      service restart-multiple-3 restart resource fs#1 (fs.flag) down None, try 1/3
      service restart-multiple-3 restart resource fs#2 (fs.flag) down None, try 1/5
      restart-multiple-3 monitor status change: idle => restarting
      detect mock service_command(('restart-multiple-3', ['start', '--rid', 'fs#1,fs#2']), {})
      COMMENT: proc ps count = 1

    COMMENT: monitor.do()
      restart-multiple-3 monitor status change: restarting => idle
    E fs#1 start returned success but resource is still not up
    E fs#2 start returned success but resource is still not up
      service restart-multiple-3 restart resource fs#1 (fs.flag) down None, try 1/3
      service restart-multiple-3 restart resource fs#2 (fs.flag) down None, try 1/5
      restart-multiple-3 monitor status change: idle => restarting
      detect mock service_command(('restart-multiple-3', ['start', '--rid', 'fs#1,fs#2']), {})
      COMMENT: proc ps count = 1
      COMMENT: ensure retries are {'fs#1': 4, 'fs#2': 6, 'fs#3': 0}

    AssertionError: expected retries mismatch
    {'fs#1': 0, 'fs#2': 1, 'fs#3': 0} != {'fs#1': 4, 'fs#2': 6, 'fs#3': 0}

    Expected :{'fs#1': 4, 'fs#2': 6, 'fs#3': 0}
    Actual   :{'fs#1': 0, 'fs#2': 1, 'fs#3': 0}
    <Click to see difference>

    mocker = <pytest_mock.plugin.MockerFixture object at 0x103e5dcf8>, restart = 3

        @staticmethod
        @pytest.mark.parametrize("restart", [3])
        def test_monitor_ensure_restart_rids_that_needs_restart(
                mocker,
                restart
        ):
            monitor_test = MonitorTest(mocker=mocker, cluster_nodes=[env.Env.nodename])
            monitor_test.service_command_factory()
            monitor_test.prepare_monitor_idle()
            monitor_test.monitor._lazy_ready_period = 0.001
            svc = "restart-multiple-%s" % restart
            monitor_test.create_svc_config(svc)
            resources = _resources("down")
            resources.update(_resources("down", rid="fs#2"))
            resources.update(_resources("up", rid="fs#3"))
            monitor_test.create_service_status(svc, status="up", overall="up", resources=resources)
            for _ in range(10):
                monitor_test.do()

            smon_retries = {
                "fs#1": monitor_test.monitor.get_smon_retries(svc, "fs#1"),
                "fs#2": monitor_test.monitor.get_smon_retries(svc, "fs#2"),
                "fs#3": monitor_test.monitor.get_smon_retries(svc, "fs#3"),
            }
    >       assert smon_retries == {
                "fs#1": restart + 1,
                "fs#2": 6,
                "fs#3": 0,
            }, "expected retries mismatch"
    E       AssertionError: expected retries mismatch
    E       assert {'fs#1': 0, 'fs#2': 1, 'fs#3': 0} == {'fs#1': 4, 'fs#2': 6, 'fs#3': 0}
  • Loading branch information
cgalibern committed Oct 15, 2021
1 parent 5cc6af8 commit 32b23dd
Showing 1 changed file with 51 additions and 4 deletions.
55 changes: 51 additions & 4 deletions opensvc/tests/daemon/monitor/test_orchestrator_start.py
Expand Up @@ -93,6 +93,13 @@ def str_uuid():
"restart-3-stdby": "\n".join(["[DEFAULT]", "id = %s" % str_uuid(), "nodes = *", "orchestrate = ha",
"[fs#1]", "type = flag", "restart = 3", "standby = True"]),

"restart-multiple-3": "\n".join([
"[DEFAULT]", "id = %s" % str_uuid(), "nodes = *", "orchestrate = ha",
"[fs#1]", "type = flag", "restart = 3",
"[fs#2]", "type = flag", "restart = 5",
"[fs#3]", "type = flag", "restart = 2",
]),

"restart-delay": "\n".join(["[DEFAULT]", "id = %s" % str_uuid(), "nodes = *", "orchestrate = ha",
"[fs#1]", "type = flag", "restart = 3", "restart_delay = 1s"]),
"restart-delay-stdby": "\n".join(["[DEFAULT]", "id = %s" % str_uuid(), "nodes = *", "orchestrate = ha",
Expand Down Expand Up @@ -748,13 +755,13 @@ def test_monitor_respect_priority_and_max_parallel(
assert monitor_test.service_command.call_count == count


def _resources(status, standby=False, monitor=False):
def _resources(status, standby=False, monitor=False, rid="fs#1"):
"""
test helper that prepare resources data for fs#1 with standby and monitor
for create_service_status()
"""
resources = {
"fs#1": {
rid: {
"status": "stdby %s" % status if standby else status,
"type": "fs.flag",
"label": "fs.flag",
Expand All @@ -765,9 +772,9 @@ def _resources(status, standby=False, monitor=False):
}
}
if standby:
resources["fs#1"]["standby"] = True
resources[rid]["standby"] = True
if monitor:
resources["fs#1"]["monitor"] = True
resources[rid]["monitor"] = True
return resources


Expand Down Expand Up @@ -948,3 +955,43 @@ def create_service_status(status):
monitor_test.do()
monitor_test.assert_a_command_has_been_launched_x_times(call(svc, ['toc']), 1)
assert monitor_test.monitor.node_data.get(["services", "status", svc, "monitor", "status"]) == "idle"

@staticmethod
@pytest.mark.parametrize("restart", [3])
def test_monitor_ensure_restart_retries_are_correct_when_multiple_rids_have_retries(
mocker,
restart
):
monitor_test = MonitorTest(mocker=mocker, cluster_nodes=[env.Env.nodename])
monitor_test.service_command_factory()
monitor_test.prepare_monitor_idle()
monitor_test.monitor._lazy_ready_period = 0.001
svc = "restart-multiple-%s" % restart
monitor_test.create_svc_config(svc)
resources = _resources("down")
resources.update(_resources("down", rid="fs#2"))
resources.update(_resources("up", rid="fs#3"))
monitor_test.create_service_status(svc, status="up", overall="up", resources=resources)
for _ in range(10):
monitor_test.do()

expected_retries = {
"fs#1": restart + 1,
"fs#2": 6,
"fs#3": 0,
}
monitor_test.log('COMMENT: ASSERT retries are %s' % expected_retries)
assert {
"fs#1": monitor_test.monitor.get_smon_retries(svc, "fs#1"),
"fs#2": monitor_test.monitor.get_smon_retries(svc, "fs#2"),
"fs#3": monitor_test.monitor.get_smon_retries(svc, "fs#3"),
} == expected_retries, "expected retries mismatch"

monitor_test.assert_command_has_not_been_launched([
call(svc, ["start"]),
call(svc, ["start", "--rid", "fs#1,fs#2,fs#3"]),
call(svc, ["start", "--rid", "fs#3"]),
call(svc, ["start", "--rid", "fs#1"]),
])
monitor_test.assert_a_command_has_been_launched_x_times(call(svc, ['start', '--rid', 'fs#1,fs#2']), restart)
monitor_test.assert_a_command_has_been_launched_x_times(call(svc, ['start', '--rid', 'fs#2']), 5 - restart)

0 comments on commit 32b23dd

Please sign in to comment.