Skip to content

TestTestgresCommon::test_restart[local2] is unstable #323

@dmitry-lipetsk

Description

@dmitry-lipetsk

https://github.com/postgrespro/testgres/actions/runs/21259348791/job/61182602347

___________________ TestTestgresCommon.test_restart[local2] ____________________
[gw3] linux -- Python 3.12.12 /tmp/testgres_venv/bin/python

self = PostgresNode(name='testgres-b7f9893f-d3f0-47de-ad1b-00b41a81123d', port=None, base_dir='/tmp/tgsn_33ib9nn4')
params = []

    def restart(self, params=[]):
        """
        Restart this node using pg_ctl.
    
        Args:
            params: additional arguments for pg_ctl.
    
        Returns:
            This instance of :class:`.PostgresNode`.
        """
    
        _params = [
            self._get_bin_path("pg_ctl"),
            "-D", self.data_dir,
            "-l", self.pg_log_file,
            "-w",  # wait
            "restart"
        ] + params  # yapf: disable
    
        try:
>           error_code, out, error = execute_utility2(self.os_ops, _params, self.utils_log_file, verbose=True)
                                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

_params    = ['/usr/local/bin/pg_ctl', '-D', '/tmp/tgsn_33ib9nn4/data', '-l', '/tmp/tgsn_33ib9nn4/logs/postgresql.log', '-w', ...]
files      = [('/tmp/tgsn_33ib9nn4/data/postgresql.conf', b"\nfsync = off\nmax_worker_processes = 10\nlog_statement = 'all'\nlisten...TAL:  could not create any TCP/IP sockets\n2026-01-22 18:23:43.372 GMT [173252] LOG:  database system is shut down\n')]
msg        = 'Cannot restart node'
params     = []
self       = PostgresNode(name='testgres-b7f9893f-d3f0-47de-ad1b-00b41a81123d', port=None, base_dir='/tmp/tgsn_33ib9nn4')

src/node.py:1208: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
src/utils.py:101: in execute_utility2
    exit_status, out, error = os_ops.exec_command(
        args       = ['/usr/local/bin/pg_ctl', '-D', '/tmp/tgsn_33ib9nn4/data', '-l', '/tmp/tgsn_33ib9nn4/logs/postgresql.log', '-w', ...]
        exec_env   = None
        ignore_errors = False
        logfile    = '/tmp/tgsn_33ib9nn4/logs/utils.log'
        os_ops     = <testgres.operations.local_ops.LocalOperations object at 0x7ffa8106e9c0>
        verbose    = True
/tmp/testgres_venv/lib/python3.12/site-packages/testgres/operations/local_ops.py:266: in exec_command
    RaiseError.UtilityExitedWithNonZeroCode(
        cmd        = ['/usr/local/bin/pg_ctl', '-D', '/tmp/tgsn_33ib9nn4/data', '-l', '/tmp/tgsn_33ib9nn4/logs/postgresql.log', '-w', ...]
        cwd        = None
        encoding   = 'UTF-8'
        error      = 'pg_ctl: could not start server\nExamine the log output.\n'
        exec_env   = None
        expect_error = False
        get_process = False
        ignore_errors = False
        input      = None
        output     = 'waiting for server to shut down.... done\nserver stopped\nwaiting for server to start.... stopped waiting\n'
        process    = <Popen: returncode: 1 args: ['/usr/local/bin/pg_ctl', '-D', '/tmp/tgsn_33ib9...>
        self       = <testgres.operations.local_ops.LocalOperations object at 0x7ffa8106e9c0>
        shell      = False
        stderr     = None
        stdin      = None
        stdout     = None
        text       = False
        timeout    = None
        verbose    = True
        wait_exit  = False
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

cmd = ['/usr/local/bin/pg_ctl', '-D', '/tmp/tgsn_33ib9nn4/data', '-l', '/tmp/tgsn_33ib9nn4/logs/postgresql.log', '-w', ...]
exit_code = 1
msg_arg = 'pg_ctl: could not start server\nExamine the log output.\n'
error = 'pg_ctl: could not start server\nExamine the log output.\n'
out = 'waiting for server to shut down.... done\nserver stopped\nwaiting for server to start.... stopped waiting\n'

    @staticmethod
    def UtilityExitedWithNonZeroCode(cmd, exit_code, msg_arg, error, out):
        assert type(exit_code) == int  # noqa: E721
    
        msg_arg_s = __class__._TranslateDataIntoString(msg_arg)
        assert type(msg_arg_s) == str  # noqa: E721
    
        msg_arg_s = msg_arg_s.strip()
        if msg_arg_s == "":
            msg_arg_s = "#no_error_message"
    
        message = "Utility exited with non-zero code (" + str(exit_code) + "). Error: `" + msg_arg_s + "`"
>       raise ExecUtilException(
            message=message,
            command=cmd,
            exit_code=exit_code,
            out=out,
            error=error)
E       testgres.operations.exceptions.ExecUtilException: Utility exited with non-zero code (1). Error: `pg_ctl: could not start server
E       Examine the log output.`
E       Command: /usr/local/bin/pg_ctl -D /tmp/tgsn_33ib9nn4/data -l /tmp/tgsn_33ib9nn4/logs/postgresql.log -w restart
E       Exit code: 1
E       ---- Error:
E       pg_ctl: could not start server
E       Examine the log output.
E       
E       ---- Out:
E       waiting for server to shut down.... done
E       server stopped
E       waiting for server to start.... stopped waiting

__class__  = <class 'testgres.operations.raise_error.RaiseError'>
cmd        = ['/usr/local/bin/pg_ctl', '-D', '/tmp/tgsn_33ib9nn4/data', '-l', '/tmp/tgsn_33ib9nn4/logs/postgresql.log', '-w', ...]
error      = 'pg_ctl: could not start server\nExamine the log output.\n'
exit_code  = 1
message    = 'Utility exited with non-zero code (1). Error: `pg_ctl: could not start server\nExamine the log output.`'
msg_arg    = 'pg_ctl: could not start server\nExamine the log output.\n'
msg_arg_s  = 'pg_ctl: could not start server\nExamine the log output.'
out        = 'waiting for server to shut down.... done\nserver stopped\nwaiting for server to start.... stopped waiting\n'

/tmp/testgres_venv/lib/python3.12/site-packages/testgres/operations/raise_error.py:18: ExecUtilException

The above exception was the direct cause of the following exception:

self = <tests.test_testgres_common.TestTestgresCommon object at 0x7ffa7e1ce330>
node_svc = <tests.helpers.global_data.PostgresNodeService object at 0x7ffa7eb55c70>

    def test_restart(self, node_svc: PostgresNodeService):
        assert isinstance(node_svc, PostgresNodeService)
    
        with __class__.helper__get_node(node_svc) as node:
            node.init().start()
    
            # restart, ok
            res = node.execute('select 1')
            assert (res == [(1,)])
>           node.restart()

__class__  = <class 'tests.test_testgres_common.TestTestgresCommon'>
node       = PostgresNode(name='testgres-b7f9893f-d3f0-47de-ad1b-00b41a81123d', port=None, base_dir='/tmp/tgsn_33ib9nn4')
node_svc   = <tests.helpers.global_data.PostgresNodeService object at 0x7ffa7eb55c70>
res        = [(1,)]
self       = <tests.test_testgres_common.TestTestgresCommon object at 0x7ffa7e1ce330>

tests/test_testgres_common.py:324: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
src/node.py:1214: in restart
    raise_from(StartNodeException(msg, files), e)
        _params    = ['/usr/local/bin/pg_ctl', '-D', '/tmp/tgsn_33ib9nn4/data', '-l', '/tmp/tgsn_33ib9nn4/logs/postgresql.log', '-w', ...]
        files      = [('/tmp/tgsn_33ib9nn4/data/postgresql.conf', b"\nfsync = off\nmax_worker_processes = 10\nlog_statement = 'all'\nlisten...TAL:  could not create any TCP/IP sockets\n2026-01-22 18:23:43.372 GMT [173252] LOG:  database system is shut down\n')]
        msg        = 'Cannot restart node'
        params     = []
        self       = PostgresNode(name='testgres-b7f9893f-d3f0-47de-ad1b-00b41a81123d', port=None, base_dir='/tmp/tgsn_33ib9nn4')
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

value = None
from_value = ExecUtilException(message='Utility exited with non-zero code (1). Error: `pg_ctl: could not start server\nExamine the ...nwaiting for server to start.... stopped waiting\n', error='pg_ctl: could not start server\nExamine the log output.\n')

>   ???
E   src.exceptions.StartNodeException: Cannot restart node
E   /tmp/tgsn_33ib9nn4/data/postgresql.conf
E   ----
E   b"\nfsync = off\nmax_worker_processes = 10\nlog_statement = 'all'\nlisten_addresses = '127.0.0.1'\nport = 48670\n\nmax_replication_slots = 10\nmax_wal_senders = 10\n\nhot_standby = on\nwal_keep_size = 320\nwal_level = 'replica'\n"
E   
E   /tmp/tgsn_33ib9nn4/data/postgresql.auto.conf
E   ----
E   b'# Do not edit this file manually!\n# It will be overwritten by the ALTER SYSTEM command.\n'
E   
E   /tmp/tgsn_33ib9nn4/data/pg_hba.conf
E   ----
E   b'local   all             all                                     trust\nhost    all             all             127.0.0.1/32            trust\nhost    all             all             ::1/128                 trust\nlocal   replication     all                                     trust\nhost    replication     all             127.0.0.1/32            trust\nhost    replication     all             ::1/128                 trust\nlocal\treplication\tall\t\t\ttrust\nhost\treplication\tall\t127.0.0.1/32\ttrust\nhost\treplication\tall\t::1/128\t\ttrust\nhost\treplication\tall\t127.0.0.0/24\t\ttrust\nhost\tall\tall\t127.0.0.0/24\t\ttrust\nhost\tall\tall\tall\ttrust\nhost\treplication\tall\tall\ttrust\n'
E   
E   /tmp/tgsn_33ib9nn4/logs/postgresql.log
E   ----
E   b'2026-01-22 18:23:43.153 GMT [173201] LOG:  starting PostgreSQL 18.1 on x86_64-pc-linux-musl, compiled by gcc (Alpine 15.2.0) 15.2.0, 64-bit\n2026-01-22 18:23:43.153 GMT [173201] LOG:  listening on IPv4 address "127.0.0.1", port 48670\n2026-01-22 18:23:43.153 GMT [173201] LOG:  listening on Unix socket "/var/run/postgresql/.s.PGSQL.48670"\n2026-01-22 18:23:43.156 GMT [173208] LOG:  database system was shut down at 2026-01-22 18:23:43 GMT\n2026-01-22 18:23:43.159 GMT [173201] LOG:  database system is ready to accept connections\n2026-01-22 18:23:43.247 GMT [173229] LOG:  statement: select 1\n2026-01-22 18:23:43.248 GMT [173201] LOG:  received fast shutdown request\n2026-01-22 18:23:43.248 GMT [173201] LOG:  aborting any active transactions\n2026-01-22 18:23:43.251 GMT [173201] LOG:  background worker "logical replication launcher" (PID 173211) exited with exit code 1\n2026-01-22 18:23:43.252 GMT [173206] LOG:  shutting down\n2026-01-22 18:23:43.252 GMT [173206] LOG:  checkpoint starting: shutdown immediate\n2026-01-22 18:23:43.253 GMT [173206] LOG:  checkpoint complete: wrote 0 buffers (0.0%), wrote 3 SLRU buffers; 0 WAL file(s) added, 0 removed, 0 recycled; write=0.001 s, sync=0.001 s, total=0.001 s; sync files=0, longest=0.000 s, average=0.000 s; distance=0 kB, estimate=0 kB; lsn=0/175F068, redo lsn=0/175F068\n2026-01-22 18:23:43.262 GMT [173201] LOG:  database system is shut down\n2026-01-22 18:23:43.369 GMT [173252] LOG:  starting PostgreSQL 18.1 on x86_64-pc-linux-musl, compiled by gcc (Alpine 15.2.0) 15.2.0, 64-bit\n2026-01-22 18:23:43.369 GMT [173252] LOG:  could not bind IPv4 address "127.0.0.1": Address in use\n2026-01-22 18:23:43.369 GMT [173252] HINT:  Is another postmaster already running on port 48670? If not, wait a few seconds and retry.\n2026-01-22 18:23:43.369 GMT [173252] WARNING:  could not create listen socket for "127.0.0.1"\n2026-01-22 18:23:43.369 GMT [173252] FATAL:  could not create any TCP/IP sockets\n2026-01-22 18:23:43.372 GMT [173252] LOG:  database system is shut down\n'

from_value = ExecUtilException(message='Utility exited with non-zero code (1). Error: `pg_ctl: could not start server\nExamine the ...nwaiting for server to start.... stopped waiting\n', error='pg_ctl: could not start server\nExamine the log output.\n')
value      = None

<string>:3: StartNodeException
------------------------------ Captured log call -------------------------------
DEBUG    root:port_manager__generic.py:97 [port manager] Port 48670 is reserved.
DEBUG    root:port_manager__generic.py:97 [port manager] Port 48670 is released.

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions