From e32e1b834e6bd4d938200fccfe17579f7f775043 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Mon, 1 Jun 2015 11:35:05 +0200 Subject: [PATCH 01/60] Add support for streaming replication protocol Introduce ReplicationConnection and ReplicationCursor classes, that incapsulate initiation of special type of PostgreSQL connection and handling of special replication commands only available in this special connection mode. The handling of stream of replication data from the server is modelled largely after the existing support for "COPY table TO file" command and pg_recvlogical tool supplied with PostgreSQL (though, it can also be used for physical replication.) --- doc/src/extras.rst | 122 +++++++++++++++++ lib/extras.py | 138 +++++++++++++++++++ psycopg/cursor.h | 6 + psycopg/cursor_type.c | 39 ++++++ psycopg/pqpath.c | 312 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 617 insertions(+) diff --git a/doc/src/extras.rst b/doc/src/extras.rst index 36ef01320..9bc302e26 100644 --- a/doc/src/extras.rst +++ b/doc/src/extras.rst @@ -141,6 +141,128 @@ Logging cursor .. autoclass:: MinTimeLoggingCursor +Replication cursor +^^^^^^^^^^^^^^^^^^ + +.. autoclass:: ReplicationConnection + + This connection factory class can be used to open a special type of + connection that is used for streaming replication. + + Example:: + + from psycopg2.extras import ReplicationConnection, REPLICATION_PHYSICAL, REPLICATION_LOGICAL + conn = psycopg2.connect(dsn, connection_factory=ReplicationConnection) + cur = conn.cursor() + +.. seealso:: + + - PostgreSQL `Replication protocol`__ + + .. __: http://www.postgresql.org/docs/current/static/protocol-replication.html + +.. autoclass:: ReplicationCursor + + .. method:: identify_system() + + Get information about the cluster status in form of a dict with + ``systemid``, ``timeline``, ``xlogpos`` and ``dbname`` as keys. + + Example:: + + >>> print cur.identify_system() + {'timeline': 1, 'systemid': '1234567890123456789', 'dbname': 'test', 'xlogpos': '0/1ABCDEF'} + + .. method:: create_replication_slot(slot_type, slot_name, output_plugin=None) + + Create streaming replication slot. + + :param slot_type: type of replication: either `REPLICATION_PHYSICAL` or + `REPLICATION_LOGICAL` + :param slot_name: name of the replication slot to be created + :param output_plugin: name of the logical decoding output plugin to use + (logical replication only) + + Example:: + + cur.create_replication_slot(REPLICATION_LOGICAL, "testslot", "test_decoding") + + .. method:: drop_replication_slot(slot_name) + + Drop streaming replication slot. + + :param slot_name: name of the replication slot to drop + + Example:: + + cur.drop_replication_slot("testslot") + + .. method:: start_replication(file, slot_type, slot_name=None, start_lsn=None, timeline=0, keepalive_interval=10, options=None) + + Start and consume replication stream. + + :param file: a file-like object to write replication stream messages to + :param slot_type: type of replication: either `REPLICATION_PHYSICAL` or + `REPLICATION_LOGICAL` + :param slot_name: name of the replication slot to use (required for + logical replication) + :param start_lsn: the point in replication stream (WAL position) to start + from, in the form ``XXX/XXX`` (forward-slash separated + pair of hexadecimals) + :param timeline: WAL history timeline to start streaming from (optional, + can only be used with physical replication) + :param keepalive_interval: interval (in seconds) to send keepalive + messages to the server, in case there was no + communication during that period of time + :param options: an dictionary of options to pass to logical replication + slot + + The ``keepalive_interval`` must be greater than zero. + + This method never returns unless an error message is sent from the + server, or the server closes connection, or there is an exception in the + ``write()`` method of the ``file`` object. + + One can even use ``sys.stdout`` as the destination (this is only good for + testing purposes, however):: + + >>> cur.start_replication(sys.stdout, "testslot") + ... + + This method acts much like the `~cursor.copy_to()` with an important + distinction that ``write()`` method return value is dirving the + server-side replication cursor. In order to report to the server that + the all the messages up to the current one have been stored reliably, one + should return true value (i.e. something that satisfies ``if retval:`` + conidtion) from the ``write`` callback:: + + class ReplicationStreamWriter(object): + def write(self, msg): + if store_message_reliably(msg): + return True + + cur.start_replication(writer, "testslot") + ... + + .. note:: + + One needs to be aware that failure to update the server-side cursor + on any one replication slot properly by constantly consuming and + reporting success to the server can eventually lead to "disk full" + condition on the server, because the server retains all the WAL + segments that might be needed to stream the changes via currently + open replication slots. + + Drop any open replication slots that are no longer being used. The + list of open slots can be obtained by running a query like ``SELECT * + FROM pg_replication_slots``. + +.. data:: REPLICATION_PHYSICAL + +.. data:: REPLICATION_LOGICAL + +.. index:: + pair: Cursor; Replication .. index:: diff --git a/lib/extras.py b/lib/extras.py index c9f1cbcd2..4d92e6fa3 100644 --- a/lib/extras.py +++ b/lib/extras.py @@ -437,6 +437,144 @@ def callproc(self, procname, vars=None): return LoggingCursor.callproc(self, procname, vars) +class ReplicationConnection(_connection): + """A connection that uses `ReplicationCursor` automatically.""" + + def __init__(self, *args, **kwargs): + """Initializes a replication connection, by adding appropriate replication parameter to the provided dsn arguments.""" + + if len(args): + dsn = args[0] + + # FIXME: could really use parse_dsn here + + if dsn.startswith('postgres://') or dsn.startswith('postgresql://'): + # poor man's url parsing + if dsn.rfind('?') > 0: + if not dsn.endswith('?'): + dsn += '&' + else: + dsn += '?' + else: + dsn += ' ' + dsn += 'replication=database' + args = [dsn] + list(args[1:]) + else: + dbname = kwargs.get('dbname', None) + if dbname is None: + kwargs['dbname'] = 'replication' + + if kwargs.get('replication', None) is None: + kwargs['replication'] = 'database' if dbname else 'true' + + super(ReplicationConnection, self).__init__(*args, **kwargs) + + # prevent auto-issued BEGIN statements + self.autocommit = True + + def cursor(self, *args, **kwargs): + kwargs.setdefault('cursor_factory', ReplicationCursor) + return super(ReplicationConnection, self).cursor(*args, **kwargs) + + +"""Streamging replication types.""" +REPLICATION_PHYSICAL = 0 +REPLICATION_LOGICAL = 1 + +class ReplicationCursor(_cursor): + """A cursor used for replication commands.""" + + def identify_system(self): + """Get information about the cluster status.""" + + self.execute("IDENTIFY_SYSTEM") + return dict(zip(['systemid', 'timeline', 'xlogpos', 'dbname'], + self.fetchall()[0])) + + def quote_ident(self, ident): + # FIXME: use PQescapeIdentifier or psycopg_escape_identifier_easy, somehow + return '"%s"' % ident.replace('"', '""') + + def create_replication_slot(self, slot_type, slot_name, output_plugin=None): + """Create streaming replication slot.""" + + command = "CREATE_REPLICATION_SLOT %s " % self.quote_ident(slot_name) + + if slot_type == REPLICATION_LOGICAL: + if output_plugin is None: + raise RuntimeError("output_plugin is required for logical replication slot") + + command += "LOGICAL %s" % self.quote_ident(output_plugin) + + elif slot_type == REPLICATION_PHYSICAL: + if output_plugin is not None: + raise RuntimeError("output_plugin is not applicable to physical replication") + + command += "PHYSICAL" + + else: + raise RuntimeError("unrecognized replication slot type") + + return self.execute(command) + + def drop_replication_slot(self, slot_name): + """Drop streaming replication slot.""" + + command = "DROP_REPLICATION_SLOT %s" % self.quote_ident(slot_name) + return self.execute(command) + + def start_replication(self, o, slot_type, slot_name=None, start_lsn=None, + timeline=0, keepalive_interval=10, options=None): + """Start and consume replication stream.""" + + if keepalive_interval <= 0: + raise RuntimeError("keepalive_interval must be > 0: %d" % keepalive_interval) + + command = "START_REPLICATION " + + if slot_type == REPLICATION_LOGICAL and slot_name is None: + raise RuntimeError("slot_name is required for logical replication slot") + + if slot_name: + command += "SLOT %s " % self.quote_ident(slot_name) + + if slot_type == REPLICATION_LOGICAL: + command += "LOGICAL " + elif slot_type == REPLICATION_PHYSICAL: + command += "PHYSICAL " + else: + raise RuntimeError("unrecognized replication slot type") + + if start_lsn is None: + start_lsn = '0/0' + + # reparse lsn to catch possible garbage + lsn = start_lsn.split('/') + command += "%X/%X" % (int(lsn[0], 16), int(lsn[1], 16)) + + if timeline != 0: + if slot_type == REPLICATION_LOGICAL: + raise RuntimeError("cannot specify timeline for logical replication") + + if timeline < 0: + raise RuntimeError("timeline must be >= 0: %d" % timeline) + + command += " TIMELINE %d" % timeline + + if options: + if slot_type == REPLICATION_PHYSICAL: + raise RuntimeError("cannot specify plugin options for physical replication") + + command += " (" + for k,v in options.iteritems(): + if not command.endswith('('): + command += ", " + command += "%s %s" % (self.quote_ident(k), _A(str(v)).getquoted()) + command += ")" + + return self.start_replication_expert(o, command, keepalive_interval) + + # a dbtype and adapter for Python UUID type class UUID_adapter(object): diff --git a/psycopg/cursor.h b/psycopg/cursor.h index e291d45f6..93b697b2d 100644 --- a/psycopg/cursor.h +++ b/psycopg/cursor.h @@ -72,6 +72,8 @@ struct cursorObject { #define DEFAULT_COPYSIZE 16384 #define DEFAULT_COPYBUFF 8192 + int keepalive_interval; /* interval for keepalive messages in replication mode */ + PyObject *tuple_factory; /* factory for result tuples */ PyObject *tzinfo_factory; /* factory for tzinfo objects */ @@ -88,6 +90,10 @@ struct cursorObject { }; +/* streaming replication modes */ +#define CURSOR_REPLICATION_PHYSICAL 0 +#define CURSOR_REPLICATION_LOGICAL 1 + /* C-callable functions in cursor_int.c and cursor_type.c */ BORROWED HIDDEN PyObject *curs_get_cast(cursorObject *self, PyObject *oid); diff --git a/psycopg/cursor_type.c b/psycopg/cursor_type.c index cd8d5ca3f..954e764de 100644 --- a/psycopg/cursor_type.c +++ b/psycopg/cursor_type.c @@ -1579,6 +1579,43 @@ psyco_curs_copy_expert(cursorObject *self, PyObject *args, PyObject *kwargs) return res; } +#define psyco_curs_start_replication_expert_doc \ +"start_replication_expert(file, command, keepalive_interval) -- Start and consume replication stream with direct command." + +static PyObject * +psyco_curs_start_replication_expert(cursorObject *self, PyObject *args) +{ + PyObject *file, *res = NULL; + char *command; + int keepalive_interval; + + if (!PyArg_ParseTuple(args, "O&si", + _psyco_curs_has_write_check, &file, + &command, &keepalive_interval)) { + return NULL; + } + + EXC_IF_CURS_CLOSED(self); + EXC_IF_CURS_ASYNC(self, start_replication_expert); + EXC_IF_GREEN(start_replication_expert); + EXC_IF_TPC_PREPARED(self->conn, start_replication_expert); + + Dprintf("psyco_curs_start_replication_expert: command = %s", command); + + self->copysize = 0; + Py_INCREF(file); + self->copyfile = file; + self->keepalive_interval = keepalive_interval; + + if (pq_execute(self, command, 0, 1 /* no_result */, 1 /* no_begin */) >= 0) { + res = Py_None; + Py_INCREF(Py_None); + } + Py_CLEAR(self->copyfile); + + return res; +} + /* extension: closed - return true if cursor is closed */ #define psyco_curs_closed_doc \ @@ -1753,6 +1790,8 @@ static struct PyMethodDef cursorObject_methods[] = { METH_VARARGS|METH_KEYWORDS, psyco_curs_copy_to_doc}, {"copy_expert", (PyCFunction)psyco_curs_copy_expert, METH_VARARGS|METH_KEYWORDS, psyco_curs_copy_expert_doc}, + {"start_replication_expert", (PyCFunction)psyco_curs_start_replication_expert, + METH_VARARGS, psyco_curs_start_replication_expert_doc}, {NULL} }; diff --git a/psycopg/pqpath.c b/psycopg/pqpath.c index 5e1974beb..55025d823 100644 --- a/psycopg/pqpath.c +++ b/psycopg/pqpath.c @@ -40,7 +40,14 @@ #include "psycopg/pgtypes.h" #include "psycopg/error.h" +#include "postgres_fe.h" +#include "access/xlog_internal.h" +#include "common/fe_memutils.h" +#include "libpq-fe.h" + #include +#include +#include extern HIDDEN PyObject *psyco_DescriptionType; @@ -1514,6 +1521,302 @@ _pq_copy_out_v3(cursorObject *curs) return ret; } +/* support routines taken from pg_basebackup/streamutil.c */ +/* + * Frontend version of GetCurrentTimestamp(), since we are not linked with + * backend code. The protocol always uses integer timestamps, regardless of + * server setting. + */ +static int64 +feGetCurrentTimestamp(void) +{ + int64 result; + struct timeval tp; + + gettimeofday(&tp, NULL); + + result = (int64) tp.tv_sec - + ((POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY); + + result = (result * USECS_PER_SEC) + tp.tv_usec; + + return result; +} + +/* + * Converts an int64 to network byte order. + */ +static void +fe_sendint64(int64 i, char *buf) +{ + uint32 n32; + + /* High order half first, since we're doing MSB-first */ + n32 = (uint32) (i >> 32); + n32 = htonl(n32); + memcpy(&buf[0], &n32, 4); + + /* Now the low order half */ + n32 = (uint32) i; + n32 = htonl(n32); + memcpy(&buf[4], &n32, 4); +} + +/* + * Converts an int64 from network byte order to native format. + */ +static int64 +fe_recvint64(char *buf) +{ + int64 result; + uint32 h32; + uint32 l32; + + memcpy(&h32, buf, 4); + memcpy(&l32, buf + 4, 4); + h32 = ntohl(h32); + l32 = ntohl(l32); + + result = h32; + result <<= 32; + result |= l32; + + return result; +} + +static int +sendFeedback(PGconn *conn, XLogRecPtr written_lsn, XLogRecPtr fsync_lsn, + int replyRequested) +{ + char replybuf[1 + 8 + 8 + 8 + 8 + 1]; + int len = 0; + + Dprintf("_pq_copy_both_v3: confirming write up to %X/%X, flush to %X/%X\n", + (uint32) (written_lsn >> 32), (uint32) written_lsn, + (uint32) (fsync_lsn >> 32), (uint32) fsync_lsn); + + replybuf[len] = 'r'; + len += 1; + fe_sendint64(written_lsn, &replybuf[len]); /* write */ + len += 8; + fe_sendint64(fsync_lsn, &replybuf[len]); /* flush */ + len += 8; + fe_sendint64(InvalidXLogRecPtr, &replybuf[len]); /* apply */ + len += 8; + fe_sendint64(feGetCurrentTimestamp(), &replybuf[len]); /* sendTime */ + len += 8; + replybuf[len] = replyRequested ? 1 : 0; /* replyRequested */ + len += 1; + + if (PQputCopyData(conn, replybuf, len) <= 0 || PQflush(conn)) { + return 0; + } + + return 1; +} + +/* used for streaming replication only */ +static int +_pq_copy_both_v3(cursorObject *curs) +{ + PyObject *tmp = NULL; + PyObject *write_func = NULL; + PyObject *obj = NULL; + int ret = -1; + int is_text; + + PGconn *conn; + char *buffer = NULL; + fd_set fds; + struct timeval last_comm, curr_time, ping_time, time_diff; + int len, hdr, reply, sel; + + XLogRecPtr written_lsn = InvalidXLogRecPtr; + XLogRecPtr fsync_lsn = InvalidXLogRecPtr; + XLogRecPtr wal_end = InvalidXLogRecPtr; + + if (!curs->copyfile) { + PyErr_SetString(ProgrammingError, + "can't execute START_REPLICATION: use the start_replication() method instead"); + goto exit; + } + + if (curs->keepalive_interval <= 0) { + PyErr_Format(PyExc_RuntimeError, "keepalive_interval must be > 0: %d", + curs->keepalive_interval); + goto exit; + } + + if (!(write_func = PyObject_GetAttrString(curs->copyfile, "write"))) { + Dprintf("_pq_copy_both_v3: can't get o.write"); + goto exit; + } + + /* if the file is text we must pass it unicode. */ + if (-1 == (is_text = psycopg_is_text_file(curs->copyfile))) { + goto exit; + } + + CLEARPGRES(curs->pgres); + + /* timestamp of last communication with the server */ + gettimeofday(&last_comm, NULL); + + conn = curs->conn->pgconn; + + while (1) { + len = PQgetCopyData(conn, &buffer, 1 /* async! */); + if (len < 0) { + break; + } + if (len == 0) { + FD_ZERO(&fds); + FD_SET(PQsocket(conn), &fds); + + /* set up timeout according to keepalive_interval, but no less than 1 second */ + gettimeofday(&curr_time, NULL); + + ping_time = last_comm; + ping_time.tv_sec += curs->keepalive_interval; + + if (timercmp(&ping_time, &curr_time, >)) { + timersub(&ping_time, &curr_time, &time_diff); + + Py_BEGIN_ALLOW_THREADS; + sel = select(PQsocket(conn) + 1, &fds, NULL, NULL, &time_diff); + Py_END_ALLOW_THREADS; + } + else { + sel = 0; /* pretend select() timed out */ + } + + if (sel < 0) { + if (errno != EINTR) { + PyErr_SetFromErrno(PyExc_OSError); + goto exit; + } + if (PyErr_CheckSignals()) { + goto exit; + } + continue; + } + + if (sel > 0) { + if (!PQconsumeInput(conn)) { + Dprintf("_pq_copy_both_v3: PQconsumeInput failed"); + pq_raise(curs->conn, curs, NULL); + goto exit; + } + } + else { /* timeout */ + if (!sendFeedback(conn, written_lsn, fsync_lsn, false)) { + pq_raise(curs->conn, curs, NULL); + goto exit; + } + } + gettimeofday(&last_comm, NULL); + continue; + } + if (len > 0 && buffer) { + gettimeofday(&last_comm, NULL); + + Dprintf("_pq_copy_both_v3: msg=%c, len=%d", buffer[0], len); + if (buffer[0] == 'w') { + /* msgtype(1), dataStart(8), walEnd(8), sendTime(8) */ + hdr = 1 + 8 + 8 + 8; + if (len < hdr + 1) { + PyErr_Format(PyExc_RuntimeError, + "streaming header too small in data message: %d", len); + goto exit; + } + + wal_end = fe_recvint64(buffer + 1 + 8); + + if (is_text) { + obj = PyUnicode_Decode(buffer + hdr, len - hdr, curs->conn->codec, NULL); + } + else { + obj = Bytes_FromStringAndSize(buffer + hdr, len - hdr); + } + if (!obj) { goto exit; } + + tmp = PyObject_CallFunctionObjArgs(write_func, obj, NULL); + Py_DECREF(obj); + + if (tmp == NULL) { + Dprintf("_pq_copy_both_v3: write_func returned NULL"); + goto exit; + } + + written_lsn = Max(wal_end, written_lsn); + + /* if write() returned true-ish, we confirm LSN with the server */ + if (PyObject_IsTrue(tmp)) { + fsync_lsn = written_lsn; + + if (!sendFeedback(conn, written_lsn, fsync_lsn, false)) { + pq_raise(curs->conn, curs, NULL); + goto exit; + } + gettimeofday(&last_comm, NULL); + } + Py_DECREF(tmp); + + } + else if (buffer[0] == 'k') { + /* msgtype(1), walEnd(8), sendTime(8), reply(1) */ + hdr = 1 + 8 + 8; + if (len < hdr + 1) { + PyErr_Format(PyExc_RuntimeError, + "streaming header too small in keepalive message: %d", len); + goto exit; + } + + reply = buffer[hdr]; + if (reply) { + if (!sendFeedback(conn, written_lsn, fsync_lsn, false)) { + pq_raise(curs->conn, curs, NULL); + goto exit; + } + gettimeofday(&last_comm, NULL); + } + } + else { + PyErr_Format(PyExc_RuntimeError, + "unrecognized streaming message type: \"%c\"", buffer[0]); + goto exit; + } + + /* buffer is allocated on every PQgetCopyData() call */ + PQfreemem(buffer); + buffer = NULL; + } + } + + if (len == -2) { + pq_raise(curs->conn, curs, NULL); + goto exit; + } + if (len == -1) { + curs->pgres = PQgetResult(curs->conn->pgconn); + + if (curs->pgres && PQresultStatus(curs->pgres) == PGRES_FATAL_ERROR) + pq_raise(curs->conn, curs, NULL); + + CLEARPGRES(curs->pgres); + } + + ret = 1; + +exit: + if (buffer) { + PQfreemem(buffer); + } + + Py_XDECREF(write_func); + return ret; +} + int pq_fetch(cursorObject *curs, int no_result) { @@ -1573,6 +1876,15 @@ pq_fetch(cursorObject *curs, int no_result) CLEARPGRES(curs->pgres); break; + case PGRES_COPY_BOTH: + Dprintf("pq_fetch: data from a streaming replication slot (no tuples)"); + curs->rowcount = -1; + ex = _pq_copy_both_v3(curs); + /* error caught by out glorious notice handler */ + if (PyErr_Occurred()) ex = -1; + CLEARPGRES(curs->pgres); + break; + case PGRES_TUPLES_OK: if (!no_result) { Dprintf("pq_fetch: got tuples"); From 80da76d43fb23e5ba915eac499927a12d4526496 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Tue, 2 Jun 2015 11:42:56 +0200 Subject: [PATCH 02/60] Get rid of postgres internal includes; check for Win32 for htonl() --- psycopg/pqpath.c | 46 ++++++++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/psycopg/pqpath.c b/psycopg/pqpath.c index 55025d823..9e4424a84 100644 --- a/psycopg/pqpath.c +++ b/psycopg/pqpath.c @@ -40,15 +40,14 @@ #include "psycopg/pgtypes.h" #include "psycopg/error.h" -#include "postgres_fe.h" -#include "access/xlog_internal.h" -#include "common/fe_memutils.h" #include "libpq-fe.h" -#include -#include +/* htonl, ntohl */ +#ifdef _WIN32 +#include +#else #include - +#endif extern HIDDEN PyObject *psyco_DescriptionType; @@ -1522,20 +1521,33 @@ _pq_copy_out_v3(cursorObject *curs) } /* support routines taken from pg_basebackup/streamutil.c */ +/* type and constant definitions from internal postgres includes */ +typedef unsigned int uint32; +typedef unsigned PG_INT64_TYPE XLogRecPtr; + +#define InvalidXLogRecPtr ((XLogRecPtr) 0) + +/* Julian-date equivalents of Day 0 in Unix and Postgres reckoning */ +#define UNIX_EPOCH_JDATE 2440588 /* == date2j(1970, 1, 1) */ +#define POSTGRES_EPOCH_JDATE 2451545 /* == date2j(2000, 1, 1) */ + +#define SECS_PER_DAY 86400 +#define USECS_PER_SEC 1000000LL + /* * Frontend version of GetCurrentTimestamp(), since we are not linked with * backend code. The protocol always uses integer timestamps, regardless of * server setting. */ -static int64 +static pg_int64 feGetCurrentTimestamp(void) { - int64 result; + pg_int64 result; struct timeval tp; gettimeofday(&tp, NULL); - result = (int64) tp.tv_sec - + result = (pg_int64) tp.tv_sec - ((POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY); result = (result * USECS_PER_SEC) + tp.tv_usec; @@ -1547,7 +1559,7 @@ feGetCurrentTimestamp(void) * Converts an int64 to network byte order. */ static void -fe_sendint64(int64 i, char *buf) +fe_sendint64(pg_int64 i, char *buf) { uint32 n32; @@ -1565,10 +1577,10 @@ fe_sendint64(int64 i, char *buf) /* * Converts an int64 from network byte order to native format. */ -static int64 +static pg_int64 fe_recvint64(char *buf) { - int64 result; + pg_int64 result; uint32 h32; uint32 l32; @@ -1709,7 +1721,7 @@ _pq_copy_both_v3(cursorObject *curs) } } else { /* timeout */ - if (!sendFeedback(conn, written_lsn, fsync_lsn, false)) { + if (!sendFeedback(conn, written_lsn, fsync_lsn, 0)) { pq_raise(curs->conn, curs, NULL); goto exit; } @@ -1748,13 +1760,15 @@ _pq_copy_both_v3(cursorObject *curs) goto exit; } - written_lsn = Max(wal_end, written_lsn); + /* update the LSN position we've written up to */ + if (written_lsn < wal_end) + written_lsn = wal_end; /* if write() returned true-ish, we confirm LSN with the server */ if (PyObject_IsTrue(tmp)) { fsync_lsn = written_lsn; - if (!sendFeedback(conn, written_lsn, fsync_lsn, false)) { + if (!sendFeedback(conn, written_lsn, fsync_lsn, 0)) { pq_raise(curs->conn, curs, NULL); goto exit; } @@ -1774,7 +1788,7 @@ _pq_copy_both_v3(cursorObject *curs) reply = buffer[hdr]; if (reply) { - if (!sendFeedback(conn, written_lsn, fsync_lsn, false)) { + if (!sendFeedback(conn, written_lsn, fsync_lsn, 0)) { pq_raise(curs->conn, curs, NULL); goto exit; } From 44b705f88f45211a778c07f909b2aa1d91da05e6 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Tue, 2 Jun 2015 16:52:48 +0200 Subject: [PATCH 03/60] Improve identify_system: don't hardcode column names --- lib/extras.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/extras.py b/lib/extras.py index 4d92e6fa3..92dd81926 100644 --- a/lib/extras.py +++ b/lib/extras.py @@ -488,7 +488,7 @@ def identify_system(self): """Get information about the cluster status.""" self.execute("IDENTIFY_SYSTEM") - return dict(zip(['systemid', 'timeline', 'xlogpos', 'dbname'], + return dict(zip([_.name for _ in self.description], self.fetchall()[0])) def quote_ident(self, ident): From f14521f8cb567c98814d392ba7ec196b4a7df77c Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Wed, 3 Jun 2015 14:10:20 +0200 Subject: [PATCH 04/60] Add libpq_support.c and win32_support.c Move libpq-specific code for streaming replication support into a separate file. Also provide gettimeofday() on Win32, implementation copied from Postgres core. --- psycopg/libpq_support.c | 111 ++++++++++++++++++++++++++++++++++++++++ psycopg/libpq_support.h | 40 +++++++++++++++ psycopg/pqpath.c | 83 ++---------------------------- psycopg/win32_support.c | 61 ++++++++++++++++++++++ psycopg/win32_support.h | 36 +++++++++++++ setup.py | 2 + 6 files changed, 254 insertions(+), 79 deletions(-) create mode 100644 psycopg/libpq_support.c create mode 100644 psycopg/libpq_support.h create mode 100644 psycopg/win32_support.c create mode 100644 psycopg/win32_support.h diff --git a/psycopg/libpq_support.c b/psycopg/libpq_support.c new file mode 100644 index 000000000..95a3ebc61 --- /dev/null +++ b/psycopg/libpq_support.c @@ -0,0 +1,111 @@ +/* libpq_support.c - functions not provided by libpq, but which are + * required for advanced communication with the server, such as + * streaming replication + * + * Copyright (C) 2003-2015 Federico Di Gregorio + * + * This file is part of psycopg. + * + * psycopg2 is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * In addition, as a special exception, the copyright holders give + * permission to link this program with the OpenSSL library (or with + * modified versions of OpenSSL that use the same license as OpenSSL), + * and distribute linked combinations including the two. + * + * You must obey the GNU Lesser General Public License in all respects for + * all of the code used other than OpenSSL. + * + * psycopg2 is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + */ + +#define PSYCOPG_MODULE +#include "psycopg/psycopg.h" + +#include "psycopg/libpq_support.h" + +/* htonl(), ntohl() */ +#ifdef _WIN32 +#include +/* gettimeofday() */ +#include "psycopg/win32_support.h" +#else +#include +#endif + +/* support routines taken from pg_basebackup/streamutil.c */ + +/* Julian-date equivalents of Day 0 in Unix and Postgres reckoning */ +#define UNIX_EPOCH_JDATE 2440588 /* == date2j(1970, 1, 1) */ +#define POSTGRES_EPOCH_JDATE 2451545 /* == date2j(2000, 1, 1) */ + +#define SECS_PER_DAY 86400 +#define USECS_PER_SEC 1000000LL + +/* + * Frontend version of GetCurrentTimestamp(), since we are not linked with + * backend code. The protocol always uses integer timestamps, regardless of + * server setting. + */ +pg_int64 +feGetCurrentTimestamp(void) +{ + pg_int64 result; + struct timeval tp; + + gettimeofday(&tp, NULL); + + result = (pg_int64) tp.tv_sec - + ((POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY); + + result = (result * USECS_PER_SEC) + tp.tv_usec; + + return result; +} + +/* + * Converts an int64 to network byte order. + */ +void +fe_sendint64(pg_int64 i, char *buf) +{ + uint32 n32; + + /* High order half first, since we're doing MSB-first */ + n32 = (uint32) (i >> 32); + n32 = htonl(n32); + memcpy(&buf[0], &n32, 4); + + /* Now the low order half */ + n32 = (uint32) i; + n32 = htonl(n32); + memcpy(&buf[4], &n32, 4); +} + +/* + * Converts an int64 from network byte order to native format. + */ +pg_int64 +fe_recvint64(char *buf) +{ + pg_int64 result; + uint32 h32; + uint32 l32; + + memcpy(&h32, buf, 4); + memcpy(&l32, buf + 4, 4); + h32 = ntohl(h32); + l32 = ntohl(l32); + + result = h32; + result <<= 32; + result |= l32; + + return result; +} diff --git a/psycopg/libpq_support.h b/psycopg/libpq_support.h new file mode 100644 index 000000000..007f5e183 --- /dev/null +++ b/psycopg/libpq_support.h @@ -0,0 +1,40 @@ +/* libpq_support.h - definitions for libpq_support.c + * + * Copyright (C) 2003-2015 Federico Di Gregorio + * + * This file is part of psycopg. + * + * psycopg2 is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * In addition, as a special exception, the copyright holders give + * permission to link this program with the OpenSSL library (or with + * modified versions of OpenSSL that use the same license as OpenSSL), + * and distribute linked combinations including the two. + * + * You must obey the GNU Lesser General Public License in all respects for + * all of the code used other than OpenSSL. + * + * psycopg2 is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + */ +#ifndef PSYCOPG_LIBPQ_SUPPORT_H +#define PSYCOPG_LIBPQ_SUPPORT_H 1 + +#include "psycopg/config.h" + +/* type and constant definitions from internal postgres includes */ +typedef unsigned int uint32; +typedef unsigned PG_INT64_TYPE XLogRecPtr; + +#define InvalidXLogRecPtr ((XLogRecPtr) 0) + +HIDDEN pg_int64 feGetCurrentTimestamp(void); +HIDDEN void fe_sendint64(pg_int64 i, char *buf); +HIDDEN pg_int64 fe_recvint64(char *buf); + +#endif /* !defined(PSYCOPG_LIBPQ_SUPPORT_H) */ diff --git a/psycopg/pqpath.c b/psycopg/pqpath.c index 9e4424a84..4fb4771ee 100644 --- a/psycopg/pqpath.c +++ b/psycopg/pqpath.c @@ -40,13 +40,14 @@ #include "psycopg/pgtypes.h" #include "psycopg/error.h" +#include "psycopg/libpq_support.h" #include "libpq-fe.h" -/* htonl, ntohl */ #ifdef _WIN32 +/* select() */ #include -#else -#include +/* gettimeofday() */ +#include "win32_support.h" #endif extern HIDDEN PyObject *psyco_DescriptionType; @@ -1520,82 +1521,6 @@ _pq_copy_out_v3(cursorObject *curs) return ret; } -/* support routines taken from pg_basebackup/streamutil.c */ -/* type and constant definitions from internal postgres includes */ -typedef unsigned int uint32; -typedef unsigned PG_INT64_TYPE XLogRecPtr; - -#define InvalidXLogRecPtr ((XLogRecPtr) 0) - -/* Julian-date equivalents of Day 0 in Unix and Postgres reckoning */ -#define UNIX_EPOCH_JDATE 2440588 /* == date2j(1970, 1, 1) */ -#define POSTGRES_EPOCH_JDATE 2451545 /* == date2j(2000, 1, 1) */ - -#define SECS_PER_DAY 86400 -#define USECS_PER_SEC 1000000LL - -/* - * Frontend version of GetCurrentTimestamp(), since we are not linked with - * backend code. The protocol always uses integer timestamps, regardless of - * server setting. - */ -static pg_int64 -feGetCurrentTimestamp(void) -{ - pg_int64 result; - struct timeval tp; - - gettimeofday(&tp, NULL); - - result = (pg_int64) tp.tv_sec - - ((POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY); - - result = (result * USECS_PER_SEC) + tp.tv_usec; - - return result; -} - -/* - * Converts an int64 to network byte order. - */ -static void -fe_sendint64(pg_int64 i, char *buf) -{ - uint32 n32; - - /* High order half first, since we're doing MSB-first */ - n32 = (uint32) (i >> 32); - n32 = htonl(n32); - memcpy(&buf[0], &n32, 4); - - /* Now the low order half */ - n32 = (uint32) i; - n32 = htonl(n32); - memcpy(&buf[4], &n32, 4); -} - -/* - * Converts an int64 from network byte order to native format. - */ -static pg_int64 -fe_recvint64(char *buf) -{ - pg_int64 result; - uint32 h32; - uint32 l32; - - memcpy(&h32, buf, 4); - memcpy(&l32, buf + 4, 4); - h32 = ntohl(h32); - l32 = ntohl(l32); - - result = h32; - result <<= 32; - result |= l32; - - return result; -} - static int sendFeedback(PGconn *conn, XLogRecPtr written_lsn, XLogRecPtr fsync_lsn, int replyRequested) diff --git a/psycopg/win32_support.c b/psycopg/win32_support.c new file mode 100644 index 000000000..8a760b9f8 --- /dev/null +++ b/psycopg/win32_support.c @@ -0,0 +1,61 @@ +/* win32_support.c - emulate some functions missing on Win32 + * + * Copyright (C) 2003-2015 Federico Di Gregorio + * + * This file is part of psycopg. + * + * psycopg2 is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * In addition, as a special exception, the copyright holders give + * permission to link this program with the OpenSSL library (or with + * modified versions of OpenSSL that use the same license as OpenSSL), + * and distribute linked combinations including the two. + * + * You must obey the GNU Lesser General Public License in all respects for + * all of the code used other than OpenSSL. + * + * psycopg2 is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + */ + +#define PSYCOPG_MODULE +#include "psycopg/psycopg.h" + +#include "psycopg/win32_support.h" + +#ifdef _WIN32 +/* millisecond-precision port of gettimeofday for Win32, taken from + src/port/gettimeofday.c in PostgreSQL core */ + +/* FILETIME of Jan 1 1970 00:00:00. */ +static const unsigned __int64 epoch = 116444736000000000ULL; + +/* + * timezone information is stored outside the kernel so tzp isn't used anymore. + * + * Note: this function is not for Win32 high precision timing purpose. See + * elapsed_time(). + */ +int +gettimeofday(struct timeval * tp, struct timezone * tzp) +{ + FILETIME file_time; + SYSTEMTIME system_time; + ULARGE_INTEGER ularge; + + GetSystemTime(&system_time); + SystemTimeToFileTime(&system_time, &file_time); + ularge.LowPart = file_time.dwLowDateTime; + ularge.HighPart = file_time.dwHighDateTime; + + tp->tv_sec = (long) ((ularge.QuadPart - epoch) / 10000000L); + tp->tv_usec = (long) (system_time.wMilliseconds * 1000); + + return 0; +} +#endif /* _WIN32 */ diff --git a/psycopg/win32_support.h b/psycopg/win32_support.h new file mode 100644 index 000000000..c65773170 --- /dev/null +++ b/psycopg/win32_support.h @@ -0,0 +1,36 @@ +/* win32_support.h - definitions for win32_support.c + * + * Copyright (C) 2003-2015 Federico Di Gregorio + * + * This file is part of psycopg. + * + * psycopg2 is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * In addition, as a special exception, the copyright holders give + * permission to link this program with the OpenSSL library (or with + * modified versions of OpenSSL that use the same license as OpenSSL), + * and distribute linked combinations including the two. + * + * You must obey the GNU Lesser General Public License in all respects for + * all of the code used other than OpenSSL. + * + * psycopg2 is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + */ +#ifndef PSYCOPG_WIN32_SUPPORT_H +#define PSYCOPG_WIN32_SUPPORT_H 1 + +#include "psycopg/config.h" + +#include + +#ifdef _WIN32 +HIDDEN int gettimeofday(struct timeval * tp, struct timezone * tzp); +#endif + +#endif /* !defined(PSYCOPG_WIN32_SUPPORT_H) */ diff --git a/setup.py b/setup.py index fc4f17112..1f87520e3 100644 --- a/setup.py +++ b/setup.py @@ -462,6 +462,7 @@ def is_py_64(): sources = [ 'psycopgmodule.c', 'green.c', 'pqpath.c', 'utils.c', 'bytes_format.c', + 'libpq_support.c', 'win32_support.c', 'connection_int.c', 'connection_type.c', 'cursor_int.c', 'cursor_type.c', @@ -481,6 +482,7 @@ def is_py_64(): 'config.h', 'pgtypes.h', 'psycopg.h', 'python.h', 'connection.h', 'cursor.h', 'diagnostics.h', 'error.h', 'green.h', 'lobject.h', 'notify.h', 'pqpath.h', 'xid.h', + 'libpq_support.h', 'win32_support.h', 'adapter_asis.h', 'adapter_binary.h', 'adapter_datetime.h', 'adapter_list.h', 'adapter_pboolean.h', 'adapter_pdecimal.h', From 50df864f8c63144bad281a1de1d6d38d4a06d4aa Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Thu, 4 Jun 2015 11:00:08 +0200 Subject: [PATCH 05/60] Add timersub for Win32. Fix gettimeofday on MinGW. --- psycopg/pqpath.c | 7 +++---- psycopg/win32_support.c | 17 ++++++++++++++++- psycopg/win32_support.h | 4 ++++ 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/psycopg/pqpath.c b/psycopg/pqpath.c index 4fb4771ee..e87befaec 100644 --- a/psycopg/pqpath.c +++ b/psycopg/pqpath.c @@ -1616,15 +1616,14 @@ _pq_copy_both_v3(cursorObject *curs) ping_time = last_comm; ping_time.tv_sec += curs->keepalive_interval; - if (timercmp(&ping_time, &curr_time, >)) { - timersub(&ping_time, &curr_time, &time_diff); - + timersub(&ping_time, &curr_time, &time_diff); + if (time_diff.tv_sec > 0) { Py_BEGIN_ALLOW_THREADS; sel = select(PQsocket(conn) + 1, &fds, NULL, NULL, &time_diff); Py_END_ALLOW_THREADS; } else { - sel = 0; /* pretend select() timed out */ + sel = 0; /* we're past target time, pretend select() timed out */ } if (sel < 0) { diff --git a/psycopg/win32_support.c b/psycopg/win32_support.c index 8a760b9f8..d508b220b 100644 --- a/psycopg/win32_support.c +++ b/psycopg/win32_support.c @@ -29,6 +29,8 @@ #include "psycopg/win32_support.h" #ifdef _WIN32 + +#ifndef __MINGW32__ /* millisecond-precision port of gettimeofday for Win32, taken from src/port/gettimeofday.c in PostgreSQL core */ @@ -58,4 +60,17 @@ gettimeofday(struct timeval * tp, struct timezone * tzp) return 0; } -#endif /* _WIN32 */ +#endif /* !defined(__MINGW32__) */ + +/* timersub is missing on mingw */ +void +timersub(struct timeval *a, struct timeval *b, struct timeval *c) +{ + c->tv_sec = a->tv_sec - b->tv_sec; + c->tv_usec = a->tv_usec - b->tv_usec; + if (tv_usec < 0) { + c->tv_usec += 1000000; + c->tv_sec -= 1; + } +} +#endif /* defined(_WIN32) */ diff --git a/psycopg/win32_support.h b/psycopg/win32_support.h index c65773170..be963df5a 100644 --- a/psycopg/win32_support.h +++ b/psycopg/win32_support.h @@ -30,7 +30,11 @@ #include #ifdef _WIN32 +#ifndef __MINGW32__ HIDDEN int gettimeofday(struct timeval * tp, struct timezone * tzp); #endif +HIDDEN void timersub(struct timeval *a, struct timeval *b, struct timeval *c); +#endif + #endif /* !defined(PSYCOPG_WIN32_SUPPORT_H) */ From f7b84ce843d3df9b95e5485fb881c13709e5c781 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Thu, 4 Jun 2015 11:01:09 +0200 Subject: [PATCH 06/60] Add {libpq,win32}_support.* to the .cproj file --- psycopg2.cproj | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/psycopg2.cproj b/psycopg2.cproj index 7755b9618..18b9727f1 100644 --- a/psycopg2.cproj +++ b/psycopg2.cproj @@ -85,6 +85,7 @@ + @@ -93,6 +94,7 @@ + @@ -217,6 +219,7 @@ + @@ -229,6 +232,7 @@ + @@ -251,4 +255,4 @@ - \ No newline at end of file + From 453830f80c111280e090eb35db4494db33ff5e16 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Fri, 5 Jun 2015 17:44:09 +0200 Subject: [PATCH 07/60] Add ReplicationMessage object --- lib/extras.py | 4 + psycopg/cursor.h | 3 + psycopg/cursor_type.c | 58 +++++++++++++ psycopg/libpq_support.h | 4 + psycopg/pqpath.c | 52 +++++++++--- psycopg/psycopg.h | 1 + psycopg/psycopgmodule.c | 4 + psycopg/replication_message.h | 52 ++++++++++++ psycopg/replication_message_type.c | 127 +++++++++++++++++++++++++++++ setup.py | 2 + 10 files changed, 295 insertions(+), 12 deletions(-) create mode 100644 psycopg/replication_message.h create mode 100644 psycopg/replication_message_type.c diff --git a/lib/extras.py b/lib/extras.py index 92dd81926..8118e1346 100644 --- a/lib/extras.py +++ b/lib/extras.py @@ -574,6 +574,10 @@ def start_replication(self, o, slot_type, slot_name=None, start_lsn=None, return self.start_replication_expert(o, command, keepalive_interval) + # thin wrapper + def sync_server(self, msg): + return self.replication_sync_server(msg) + # a dbtype and adapter for Python UUID type diff --git a/psycopg/cursor.h b/psycopg/cursor.h index 93b697b2d..78ee21c47 100644 --- a/psycopg/cursor.h +++ b/psycopg/cursor.h @@ -72,7 +72,10 @@ struct cursorObject { #define DEFAULT_COPYSIZE 16384 #define DEFAULT_COPYBUFF 8192 + int in_replication; /* we're in streaming replication loop */ + int stop_replication; /* client requested to stop replication */ int keepalive_interval; /* interval for keepalive messages in replication mode */ + replicationMessageObject *repl_sync_msg; /* set when the client asks us to sync the server */ PyObject *tuple_factory; /* factory for result tuples */ PyObject *tzinfo_factory; /* factory for tzinfo objects */ diff --git a/psycopg/cursor_type.c b/psycopg/cursor_type.c index 954e764de..1ea922bbc 100644 --- a/psycopg/cursor_type.c +++ b/psycopg/cursor_type.c @@ -28,6 +28,7 @@ #include "psycopg/cursor.h" #include "psycopg/connection.h" +#include "psycopg/replication_message.h" #include "psycopg/green.h" #include "psycopg/pqpath.h" #include "psycopg/typecast.h" @@ -1605,17 +1606,68 @@ psyco_curs_start_replication_expert(cursorObject *self, PyObject *args) self->copysize = 0; Py_INCREF(file); self->copyfile = file; + self->in_replication = 1; self->keepalive_interval = keepalive_interval; + self->stop_replication = 0; + self->repl_sync_msg = NULL; if (pq_execute(self, command, 0, 1 /* no_result */, 1 /* no_begin */) >= 0) { res = Py_None; Py_INCREF(Py_None); } + + Py_CLEAR(self->repl_sync_msg); Py_CLEAR(self->copyfile); + self->in_replication = 0; return res; } +#define psyco_curs_stop_replication_doc \ +"start_replication() -- Set flag to break out of endless loop in start_replication()." + +static PyObject * +psyco_curs_stop_replication(cursorObject *self) +{ + EXC_IF_CURS_CLOSED(self); + + if (!self->in_replication) { + PyErr_SetString(ProgrammingError, + "stop_replication() called when not in streaming replication loop"); + } else { + self->stop_replication = 1; + } + + Py_RETURN_NONE; +} + +#define psyco_curs_replication_sync_server_doc \ +"replication_sync_server(msg) -- Set flag to sync the server up to this replication message." + +static PyObject * +psyco_curs_replication_sync_server(cursorObject *self, PyObject *args) +{ + replicationMessageObject *msg; + + EXC_IF_CURS_CLOSED(self); + + if (!PyArg_ParseTuple(args, "O!", &replicationMessageType, &msg)) { + return NULL; + } + + if (!self->in_replication) { + PyErr_SetString(ProgrammingError, + "replication_sync_server() called when not in streaming replication loop"); + } else { + Py_CLEAR(self->repl_sync_msg); + + self->repl_sync_msg = msg; + Py_XINCREF(self->repl_sync_msg); + } + + Py_RETURN_NONE; +} + /* extension: closed - return true if cursor is closed */ #define psyco_curs_closed_doc \ @@ -1792,6 +1844,10 @@ static struct PyMethodDef cursorObject_methods[] = { METH_VARARGS|METH_KEYWORDS, psyco_curs_copy_expert_doc}, {"start_replication_expert", (PyCFunction)psyco_curs_start_replication_expert, METH_VARARGS, psyco_curs_start_replication_expert_doc}, + {"stop_replication", (PyCFunction)psyco_curs_stop_replication, + METH_NOARGS, psyco_curs_stop_replication_doc}, + {"replication_sync_server", (PyCFunction)psyco_curs_replication_sync_server, + METH_VARARGS, psyco_curs_replication_sync_server_doc}, {NULL} }; @@ -1908,6 +1964,7 @@ cursor_clear(cursorObject *self) Py_CLEAR(self->casts); Py_CLEAR(self->caster); Py_CLEAR(self->copyfile); + Py_CLEAR(self->repl_sync_msg); Py_CLEAR(self->tuple_factory); Py_CLEAR(self->tzinfo_factory); Py_CLEAR(self->query); @@ -1997,6 +2054,7 @@ cursor_traverse(cursorObject *self, visitproc visit, void *arg) Py_VISIT(self->casts); Py_VISIT(self->caster); Py_VISIT(self->copyfile); + Py_VISIT(self->repl_sync_msg); Py_VISIT(self->tuple_factory); Py_VISIT(self->tzinfo_factory); Py_VISIT(self->query); diff --git a/psycopg/libpq_support.h b/psycopg/libpq_support.h index 007f5e183..e597d24c3 100644 --- a/psycopg/libpq_support.h +++ b/psycopg/libpq_support.h @@ -33,6 +33,10 @@ typedef unsigned PG_INT64_TYPE XLogRecPtr; #define InvalidXLogRecPtr ((XLogRecPtr) 0) +/* have to use lowercase %x, as PyString_FromFormat can't do %X */ +#define XLOGFMTSTR "%x/%x" +#define XLOGFMTARGS(x) ((uint32)((x) >> 32)), ((uint32)((x) & 0xFFFFFFFF)) + HIDDEN pg_int64 feGetCurrentTimestamp(void); HIDDEN void fe_sendint64(pg_int64 i, char *buf); HIDDEN pg_int64 fe_recvint64(char *buf); diff --git a/psycopg/pqpath.c b/psycopg/pqpath.c index e87befaec..4ae62971a 100644 --- a/psycopg/pqpath.c +++ b/psycopg/pqpath.c @@ -35,6 +35,7 @@ #include "psycopg/pqpath.h" #include "psycopg/connection.h" #include "psycopg/cursor.h" +#include "psycopg/replication_message.h" #include "psycopg/green.h" #include "psycopg/typecast.h" #include "psycopg/pgtypes.h" @@ -1528,9 +1529,8 @@ sendFeedback(PGconn *conn, XLogRecPtr written_lsn, XLogRecPtr fsync_lsn, char replybuf[1 + 8 + 8 + 8 + 8 + 1]; int len = 0; - Dprintf("_pq_copy_both_v3: confirming write up to %X/%X, flush to %X/%X\n", - (uint32) (written_lsn >> 32), (uint32) written_lsn, - (uint32) (fsync_lsn >> 32), (uint32) fsync_lsn); + Dprintf("_pq_copy_both_v3: confirming write up to "XLOGFMTSTR", flush to "XLOGFMTSTR, + XLOGFMTARGS(written_lsn), XLOGFMTARGS(fsync_lsn)); replybuf[len] = 'r'; len += 1; @@ -1559,6 +1559,7 @@ _pq_copy_both_v3(cursorObject *curs) PyObject *tmp = NULL; PyObject *write_func = NULL; PyObject *obj = NULL; + replicationMessageObject *msg = NULL; int ret = -1; int is_text; @@ -1568,9 +1569,9 @@ _pq_copy_both_v3(cursorObject *curs) struct timeval last_comm, curr_time, ping_time, time_diff; int len, hdr, reply, sel; - XLogRecPtr written_lsn = InvalidXLogRecPtr; - XLogRecPtr fsync_lsn = InvalidXLogRecPtr; - XLogRecPtr wal_end = InvalidXLogRecPtr; + XLogRecPtr written_lsn = InvalidXLogRecPtr, + fsync_lsn = InvalidXLogRecPtr, + data_start, wal_end; if (!curs->copyfile) { PyErr_SetString(ProgrammingError, @@ -1666,7 +1667,12 @@ _pq_copy_both_v3(cursorObject *curs) goto exit; } - wal_end = fe_recvint64(buffer + 1 + 8); + data_start = fe_recvint64(buffer + 1); + wal_end = fe_recvint64(buffer + 1 + 8); + /*send_time = fe_recvint64(buffer + 1 + 8 + 8);*/ + + Dprintf("_pq_copy_both_v3: data_start="XLOGFMTSTR", wal_end="XLOGFMTSTR, + XLOGFMTARGS(data_start), XLOGFMTARGS(wal_end)); if (is_text) { obj = PyUnicode_Decode(buffer + hdr, len - hdr, curs->conn->codec, NULL); @@ -1676,21 +1682,36 @@ _pq_copy_both_v3(cursorObject *curs) } if (!obj) { goto exit; } - tmp = PyObject_CallFunctionObjArgs(write_func, obj, NULL); + msg = (replicationMessageObject *) + PyObject_CallFunctionObjArgs((PyObject *)&replicationMessageType, + obj, NULL); Py_DECREF(obj); + if (!msg) { goto exit; } + + msg->data_start = data_start; + msg->wal_end = wal_end; + + tmp = PyObject_CallFunctionObjArgs(write_func, msg, NULL); if (tmp == NULL) { Dprintf("_pq_copy_both_v3: write_func returned NULL"); goto exit; } + Py_DECREF(tmp); /* update the LSN position we've written up to */ if (written_lsn < wal_end) written_lsn = wal_end; - /* if write() returned true-ish, we confirm LSN with the server */ - if (PyObject_IsTrue(tmp)) { - fsync_lsn = written_lsn; + /* if requested by sync_server(msg), we confirm LSN with the server */ + if (curs->repl_sync_msg) { + Dprintf("_pq_copy_both_v3: server sync requested at "XLOGFMTSTR, + XLOGFMTARGS(curs->repl_sync_msg->wal_end)); + + if (fsync_lsn < curs->repl_sync_msg->wal_end) + fsync_lsn = curs->repl_sync_msg->wal_end; + + Py_CLEAR(curs->repl_sync_msg); if (!sendFeedback(conn, written_lsn, fsync_lsn, 0)) { pq_raise(curs->conn, curs, NULL); @@ -1698,8 +1719,14 @@ _pq_copy_both_v3(cursorObject *curs) } gettimeofday(&last_comm, NULL); } - Py_DECREF(tmp); + if (curs->stop_replication) { + Dprintf("_pq_copy_both_v3: stop_replication flag set by write_func"); + break; + } + + Py_DECREF(msg); + msg = NULL; } else if (buffer[0] == 'k') { /* msgtype(1), walEnd(8), sendTime(8), reply(1) */ @@ -1751,6 +1778,7 @@ _pq_copy_both_v3(cursorObject *curs) PQfreemem(buffer); } + Py_XDECREF(msg); Py_XDECREF(write_func); return ret; } diff --git a/psycopg/psycopg.h b/psycopg/psycopg.h index eb406fd2b..adda12d93 100644 --- a/psycopg/psycopg.h +++ b/psycopg/psycopg.h @@ -117,6 +117,7 @@ HIDDEN PyObject *psyco_GetDecimalType(void); /* forward declarations */ typedef struct cursorObject cursorObject; typedef struct connectionObject connectionObject; +typedef struct replicationMessageObject replicationMessageObject; /* some utility functions */ RAISES HIDDEN PyObject *psyco_set_error(PyObject *exc, cursorObject *curs, const char *msg); diff --git a/psycopg/psycopgmodule.c b/psycopg/psycopgmodule.c index 61e2de579..67393c371 100644 --- a/psycopg/psycopgmodule.c +++ b/psycopg/psycopgmodule.c @@ -28,6 +28,7 @@ #include "psycopg/connection.h" #include "psycopg/cursor.h" +#include "psycopg/replication_message.h" #include "psycopg/green.h" #include "psycopg/lobject.h" #include "psycopg/notify.h" @@ -785,6 +786,9 @@ INIT_MODULE(_psycopg)(void) Py_TYPE(&cursorType) = &PyType_Type; if (PyType_Ready(&cursorType) == -1) goto exit; + Py_TYPE(&replicationMessageType) = &PyType_Type; + if (PyType_Ready(&replicationMessageType) == -1) goto exit; + Py_TYPE(&typecastType) = &PyType_Type; if (PyType_Ready(&typecastType) == -1) goto exit; diff --git a/psycopg/replication_message.h b/psycopg/replication_message.h new file mode 100644 index 000000000..bf2b5f169 --- /dev/null +++ b/psycopg/replication_message.h @@ -0,0 +1,52 @@ +/* replication_message.h - definition for the psycopg ReplicationMessage type + * + * Copyright (C) 2003-2015 Federico Di Gregorio + * + * This file is part of psycopg. + * + * psycopg2 is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * In addition, as a special exception, the copyright holders give + * permission to link this program with the OpenSSL library (or with + * modified versions of OpenSSL that use the same license as OpenSSL), + * and distribute linked combinations including the two. + * + * You must obey the GNU Lesser General Public License in all respects for + * all of the code used other than OpenSSL. + * + * psycopg2 is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + */ + +#ifndef PSYCOPG_REPLICATION_MESSAGE_H +#define PSYCOPG_REPLICATION_MESSAGE_H 1 + +#include "libpq_support.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern HIDDEN PyTypeObject replicationMessageType; + +/* the typedef is forward-declared in psycopg.h */ +struct replicationMessageObject { + PyObject_HEAD + + PyObject *payload; + + XLogRecPtr data_start; + XLogRecPtr wal_end; + /* send_time */ +}; + +#ifdef __cplusplus +} +#endif + +#endif /* !defined(PSYCOPG_REPLICATION_MESSAGE_H) */ diff --git a/psycopg/replication_message_type.c b/psycopg/replication_message_type.c new file mode 100644 index 000000000..6968955e3 --- /dev/null +++ b/psycopg/replication_message_type.c @@ -0,0 +1,127 @@ +/* replication_message_type.c - python interface to ReplcationMessage objects + * + * Copyright (C) 2003-2015 Federico Di Gregorio + * + * This file is part of psycopg. + * + * psycopg2 is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * In addition, as a special exception, the copyright holders give + * permission to link this program with the OpenSSL library (or with + * modified versions of OpenSSL that use the same license as OpenSSL), + * and distribute linked combinations including the two. + * + * You must obey the GNU Lesser General Public License in all respects for + * all of the code used other than OpenSSL. + * + * psycopg2 is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + */ + +#define PSYCOPG_MODULE +#include "psycopg/psycopg.h" + +#include "psycopg/replication_message.h" + + +static PyObject * +replmsg_repr(replicationMessageObject *self) +{ + return PyString_FromFormat( + "", + self, XLOGFMTARGS(self->data_start), XLOGFMTARGS(self->wal_end)); +} + +static int +replmsg_init(PyObject *obj, PyObject *args, PyObject *kwargs) +{ + replicationMessageObject *self = (replicationMessageObject*) obj; + + if (!PyArg_ParseTuple(args, "O", &self->payload)) + return -1; + Py_XINCREF(self->payload); + + self->data_start = 0; + self->wal_end = 0; + + return 0; +} + +static int +replmsg_clear(PyObject *self) +{ + Py_CLEAR(((replicationMessageObject*) self)->payload); + return 0; +} + +static void +replmsg_dealloc(PyObject* obj) +{ + replmsg_clear(obj); +} + + +#define OFFSETOF(x) offsetof(replicationMessageObject, x) + +/* object member list */ + +static struct PyMemberDef replicationMessageObject_members[] = { + {"payload", T_OBJECT, OFFSETOF(payload), READONLY, + "TODO"}, + {"data_start", T_ULONGLONG, OFFSETOF(data_start), READONLY, + "TODO"}, + {"wal_end", T_ULONGLONG, OFFSETOF(wal_end), READONLY, + "TODO"}, + {NULL} +}; + +/* object type */ + +#define replicationMessageType_doc \ +"A database replication message." + +PyTypeObject replicationMessageType = { + PyVarObject_HEAD_INIT(NULL, 0) + "psycopg2.extensions.ReplicationMessage", + sizeof(replicationMessageObject), 0, + replmsg_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + (reprfunc)replmsg_repr, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, + /*tp_flags*/ + replicationMessageType_doc, /*tp_doc*/ + 0, /*tp_traverse*/ + replmsg_clear, /*tp_clear*/ + 0, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + 0, /*tp_iter*/ + 0, /*tp_iternext*/ + 0, /*tp_methods*/ + replicationMessageObject_members, /*tp_members*/ + 0, /*tp_getset*/ + 0, /*tp_base*/ + 0, /*tp_dict*/ + 0, /*tp_descr_get*/ + 0, /*tp_descr_set*/ + 0, /*tp_dictoffset*/ + replmsg_init, /*tp_init*/ + 0, /*tp_alloc*/ + PyType_GenericNew, /*tp_new*/ +}; diff --git a/setup.py b/setup.py index 1f87520e3..7c1a479f7 100644 --- a/setup.py +++ b/setup.py @@ -466,6 +466,7 @@ def is_py_64(): 'connection_int.c', 'connection_type.c', 'cursor_int.c', 'cursor_type.c', + 'replication_message_type.c', 'diagnostics_type.c', 'error_type.c', 'lobject_int.c', 'lobject_type.c', 'notify_type.c', 'xid_type.c', @@ -481,6 +482,7 @@ def is_py_64(): # headers 'config.h', 'pgtypes.h', 'psycopg.h', 'python.h', 'connection.h', 'cursor.h', 'diagnostics.h', 'error.h', 'green.h', 'lobject.h', + 'replication_message.h', 'notify.h', 'pqpath.h', 'xid.h', 'libpq_support.h', 'win32_support.h', From 1ac385d1fb4328ba2220943741e4049fe472495b Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Wed, 10 Jun 2015 13:39:35 +0200 Subject: [PATCH 08/60] Fix logical decoding plugin options adaptation on python3 --- lib/extras.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/extras.py b/lib/extras.py index 8118e1346..7de48d789 100644 --- a/lib/extras.py +++ b/lib/extras.py @@ -569,7 +569,7 @@ def start_replication(self, o, slot_type, slot_name=None, start_lsn=None, for k,v in options.iteritems(): if not command.endswith('('): command += ", " - command += "%s %s" % (self.quote_ident(k), _A(str(v)).getquoted()) + command += "%s %s" % (self.quote_ident(k), _A(str(v))) command += ")" return self.start_replication_expert(o, command, keepalive_interval) From 9fc5bf44368eb381955c8bd164ccac145363e950 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Wed, 10 Jun 2015 18:21:06 +0200 Subject: [PATCH 09/60] Add handling of send_time field in replmsg --- psycopg/libpq_support.c | 7 ----- psycopg/libpq_support.h | 7 +++++ psycopg/pqpath.c | 8 +++-- psycopg/psycopgmodule.c | 1 + psycopg/replication_message.h | 4 ++- psycopg/replication_message_type.c | 49 ++++++++++++++++++++++++++++-- 6 files changed, 62 insertions(+), 14 deletions(-) diff --git a/psycopg/libpq_support.c b/psycopg/libpq_support.c index 95a3ebc61..160c84913 100644 --- a/psycopg/libpq_support.c +++ b/psycopg/libpq_support.c @@ -41,13 +41,6 @@ /* support routines taken from pg_basebackup/streamutil.c */ -/* Julian-date equivalents of Day 0 in Unix and Postgres reckoning */ -#define UNIX_EPOCH_JDATE 2440588 /* == date2j(1970, 1, 1) */ -#define POSTGRES_EPOCH_JDATE 2451545 /* == date2j(2000, 1, 1) */ - -#define SECS_PER_DAY 86400 -#define USECS_PER_SEC 1000000LL - /* * Frontend version of GetCurrentTimestamp(), since we are not linked with * backend code. The protocol always uses integer timestamps, regardless of diff --git a/psycopg/libpq_support.h b/psycopg/libpq_support.h index e597d24c3..ab35fef50 100644 --- a/psycopg/libpq_support.h +++ b/psycopg/libpq_support.h @@ -37,6 +37,13 @@ typedef unsigned PG_INT64_TYPE XLogRecPtr; #define XLOGFMTSTR "%x/%x" #define XLOGFMTARGS(x) ((uint32)((x) >> 32)), ((uint32)((x) & 0xFFFFFFFF)) +/* Julian-date equivalents of Day 0 in Unix and Postgres reckoning */ +#define UNIX_EPOCH_JDATE 2440588 /* == date2j(1970, 1, 1) */ +#define POSTGRES_EPOCH_JDATE 2451545 /* == date2j(2000, 1, 1) */ + +#define SECS_PER_DAY 86400 +#define USECS_PER_SEC 1000000LL + HIDDEN pg_int64 feGetCurrentTimestamp(void); HIDDEN void fe_sendint64(pg_int64 i, char *buf); HIDDEN pg_int64 fe_recvint64(char *buf); diff --git a/psycopg/pqpath.c b/psycopg/pqpath.c index 4ae62971a..7a3ec19e0 100644 --- a/psycopg/pqpath.c +++ b/psycopg/pqpath.c @@ -1572,6 +1572,7 @@ _pq_copy_both_v3(cursorObject *curs) XLogRecPtr written_lsn = InvalidXLogRecPtr, fsync_lsn = InvalidXLogRecPtr, data_start, wal_end; + pg_int64 send_time; if (!curs->copyfile) { PyErr_SetString(ProgrammingError, @@ -1669,10 +1670,10 @@ _pq_copy_both_v3(cursorObject *curs) data_start = fe_recvint64(buffer + 1); wal_end = fe_recvint64(buffer + 1 + 8); - /*send_time = fe_recvint64(buffer + 1 + 8 + 8);*/ + send_time = fe_recvint64(buffer + 1 + 8 + 8); - Dprintf("_pq_copy_both_v3: data_start="XLOGFMTSTR", wal_end="XLOGFMTSTR, - XLOGFMTARGS(data_start), XLOGFMTARGS(wal_end)); + Dprintf("_pq_copy_both_v3: data_start="XLOGFMTSTR", wal_end="XLOGFMTSTR", send_time=%lld", + XLOGFMTARGS(data_start), XLOGFMTARGS(wal_end), send_time); if (is_text) { obj = PyUnicode_Decode(buffer + hdr, len - hdr, curs->conn->codec, NULL); @@ -1690,6 +1691,7 @@ _pq_copy_both_v3(cursorObject *curs) msg->data_start = data_start; msg->wal_end = wal_end; + msg->send_time = send_time; tmp = PyObject_CallFunctionObjArgs(write_func, msg, NULL); diff --git a/psycopg/psycopgmodule.c b/psycopg/psycopgmodule.c index 67393c371..27af21129 100644 --- a/psycopg/psycopgmodule.c +++ b/psycopg/psycopgmodule.c @@ -869,6 +869,7 @@ INIT_MODULE(_psycopg)(void) /* Initialize the PyDateTimeAPI everywhere is used */ PyDateTime_IMPORT; if (psyco_adapter_datetime_init()) { goto exit; } + if (psyco_replmsg_datetime_init()) { goto exit; } Py_TYPE(&pydatetimeType) = &PyType_Type; if (PyType_Ready(&pydatetimeType) == -1) goto exit; diff --git a/psycopg/replication_message.h b/psycopg/replication_message.h index bf2b5f169..b03d1c4f3 100644 --- a/psycopg/replication_message.h +++ b/psycopg/replication_message.h @@ -42,9 +42,11 @@ struct replicationMessageObject { XLogRecPtr data_start; XLogRecPtr wal_end; - /* send_time */ + pg_int64 send_time; }; +RAISES_NEG int psyco_replmsg_datetime_init(void); + #ifdef __cplusplus } #endif diff --git a/psycopg/replication_message_type.c b/psycopg/replication_message_type.c index 6968955e3..5d15ca617 100644 --- a/psycopg/replication_message_type.c +++ b/psycopg/replication_message_type.c @@ -27,14 +27,31 @@ #include "psycopg/psycopg.h" #include "psycopg/replication_message.h" +#include "psycopg/libpq_support.h" + +#include "datetime.h" + +RAISES_NEG int +psyco_replmsg_datetime_init(void) +{ + Dprintf("psyco_replmsg_datetime_init: datetime init"); + + PyDateTime_IMPORT; + + if (!PyDateTimeAPI) { + PyErr_SetString(PyExc_ImportError, "datetime initialization failed"); + return -1; + } + return 0; +} static PyObject * replmsg_repr(replicationMessageObject *self) { return PyString_FromFormat( - "", - self, XLOGFMTARGS(self->data_start), XLOGFMTARGS(self->wal_end)); + "", + self, XLOGFMTARGS(self->data_start), XLOGFMTARGS(self->wal_end), self->send_time); } static int @@ -65,6 +82,26 @@ replmsg_dealloc(PyObject* obj) replmsg_clear(obj); } +#define psyco_replmsg_send_time_doc \ +"send_time - Timestamp of the replication message departure from the server." + +static PyObject * +psyco_replmsg_get_send_time(replicationMessageObject *self) +{ + PyObject *tval, *res = NULL; + double t; + + t = (double)self->send_time / USECS_PER_SEC + + ((POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY); + + tval = Py_BuildValue("(d)", t); + if (tval) { + res = PyDateTime_FromTimestamp(tval); + Py_DECREF(tval); + } + + return res; +} #define OFFSETOF(x) offsetof(replicationMessageObject, x) @@ -80,6 +117,12 @@ static struct PyMemberDef replicationMessageObject_members[] = { {NULL} }; +static struct PyGetSetDef replicationMessageObject_getsets[] = { + { "send_time", (getter)psyco_replmsg_get_send_time, NULL, + psyco_replmsg_send_time_doc, NULL }, + {NULL} +}; + /* object type */ #define replicationMessageType_doc \ @@ -115,7 +158,7 @@ PyTypeObject replicationMessageType = { 0, /*tp_iternext*/ 0, /*tp_methods*/ replicationMessageObject_members, /*tp_members*/ - 0, /*tp_getset*/ + replicationMessageObject_getsets, /*tp_getset*/ 0, /*tp_base*/ 0, /*tp_dict*/ 0, /*tp_descr_get*/ From 35a3262fe345b12fbc1cc7f89c2e0d35631811f7 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Thu, 11 Jun 2015 12:20:52 +0200 Subject: [PATCH 10/60] Expose ReplicationMessage type in extras --- lib/extensions.py | 2 +- lib/extras.py | 5 +++-- psycopg/psycopgmodule.c | 1 + 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/extensions.py b/lib/extensions.py index 216d8ad20..faa8b1dea 100644 --- a/lib/extensions.py +++ b/lib/extensions.py @@ -56,7 +56,7 @@ except ImportError: pass -from psycopg2._psycopg import adapt, adapters, encodings, connection, cursor, lobject, Xid +from psycopg2._psycopg import adapt, adapters, encodings, connection, cursor, replicationMessage, lobject, Xid from psycopg2._psycopg import string_types, binary_types, new_type, new_array_type, register_type from psycopg2._psycopg import ISQLQuote, Notify, Diagnostics, Column diff --git a/lib/extras.py b/lib/extras.py index 7de48d789..2f32bf123 100644 --- a/lib/extras.py +++ b/lib/extras.py @@ -39,6 +39,7 @@ from psycopg2 import extensions as _ext from psycopg2.extensions import cursor as _cursor from psycopg2.extensions import connection as _connection +from psycopg2.extensions import replicationMessage as ReplicationMessage from psycopg2.extensions import adapt as _A from psycopg2.extensions import b @@ -515,13 +516,13 @@ def create_replication_slot(self, slot_type, slot_name, output_plugin=None): else: raise RuntimeError("unrecognized replication slot type") - return self.execute(command) + self.execute(command) def drop_replication_slot(self, slot_name): """Drop streaming replication slot.""" command = "DROP_REPLICATION_SLOT %s" % self.quote_ident(slot_name) - return self.execute(command) + self.execute(command) def start_replication(self, o, slot_type, slot_name=None, start_lsn=None, timeline=0, keepalive_interval=10, options=None): diff --git a/psycopg/psycopgmodule.c b/psycopg/psycopgmodule.c index 27af21129..d44a4b681 100644 --- a/psycopg/psycopgmodule.c +++ b/psycopg/psycopgmodule.c @@ -911,6 +911,7 @@ INIT_MODULE(_psycopg)(void) /* put new types in module dictionary */ PyModule_AddObject(module, "connection", (PyObject*)&connectionType); PyModule_AddObject(module, "cursor", (PyObject*)&cursorType); + PyModule_AddObject(module, "replicationMessage", (PyObject*)&replicationMessageType); PyModule_AddObject(module, "ISQLQuote", (PyObject*)&isqlquoteType); PyModule_AddObject(module, "Notify", (PyObject*)¬ifyType); PyModule_AddObject(module, "Xid", (PyObject*)&xidType); From 9ed90b1216828351ccbd9e9e28951bf7933fb1b3 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Thu, 11 Jun 2015 14:52:01 +0200 Subject: [PATCH 11/60] Refer cursor from ReplicationMessage object. At the same time, for the sync use LSN instead of msg reference in cursor. --- psycopg/cursor.h | 3 ++- psycopg/cursor_type.c | 19 ++----------------- psycopg/pqpath.c | 12 ++++++------ psycopg/replication_message.h | 2 ++ psycopg/replication_message_type.c | 15 +++++++++------ 5 files changed, 21 insertions(+), 30 deletions(-) diff --git a/psycopg/cursor.h b/psycopg/cursor.h index 78ee21c47..1a630553d 100644 --- a/psycopg/cursor.h +++ b/psycopg/cursor.h @@ -27,6 +27,7 @@ #define PSYCOPG_CURSOR_H 1 #include "psycopg/connection.h" +#include "libpq_support.h" #ifdef __cplusplus extern "C" { @@ -75,7 +76,7 @@ struct cursorObject { int in_replication; /* we're in streaming replication loop */ int stop_replication; /* client requested to stop replication */ int keepalive_interval; /* interval for keepalive messages in replication mode */ - replicationMessageObject *repl_sync_msg; /* set when the client asks us to sync the server */ + XLogRecPtr repl_sync_lsn; /* set when the client asks us to sync the server */ PyObject *tuple_factory; /* factory for result tuples */ PyObject *tzinfo_factory; /* factory for tzinfo objects */ diff --git a/psycopg/cursor_type.c b/psycopg/cursor_type.c index 1ea922bbc..19f82c600 100644 --- a/psycopg/cursor_type.c +++ b/psycopg/cursor_type.c @@ -1609,14 +1609,13 @@ psyco_curs_start_replication_expert(cursorObject *self, PyObject *args) self->in_replication = 1; self->keepalive_interval = keepalive_interval; self->stop_replication = 0; - self->repl_sync_msg = NULL; + self->repl_sync_lsn = InvalidXLogRecPtr; if (pq_execute(self, command, 0, 1 /* no_result */, 1 /* no_begin */) >= 0) { res = Py_None; Py_INCREF(Py_None); } - Py_CLEAR(self->repl_sync_msg); Py_CLEAR(self->copyfile); self->in_replication = 0; @@ -1647,24 +1646,12 @@ psyco_curs_stop_replication(cursorObject *self) static PyObject * psyco_curs_replication_sync_server(cursorObject *self, PyObject *args) { - replicationMessageObject *msg; - EXC_IF_CURS_CLOSED(self); - if (!PyArg_ParseTuple(args, "O!", &replicationMessageType, &msg)) { + if (!PyArg_ParseTuple(args, "K", &self->repl_sync_lsn)) { return NULL; } - if (!self->in_replication) { - PyErr_SetString(ProgrammingError, - "replication_sync_server() called when not in streaming replication loop"); - } else { - Py_CLEAR(self->repl_sync_msg); - - self->repl_sync_msg = msg; - Py_XINCREF(self->repl_sync_msg); - } - Py_RETURN_NONE; } @@ -1964,7 +1951,6 @@ cursor_clear(cursorObject *self) Py_CLEAR(self->casts); Py_CLEAR(self->caster); Py_CLEAR(self->copyfile); - Py_CLEAR(self->repl_sync_msg); Py_CLEAR(self->tuple_factory); Py_CLEAR(self->tzinfo_factory); Py_CLEAR(self->query); @@ -2054,7 +2040,6 @@ cursor_traverse(cursorObject *self, visitproc visit, void *arg) Py_VISIT(self->casts); Py_VISIT(self->caster); Py_VISIT(self->copyfile); - Py_VISIT(self->repl_sync_msg); Py_VISIT(self->tuple_factory); Py_VISIT(self->tzinfo_factory); Py_VISIT(self->query); diff --git a/psycopg/pqpath.c b/psycopg/pqpath.c index 7a3ec19e0..7ce06a862 100644 --- a/psycopg/pqpath.c +++ b/psycopg/pqpath.c @@ -1685,7 +1685,7 @@ _pq_copy_both_v3(cursorObject *curs) msg = (replicationMessageObject *) PyObject_CallFunctionObjArgs((PyObject *)&replicationMessageType, - obj, NULL); + curs, obj, NULL); Py_DECREF(obj); if (!msg) { goto exit; } @@ -1706,14 +1706,14 @@ _pq_copy_both_v3(cursorObject *curs) written_lsn = wal_end; /* if requested by sync_server(msg), we confirm LSN with the server */ - if (curs->repl_sync_msg) { + if (curs->repl_sync_lsn != InvalidXLogRecPtr) { Dprintf("_pq_copy_both_v3: server sync requested at "XLOGFMTSTR, - XLOGFMTARGS(curs->repl_sync_msg->wal_end)); + XLOGFMTARGS(curs->repl_sync_lsn)); - if (fsync_lsn < curs->repl_sync_msg->wal_end) - fsync_lsn = curs->repl_sync_msg->wal_end; + if (fsync_lsn < curs->repl_sync_lsn) + fsync_lsn = curs->repl_sync_lsn; - Py_CLEAR(curs->repl_sync_msg); + curs->repl_sync_lsn = InvalidXLogRecPtr; if (!sendFeedback(conn, written_lsn, fsync_lsn, 0)) { pq_raise(curs->conn, curs, NULL); diff --git a/psycopg/replication_message.h b/psycopg/replication_message.h index b03d1c4f3..a7567a1da 100644 --- a/psycopg/replication_message.h +++ b/psycopg/replication_message.h @@ -26,6 +26,7 @@ #ifndef PSYCOPG_REPLICATION_MESSAGE_H #define PSYCOPG_REPLICATION_MESSAGE_H 1 +#include "cursor.h" #include "libpq_support.h" #ifdef __cplusplus @@ -38,6 +39,7 @@ extern HIDDEN PyTypeObject replicationMessageType; struct replicationMessageObject { PyObject_HEAD + cursorObject *cursor; PyObject *payload; XLogRecPtr data_start; diff --git a/psycopg/replication_message_type.c b/psycopg/replication_message_type.c index 5d15ca617..27a9c9163 100644 --- a/psycopg/replication_message_type.c +++ b/psycopg/replication_message_type.c @@ -27,7 +27,6 @@ #include "psycopg/psycopg.h" #include "psycopg/replication_message.h" -#include "psycopg/libpq_support.h" #include "datetime.h" @@ -59,8 +58,9 @@ replmsg_init(PyObject *obj, PyObject *args, PyObject *kwargs) { replicationMessageObject *self = (replicationMessageObject*) obj; - if (!PyArg_ParseTuple(args, "O", &self->payload)) + if (!PyArg_ParseTuple(args, "O!O", &cursorType, &self->cursor, &self->payload)) return -1; + Py_XINCREF(self->cursor); Py_XINCREF(self->payload); self->data_start = 0; @@ -70,16 +70,17 @@ replmsg_init(PyObject *obj, PyObject *args, PyObject *kwargs) } static int -replmsg_clear(PyObject *self) +replmsg_clear(replicationMessageObject *self) { - Py_CLEAR(((replicationMessageObject*) self)->payload); + Py_CLEAR(self->cursor); + Py_CLEAR(self->payload); return 0; } static void replmsg_dealloc(PyObject* obj) { - replmsg_clear(obj); + replmsg_clear((replicationMessageObject*) obj); } #define psyco_replmsg_send_time_doc \ @@ -108,6 +109,8 @@ psyco_replmsg_get_send_time(replicationMessageObject *self) /* object member list */ static struct PyMemberDef replicationMessageObject_members[] = { + {"cursor", T_OBJECT, OFFSETOF(cursor), READONLY, + "TODO"}, {"payload", T_OBJECT, OFFSETOF(payload), READONLY, "TODO"}, {"data_start", T_ULONGLONG, OFFSETOF(data_start), READONLY, @@ -151,7 +154,7 @@ PyTypeObject replicationMessageType = { /*tp_flags*/ replicationMessageType_doc, /*tp_doc*/ 0, /*tp_traverse*/ - replmsg_clear, /*tp_clear*/ + (inquiry)replmsg_clear, /*tp_clear*/ 0, /*tp_richcompare*/ 0, /*tp_weaklistoffset*/ 0, /*tp_iter*/ From 61e52ce8793472ff1348ab93ccdeb682a1e7b3df Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Wed, 10 Jun 2015 09:06:08 +0200 Subject: [PATCH 12/60] Rework replication protocol This change exposes lower level functions for operating the (logical) replication protocol, while keeping the high-level start_replication function that does all the job for you in case of a synchronous connection. A number of other changes and fixes are put into this commit. --- lib/extras.py | 36 +++-- psycopg/cursor.h | 11 +- psycopg/cursor_type.c | 177 +++++++++++++++++---- psycopg/pqpath.c | 362 +++++++++++++++++++++--------------------- psycopg/pqpath.h | 3 + psycopg2.cproj | 2 + 6 files changed, 355 insertions(+), 236 deletions(-) diff --git a/lib/extras.py b/lib/extras.py index 2f32bf123..85debc68a 100644 --- a/lib/extras.py +++ b/lib/extras.py @@ -471,7 +471,8 @@ def __init__(self, *args, **kwargs): super(ReplicationConnection, self).__init__(*args, **kwargs) # prevent auto-issued BEGIN statements - self.autocommit = True + if not self.async: + self.autocommit = True def cursor(self, *args, **kwargs): kwargs.setdefault('cursor_factory', ReplicationCursor) @@ -503,18 +504,18 @@ def create_replication_slot(self, slot_type, slot_name, output_plugin=None): if slot_type == REPLICATION_LOGICAL: if output_plugin is None: - raise RuntimeError("output_plugin is required for logical replication slot") + raise psycopg2.ProgrammingError("output_plugin is required for logical replication slot") command += "LOGICAL %s" % self.quote_ident(output_plugin) elif slot_type == REPLICATION_PHYSICAL: if output_plugin is not None: - raise RuntimeError("output_plugin is not applicable to physical replication") + raise psycopg2.ProgrammingError("output_plugin is not applicable to physical replication") command += "PHYSICAL" else: - raise RuntimeError("unrecognized replication slot type") + raise psycopg2.ProgrammingError("unrecognized replication slot type") self.execute(command) @@ -524,17 +525,14 @@ def drop_replication_slot(self, slot_name): command = "DROP_REPLICATION_SLOT %s" % self.quote_ident(slot_name) self.execute(command) - def start_replication(self, o, slot_type, slot_name=None, start_lsn=None, + def start_replication(self, slot_type, slot_name=None, writer=None, start_lsn=None, timeline=0, keepalive_interval=10, options=None): """Start and consume replication stream.""" - if keepalive_interval <= 0: - raise RuntimeError("keepalive_interval must be > 0: %d" % keepalive_interval) - command = "START_REPLICATION " if slot_type == REPLICATION_LOGICAL and slot_name is None: - raise RuntimeError("slot_name is required for logical replication slot") + raise psycopg2.ProgrammingError("slot_name is required for logical replication slot") if slot_name: command += "SLOT %s " % self.quote_ident(slot_name) @@ -544,7 +542,7 @@ def start_replication(self, o, slot_type, slot_name=None, start_lsn=None, elif slot_type == REPLICATION_PHYSICAL: command += "PHYSICAL " else: - raise RuntimeError("unrecognized replication slot type") + raise psycopg2.ProgrammingError("unrecognized replication slot type") if start_lsn is None: start_lsn = '0/0' @@ -555,16 +553,16 @@ def start_replication(self, o, slot_type, slot_name=None, start_lsn=None, if timeline != 0: if slot_type == REPLICATION_LOGICAL: - raise RuntimeError("cannot specify timeline for logical replication") + raise psycopg2.ProgrammingError("cannot specify timeline for logical replication") if timeline < 0: - raise RuntimeError("timeline must be >= 0: %d" % timeline) + raise psycopg2.ProgrammingError("timeline must be >= 0: %d" % timeline) command += " TIMELINE %d" % timeline if options: if slot_type == REPLICATION_PHYSICAL: - raise RuntimeError("cannot specify plugin options for physical replication") + raise psycopg2.ProgrammingError("cannot specify plugin options for physical replication") command += " (" for k,v in options.iteritems(): @@ -573,11 +571,15 @@ def start_replication(self, o, slot_type, slot_name=None, start_lsn=None, command += "%s %s" % (self.quote_ident(k), _A(str(v))) command += ")" - return self.start_replication_expert(o, command, keepalive_interval) + return self.start_replication_expert(command, writer=writer, + keepalive_interval=keepalive_interval) + + def send_feedback_message(self, written_lsn=0, sync_lsn=0, apply_lsn=0, reply_requested=False): + return self.send_replication_feedback(written_lsn, sync_lsn, apply_lsn, reply_requested) - # thin wrapper - def sync_server(self, msg): - return self.replication_sync_server(msg) + # allows replication cursors to be used in select.select() directly + def fileno(self): + return self.connection.fileno() # a dbtype and adapter for Python UUID type diff --git a/psycopg/cursor.h b/psycopg/cursor.h index 1a630553d..380abbf4a 100644 --- a/psycopg/cursor.h +++ b/psycopg/cursor.h @@ -73,10 +73,13 @@ struct cursorObject { #define DEFAULT_COPYSIZE 16384 #define DEFAULT_COPYBUFF 8192 - int in_replication; /* we're in streaming replication loop */ - int stop_replication; /* client requested to stop replication */ - int keepalive_interval; /* interval for keepalive messages in replication mode */ - XLogRecPtr repl_sync_lsn; /* set when the client asks us to sync the server */ + int repl_stop; /* if client requested to stop replication */ + struct timeval repl_keepalive_interval; /* interval for keepalive messages in replication mode */ + XLogRecPtr repl_write_lsn; /* LSN stats for replication feedback messages */ + XLogRecPtr repl_flush_lsn; + XLogRecPtr repl_apply_lsn; + int repl_feedback_pending; /* flag set when we couldn't send the feedback to the server */ + struct timeval repl_last_io; /* timestamp of the last exchange with the server */ PyObject *tuple_factory; /* factory for result tuples */ PyObject *tzinfo_factory; /* factory for tzinfo objects */ diff --git a/psycopg/cursor_type.c b/psycopg/cursor_type.c index 19f82c600..9de5b0855 100644 --- a/psycopg/cursor_type.c +++ b/psycopg/cursor_type.c @@ -36,9 +36,11 @@ #include "psycopg/microprotocols_proto.h" #include - #include +/* python */ +#include "datetime.h" + /** DBAPI methods **/ @@ -1581,78 +1583,182 @@ psyco_curs_copy_expert(cursorObject *self, PyObject *args, PyObject *kwargs) } #define psyco_curs_start_replication_expert_doc \ -"start_replication_expert(file, command, keepalive_interval) -- Start and consume replication stream with direct command." +"start_replication_expert(command, writer=None, keepalive_interval=10) -- Start and consume replication stream with direct command." static PyObject * -psyco_curs_start_replication_expert(cursorObject *self, PyObject *args) +psyco_curs_start_replication_expert(cursorObject *self, PyObject *args, PyObject *kwargs) { - PyObject *file, *res = NULL; + PyObject *writer = NULL, *res = NULL; char *command; - int keepalive_interval; + double keepalive_interval = 10; + static char *kwlist[] = {"command", "writer", "keepalive_interval", NULL}; - if (!PyArg_ParseTuple(args, "O&si", - _psyco_curs_has_write_check, &file, - &command, &keepalive_interval)) { + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|Od", kwlist, + &command, &writer, &keepalive_interval)) { return NULL; } EXC_IF_CURS_CLOSED(self); - EXC_IF_CURS_ASYNC(self, start_replication_expert); EXC_IF_GREEN(start_replication_expert); EXC_IF_TPC_PREPARED(self->conn, start_replication_expert); Dprintf("psyco_curs_start_replication_expert: command = %s", command); + if (keepalive_interval < 1.0) { + psyco_set_error(ProgrammingError, self, "keepalive_interval must be >= 1sec"); + return NULL; + } + self->copysize = 0; - Py_INCREF(file); - self->copyfile = file; - self->in_replication = 1; - self->keepalive_interval = keepalive_interval; - self->stop_replication = 0; - self->repl_sync_lsn = InvalidXLogRecPtr; + Py_XINCREF(writer); + self->copyfile = writer; + + self->repl_stop = 0; + self->repl_keepalive_interval.tv_sec = (int)keepalive_interval; + self->repl_keepalive_interval.tv_usec = + (keepalive_interval - (int)keepalive_interval)*1.0e6; - if (pq_execute(self, command, 0, 1 /* no_result */, 1 /* no_begin */) >= 0) { + self->repl_write_lsn = InvalidXLogRecPtr; + self->repl_flush_lsn = InvalidXLogRecPtr; + self->repl_apply_lsn = InvalidXLogRecPtr; + self->repl_feedback_pending = 0; + + gettimeofday(&self->repl_last_io, NULL); + + if (pq_execute(self, command, self->conn->async, + 1 /* no_result */, 1 /* no_begin */) >= 0) { res = Py_None; - Py_INCREF(Py_None); + Py_INCREF(res); } Py_CLEAR(self->copyfile); - self->in_replication = 0; return res; } #define psyco_curs_stop_replication_doc \ -"start_replication() -- Set flag to break out of endless loop in start_replication()." +"stop_replication() -- Set flag to break out of endless loop in start_replication() on sync connection." static PyObject * psyco_curs_stop_replication(cursorObject *self) { EXC_IF_CURS_CLOSED(self); - if (!self->in_replication) { - PyErr_SetString(ProgrammingError, - "stop_replication() called when not in streaming replication loop"); + self->repl_stop = 1; + + Py_RETURN_NONE; +} + +#define psyco_curs_read_replication_message_doc \ +"read_replication_message(decode=True) -- Try reading a replication message from the server (non-blocking)." + +static PyObject * +psyco_curs_read_replication_message(cursorObject *self, PyObject *args, PyObject *kwargs) +{ + int decode = 1; + static char *kwlist[] = {"decode", NULL}; + + EXC_IF_CURS_CLOSED(self); + EXC_IF_GREEN(read_replication_message); + EXC_IF_TPC_PREPARED(self->conn, read_replication_message); + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, + &decode)) { + return NULL; + } + + return pq_read_replication_message(self, decode); +} + +static PyObject * +curs_flush_replication_feedback(cursorObject *self, int reply) +{ + if (!self->repl_feedback_pending) + Py_RETURN_FALSE; + + if (pq_send_replication_feedback(self, reply)) { + self->repl_feedback_pending = 0; + Py_RETURN_TRUE; } else { - self->stop_replication = 1; + self->repl_feedback_pending = 1; + Py_RETURN_FALSE; } +} - Py_RETURN_NONE; +#define psyco_curs_send_replication_feedback_doc \ +"send_replication_feedback(write_lsn=0, flush_lsn=0, apply_lsn=0, reply=False) -- Try sending a replication feedback message to the server and optionally request a reply." + +static PyObject * +psyco_curs_send_replication_feedback(cursorObject *self, PyObject *args, PyObject *kwargs) +{ + XLogRecPtr write_lsn = InvalidXLogRecPtr, + flush_lsn = InvalidXLogRecPtr, + apply_lsn = InvalidXLogRecPtr; + int reply = 0; + static char* kwlist[] = {"write_lsn", "flush_lsn", "apply_lsn", "reply", NULL}; + + EXC_IF_CURS_CLOSED(self); + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|KKKi", kwlist, + &write_lsn, &flush_lsn, &apply_lsn, &reply)) { + return NULL; + } + + if (write_lsn > self->repl_write_lsn) + self->repl_write_lsn = write_lsn; + + if (flush_lsn > self->repl_flush_lsn) + self->repl_flush_lsn = flush_lsn; + + if (apply_lsn > self->repl_apply_lsn) + self->repl_apply_lsn = apply_lsn; + + self->repl_feedback_pending = 1; + + return curs_flush_replication_feedback(self, reply); } -#define psyco_curs_replication_sync_server_doc \ -"replication_sync_server(msg) -- Set flag to sync the server up to this replication message." +#define psyco_curs_flush_replication_feedback_doc \ +"flush_replication_feedback(reply=False) -- Try flushing the latest pending replication feedback message to the server and optionally request a reply." static PyObject * -psyco_curs_replication_sync_server(cursorObject *self, PyObject *args) +psyco_curs_flush_replication_feedback(cursorObject *self, PyObject *args, PyObject *kwargs) { + int reply = 0; + static char *kwlist[] = {"reply", NULL}; + EXC_IF_CURS_CLOSED(self); - if (!PyArg_ParseTuple(args, "K", &self->repl_sync_lsn)) { + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, + &reply)) { return NULL; } - Py_RETURN_NONE; + return curs_flush_replication_feedback(self, reply); +} + +#define psyco_curs_replication_io_timestamp_doc \ +"replication_io_timestamp -- the timestamp of latest IO with the server" + +static PyObject * +psyco_curs_get_replication_io_timestamp(cursorObject *self) +{ + PyObject *tval, *res = NULL; + double seconds; + + EXC_IF_CURS_CLOSED(self); + + // TODO: move to a one-call init function + PyDateTime_IMPORT; + + seconds = self->repl_last_io.tv_sec + self->repl_last_io.tv_usec / 1.0e6; + + tval = Py_BuildValue("(d)", seconds); + if (tval) { + res = PyDateTime_FromTimestamp(tval); + Py_DECREF(tval); + } + return res; } /* extension: closed - return true if cursor is closed */ @@ -1830,11 +1936,15 @@ static struct PyMethodDef cursorObject_methods[] = { {"copy_expert", (PyCFunction)psyco_curs_copy_expert, METH_VARARGS|METH_KEYWORDS, psyco_curs_copy_expert_doc}, {"start_replication_expert", (PyCFunction)psyco_curs_start_replication_expert, - METH_VARARGS, psyco_curs_start_replication_expert_doc}, + METH_VARARGS|METH_KEYWORDS, psyco_curs_start_replication_expert_doc}, {"stop_replication", (PyCFunction)psyco_curs_stop_replication, METH_NOARGS, psyco_curs_stop_replication_doc}, - {"replication_sync_server", (PyCFunction)psyco_curs_replication_sync_server, - METH_VARARGS, psyco_curs_replication_sync_server_doc}, + {"read_replication_message", (PyCFunction)psyco_curs_read_replication_message, + METH_VARARGS|METH_KEYWORDS, psyco_curs_read_replication_message_doc}, + {"send_replication_feedback", (PyCFunction)psyco_curs_send_replication_feedback, + METH_VARARGS|METH_KEYWORDS, psyco_curs_send_replication_feedback_doc}, + {"flush_replication_feedback", (PyCFunction)psyco_curs_flush_replication_feedback, + METH_VARARGS|METH_KEYWORDS, psyco_curs_flush_replication_feedback_doc}, {NULL} }; @@ -1885,6 +1995,9 @@ static struct PyGetSetDef cursorObject_getsets[] = { (getter)psyco_curs_scrollable_get, (setter)psyco_curs_scrollable_set, psyco_curs_scrollable_doc, NULL }, + { "replication_io_timestamp", + (getter)psyco_curs_get_replication_io_timestamp, NULL, + psyco_curs_replication_io_timestamp_doc, NULL }, {NULL} }; diff --git a/psycopg/pqpath.c b/psycopg/pqpath.c index 7ce06a862..03d928cff 100644 --- a/psycopg/pqpath.c +++ b/psycopg/pqpath.c @@ -1062,6 +1062,9 @@ pq_get_last_result(connectionObject *conn) PQclear(result); } result = res; + if (PQresultStatus(result) == PGRES_COPY_BOTH) { + break; + } } return result; @@ -1522,32 +1525,151 @@ _pq_copy_out_v3(cursorObject *curs) return ret; } -static int -sendFeedback(PGconn *conn, XLogRecPtr written_lsn, XLogRecPtr fsync_lsn, - int replyRequested) +/* ignores keepalive messages */ +PyObject * +pq_read_replication_message(cursorObject *curs, int decode) +{ + char *buffer = NULL; + int len, hdr, reply; + XLogRecPtr data_start, wal_end; + pg_int64 send_time; + PyObject *str = NULL, *msg = NULL; + + Dprintf("pq_read_replication_message(decode=%d)", decode); + +retry: + if (!PQconsumeInput(curs->conn->pgconn)) { + goto none; + } + + Py_BEGIN_ALLOW_THREADS; + len = PQgetCopyData(curs->conn->pgconn, &buffer, 1 /* async */); + Py_END_ALLOW_THREADS; + if (len == 0) { + goto none; + } + + if (len == -2) { + pq_raise(curs->conn, curs, NULL); + goto exit; + } + if (len == -1) { + curs->pgres = PQgetResult(curs->conn->pgconn); + + if (curs->pgres && PQresultStatus(curs->pgres) == PGRES_FATAL_ERROR) { + pq_raise(curs->conn, curs, NULL); + goto exit; + } + + CLEARPGRES(curs->pgres); + goto none; + } + + /* ok, we did really read something: update the io timestamp */ + gettimeofday(&curs->repl_last_io, NULL); + + Dprintf("pq_read_replication_message: msg=%c, len=%d", buffer[0], len); + if (buffer[0] == 'w') { + /* msgtype(1), dataStart(8), walEnd(8), sendTime(8) */ + hdr = 1 + 8 + 8 + 8; + if (len < hdr + 1) { + psyco_set_error(OperationalError, curs, "data message header too small"); + goto exit; + } + + data_start = fe_recvint64(buffer + 1); + wal_end = fe_recvint64(buffer + 1 + 8); + send_time = fe_recvint64(buffer + 1 + 8 + 8); + + Dprintf("pq_read_replication_message: data_start="XLOGFMTSTR", wal_end="XLOGFMTSTR, + XLOGFMTARGS(data_start), XLOGFMTARGS(wal_end)); + + Dprintf("pq_read_replication_message: >>%.*s<<", len - hdr, buffer + hdr); + + if (decode) { + str = PyUnicode_Decode(buffer + hdr, len - hdr, curs->conn->codec, NULL); + } else { + str = Bytes_FromStringAndSize(buffer + hdr, len - hdr); + } + if (!str) { goto exit; } + + msg = PyObject_CallFunctionObjArgs((PyObject *)&replicationMessageType, + curs, str, NULL); + Py_DECREF(str); + if (!msg) { goto exit; } + + ((replicationMessageObject *)msg)->data_start = data_start; + ((replicationMessageObject *)msg)->wal_end = wal_end; + ((replicationMessageObject *)msg)->send_time = send_time; + } + else if (buffer[0] == 'k') { + /* msgtype(1), walEnd(8), sendTime(8), reply(1) */ + hdr = 1 + 8 + 8; + if (len < hdr + 1) { + psyco_set_error(OperationalError, curs, "keepalive message header too small"); + goto exit; + } + + reply = buffer[hdr]; + if (reply) { + if (!pq_send_replication_feedback(curs, 0)) { + if (curs->conn->async) { + curs->repl_feedback_pending = 1; + } else { + pq_raise(curs->conn, curs, NULL); + goto exit; + } + } + else { + gettimeofday(&curs->repl_last_io, NULL); + } + } + + PQfreemem(buffer); + buffer = NULL; + goto retry; + } + else { + psyco_set_error(OperationalError, curs, "unrecognized replication message type"); + goto exit; + } + +exit: + if (buffer) { + PQfreemem(buffer); + } + + return msg; + +none: + msg = Py_None; + Py_INCREF(msg); + goto exit; +} + +int +pq_send_replication_feedback(cursorObject* curs, int reply_requested) { char replybuf[1 + 8 + 8 + 8 + 8 + 1]; int len = 0; - Dprintf("_pq_copy_both_v3: confirming write up to "XLOGFMTSTR", flush to "XLOGFMTSTR, - XLOGFMTARGS(written_lsn), XLOGFMTARGS(fsync_lsn)); - - replybuf[len] = 'r'; - len += 1; - fe_sendint64(written_lsn, &replybuf[len]); /* write */ - len += 8; - fe_sendint64(fsync_lsn, &replybuf[len]); /* flush */ - len += 8; - fe_sendint64(InvalidXLogRecPtr, &replybuf[len]); /* apply */ - len += 8; - fe_sendint64(feGetCurrentTimestamp(), &replybuf[len]); /* sendTime */ - len += 8; - replybuf[len] = replyRequested ? 1 : 0; /* replyRequested */ - len += 1; - - if (PQputCopyData(conn, replybuf, len) <= 0 || PQflush(conn)) { + Dprintf("pq_send_replication_feedback: write="XLOGFMTSTR", flush="XLOGFMTSTR", apply="XLOGFMTSTR, + XLOGFMTARGS(curs->repl_write_lsn), + XLOGFMTARGS(curs->repl_flush_lsn), + XLOGFMTARGS(curs->repl_apply_lsn)); + + replybuf[len] = 'r'; len += 1; + fe_sendint64(curs->repl_write_lsn, &replybuf[len]); len += 8; + fe_sendint64(curs->repl_flush_lsn, &replybuf[len]); len += 8; + fe_sendint64(curs->repl_apply_lsn, &replybuf[len]); len += 8; + fe_sendint64(feGetCurrentTimestamp(), &replybuf[len]); len += 8; + replybuf[len] = reply_requested ? 1 : 0; len += 1; + + if (PQputCopyData(curs->conn->pgconn, replybuf, len) <= 0 || + PQflush(curs->conn->pgconn) != 0) { return 0; } + gettimeofday(&curs->repl_last_io, NULL); return 1; } @@ -1556,33 +1678,19 @@ sendFeedback(PGconn *conn, XLogRecPtr written_lsn, XLogRecPtr fsync_lsn, static int _pq_copy_both_v3(cursorObject *curs) { - PyObject *tmp = NULL; + PyObject *msg, *tmp = NULL; PyObject *write_func = NULL; - PyObject *obj = NULL; - replicationMessageObject *msg = NULL; int ret = -1; int is_text; - PGconn *conn; - char *buffer = NULL; + PGconn *pgconn; fd_set fds; - struct timeval last_comm, curr_time, ping_time, time_diff; - int len, hdr, reply, sel; - - XLogRecPtr written_lsn = InvalidXLogRecPtr, - fsync_lsn = InvalidXLogRecPtr, - data_start, wal_end; - pg_int64 send_time; + struct timeval curr_time, ping_time, time_diff; + int sel; if (!curs->copyfile) { - PyErr_SetString(ProgrammingError, - "can't execute START_REPLICATION: use the start_replication() method instead"); - goto exit; - } - - if (curs->keepalive_interval <= 0) { - PyErr_Format(PyExc_RuntimeError, "keepalive_interval must be > 0: %d", - curs->keepalive_interval); + psyco_set_error(ProgrammingError, curs, + "can't execute START_REPLICATION directly: use the start_replication() method instead"); goto exit; } @@ -1597,31 +1705,29 @@ _pq_copy_both_v3(cursorObject *curs) } CLEARPGRES(curs->pgres); - - /* timestamp of last communication with the server */ - gettimeofday(&last_comm, NULL); - - conn = curs->conn->pgconn; + pgconn = curs->conn->pgconn; while (1) { - len = PQgetCopyData(conn, &buffer, 1 /* async! */); - if (len < 0) { - break; + msg = pq_read_replication_message(curs, is_text); + if (!msg) { + goto exit; } - if (len == 0) { + else if (msg == Py_None) { + Py_DECREF(msg); + FD_ZERO(&fds); - FD_SET(PQsocket(conn), &fds); + FD_SET(PQsocket(pgconn), &fds); - /* set up timeout according to keepalive_interval, but no less than 1 second */ gettimeofday(&curr_time, NULL); - ping_time = last_comm; - ping_time.tv_sec += curs->keepalive_interval; + ping_time = curs->repl_last_io; + ping_time.tv_sec += curs->repl_keepalive_interval.tv_sec; + ping_time.tv_usec += curs->repl_keepalive_interval.tv_usec; timersub(&ping_time, &curr_time, &time_diff); if (time_diff.tv_sec > 0) { Py_BEGIN_ALLOW_THREADS; - sel = select(PQsocket(conn) + 1, &fds, NULL, NULL, &time_diff); + sel = select(PQsocket(pgconn) + 1, &fds, NULL, NULL, &time_diff); Py_END_ALLOW_THREADS; } else { @@ -1639,148 +1745,34 @@ _pq_copy_both_v3(cursorObject *curs) continue; } - if (sel > 0) { - if (!PQconsumeInput(conn)) { - Dprintf("_pq_copy_both_v3: PQconsumeInput failed"); + if (sel == 0) { + if (!pq_send_replication_feedback(curs, 0)) { pq_raise(curs->conn, curs, NULL); goto exit; } } - else { /* timeout */ - if (!sendFeedback(conn, written_lsn, fsync_lsn, 0)) { - pq_raise(curs->conn, curs, NULL); - goto exit; - } - } - gettimeofday(&last_comm, NULL); continue; } - if (len > 0 && buffer) { - gettimeofday(&last_comm, NULL); - - Dprintf("_pq_copy_both_v3: msg=%c, len=%d", buffer[0], len); - if (buffer[0] == 'w') { - /* msgtype(1), dataStart(8), walEnd(8), sendTime(8) */ - hdr = 1 + 8 + 8 + 8; - if (len < hdr + 1) { - PyErr_Format(PyExc_RuntimeError, - "streaming header too small in data message: %d", len); - goto exit; - } - - data_start = fe_recvint64(buffer + 1); - wal_end = fe_recvint64(buffer + 1 + 8); - send_time = fe_recvint64(buffer + 1 + 8 + 8); - - Dprintf("_pq_copy_both_v3: data_start="XLOGFMTSTR", wal_end="XLOGFMTSTR", send_time=%lld", - XLOGFMTARGS(data_start), XLOGFMTARGS(wal_end), send_time); - - if (is_text) { - obj = PyUnicode_Decode(buffer + hdr, len - hdr, curs->conn->codec, NULL); - } - else { - obj = Bytes_FromStringAndSize(buffer + hdr, len - hdr); - } - if (!obj) { goto exit; } - - msg = (replicationMessageObject *) - PyObject_CallFunctionObjArgs((PyObject *)&replicationMessageType, - curs, obj, NULL); - Py_DECREF(obj); - if (!msg) { goto exit; } - - msg->data_start = data_start; - msg->wal_end = wal_end; - msg->send_time = send_time; - - tmp = PyObject_CallFunctionObjArgs(write_func, msg, NULL); - - if (tmp == NULL) { - Dprintf("_pq_copy_both_v3: write_func returned NULL"); - goto exit; - } - Py_DECREF(tmp); - - /* update the LSN position we've written up to */ - if (written_lsn < wal_end) - written_lsn = wal_end; - - /* if requested by sync_server(msg), we confirm LSN with the server */ - if (curs->repl_sync_lsn != InvalidXLogRecPtr) { - Dprintf("_pq_copy_both_v3: server sync requested at "XLOGFMTSTR, - XLOGFMTARGS(curs->repl_sync_lsn)); - - if (fsync_lsn < curs->repl_sync_lsn) - fsync_lsn = curs->repl_sync_lsn; - - curs->repl_sync_lsn = InvalidXLogRecPtr; - - if (!sendFeedback(conn, written_lsn, fsync_lsn, 0)) { - pq_raise(curs->conn, curs, NULL); - goto exit; - } - gettimeofday(&last_comm, NULL); - } - - if (curs->stop_replication) { - Dprintf("_pq_copy_both_v3: stop_replication flag set by write_func"); - break; - } - - Py_DECREF(msg); - msg = NULL; - } - else if (buffer[0] == 'k') { - /* msgtype(1), walEnd(8), sendTime(8), reply(1) */ - hdr = 1 + 8 + 8; - if (len < hdr + 1) { - PyErr_Format(PyExc_RuntimeError, - "streaming header too small in keepalive message: %d", len); - goto exit; - } + else { + tmp = PyObject_CallFunctionObjArgs(write_func, msg, NULL); + Py_DECREF(msg); - reply = buffer[hdr]; - if (reply) { - if (!sendFeedback(conn, written_lsn, fsync_lsn, 0)) { - pq_raise(curs->conn, curs, NULL); - goto exit; - } - gettimeofday(&last_comm, NULL); - } - } - else { - PyErr_Format(PyExc_RuntimeError, - "unrecognized streaming message type: \"%c\"", buffer[0]); + if (tmp == NULL) { + Dprintf("_pq_copy_both_v3: write_func returned NULL"); goto exit; } + Py_DECREF(tmp); - /* buffer is allocated on every PQgetCopyData() call */ - PQfreemem(buffer); - buffer = NULL; + if (curs->repl_stop) { + Dprintf("_pq_copy_both_v3: repl_stop flag set by write_func"); + break; + } } } - if (len == -2) { - pq_raise(curs->conn, curs, NULL); - goto exit; - } - if (len == -1) { - curs->pgres = PQgetResult(curs->conn->pgconn); - - if (curs->pgres && PQresultStatus(curs->pgres) == PGRES_FATAL_ERROR) - pq_raise(curs->conn, curs, NULL); - - CLEARPGRES(curs->pgres); - } - ret = 1; exit: - if (buffer) { - PQfreemem(buffer); - } - - Py_XDECREF(msg); Py_XDECREF(write_func); return ret; } @@ -1847,9 +1839,13 @@ pq_fetch(cursorObject *curs, int no_result) case PGRES_COPY_BOTH: Dprintf("pq_fetch: data from a streaming replication slot (no tuples)"); curs->rowcount = -1; - ex = _pq_copy_both_v3(curs); - /* error caught by out glorious notice handler */ - if (PyErr_Occurred()) ex = -1; + if (curs->conn->async) { + ex = 0; + } else { + ex = _pq_copy_both_v3(curs); + /* error caught by out glorious notice handler */ + if (PyErr_Occurred()) ex = -1; + } CLEARPGRES(curs->pgres); break; diff --git a/psycopg/pqpath.h b/psycopg/pqpath.h index bd3293f86..9a348bc26 100644 --- a/psycopg/pqpath.h +++ b/psycopg/pqpath.h @@ -72,4 +72,7 @@ HIDDEN int pq_execute_command_locked(connectionObject *conn, RAISES HIDDEN void pq_complete_error(connectionObject *conn, PGresult **pgres, char **error); +HIDDEN PyObject *pq_read_replication_message(cursorObject *curs, int decode); +HIDDEN int pq_send_replication_feedback(cursorObject *curs, int reply_requested); + #endif /* !defined(PSYCOPG_PQPATH_H) */ diff --git a/psycopg2.cproj b/psycopg2.cproj index 18b9727f1..386287c13 100644 --- a/psycopg2.cproj +++ b/psycopg2.cproj @@ -92,6 +92,7 @@ + @@ -224,6 +225,7 @@ + From 318706f28c07444c1a73a3022eab2018ec73817c Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Tue, 30 Jun 2015 16:17:31 +0200 Subject: [PATCH 13/60] Update docs for Replication protocol --- doc/src/extras.rst | 199 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 165 insertions(+), 34 deletions(-) diff --git a/doc/src/extras.rst b/doc/src/extras.rst index 9bc302e26..7cca84002 100644 --- a/doc/src/extras.rst +++ b/doc/src/extras.rst @@ -165,8 +165,8 @@ Replication cursor .. method:: identify_system() - Get information about the cluster status in form of a dict with - ``systemid``, ``timeline``, ``xlogpos`` and ``dbname`` as keys. + This method executes ``IDENTIFY_SYSTEM`` command of the streaming + replication protocol and returns a result as a dictionary. Example:: @@ -197,65 +197,196 @@ Replication cursor cur.drop_replication_slot("testslot") - .. method:: start_replication(file, slot_type, slot_name=None, start_lsn=None, timeline=0, keepalive_interval=10, options=None) + .. method:: start_replication(slot_type, slot_name=None, writer=None, start_lsn=None, timeline=0, keepalive_interval=10, options=None) Start and consume replication stream. - :param file: a file-like object to write replication stream messages to :param slot_type: type of replication: either `REPLICATION_PHYSICAL` or `REPLICATION_LOGICAL` :param slot_name: name of the replication slot to use (required for logical replication) + :param writer: a file-like object to write replication messages to :param start_lsn: the point in replication stream (WAL position) to start from, in the form ``XXX/XXX`` (forward-slash separated pair of hexadecimals) :param timeline: WAL history timeline to start streaming from (optional, can only be used with physical replication) :param keepalive_interval: interval (in seconds) to send keepalive - messages to the server, in case there was no - communication during that period of time + messages to the server :param options: an dictionary of options to pass to logical replication slot - The ``keepalive_interval`` must be greater than zero. + With non-asynchronous connection, this method enters an endless loop, + reading messages from the server and passing them to ``write()`` method + of the *writer* object. This is similar to operation of the + `~cursor.copy_to()` method. It also sends keepalive messages to the + server, in case there were no new data from it for the duration of + *keepalive_interval* seconds (this parameter must be greater than 1 + second, but it can have a fractional part). - This method never returns unless an error message is sent from the - server, or the server closes connection, or there is an exception in the - ``write()`` method of the ``file`` object. + With asynchronous connection, this method returns immediately and the + calling code can start reading the replication messages in a loop. - One can even use ``sys.stdout`` as the destination (this is only good for - testing purposes, however):: + A sketch implementation of the *writer* object might look similar to + the following:: - >>> cur.start_replication(sys.stdout, "testslot") - ... + from io import TextIOBase - This method acts much like the `~cursor.copy_to()` with an important - distinction that ``write()`` method return value is dirving the - server-side replication cursor. In order to report to the server that - the all the messages up to the current one have been stored reliably, one - should return true value (i.e. something that satisfies ``if retval:`` - conidtion) from the ``write`` callback:: + class ReplicationStreamWriter(TextIOBase): - class ReplicationStreamWriter(object): def write(self, msg): - if store_message_reliably(msg): - return True + self.store_data_reliably(msg) - cur.start_replication(writer, "testslot") - ... + if self.should_report_to_the_server(msg): + msg.cursor.send_replication_feedback(flush_lsn=msg.wal_end) + + def store_data_reliably(self, msg): + ... + + def shoud_report_to_the_server(self, msg): + ... + + First, like with the `~cursor.copy_to()` method, the code that is + calling the provided write method checks if the *writer* object is + inherited from `~io.TextIOBase`. If that is the case, the message + payload to be passed is converted to unicode using the connection's + encoding information. Otherwise, the message is passed as is. + + The *msg* object being passed is an instance of `~ReplicationMessage` + class. + + After storing the data passed in the message object, the writer object + should consider sending a confirmation message to the server. This is + done by calling `~send_replication_feedback()` method on the + corresponding replication cursor. A reference to the cursor producing + a given message is provided in the `~ReplicationMessage` as an + attribute. .. note:: - One needs to be aware that failure to update the server-side cursor - on any one replication slot properly by constantly consuming and - reporting success to the server can eventually lead to "disk full" - condition on the server, because the server retains all the WAL - segments that might be needed to stream the changes via currently - open replication slots. + One needs to be aware that failure to properly notify the server on + any one replication slot by constantly consuming and reporting + success to the server at appropriate times can eventually lead to + "disk full" condition on the server, because the server retains all + the WAL segments that might be needed to stream the changes via + currently open replication slots. + + .. method:: stop_replication() + + In non-asynchronous connection, when called from the ``write()`` + method tells the code in `~start_replication` to break out of the + endless loop and return. + + .. method:: send_replication_feedback(write_lsn=0, flush_lsn=0, apply_lsn=0, reply=False) + + :param write_lsn: a LSN position up to which the client has written the data locally + :param flush_lsn: a LSN position up to which the client has stored the + data reliably (the server is allowed to discard all + and every data that predates this LSN) + :param apply_lsn: a LSN position up to which the warm standby server + has applied the changes (physical replication + master-slave protocol only) + :param reply: request the server to send back a keepalive message immediately + + Use this method to report to the server that all messages up to a + certain LSN position have been stored and may be discarded. + + This method can also be called with default parameters to send a + keepalive message to the server. + + In case the message cannot be sent at the moment, remembers the + positions for a later successful call or call to + `~flush_replication_feedback()`. + + .. method:: flush_replication_feedback(reply=False) + + :param reply: request the server to send back a keepalive message immediately + + This method tries to flush the latest replication feedback message + that `~send_replication_feedback()` was trying to send, if any. + + Low-level methods for asynchronous connection operation. + + While with the non-asynchronous connection, a single call to + `~start_replication()` handles all the complexity, at times it might be + beneficial to use low-level interface for better control, in particular to + `~select.select()` on multiple sockets. The following methods are + provided for asynchronous operation: + + .. method:: read_replication_message(decode=True) + + :param decode: a flag indicating that unicode conversion should be + performed on the data received from the server + + This method should be used in a loop with asynchronous connections + after calling `~start_replication()`. + + It tries to read the next message from the server, without blocking + and returns an instance of `~ReplicationMessage` or *None*, in case + there are no more data messages from the server at the moment. After + receiving a *None* value from this method, one should use a + `~select.select()` or `~select.poll()` on the corresponding connection + to block the process until there is more data from the server. + + The server can send keepalive messages to the client periodically. + Such messages are silently consumed by this method and are never + reported to the caller. + + .. method:: fileno() + + Calls the corresponding connection's `~connection.fileno()` method + and returns the result. + + This is a convenience method which allows replication cursor to be + used directly in `~select.select()` or `~select.poll()` calls. + + .. attribute:: replication_io_timestamp + + A `~datetime` object representing the timestamp at the moment of last + communication with the server (a data or keepalive message in either + direction). + + An actual example of asynchronous operation might look like this:: + + keepalive_interval = 10.0 + while True: + if (datetime.now() - cur.replication_io_timestamp).total_seconds() >= keepalive_interval: + cur.send_replication_feedback() + + while True: + msg = cur.read_replication_message() + if not msg: + break + writer.write(msg) + + timeout = keepalive_interval - (datetime.now() - cur.replication_io_timestamp).total_seconds() + if timeout > 0: + select.select([cur], [], [], timeout) + +.. autoclass:: ReplicationMessage + + .. attribute:: payload + + The actual data received from the server. An instance of either + ``str`` or ``unicode``. + + .. attribute:: data_start + + LSN position of the start of the message. + + .. attribute:: wal_end + + LSN position of the end of the message. + + .. attribute:: send_time + + A `~datetime` object representing the server timestamp at the moment + when the message was sent. + + .. attribute:: cursor + + A reference to the corresponding `~ReplicationCursor` object. - Drop any open replication slots that are no longer being used. The - list of open slots can be obtained by running a query like ``SELECT * - FROM pg_replication_slots``. .. data:: REPLICATION_PHYSICAL From 0d731aa12e6d9a59e61cebe9c0a7d71025f000f8 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Tue, 30 Jun 2015 16:34:17 +0200 Subject: [PATCH 14/60] Comment on special handling of PGRES_COPY_BOTH --- psycopg/pqpath.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/psycopg/pqpath.c b/psycopg/pqpath.c index 03d928cff..04789d354 100644 --- a/psycopg/pqpath.c +++ b/psycopg/pqpath.c @@ -1062,6 +1062,10 @@ pq_get_last_result(connectionObject *conn) PQclear(result); } result = res; + + /* After entering copy both mode, libpq will make a phony + * PGresult for us every time we query for it, so we need to + * break out of this endless loop. */ if (PQresultStatus(result) == PGRES_COPY_BOTH) { break; } From 9386653d721229eae3f9e691a93d711575d2e5c6 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Wed, 1 Jul 2015 14:08:32 +0200 Subject: [PATCH 15/60] Update docs on ReplicationCursor --- doc/src/extras.rst | 117 +++++++++++++++++++++++---------------------- 1 file changed, 60 insertions(+), 57 deletions(-) diff --git a/doc/src/extras.rst b/doc/src/extras.rst index 7cca84002..19c81523a 100644 --- a/doc/src/extras.rst +++ b/doc/src/extras.rst @@ -165,12 +165,12 @@ Replication cursor .. method:: identify_system() - This method executes ``IDENTIFY_SYSTEM`` command of the streaming - replication protocol and returns a result as a dictionary. + Execute ``IDENTIFY_SYSTEM`` command of the streaming replication + protocol and return the result as a dictionary. Example:: - >>> print cur.identify_system() + >>> cur.identify_system() {'timeline': 1, 'systemid': '1234567890123456789', 'dbname': 'test', 'xlogpos': '0/1ABCDEF'} .. method:: create_replication_slot(slot_type, slot_name, output_plugin=None) @@ -199,82 +199,81 @@ Replication cursor .. method:: start_replication(slot_type, slot_name=None, writer=None, start_lsn=None, timeline=0, keepalive_interval=10, options=None) - Start and consume replication stream. + Start a replication stream. On non-asynchronous connection, also + consume the stream messages. :param slot_type: type of replication: either `REPLICATION_PHYSICAL` or `REPLICATION_LOGICAL` :param slot_name: name of the replication slot to use (required for logical replication) :param writer: a file-like object to write replication messages to - :param start_lsn: the point in replication stream (WAL position) to start - from, in the form ``XXX/XXX`` (forward-slash separated - pair of hexadecimals) + :param start_lsn: the LSN position to start from, in the form + ``XXX/XXX`` (forward-slash separated pair of + hexadecimals) :param timeline: WAL history timeline to start streaming from (optional, can only be used with physical replication) :param keepalive_interval: interval (in seconds) to send keepalive messages to the server - :param options: an dictionary of options to pass to logical replication + :param options: a dictionary of options to pass to logical replication slot - With non-asynchronous connection, this method enters an endless loop, - reading messages from the server and passing them to ``write()`` method - of the *writer* object. This is similar to operation of the + When used on non-asynchronous connection this method enters an endless + loop, reading messages from the server and passing them to ``write()`` + method of the *writer* object. This is similar to operation of the `~cursor.copy_to()` method. It also sends keepalive messages to the server, in case there were no new data from it for the duration of - *keepalive_interval* seconds (this parameter must be greater than 1 - second, but it can have a fractional part). + *keepalive_interval* seconds (this parameter's value must be equal to + at least than 1 second, but it can have a fractional part). With asynchronous connection, this method returns immediately and the calling code can start reading the replication messages in a loop. - A sketch implementation of the *writer* object might look similar to - the following:: + A sketch implementation of the *writer* object for logical replication + might look similar to the following:: from io import TextIOBase - class ReplicationStreamWriter(TextIOBase): + class LogicalStreamWriter(TextIOBase): def write(self, msg): - self.store_data_reliably(msg) + self.store_message_data(msg.payload) - if self.should_report_to_the_server(msg): + if self.should_report_to_the_server_now(msg): msg.cursor.send_replication_feedback(flush_lsn=msg.wal_end) - def store_data_reliably(self, msg): - ... - - def shoud_report_to_the_server(self, msg): - ... - - First, like with the `~cursor.copy_to()` method, the code that is - calling the provided write method checks if the *writer* object is + First, like with the `~cursor.copy_to()` method, the code that calls + the provided ``write()`` method checks if the *writer* object is inherited from `~io.TextIOBase`. If that is the case, the message payload to be passed is converted to unicode using the connection's - encoding information. Otherwise, the message is passed as is. + `~connection.encoding` information. Otherwise, the message is passed + as is. The *msg* object being passed is an instance of `~ReplicationMessage` class. - After storing the data passed in the message object, the writer object - should consider sending a confirmation message to the server. This is - done by calling `~send_replication_feedback()` method on the - corresponding replication cursor. A reference to the cursor producing - a given message is provided in the `~ReplicationMessage` as an - attribute. + After storing certain amount of messages' data reliably, the client + should send a confirmation message to the server. This should be done + by calling `~send_replication_feedback()` method on the corresponding + replication cursor. A reference to the cursor is provided in the + `~ReplicationMessage` as an attribute. + + .. warning:: - .. note:: + Failure to properly notify the server by constantly consuming and + reporting success at appropriate times can eventually lead to "disk + full" condition on the server, because the server retains all the + WAL segments that might be needed to stream the changes via all of + the currently open replication slots. - One needs to be aware that failure to properly notify the server on - any one replication slot by constantly consuming and reporting - success to the server at appropriate times can eventually lead to - "disk full" condition on the server, because the server retains all - the WAL segments that might be needed to stream the changes via - currently open replication slots. + On the other hand, it is not recommended to send a confirmation + after every processed message, since that will put an unnecessary + load on network and the server. A possible strategy is to confirm + after every COMMIT message. .. method:: stop_replication() In non-asynchronous connection, when called from the ``write()`` - method tells the code in `~start_replication` to break out of the + method, tell the code in `~start_replication` to break out of the endless loop and return. .. method:: send_replication_feedback(write_lsn=0, flush_lsn=0, apply_lsn=0, reply=False) @@ -291,12 +290,12 @@ Replication cursor Use this method to report to the server that all messages up to a certain LSN position have been stored and may be discarded. - This method can also be called with default parameters to send a - keepalive message to the server. + This method can also be called with all default parameters' values to + send a keepalive message to the server. - In case the message cannot be sent at the moment, remembers the - positions for a later successful call or call to - `~flush_replication_feedback()`. + In case of asynchronous connection, if the feedback message cannot be + sent at the moment, remembers the passed LSN positions for a later + hopefully successful call or call to `~flush_replication_feedback()`. .. method:: flush_replication_feedback(reply=False) @@ -307,10 +306,10 @@ Replication cursor Low-level methods for asynchronous connection operation. - While with the non-asynchronous connection, a single call to - `~start_replication()` handles all the complexity, at times it might be - beneficial to use low-level interface for better control, in particular to - `~select.select()` on multiple sockets. The following methods are + With the non-asynchronous connection, a single call to + `~start_replication()` handles all the complexity, but at times it might + be beneficial to use low-level interface for better control, in particular + to `~select.select()` on multiple sockets. The following methods are provided for asynchronous operation: .. method:: read_replication_message(decode=True) @@ -319,14 +318,18 @@ Replication cursor performed on the data received from the server This method should be used in a loop with asynchronous connections - after calling `~start_replication()`. + after calling `~start_replication()` once. It tries to read the next message from the server, without blocking and returns an instance of `~ReplicationMessage` or *None*, in case - there are no more data messages from the server at the moment. After - receiving a *None* value from this method, one should use a - `~select.select()` or `~select.poll()` on the corresponding connection - to block the process until there is more data from the server. + there are no more data messages from the server at the moment. + + It is expected that the calling code will call this method repeatedly + in order to consume all of the messages that might have been buffered, + until *None* is returned. After receiving a *None* value from this + method, one might use `~select.select()` or `~select.poll()` on the + corresponding connection to block the process until there is more data + from the server. The server can send keepalive messages to the client periodically. Such messages are silently consumed by this method and are never @@ -334,8 +337,8 @@ Replication cursor .. method:: fileno() - Calls the corresponding connection's `~connection.fileno()` method - and returns the result. + Call the corresponding connection's `~connection.fileno()` method and + return the result. This is a convenience method which allows replication cursor to be used directly in `~select.select()` or `~select.poll()` calls. From dab41c699a3e20a3577ad52529d879741185df13 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Thu, 2 Jul 2015 14:34:09 +0200 Subject: [PATCH 16/60] Fix PQconsumeInput usage. Only call when no data is available in the internal buffer. --- psycopg/pqpath.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/psycopg/pqpath.c b/psycopg/pqpath.c index 04789d354..ed8b37f36 100644 --- a/psycopg/pqpath.c +++ b/psycopg/pqpath.c @@ -1542,14 +1542,19 @@ pq_read_replication_message(cursorObject *curs, int decode) Dprintf("pq_read_replication_message(decode=%d)", decode); retry: - if (!PQconsumeInput(curs->conn->pgconn)) { - goto none; - } - Py_BEGIN_ALLOW_THREADS; len = PQgetCopyData(curs->conn->pgconn, &buffer, 1 /* async */); Py_END_ALLOW_THREADS; + if (len == 0) { + /* We should only try reading more data into the internal buffer when + * there is nothing available at the moment. Otherwise, with a really + * highly loaded server we might be reading a number of messages for + * every single one we process, thus overgrowing the internal buffer + * until the system runs out of memory. */ + if (PQconsumeInput(curs->conn->pgconn)) { + goto retry; + } goto none; } From 9c1f2acf3e3608ba0d13b0b3c3d01b68f2a29d90 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Thu, 2 Jul 2015 14:39:51 +0200 Subject: [PATCH 17/60] Check return value of PQsocket When connection is closed by the server, we might get -1 there. --- psycopg/pqpath.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/psycopg/pqpath.c b/psycopg/pqpath.c index ed8b37f36..e550d796b 100644 --- a/psycopg/pqpath.c +++ b/psycopg/pqpath.c @@ -1689,13 +1689,10 @@ _pq_copy_both_v3(cursorObject *curs) { PyObject *msg, *tmp = NULL; PyObject *write_func = NULL; - int ret = -1; - int is_text; - + int is_text, fd, sel, ret = -1; PGconn *pgconn; fd_set fds; struct timeval curr_time, ping_time, time_diff; - int sel; if (!curs->copyfile) { psyco_set_error(ProgrammingError, curs, @@ -1724,8 +1721,14 @@ _pq_copy_both_v3(cursorObject *curs) else if (msg == Py_None) { Py_DECREF(msg); + fd = PQsocket(pgconn); + if (fd < 0) { + pq_raise(curs->conn, curs, NULL); + goto exit; + } + FD_ZERO(&fds); - FD_SET(PQsocket(pgconn), &fds); + FD_SET(fd, &fds); gettimeofday(&curr_time, NULL); @@ -1736,7 +1739,7 @@ _pq_copy_both_v3(cursorObject *curs) timersub(&ping_time, &curr_time, &time_diff); if (time_diff.tv_sec > 0) { Py_BEGIN_ALLOW_THREADS; - sel = select(PQsocket(pgconn) + 1, &fds, NULL, NULL, &time_diff); + sel = select(fd + 1, &fds, NULL, NULL, &time_diff); Py_END_ALLOW_THREADS; } else { From 06f18237f7932aab066cae2c09b6e335af5225f2 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Fri, 3 Jul 2015 11:40:00 +0200 Subject: [PATCH 18/60] Fix missing free in replmsg_dealloc --- psycopg/replication_message_type.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/psycopg/replication_message_type.c b/psycopg/replication_message_type.c index 27a9c9163..e52b32eef 100644 --- a/psycopg/replication_message_type.c +++ b/psycopg/replication_message_type.c @@ -80,7 +80,11 @@ replmsg_clear(replicationMessageObject *self) static void replmsg_dealloc(PyObject* obj) { + PyObject_GC_UnTrack(obj); + replmsg_clear((replicationMessageObject*) obj); + + Py_TYPE(obj)->tp_free(obj); } #define psyco_replmsg_send_time_doc \ From eac16d048ac597e3602e7ebddb3ea191e0537cff Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Fri, 3 Jul 2015 15:44:45 +0200 Subject: [PATCH 19/60] Fix missing GC flag in ReplicationMessage type --- psycopg/replication_message_type.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/psycopg/replication_message_type.c b/psycopg/replication_message_type.c index e52b32eef..edfe6c16d 100644 --- a/psycopg/replication_message_type.c +++ b/psycopg/replication_message_type.c @@ -69,6 +69,14 @@ replmsg_init(PyObject *obj, PyObject *args, PyObject *kwargs) return 0; } +static int +replmsg_traverse(replicationMessageObject *self, visitproc visit, void *arg) +{ + Py_VISIT((PyObject* )self->cursor); + Py_VISIT(self->payload); + return 0; +} + static int replmsg_clear(replicationMessageObject *self) { @@ -154,10 +162,10 @@ PyTypeObject replicationMessageType = { 0, /*tp_getattro*/ 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, - /*tp_flags*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | + Py_TPFLAGS_HAVE_GC, /*tp_flags*/ replicationMessageType_doc, /*tp_doc*/ - 0, /*tp_traverse*/ + (traverseproc)replmsg_traverse, /*tp_traverse*/ (inquiry)replmsg_clear, /*tp_clear*/ 0, /*tp_richcompare*/ 0, /*tp_weaklistoffset*/ From 26fe1f230fb073033d3279eb054bccfe4aecee99 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Tue, 7 Jul 2015 19:04:32 +0200 Subject: [PATCH 20/60] Fix use of PQconsumeInput() in pq_read_replication_message() The libpq's PQconsumeInput() returns 0 in case of an error only, but we need to know if it was able to actually read something. Work around this by setting an internal flag before retry. --- psycopg/pqpath.c | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/psycopg/pqpath.c b/psycopg/pqpath.c index e550d796b..edfdcd3a3 100644 --- a/psycopg/pqpath.c +++ b/psycopg/pqpath.c @@ -1534,7 +1534,7 @@ PyObject * pq_read_replication_message(cursorObject *curs, int decode) { char *buffer = NULL; - int len, hdr, reply; + int len, consumed = 0, hdr, reply; XLogRecPtr data_start, wal_end; pg_int64 send_time; PyObject *str = NULL, *msg = NULL; @@ -1542,20 +1542,29 @@ pq_read_replication_message(cursorObject *curs, int decode) Dprintf("pq_read_replication_message(decode=%d)", decode); retry: - Py_BEGIN_ALLOW_THREADS; len = PQgetCopyData(curs->conn->pgconn, &buffer, 1 /* async */); - Py_END_ALLOW_THREADS; if (len == 0) { - /* We should only try reading more data into the internal buffer when - * there is nothing available at the moment. Otherwise, with a really - * highly loaded server we might be reading a number of messages for - * every single one we process, thus overgrowing the internal buffer - * until the system runs out of memory. */ - if (PQconsumeInput(curs->conn->pgconn)) { - goto retry; + /* If we've tried reading some data, but there was none, bail out. */ + if (consumed) { + goto none; } - goto none; + /* We should only try reading more data when there is nothing + available at the moment. Otherwise, with a really highly loaded + server we might be reading a number of messages for every single + one we process, thus overgrowing the internal buffer until the + client system runs out of memory. */ + if (!PQconsumeInput(curs->conn->pgconn)) { + pq_raise(curs->conn, curs, NULL); + goto exit; + } + /* But PQconsumeInput() doesn't tell us if it has actually read + anything into the internal buffer and there is no (supported) way + to ask libpq about this directly. The way we check is setting the + flag and re-trying PQgetCopyData(): if that returns 0 again, + there's no more data available in the buffer, so we return None. */ + consumed = 1; + goto retry; } if (len == -2) { @@ -1574,6 +1583,11 @@ pq_read_replication_message(cursorObject *curs, int decode) goto none; } + /* It also makes sense to set this flag here to make us return early in + case of retry due to keepalive message. Any pending data on the socket + will trigger read condition in select() in the calling code anyway. */ + consumed = 1; + /* ok, we did really read something: update the io timestamp */ gettimeofday(&curs->repl_last_io, NULL); From f872a2aabbf69bc7f16a4c25f226d634f9d019c9 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Wed, 30 Sep 2015 14:34:45 +0200 Subject: [PATCH 21/60] Remove typedef for uint32, include internal/c.h --- psycopg/libpq_support.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/psycopg/libpq_support.h b/psycopg/libpq_support.h index ab35fef50..c71394632 100644 --- a/psycopg/libpq_support.h +++ b/psycopg/libpq_support.h @@ -26,9 +26,9 @@ #define PSYCOPG_LIBPQ_SUPPORT_H 1 #include "psycopg/config.h" +#include "internal/c.h" -/* type and constant definitions from internal postgres includes */ -typedef unsigned int uint32; +/* type and constant definitions from internal postgres includes not available otherwise */ typedef unsigned PG_INT64_TYPE XLogRecPtr; #define InvalidXLogRecPtr ((XLogRecPtr) 0) From 937a7a90246916bff0e956947b1bab6058c72d08 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Thu, 1 Oct 2015 11:08:56 +0200 Subject: [PATCH 22/60] Cleanup start replication wrt. slot type a bit. --- doc/src/extras.rst | 11 +++++----- lib/extras.py | 51 +++++++++++++++++++++++----------------------- psycopg/cursor.h | 4 ---- 3 files changed, 32 insertions(+), 34 deletions(-) diff --git a/doc/src/extras.rst b/doc/src/extras.rst index 19c81523a..1da983a4f 100644 --- a/doc/src/extras.rst +++ b/doc/src/extras.rst @@ -197,7 +197,7 @@ Replication cursor cur.drop_replication_slot("testslot") - .. method:: start_replication(slot_type, slot_name=None, writer=None, start_lsn=None, timeline=0, keepalive_interval=10, options=None) + .. method:: start_replication(slot_type, slot_name=None, writer=None, start_lsn=0, timeline=0, keepalive_interval=10, options=None) Start a replication stream. On non-asynchronous connection, also consume the stream messages. @@ -207,15 +207,16 @@ Replication cursor :param slot_name: name of the replication slot to use (required for logical replication) :param writer: a file-like object to write replication messages to - :param start_lsn: the LSN position to start from, in the form - ``XXX/XXX`` (forward-slash separated pair of - hexadecimals) + :param start_lsn: the optional LSN position to start replicating from, + can be an integer or a string of hexadecimal digits + in the form ``XXX/XXX`` :param timeline: WAL history timeline to start streaming from (optional, can only be used with physical replication) :param keepalive_interval: interval (in seconds) to send keepalive messages to the server :param options: a dictionary of options to pass to logical replication - slot + slot (not allowed with physical replication, use + *None*) When used on non-asynchronous connection this method enters an endless loop, reading messages from the server and passing them to ``write()`` diff --git a/lib/extras.py b/lib/extras.py index 85debc68a..36138c630 100644 --- a/lib/extras.py +++ b/lib/extras.py @@ -480,8 +480,8 @@ def cursor(self, *args, **kwargs): """Streamging replication types.""" -REPLICATION_PHYSICAL = 0 -REPLICATION_LOGICAL = 1 +REPLICATION_LOGICAL = "LOGICAL" +REPLICATION_PHYSICAL = "PHYSICAL" class ReplicationCursor(_cursor): """A cursor used for replication commands.""" @@ -504,18 +504,18 @@ def create_replication_slot(self, slot_type, slot_name, output_plugin=None): if slot_type == REPLICATION_LOGICAL: if output_plugin is None: - raise psycopg2.ProgrammingError("output_plugin is required for logical replication slot") + raise psycopg2.ProgrammingError("output plugin name is required for logical replication slot") - command += "LOGICAL %s" % self.quote_ident(output_plugin) + command += "%s %s" % (slot_type, self.quote_ident(output_plugin)) elif slot_type == REPLICATION_PHYSICAL: if output_plugin is not None: - raise psycopg2.ProgrammingError("output_plugin is not applicable to physical replication") + raise psycopg2.ProgrammingError("cannot specify output plugin name for physical replication slot") - command += "PHYSICAL" + command += slot_type else: - raise psycopg2.ProgrammingError("unrecognized replication slot type") + raise psycopg2.ProgrammingError("unrecognized replication slot type: %s" % slot_type) self.execute(command) @@ -525,44 +525,45 @@ def drop_replication_slot(self, slot_name): command = "DROP_REPLICATION_SLOT %s" % self.quote_ident(slot_name) self.execute(command) - def start_replication(self, slot_type, slot_name=None, writer=None, start_lsn=None, + def start_replication(self, slot_type, slot_name=None, writer=None, start_lsn=0, timeline=0, keepalive_interval=10, options=None): """Start and consume replication stream.""" command = "START_REPLICATION " - if slot_type == REPLICATION_LOGICAL and slot_name is None: - raise psycopg2.ProgrammingError("slot_name is required for logical replication slot") + if slot_type == REPLICATION_LOGICAL: + if slot_name: + command += "SLOT %s " % self.quote_ident(slot_name) + else: + raise psycopg2.ProgrammingError("slot name is required for logical replication") - if slot_name: - command += "SLOT %s " % self.quote_ident(slot_name) + command += "%s " % slot_type - if slot_type == REPLICATION_LOGICAL: - command += "LOGICAL " elif slot_type == REPLICATION_PHYSICAL: - command += "PHYSICAL " + if slot_name: + command += "SLOT %s " % self.quote_ident(slot_name) + + # don't add "PHYSICAL", before 9.4 it was just START_REPLICATION XXX/XXX else: - raise psycopg2.ProgrammingError("unrecognized replication slot type") + raise psycopg2.ProgrammingError("unrecognized replication slot type: %s" % slot_type) - if start_lsn is None: - start_lsn = '0/0' + if type(start_lsn) is str: + lsn = start_lsn.split('/') + lsn = "%X/%08X" % (int(lsn[0], 16), int(lsn[1], 16)) + else: + lsn = "%X/%08X" % ((start_lsn >> 32) & 0xFFFFFFFF, start_lsn & 0xFFFFFFFF) - # reparse lsn to catch possible garbage - lsn = start_lsn.split('/') - command += "%X/%X" % (int(lsn[0], 16), int(lsn[1], 16)) + command += lsn if timeline != 0: if slot_type == REPLICATION_LOGICAL: raise psycopg2.ProgrammingError("cannot specify timeline for logical replication") - if timeline < 0: - raise psycopg2.ProgrammingError("timeline must be >= 0: %d" % timeline) - command += " TIMELINE %d" % timeline if options: if slot_type == REPLICATION_PHYSICAL: - raise psycopg2.ProgrammingError("cannot specify plugin options for physical replication") + raise psycopg2.ProgrammingError("cannot specify output plugin options for physical replication") command += " (" for k,v in options.iteritems(): diff --git a/psycopg/cursor.h b/psycopg/cursor.h index 380abbf4a..dd07243f5 100644 --- a/psycopg/cursor.h +++ b/psycopg/cursor.h @@ -97,10 +97,6 @@ struct cursorObject { }; -/* streaming replication modes */ -#define CURSOR_REPLICATION_PHYSICAL 0 -#define CURSOR_REPLICATION_LOGICAL 1 - /* C-callable functions in cursor_int.c and cursor_type.c */ BORROWED HIDDEN PyObject *curs_get_cast(cursorObject *self, PyObject *oid); From 95ee218c6d1e3ee5d7339c1980f7c4c410c8d827 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Thu, 1 Oct 2015 15:34:51 +0200 Subject: [PATCH 23/60] Update replication connection/cursor interface and docs. --- doc/src/extras.rst | 101 ++++++++++++++++++++++++++++--------- lib/extras.py | 121 +++++++++++++++++++++++++++------------------ 2 files changed, 151 insertions(+), 71 deletions(-) diff --git a/doc/src/extras.rst b/doc/src/extras.rst index 1da983a4f..de94e6d08 100644 --- a/doc/src/extras.rst +++ b/doc/src/extras.rst @@ -144,20 +144,36 @@ Logging cursor Replication cursor ^^^^^^^^^^^^^^^^^^ -.. autoclass:: ReplicationConnection +.. autoclass:: LogicalReplicationConnection This connection factory class can be used to open a special type of - connection that is used for streaming replication. + connection that is used for logical replication. Example:: - from psycopg2.extras import ReplicationConnection, REPLICATION_PHYSICAL, REPLICATION_LOGICAL - conn = psycopg2.connect(dsn, connection_factory=ReplicationConnection) - cur = conn.cursor() + from psycopg2.extras import LogicalReplicationConnection + log_conn = psycopg2.connect(dsn, connection_factory=LogicalReplicationConnection) + log_cur = log_conn.cursor() + + +.. autoclass:: PhysicalReplicationConnection + + This connection factory class can be used to open a special type of + connection that is used for physical replication. + + Example:: + + from psycopg2.extras import PhysicalReplicationConnection + phys_conn = psycopg2.connect(dsn, connection_factory=PhysicalReplicationConnection) + phys_cur = phys_conn.cursor() + + + Both `LogicalReplicationConnection` and `PhysicalReplicationConnection` use + `ReplicationCursor` for actual communication on the connection. .. seealso:: - - PostgreSQL `Replication protocol`__ + - PostgreSQL `Streaming Replication Protocol`__ .. __: http://www.postgresql.org/docs/current/static/protocol-replication.html @@ -173,19 +189,38 @@ Replication cursor >>> cur.identify_system() {'timeline': 1, 'systemid': '1234567890123456789', 'dbname': 'test', 'xlogpos': '0/1ABCDEF'} - .. method:: create_replication_slot(slot_type, slot_name, output_plugin=None) + .. method:: create_replication_slot(slot_name, output_plugin=None) Create streaming replication slot. - :param slot_type: type of replication: either `REPLICATION_PHYSICAL` or - `REPLICATION_LOGICAL` :param slot_name: name of the replication slot to be created - :param output_plugin: name of the logical decoding output plugin to use - (logical replication only) + :param slot_type: type of replication: should be either + `REPLICATION_LOGICAL` or `REPLICATION_PHYSICAL` + :param output_plugin: name of the logical decoding output plugin to be + used by the slot; required for logical + replication connections, disallowed for physical Example:: - cur.create_replication_slot(REPLICATION_LOGICAL, "testslot", "test_decoding") + log_cur.create_replication_slot("logical1", "test_decoding") + phys_cur.create_replication_slot("physical1") + + # either logical or physical replication connection + cur.create_replication_slot("slot1", slot_type=REPLICATION_LOGICAL) + + When creating a slot on a logical replication connection, a logical + replication slot is created by default. Logical replication requires + name of the logical decoding output plugin to be specified. + + When creating a slot on a physical replication connection, a physical + replication slot is created by default. No output plugin parameter is + required or allowed when creating a physical replication slot. + + In either case, the type of slot being created can be specified + explicitly using *slot_type* parameter. + + Replication slots are a feature of PostgreSQL server starting with + version 9.4. .. method:: drop_replication_slot(slot_name) @@ -195,18 +230,24 @@ Replication cursor Example:: - cur.drop_replication_slot("testslot") + # either logical or physical replication connection + cur.drop_replication_slot("slot1") - .. method:: start_replication(slot_type, slot_name=None, writer=None, start_lsn=0, timeline=0, keepalive_interval=10, options=None) + This + + Replication slots are a feature of PostgreSQL server starting with + version 9.4. - Start a replication stream. On non-asynchronous connection, also - consume the stream messages. + .. method:: start_replication(slot_name=None, writer=None, slot_type=None, start_lsn=0, timeline=0, keepalive_interval=10, options=None) - :param slot_type: type of replication: either `REPLICATION_PHYSICAL` or - `REPLICATION_LOGICAL` - :param slot_name: name of the replication slot to use (required for - logical replication) + Start replication on the connection. + + :param slot_name: name of the replication slot to use; required for + logical replication, physical replication can work + with or without a slot :param writer: a file-like object to write replication messages to + :param slot_type: type of replication: should be either + `REPLICATION_LOGICAL` or `REPLICATION_PHYSICAL` :param start_lsn: the optional LSN position to start replicating from, can be an integer or a string of hexadecimal digits in the form ``XXX/XXX`` @@ -215,9 +256,23 @@ Replication cursor :param keepalive_interval: interval (in seconds) to send keepalive messages to the server :param options: a dictionary of options to pass to logical replication - slot (not allowed with physical replication, use + slot (not allowed with physical replication, set to *None*) + If not specified using *slot_type* parameter, the type of replication + to be started is defined by the type of replication connection. + Logical replication is only allowed on logical replication connection, + but physical replication can be used with both types of connection. + + On the other hand, physical replication doesn't require a named + replication slot to be used, only logical one does. In any case, + logical replication and replication slots are a feature of PostgreSQL + server starting with version 9.4. Physical replication can be used + starting with 9.0. + + If a *slot_name* is specified, the slot must exist on the server and + its type must match the replication type used. + When used on non-asynchronous connection this method enters an endless loop, reading messages from the server and passing them to ``write()`` method of the *writer* object. This is similar to operation of the @@ -391,10 +446,8 @@ Replication cursor A reference to the corresponding `~ReplicationCursor` object. - -.. data:: REPLICATION_PHYSICAL - .. data:: REPLICATION_LOGICAL +.. data:: REPLICATION_PHYSICAL .. index:: pair: Cursor; Replication diff --git a/lib/extras.py b/lib/extras.py index 36138c630..4587afeaa 100644 --- a/lib/extras.py +++ b/lib/extras.py @@ -438,53 +438,78 @@ def callproc(self, procname, vars=None): return LoggingCursor.callproc(self, procname, vars) -class ReplicationConnection(_connection): - """A connection that uses `ReplicationCursor` automatically.""" +"""Replication connection types.""" +REPLICATION_LOGICAL = "LOGICAL" +REPLICATION_PHYSICAL = "PHYSICAL" + + +class ReplicationConnectionBase(_connection): + """ + Base class for Logical and Physical replication connection + classes. Uses `ReplicationCursor` automatically. + """ def __init__(self, *args, **kwargs): - """Initializes a replication connection, by adding appropriate replication parameter to the provided dsn arguments.""" + """ + Initializes a replication connection by adding appropriate + parameters to the provided DSN and tweaking the connection + attributes. + """ - if len(args): - dsn = args[0] + # replication_type is set in subclasses + if self.replication_type == REPLICATION_LOGICAL: + replication = 'database' - # FIXME: could really use parse_dsn here + elif self.replication_type == REPLICATION_PHYSICAL: + replication = 'true' - if dsn.startswith('postgres://') or dsn.startswith('postgresql://'): - # poor man's url parsing - if dsn.rfind('?') > 0: - if not dsn.endswith('?'): - dsn += '&' - else: - dsn += '?' + else: + raise psycopg2.ProgrammingError("unrecognized replication type: %s" % self.replication_type) + + # FIXME: could really use parse_dsn here + dsn = args[0] + if dsn.startswith('postgres://') or dsn.startswith('postgresql://'): + # poor man's url parsing + if dsn.rfind('?') > 0: + if not dsn.endswith('?'): + dsn += '&' else: - dsn += ' ' - dsn += 'replication=database' - args = [dsn] + list(args[1:]) + dsn += '?' else: - dbname = kwargs.get('dbname', None) - if dbname is None: - kwargs['dbname'] = 'replication' - - if kwargs.get('replication', None) is None: - kwargs['replication'] = 'database' if dbname else 'true' + dsn += ' ' + dsn += 'replication=%s' % replication + args = [dsn] + list(args[1:]) - super(ReplicationConnection, self).__init__(*args, **kwargs) + super(ReplicationConnectionBase, self).__init__(*args, **kwargs) # prevent auto-issued BEGIN statements if not self.async: self.autocommit = True - def cursor(self, *args, **kwargs): - kwargs.setdefault('cursor_factory', ReplicationCursor) - return super(ReplicationConnection, self).cursor(*args, **kwargs) + if self.cursor_factory is None: + self.cursor_factory = ReplicationCursor + def quote_ident(self, ident): + # FIXME: use PQescapeIdentifier or psycopg_escape_identifier_easy, somehow + return '"%s"' % ident.replace('"', '""') + + +class LogicalReplicationConnection(ReplicationConnectionBase): + + def __init__(self, *args, **kwargs): + self.replication_type = REPLICATION_LOGICAL + super(LogicalReplicationConnection, self).__init__(*args, **kwargs) + + +class PhysicalReplicationConnection(ReplicationConnectionBase): + + def __init__(self, *args, **kwargs): + self.replication_type = REPLICATION_PHYSICAL + super(PhysicalReplicationConnection, self).__init__(*args, **kwargs) -"""Streamging replication types.""" -REPLICATION_LOGICAL = "LOGICAL" -REPLICATION_PHYSICAL = "PHYSICAL" class ReplicationCursor(_cursor): - """A cursor used for replication commands.""" + """A cursor used for communication on the replication protocol.""" def identify_system(self): """Get information about the cluster status.""" @@ -493,47 +518,49 @@ def identify_system(self): return dict(zip([_.name for _ in self.description], self.fetchall()[0])) - def quote_ident(self, ident): - # FIXME: use PQescapeIdentifier or psycopg_escape_identifier_easy, somehow - return '"%s"' % ident.replace('"', '""') - - def create_replication_slot(self, slot_type, slot_name, output_plugin=None): + def create_replication_slot(self, slot_name, slot_type=None, output_plugin=None): """Create streaming replication slot.""" - command = "CREATE_REPLICATION_SLOT %s " % self.quote_ident(slot_name) + command = "CREATE_REPLICATION_SLOT %s " % self.connection.quote_ident(slot_name) + + if slot_type is None: + slot_type = self.connection.replication_type if slot_type == REPLICATION_LOGICAL: if output_plugin is None: - raise psycopg2.ProgrammingError("output plugin name is required for logical replication slot") + raise psycopg2.ProgrammingError("output plugin name is required to create logical replication slot") - command += "%s %s" % (slot_type, self.quote_ident(output_plugin)) + command += "%s %s" % (slot_type, self.connection.quote_ident(output_plugin)) elif slot_type == REPLICATION_PHYSICAL: if output_plugin is not None: - raise psycopg2.ProgrammingError("cannot specify output plugin name for physical replication slot") + raise psycopg2.ProgrammingError("cannot specify output plugin name when creating physical replication slot") command += slot_type else: - raise psycopg2.ProgrammingError("unrecognized replication slot type: %s" % slot_type) + raise psycopg2.ProgrammingError("unrecognized replication type: %s" % slot_type) self.execute(command) def drop_replication_slot(self, slot_name): """Drop streaming replication slot.""" - command = "DROP_REPLICATION_SLOT %s" % self.quote_ident(slot_name) + command = "DROP_REPLICATION_SLOT %s" % self.connection.quote_ident(slot_name) self.execute(command) - def start_replication(self, slot_type, slot_name=None, writer=None, start_lsn=0, + def start_replication(self, slot_name=None, writer=None, slot_type=None, start_lsn=0, timeline=0, keepalive_interval=10, options=None): """Start and consume replication stream.""" command = "START_REPLICATION " + if slot_type is None: + slot_type = self.connection.replication_type + if slot_type == REPLICATION_LOGICAL: if slot_name: - command += "SLOT %s " % self.quote_ident(slot_name) + command += "SLOT %s " % self.connection.quote_ident(slot_name) else: raise psycopg2.ProgrammingError("slot name is required for logical replication") @@ -541,11 +568,11 @@ def start_replication(self, slot_type, slot_name=None, writer=None, start_lsn=0, elif slot_type == REPLICATION_PHYSICAL: if slot_name: - command += "SLOT %s " % self.quote_ident(slot_name) - + command += "SLOT %s " % self.connection.quote_ident(slot_name) # don't add "PHYSICAL", before 9.4 it was just START_REPLICATION XXX/XXX + else: - raise psycopg2.ProgrammingError("unrecognized replication slot type: %s" % slot_type) + raise psycopg2.ProgrammingError("unrecognized replication type: %s" % slot_type) if type(start_lsn) is str: lsn = start_lsn.split('/') @@ -569,7 +596,7 @@ def start_replication(self, slot_type, slot_name=None, writer=None, start_lsn=0, for k,v in options.iteritems(): if not command.endswith('('): command += ", " - command += "%s %s" % (self.quote_ident(k), _A(str(v))) + command += "%s %s" % (self.connection.quote_ident(k), _A(str(v))) command += ")" return self.start_replication_expert(command, writer=writer, From cac83da5dbb77e142040be66b7d0e85e3e10f9c3 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Thu, 1 Oct 2015 16:04:19 +0200 Subject: [PATCH 24/60] Use parse_dsn in ReplicationConnectionBase --- lib/extras.py | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/lib/extras.py b/lib/extras.py index 4587afeaa..998c792f4 100644 --- a/lib/extras.py +++ b/lib/extras.py @@ -449,7 +449,7 @@ class ReplicationConnectionBase(_connection): classes. Uses `ReplicationCursor` automatically. """ - def __init__(self, *args, **kwargs): + def __init__(self, dsn, **kwargs): """ Initializes a replication connection by adding appropriate parameters to the provided DSN and tweaking the connection @@ -466,21 +466,16 @@ def __init__(self, *args, **kwargs): else: raise psycopg2.ProgrammingError("unrecognized replication type: %s" % self.replication_type) - # FIXME: could really use parse_dsn here - dsn = args[0] - if dsn.startswith('postgres://') or dsn.startswith('postgresql://'): - # poor man's url parsing - if dsn.rfind('?') > 0: - if not dsn.endswith('?'): - dsn += '&' - else: - dsn += '?' - else: - dsn += ' ' - dsn += 'replication=%s' % replication - args = [dsn] + list(args[1:]) + items = _ext.parse_dsn(dsn) + + # we add an appropriate replication keyword parameter, unless + # user has specified one explicitly in the DSN + items.setdefault('replication', replication) + + dsn = " ".join(["%s=%s" % (k, psycopg2._param_escape(str(v))) + for (k, v) in items.iteritems()]) - super(ReplicationConnectionBase, self).__init__(*args, **kwargs) + super(ReplicationConnectionBase, self).__init__(dsn, **kwargs) # prevent auto-issued BEGIN statements if not self.async: From 0233620c26c5df32b1ad8d5b0363a5fd75be3e91 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Thu, 1 Oct 2015 19:28:00 +0200 Subject: [PATCH 25/60] Rework replication connection/cursor classes --- doc/src/extras.rst | 423 +++++++++++++++++------------ lib/extras.py | 9 +- psycopg/cursor.h | 4 +- psycopg/cursor_type.c | 71 +++-- psycopg/pqpath.c | 93 ++++--- psycopg/pqpath.h | 2 + psycopg/replication_message.h | 1 + psycopg/replication_message_type.c | 9 +- 8 files changed, 369 insertions(+), 243 deletions(-) diff --git a/doc/src/extras.rst b/doc/src/extras.rst index de94e6d08..82a2be18f 100644 --- a/doc/src/extras.rst +++ b/doc/src/extras.rst @@ -144,32 +144,40 @@ Logging cursor Replication cursor ^^^^^^^^^^^^^^^^^^ +.. autoclass:: ReplicationConnectionBase + + +The following replication types are defined: + +.. data:: REPLICATION_LOGICAL +.. data:: REPLICATION_PHYSICAL + + .. autoclass:: LogicalReplicationConnection - This connection factory class can be used to open a special type of - connection that is used for logical replication. + This connection factory class can be used to open a special type of + connection that is used for logical replication. - Example:: + Example:: - from psycopg2.extras import LogicalReplicationConnection - log_conn = psycopg2.connect(dsn, connection_factory=LogicalReplicationConnection) - log_cur = log_conn.cursor() + from psycopg2.extras import LogicalReplicationConnection + log_conn = psycopg2.connect(dsn, connection_factory=LogicalReplicationConnection) + log_cur = log_conn.cursor() .. autoclass:: PhysicalReplicationConnection - This connection factory class can be used to open a special type of - connection that is used for physical replication. - - Example:: + This connection factory class can be used to open a special type of + connection that is used for physical replication. - from psycopg2.extras import PhysicalReplicationConnection - phys_conn = psycopg2.connect(dsn, connection_factory=PhysicalReplicationConnection) - phys_cur = phys_conn.cursor() + Example:: + from psycopg2.extras import PhysicalReplicationConnection + phys_conn = psycopg2.connect(dsn, connection_factory=PhysicalReplicationConnection) + phys_cur = phys_conn.cursor() - Both `LogicalReplicationConnection` and `PhysicalReplicationConnection` use - `ReplicationCursor` for actual communication on the connection. + Both `LogicalReplicationConnection` and `PhysicalReplicationConnection` use + `ReplicationCursor` for actual communication on the connection. .. seealso:: @@ -177,160 +185,237 @@ Replication cursor .. __: http://www.postgresql.org/docs/current/static/protocol-replication.html + +The individual messages in the replication stream are presented by +`ReplicationMessage` objects: + +.. autoclass:: ReplicationMessage + + .. attribute:: payload + + The actual data received from the server. An instance of either + ``str`` or ``unicode``, depending on the method that was used to + produce this message. + + .. attribute:: data_size + + The raw size of the message payload (before possible unicode + conversion). + + .. attribute:: data_start + + LSN position of the start of the message. + + .. attribute:: wal_end + + LSN position of the current end of WAL on the server. + + .. attribute:: send_time + + A `~datetime` object representing the server timestamp at the moment + when the message was sent. + + .. attribute:: cursor + + A reference to the corresponding `ReplicationCursor` object. + + .. autoclass:: ReplicationCursor .. method:: identify_system() - Execute ``IDENTIFY_SYSTEM`` command of the streaming replication - protocol and return the result as a dictionary. + Execute ``IDENTIFY_SYSTEM`` command of the streaming replication + protocol and return the result as a dictionary. - Example:: + Example:: - >>> cur.identify_system() - {'timeline': 1, 'systemid': '1234567890123456789', 'dbname': 'test', 'xlogpos': '0/1ABCDEF'} + >>> cur.identify_system() + {'timeline': 1, 'systemid': '1234567890123456789', 'dbname': 'test', 'xlogpos': '0/1ABCDEF'} .. method:: create_replication_slot(slot_name, output_plugin=None) - Create streaming replication slot. + Create streaming replication slot. - :param slot_name: name of the replication slot to be created - :param slot_type: type of replication: should be either - `REPLICATION_LOGICAL` or `REPLICATION_PHYSICAL` - :param output_plugin: name of the logical decoding output plugin to be - used by the slot; required for logical - replication connections, disallowed for physical + :param slot_name: name of the replication slot to be created + :param slot_type: type of replication: should be either + `REPLICATION_LOGICAL` or `REPLICATION_PHYSICAL` + :param output_plugin: name of the logical decoding output plugin to be + used by the slot; required for logical + replication connections, disallowed for physical - Example:: + Example:: - log_cur.create_replication_slot("logical1", "test_decoding") - phys_cur.create_replication_slot("physical1") + log_cur.create_replication_slot("logical1", "test_decoding") + phys_cur.create_replication_slot("physical1") - # either logical or physical replication connection - cur.create_replication_slot("slot1", slot_type=REPLICATION_LOGICAL) + # either logical or physical replication connection + cur.create_replication_slot("slot1", slot_type=REPLICATION_LOGICAL) - When creating a slot on a logical replication connection, a logical - replication slot is created by default. Logical replication requires - name of the logical decoding output plugin to be specified. + When creating a slot on a logical replication connection, a logical + replication slot is created by default. Logical replication requires + name of the logical decoding output plugin to be specified. - When creating a slot on a physical replication connection, a physical - replication slot is created by default. No output plugin parameter is - required or allowed when creating a physical replication slot. + When creating a slot on a physical replication connection, a physical + replication slot is created by default. No output plugin parameter is + required or allowed when creating a physical replication slot. - In either case, the type of slot being created can be specified - explicitly using *slot_type* parameter. + In either case, the type of slot being created can be specified + explicitly using *slot_type* parameter. - Replication slots are a feature of PostgreSQL server starting with - version 9.4. + Replication slots are a feature of PostgreSQL server starting with + version 9.4. .. method:: drop_replication_slot(slot_name) - Drop streaming replication slot. + Drop streaming replication slot. + + :param slot_name: name of the replication slot to drop + + Example:: + + # either logical or physical replication connection + cur.drop_replication_slot("slot1") - :param slot_name: name of the replication slot to drop + Replication slots are a feature of PostgreSQL server starting with + version 9.4. - Example:: + .. method:: start_replication(slot_name=None, slot_type=None, start_lsn=0, timeline=0, options=None) - # either logical or physical replication connection - cur.drop_replication_slot("slot1") + Start replication on the connection. - This - - Replication slots are a feature of PostgreSQL server starting with - version 9.4. + :param slot_name: name of the replication slot to use; required for + logical replication, physical replication can work + with or without a slot + :param slot_type: type of replication: should be either + `REPLICATION_LOGICAL` or `REPLICATION_PHYSICAL` + :param start_lsn: the optional LSN position to start replicating from, + can be an integer or a string of hexadecimal digits + in the form ``XXX/XXX`` + :param timeline: WAL history timeline to start streaming from (optional, + can only be used with physical replication) + :param options: a dictionary of options to pass to logical replication + slot (not allowed with physical replication) - .. method:: start_replication(slot_name=None, writer=None, slot_type=None, start_lsn=0, timeline=0, keepalive_interval=10, options=None) + If a *slot_name* is specified, the slot must exist on the server and + its type must match the replication type used. - Start replication on the connection. + If not specified using *slot_type* parameter, the type of replication + is defined by the type of replication connection. Logical replication + is only allowed on logical replication connection, but physical + replication can be used with both types of connection. - :param slot_name: name of the replication slot to use; required for - logical replication, physical replication can work - with or without a slot - :param writer: a file-like object to write replication messages to - :param slot_type: type of replication: should be either - `REPLICATION_LOGICAL` or `REPLICATION_PHYSICAL` - :param start_lsn: the optional LSN position to start replicating from, - can be an integer or a string of hexadecimal digits - in the form ``XXX/XXX`` - :param timeline: WAL history timeline to start streaming from (optional, - can only be used with physical replication) - :param keepalive_interval: interval (in seconds) to send keepalive - messages to the server - :param options: a dictionary of options to pass to logical replication - slot (not allowed with physical replication, set to - *None*) + On the other hand, physical replication doesn't require a named + replication slot to be used, only logical one does. In any case, + logical replication and replication slots are a feature of PostgreSQL + server starting with version 9.4. Physical replication can be used + starting with 9.0. - If not specified using *slot_type* parameter, the type of replication - to be started is defined by the type of replication connection. - Logical replication is only allowed on logical replication connection, - but physical replication can be used with both types of connection. + If *start_lsn* is specified, the requested stream will start from that + LSN. The default is `!None`, which passes the LSN ``0/0``, causing + replay to begin at the last point at which the server got replay + confirmation from the client for, or the oldest available point for a + new slot. - On the other hand, physical replication doesn't require a named - replication slot to be used, only logical one does. In any case, - logical replication and replication slots are a feature of PostgreSQL - server starting with version 9.4. Physical replication can be used - starting with 9.0. + The server might produce an error if a WAL file for the given LSN has + already been recycled, or it may silently start streaming from a later + position: the client can verify the actual position using information + provided the `ReplicationMessage` attributes. The exact server + behavior depends on the type of replication and use of slots. - If a *slot_name* is specified, the slot must exist on the server and - its type must match the replication type used. + A *timeline* parameter can only be specified with physical replication + and only starting with server version 9.3. - When used on non-asynchronous connection this method enters an endless - loop, reading messages from the server and passing them to ``write()`` - method of the *writer* object. This is similar to operation of the - `~cursor.copy_to()` method. It also sends keepalive messages to the - server, in case there were no new data from it for the duration of - *keepalive_interval* seconds (this parameter's value must be equal to - at least than 1 second, but it can have a fractional part). + A dictionary of *options* may be passed to the logical decoding plugin + on a logical replication slot. The set of supported options depends + on the output plugin that was used to create the slot. Must be + `!None` for physical replication. - With asynchronous connection, this method returns immediately and the - calling code can start reading the replication messages in a loop. + This function constructs a ``START_REPLICATION`` command and calls + `start_replication_expert()` internally. - A sketch implementation of the *writer* object for logical replication - might look similar to the following:: + After starting the replication, to actually consume the incoming + server messages, use `consume_replication_stream()` or implement a + loop around `read_replication_message()` in case of asynchronous + connection. - from io import TextIOBase + .. method:: start_replication_expert(command) - class LogicalStreamWriter(TextIOBase): + Start replication on the connection using provided ``START_REPLICATION`` + command. - def write(self, msg): - self.store_message_data(msg.payload) + .. method:: consume_replication_stream(consumer, decode=False, keepalive_interval=10) - if self.should_report_to_the_server_now(msg): - msg.cursor.send_replication_feedback(flush_lsn=msg.wal_end) + :param consumer: an object providing ``consume()`` method + :param decode: a flag indicating that unicode conversion should be + performed on the messages received from the server + :param keepalive_interval: interval (in seconds) to send keepalive + messages to the server + + This method can only be used with synchronous connection. For + asynchronous connections see `read_replication_message()`. + + Before calling this method to consume the stream, use + `start_replication()` first. + + When called, this method enters an endless loop, reading messages from + the server and passing them to ``consume()`` method of the *consumer* + object. In order to make this method break out of the loop and + return, the ``consume()`` method can call `stop_replication()` on the + cursor or it can throw an exception. + + If *decode* is set to `!True`, the messages read from the server are + converted according to the connection `~connection.encoding`. This + parameter should not be set with physical replication. + + This method also sends keepalive messages to the server, in case there + were no new data from the server for the duration of + *keepalive_interval* (in seconds). The value of this parameter must + be equal to at least 1 second, but it can have a fractional part. + + The following example is a sketch implementation of *consumer* object + for logical replication:: + + class LogicalStreamConsumer(object): + + def consume(self, msg): + self.store_message_data(msg.payload) + + if self.should_report_to_the_server_now(msg): + msg.cursor.send_replication_feedback(flush_lsn=msg.data_start) - First, like with the `~cursor.copy_to()` method, the code that calls - the provided ``write()`` method checks if the *writer* object is - inherited from `~io.TextIOBase`. If that is the case, the message - payload to be passed is converted to unicode using the connection's - `~connection.encoding` information. Otherwise, the message is passed - as is. + consumer = LogicalStreamConsumer() + cur.consume_replication_stream(consumer, decode=True) - The *msg* object being passed is an instance of `~ReplicationMessage` - class. + The *msg* objects passed to the ``consume()`` method are instances of + `ReplicationMessage` class. - After storing certain amount of messages' data reliably, the client - should send a confirmation message to the server. This should be done - by calling `~send_replication_feedback()` method on the corresponding - replication cursor. A reference to the cursor is provided in the - `~ReplicationMessage` as an attribute. + After storing certain amount of messages' data reliably, the client + should send a confirmation message to the server. This should be done + by calling `send_replication_feedback()` method on the corresponding + replication cursor. A reference to the cursor is provided in the + `ReplicationMessage` as an attribute. - .. warning:: + .. warning:: - Failure to properly notify the server by constantly consuming and - reporting success at appropriate times can eventually lead to "disk - full" condition on the server, because the server retains all the - WAL segments that might be needed to stream the changes via all of - the currently open replication slots. + When using replication with slots, failure to properly notify the + server by constantly consuming and reporting success at + appropriate times can eventually lead to "disk full" condition on + the server, because the server retains all the WAL segments that + might be needed to stream the changes via all of the currently + open replication slots. - On the other hand, it is not recommended to send a confirmation - after every processed message, since that will put an unnecessary - load on network and the server. A possible strategy is to confirm - after every COMMIT message. + On the other hand, it is not recommended to send a confirmation + after every processed message, since that will put an unnecessary + load on network and the server. A possible strategy is to confirm + after every COMMIT message. .. method:: stop_replication() - In non-asynchronous connection, when called from the ``write()`` - method, tell the code in `~start_replication` to break out of the - endless loop and return. + This method can be called on synchronous connections from the + ``consume()`` method of a ``consumer`` object in order to break out of + the endless loop in `consume_replication_stream()`. If called on + asynchronous connection or outside of the consume loop, this method + raises an error. .. method:: send_replication_feedback(write_lsn=0, flush_lsn=0, apply_lsn=0, reply=False) @@ -344,29 +429,37 @@ Replication cursor :param reply: request the server to send back a keepalive message immediately Use this method to report to the server that all messages up to a - certain LSN position have been stored and may be discarded. + certain LSN position have been stored on the client and may be + discarded on the server. This method can also be called with all default parameters' values to - send a keepalive message to the server. + just send a keepalive message to the server. - In case of asynchronous connection, if the feedback message cannot be - sent at the moment, remembers the passed LSN positions for a later - hopefully successful call or call to `~flush_replication_feedback()`. + If the feedback message could not be sent, updates the passed LSN + positions in the cursor for a later call to + `flush_replication_feedback()` and returns `!False`, otherwise returns + `!True`. .. method:: flush_replication_feedback(reply=False) :param reply: request the server to send back a keepalive message immediately This method tries to flush the latest replication feedback message - that `~send_replication_feedback()` was trying to send, if any. + that `send_replication_feedback()` was trying to send but couldn't. + + If *reply* is `!True` sends a keepalive message in either case. + + Returns `!True` if the feedback message was sent successfully, + `!False` otherwise. Low-level methods for asynchronous connection operation. - With the non-asynchronous connection, a single call to - `~start_replication()` handles all the complexity, but at times it might - be beneficial to use low-level interface for better control, in particular - to `~select.select()` on multiple sockets. The following methods are - provided for asynchronous operation: + With the synchronous connection, a call to `consume_replication_stream()` + handles all the complexity of handling the incoming messages and sending + keepalive replies, but at times it might be beneficial to use low-level + interface for better control, in particular to `~select.select()` on + multiple sockets. The following methods are provided for asynchronous + operation: .. method:: read_replication_message(decode=True) @@ -374,18 +467,18 @@ Replication cursor performed on the data received from the server This method should be used in a loop with asynchronous connections - after calling `~start_replication()` once. + after calling `start_replication()` once. It tries to read the next message from the server, without blocking - and returns an instance of `~ReplicationMessage` or *None*, in case + and returns an instance of `ReplicationMessage` or `!None`, in case there are no more data messages from the server at the moment. It is expected that the calling code will call this method repeatedly in order to consume all of the messages that might have been buffered, - until *None* is returned. After receiving a *None* value from this - method, one might use `~select.select()` or `~select.poll()` on the - corresponding connection to block the process until there is more data - from the server. + until `!None` is returned. After receiving a `!None` value from this + method, the caller should use `~select.select()` or `~select.poll()` + on the corresponding connection to block the process until there is + more data from the server. The server can send keepalive messages to the client periodically. Such messages are silently consumed by this method and are never @@ -409,45 +502,19 @@ Replication cursor keepalive_interval = 10.0 while True: - if (datetime.now() - cur.replication_io_timestamp).total_seconds() >= keepalive_interval: - cur.send_replication_feedback() - - while True: - msg = cur.read_replication_message() - if not msg: - break - writer.write(msg) + msg = cur.read_replication_message() + if msg: + consumer.consume(msg) + else: + timeout = keepalive_interval - (datetime.now() - cur.replication_io_timestamp).total_seconds() + if timeout > 0: + sel = select.select([cur], [], [], timeout) + else: + sel = [] + + if not sel: + cur.send_replication_feedback() - timeout = keepalive_interval - (datetime.now() - cur.replication_io_timestamp).total_seconds() - if timeout > 0: - select.select([cur], [], [], timeout) - -.. autoclass:: ReplicationMessage - - .. attribute:: payload - - The actual data received from the server. An instance of either - ``str`` or ``unicode``. - - .. attribute:: data_start - - LSN position of the start of the message. - - .. attribute:: wal_end - - LSN position of the end of the message. - - .. attribute:: send_time - - A `~datetime` object representing the server timestamp at the moment - when the message was sent. - - .. attribute:: cursor - - A reference to the corresponding `~ReplicationCursor` object. - -.. data:: REPLICATION_LOGICAL -.. data:: REPLICATION_PHYSICAL .. index:: pair: Cursor; Replication diff --git a/lib/extras.py b/lib/extras.py index 998c792f4..c05536ad7 100644 --- a/lib/extras.py +++ b/lib/extras.py @@ -544,9 +544,9 @@ def drop_replication_slot(self, slot_name): command = "DROP_REPLICATION_SLOT %s" % self.connection.quote_ident(slot_name) self.execute(command) - def start_replication(self, slot_name=None, writer=None, slot_type=None, start_lsn=0, - timeline=0, keepalive_interval=10, options=None): - """Start and consume replication stream.""" + def start_replication(self, slot_name=None, slot_type=None, start_lsn=0, + timeline=0, options=None): + """Start replication stream.""" command = "START_REPLICATION " @@ -594,8 +594,7 @@ def start_replication(self, slot_name=None, writer=None, slot_type=None, start_l command += "%s %s" % (self.connection.quote_ident(k), _A(str(v))) command += ")" - return self.start_replication_expert(command, writer=writer, - keepalive_interval=keepalive_interval) + return self.start_replication_expert(command) def send_feedback_message(self, written_lsn=0, sync_lsn=0, apply_lsn=0, reply_requested=False): return self.send_replication_feedback(written_lsn, sync_lsn, apply_lsn, reply_requested) diff --git a/psycopg/cursor.h b/psycopg/cursor.h index dd07243f5..941e279e8 100644 --- a/psycopg/cursor.h +++ b/psycopg/cursor.h @@ -73,7 +73,9 @@ struct cursorObject { #define DEFAULT_COPYSIZE 16384 #define DEFAULT_COPYBUFF 8192 - int repl_stop; /* if client requested to stop replication */ + /* replication cursor attrs */ + int repl_started:1; /* if replication is started */ + int repl_stop:1; /* if client requested to stop replication */ struct timeval repl_keepalive_interval; /* interval for keepalive messages in replication mode */ XLogRecPtr repl_write_lsn; /* LSN stats for replication feedback messages */ XLogRecPtr repl_flush_lsn; diff --git a/psycopg/cursor_type.c b/psycopg/cursor_type.c index 9de5b0855..d033a3dfc 100644 --- a/psycopg/cursor_type.c +++ b/psycopg/cursor_type.c @@ -36,6 +36,7 @@ #include "psycopg/microprotocols_proto.h" #include + #include /* python */ @@ -1588,13 +1589,11 @@ psyco_curs_copy_expert(cursorObject *self, PyObject *args, PyObject *kwargs) static PyObject * psyco_curs_start_replication_expert(cursorObject *self, PyObject *args, PyObject *kwargs) { - PyObject *writer = NULL, *res = NULL; + PyObject *res = NULL; char *command; - double keepalive_interval = 10; - static char *kwlist[] = {"command", "writer", "keepalive_interval", NULL}; + static char *kwlist[] = {"command", NULL}; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|Od", kwlist, - &command, &writer, &keepalive_interval)) { + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s", kwlist, &command)) { return NULL; } @@ -1602,21 +1601,15 @@ psyco_curs_start_replication_expert(cursorObject *self, PyObject *args, PyObject EXC_IF_GREEN(start_replication_expert); EXC_IF_TPC_PREPARED(self->conn, start_replication_expert); - Dprintf("psyco_curs_start_replication_expert: command = %s", command); - - if (keepalive_interval < 1.0) { - psyco_set_error(ProgrammingError, self, "keepalive_interval must be >= 1sec"); + if (self->repl_started) { + psyco_set_error(ProgrammingError, self, "replication already in progress"); return NULL; } - self->copysize = 0; - Py_XINCREF(writer); - self->copyfile = writer; + Dprintf("psyco_curs_start_replication_expert: command = %s", command); + self->copysize = 0; self->repl_stop = 0; - self->repl_keepalive_interval.tv_sec = (int)keepalive_interval; - self->repl_keepalive_interval.tv_usec = - (keepalive_interval - (int)keepalive_interval)*1.0e6; self->repl_write_lsn = InvalidXLogRecPtr; self->repl_flush_lsn = InvalidXLogRecPtr; @@ -1631,7 +1624,7 @@ psyco_curs_start_replication_expert(cursorObject *self, PyObject *args, PyObject Py_INCREF(res); } - Py_CLEAR(self->copyfile); + self->repl_started = 1; return res; } @@ -1643,12 +1636,54 @@ static PyObject * psyco_curs_stop_replication(cursorObject *self) { EXC_IF_CURS_CLOSED(self); + EXC_IF_CURS_ASYNC(self, stop_replication); + + if (!self->repl_started || self->repl_stop) { + psyco_set_error(ProgrammingError, self, "replication is not in progress"); + return NULL; + } self->repl_stop = 1; Py_RETURN_NONE; } +#define psyco_curs_consume_replication_stream_doc \ +"consume_replication_stream(consumer, keepalive_interval=10) -- Consume replication stream." + +static PyObject * +psyco_curs_consume_replication_stream(cursorObject *self, PyObject *args, PyObject *kwargs) +{ + PyObject *consumer = NULL, *res = NULL; + int decode = 0; + double keepalive_interval = 10; + static char *kwlist[] = {"consumer", "decode", "keepalive_interval", NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|id", kwlist, + &consumer, &decode, &keepalive_interval)) { + return NULL; + } + + EXC_IF_CURS_CLOSED(self); + EXC_IF_CURS_ASYNC(self, consume_replication_stream); + EXC_IF_GREEN(consume_replication_stream); + EXC_IF_TPC_PREPARED(self->conn, consume_replication_stream); + + Dprintf("psyco_curs_consume_replication_stream"); + + if (keepalive_interval < 1.0) { + psyco_set_error(ProgrammingError, self, "keepalive_interval must be >= 1 (sec)"); + return NULL; + } + + if (pq_copy_both(self, consumer, decode, keepalive_interval) >= 0) { + res = Py_None; + Py_INCREF(res); + } + + return res; +} + #define psyco_curs_read_replication_message_doc \ "read_replication_message(decode=True) -- Try reading a replication message from the server (non-blocking)." @@ -1673,7 +1708,7 @@ psyco_curs_read_replication_message(cursorObject *self, PyObject *args, PyObject static PyObject * curs_flush_replication_feedback(cursorObject *self, int reply) { - if (!self->repl_feedback_pending) + if (!(self->repl_feedback_pending || reply)) Py_RETURN_FALSE; if (pq_send_replication_feedback(self, reply)) { @@ -1939,6 +1974,8 @@ static struct PyMethodDef cursorObject_methods[] = { METH_VARARGS|METH_KEYWORDS, psyco_curs_start_replication_expert_doc}, {"stop_replication", (PyCFunction)psyco_curs_stop_replication, METH_NOARGS, psyco_curs_stop_replication_doc}, + {"consume_replication_stream", (PyCFunction)psyco_curs_consume_replication_stream, + METH_VARARGS|METH_KEYWORDS, psyco_curs_consume_replication_stream_doc}, {"read_replication_message", (PyCFunction)psyco_curs_read_replication_message, METH_VARARGS|METH_KEYWORDS, psyco_curs_read_replication_message_doc}, {"send_replication_feedback", (PyCFunction)psyco_curs_send_replication_feedback, diff --git a/psycopg/pqpath.c b/psycopg/pqpath.c index b524b14ad..4f1427dea 100644 --- a/psycopg/pqpath.c +++ b/psycopg/pqpath.c @@ -1531,18 +1531,28 @@ _pq_copy_out_v3(cursorObject *curs) return ret; } -/* ignores keepalive messages */ +/* Tries to read the next message from the replication stream, without + blocking, in both sync and async connection modes. If no message + is ready in the CopyData buffer, tries to read from the server, + again without blocking. If that doesn't help, returns Py_None. + The caller is then supposed to block on the socket(s) and call this + function again. + + Any keepalive messages from the server are silently consumed and + are never returned to the caller. + */ PyObject * pq_read_replication_message(cursorObject *curs, int decode) { char *buffer = NULL; - int len, consumed = 0, hdr, reply; + int len, data_size, consumed, hdr, reply; XLogRecPtr data_start, wal_end; pg_int64 send_time; PyObject *str = NULL, *msg = NULL; Dprintf("pq_read_replication_message(decode=%d)", decode); + consumed = 0; retry: len = PQgetCopyData(curs->conn->pgconn, &buffer, 1 /* async */); @@ -1570,10 +1580,12 @@ pq_read_replication_message(cursorObject *curs, int decode) } if (len == -2) { + /* serious error */ pq_raise(curs->conn, curs, NULL); goto exit; } if (len == -1) { + /* EOF */ curs->pgres = PQgetResult(curs->conn->pgconn); if (curs->pgres && PQresultStatus(curs->pgres) == PGRES_FATAL_ERROR) { @@ -1595,13 +1607,14 @@ pq_read_replication_message(cursorObject *curs, int decode) Dprintf("pq_read_replication_message: msg=%c, len=%d", buffer[0], len); if (buffer[0] == 'w') { - /* msgtype(1), dataStart(8), walEnd(8), sendTime(8) */ + /* XLogData: msgtype(1), dataStart(8), walEnd(8), sendTime(8) */ hdr = 1 + 8 + 8 + 8; if (len < hdr + 1) { psyco_set_error(OperationalError, curs, "data message header too small"); goto exit; } + data_size = len - hdr; data_start = fe_recvint64(buffer + 1); wal_end = fe_recvint64(buffer + 1 + 8); send_time = fe_recvint64(buffer + 1 + 8 + 8); @@ -1609,12 +1622,13 @@ pq_read_replication_message(cursorObject *curs, int decode) Dprintf("pq_read_replication_message: data_start="XLOGFMTSTR", wal_end="XLOGFMTSTR, XLOGFMTARGS(data_start), XLOGFMTARGS(wal_end)); - Dprintf("pq_read_replication_message: >>%.*s<<", len - hdr, buffer + hdr); + Dprintf("pq_read_replication_message: >>%.*s<<", data_size, buffer + hdr); + /* XXX it would be wise to check if it's really a logical replication */ if (decode) { - str = PyUnicode_Decode(buffer + hdr, len - hdr, curs->conn->codec, NULL); + str = PyUnicode_Decode(buffer + hdr, data_size, curs->conn->codec, NULL); } else { - str = Bytes_FromStringAndSize(buffer + hdr, len - hdr); + str = Bytes_FromStringAndSize(buffer + hdr, data_size); } if (!str) { goto exit; } @@ -1623,12 +1637,13 @@ pq_read_replication_message(cursorObject *curs, int decode) Py_DECREF(str); if (!msg) { goto exit; } + ((replicationMessageObject *)msg)->data_size = data_size; ((replicationMessageObject *)msg)->data_start = data_start; ((replicationMessageObject *)msg)->wal_end = wal_end; ((replicationMessageObject *)msg)->send_time = send_time; } else if (buffer[0] == 'k') { - /* msgtype(1), walEnd(8), sendTime(8), reply(1) */ + /* Primary keepalive message: msgtype(1), walEnd(8), sendTime(8), reply(1) */ hdr = 1 + 8 + 8; if (len < hdr + 1) { psyco_set_error(OperationalError, curs, "keepalive message header too small"); @@ -1641,6 +1656,7 @@ pq_read_replication_message(cursorObject *curs, int decode) if (curs->conn->async) { curs->repl_feedback_pending = 1; } else { + /* XXX not sure if this was a good idea after all */ pq_raise(curs->conn, curs, NULL); goto exit; } @@ -1699,38 +1715,36 @@ pq_send_replication_feedback(cursorObject* curs, int reply_requested) return 1; } -/* used for streaming replication only */ -static int -_pq_copy_both_v3(cursorObject *curs) +/* Calls pq_read_replication_message in an endless loop, until + stop_replication is called or a fatal error occurs. The messages + are passed to the consumer object. + + When no message is available, blocks on the connection socket, but + manages to send keepalive messages to the server as needed. +*/ +int +pq_copy_both(cursorObject *curs, PyObject *consumer, int decode, double keepalive_interval) { PyObject *msg, *tmp = NULL; - PyObject *write_func = NULL; - int is_text, fd, sel, ret = -1; + PyObject *consume_func = NULL; + int fd, sel, ret = -1; PGconn *pgconn; fd_set fds; - struct timeval curr_time, ping_time, time_diff; - - if (!curs->copyfile) { - psyco_set_error(ProgrammingError, curs, - "can't execute START_REPLICATION directly: use the start_replication() method instead"); - goto exit; - } - - if (!(write_func = PyObject_GetAttrString(curs->copyfile, "write"))) { - Dprintf("_pq_copy_both_v3: can't get o.write"); - goto exit; - } + struct timeval keep_intr, curr_time, ping_time, timeout; - /* if the file is text we must pass it unicode. */ - if (-1 == (is_text = psycopg_is_text_file(curs->copyfile))) { + if (!(consume_func = PyObject_GetAttrString(consumer, "consume"))) { + Dprintf("pq_copy_both: can't get o.consume"); goto exit; } CLEARPGRES(curs->pgres); pgconn = curs->conn->pgconn; + keep_intr.tv_sec = (int)keepalive_interval; + keep_intr.tv_usec = (keepalive_interval - keep_intr.tv_sec)*1.0e6; + while (1) { - msg = pq_read_replication_message(curs, is_text); + msg = pq_read_replication_message(curs, decode); if (!msg) { goto exit; } @@ -1748,14 +1762,12 @@ _pq_copy_both_v3(cursorObject *curs) gettimeofday(&curr_time, NULL); - ping_time = curs->repl_last_io; - ping_time.tv_sec += curs->repl_keepalive_interval.tv_sec; - ping_time.tv_usec += curs->repl_keepalive_interval.tv_usec; + timeradd(&curs->repl_last_io, &keep_intr, &ping_time); + timersub(&ping_time, &curr_time, &timeout); - timersub(&ping_time, &curr_time, &time_diff); - if (time_diff.tv_sec > 0) { + if (timeout.tv_sec >= 0) { Py_BEGIN_ALLOW_THREADS; - sel = select(fd + 1, &fds, NULL, NULL, &time_diff); + sel = select(fd + 1, &fds, NULL, NULL, &timeout); Py_END_ALLOW_THREADS; } else { @@ -1782,17 +1794,17 @@ _pq_copy_both_v3(cursorObject *curs) continue; } else { - tmp = PyObject_CallFunctionObjArgs(write_func, msg, NULL); + tmp = PyObject_CallFunctionObjArgs(consume_func, msg, NULL); Py_DECREF(msg); if (tmp == NULL) { - Dprintf("_pq_copy_both_v3: write_func returned NULL"); + Dprintf("pq_copy_both: consume_func returned NULL"); goto exit; } Py_DECREF(tmp); if (curs->repl_stop) { - Dprintf("_pq_copy_both_v3: repl_stop flag set by write_func"); + Dprintf("pq_copy_both: repl_stop flag set by consume_func"); break; } } @@ -1801,7 +1813,7 @@ _pq_copy_both_v3(cursorObject *curs) ret = 1; exit: - Py_XDECREF(write_func); + Py_XDECREF(consume_func); return ret; } @@ -1867,13 +1879,14 @@ pq_fetch(cursorObject *curs, int no_result) case PGRES_COPY_BOTH: Dprintf("pq_fetch: data from a streaming replication slot (no tuples)"); curs->rowcount = -1; - if (curs->conn->async) { + ex = 0; + /*if (curs->conn->async) { ex = 0; } else { ex = _pq_copy_both_v3(curs); - /* error caught by out glorious notice handler */ + if (PyErr_Occurred()) ex = -1; - } + }*/ CLEARPGRES(curs->pgres); break; diff --git a/psycopg/pqpath.h b/psycopg/pqpath.h index 9a348bc26..a858a2692 100644 --- a/psycopg/pqpath.h +++ b/psycopg/pqpath.h @@ -72,6 +72,8 @@ HIDDEN int pq_execute_command_locked(connectionObject *conn, RAISES HIDDEN void pq_complete_error(connectionObject *conn, PGresult **pgres, char **error); +HIDDEN int pq_copy_both(cursorObject *curs, PyObject *consumer, + int decode, double keepalive_interval); HIDDEN PyObject *pq_read_replication_message(cursorObject *curs, int decode); HIDDEN int pq_send_replication_feedback(cursorObject *curs, int reply_requested); diff --git a/psycopg/replication_message.h b/psycopg/replication_message.h index a7567a1da..201b9fb40 100644 --- a/psycopg/replication_message.h +++ b/psycopg/replication_message.h @@ -42,6 +42,7 @@ struct replicationMessageObject { cursorObject *cursor; PyObject *payload; + int data_size; XLogRecPtr data_start; XLogRecPtr wal_end; pg_int64 send_time; diff --git a/psycopg/replication_message_type.c b/psycopg/replication_message_type.c index edfe6c16d..61833931b 100644 --- a/psycopg/replication_message_type.c +++ b/psycopg/replication_message_type.c @@ -49,8 +49,9 @@ static PyObject * replmsg_repr(replicationMessageObject *self) { return PyString_FromFormat( - "", - self, XLOGFMTARGS(self->data_start), XLOGFMTARGS(self->wal_end), self->send_time); + "", + self, self->data_size, XLOGFMTARGS(self->data_start), XLOGFMTARGS(self->wal_end), + self->send_time); } static int @@ -63,8 +64,10 @@ replmsg_init(PyObject *obj, PyObject *args, PyObject *kwargs) Py_XINCREF(self->cursor); Py_XINCREF(self->payload); + self->data_size = 0; self->data_start = 0; self->wal_end = 0; + self->send_time = 0; return 0; } @@ -125,6 +128,8 @@ static struct PyMemberDef replicationMessageObject_members[] = { "TODO"}, {"payload", T_OBJECT, OFFSETOF(payload), READONLY, "TODO"}, + {"data_size", T_INT, OFFSETOF(data_size), READONLY, + "TODO"}, {"data_start", T_ULONGLONG, OFFSETOF(data_start), READONLY, "TODO"}, {"wal_end", T_ULONGLONG, OFFSETOF(wal_end), READONLY, From ea2b87eade9bb0a1eb0f4f9398ce9daeb3dcb930 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Tue, 13 Oct 2015 11:01:13 +0200 Subject: [PATCH 26/60] Fix create_replication_slot doc signature --- doc/src/extras.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/extras.rst b/doc/src/extras.rst index 82a2be18f..bdf8fc1b1 100644 --- a/doc/src/extras.rst +++ b/doc/src/extras.rst @@ -232,7 +232,7 @@ The individual messages in the replication stream are presented by >>> cur.identify_system() {'timeline': 1, 'systemid': '1234567890123456789', 'dbname': 'test', 'xlogpos': '0/1ABCDEF'} - .. method:: create_replication_slot(slot_name, output_plugin=None) + .. method:: create_replication_slot(slot_name, slot_type=None, output_plugin=None) Create streaming replication slot. From 6ad299945fc431d162f53b08a3de5dda729fcb3e Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Tue, 13 Oct 2015 18:05:33 +0200 Subject: [PATCH 27/60] Remove IDENTIFY_SYSTEM wrapper method (it can't work with async anyway). --- doc/src/extras.rst | 10 ---------- lib/extras.py | 9 +-------- 2 files changed, 1 insertion(+), 18 deletions(-) diff --git a/doc/src/extras.rst b/doc/src/extras.rst index bdf8fc1b1..356e10e0c 100644 --- a/doc/src/extras.rst +++ b/doc/src/extras.rst @@ -222,16 +222,6 @@ The individual messages in the replication stream are presented by .. autoclass:: ReplicationCursor - .. method:: identify_system() - - Execute ``IDENTIFY_SYSTEM`` command of the streaming replication - protocol and return the result as a dictionary. - - Example:: - - >>> cur.identify_system() - {'timeline': 1, 'systemid': '1234567890123456789', 'dbname': 'test', 'xlogpos': '0/1ABCDEF'} - .. method:: create_replication_slot(slot_name, slot_type=None, output_plugin=None) Create streaming replication slot. diff --git a/lib/extras.py b/lib/extras.py index c05536ad7..913a6aae6 100644 --- a/lib/extras.py +++ b/lib/extras.py @@ -506,13 +506,6 @@ def __init__(self, *args, **kwargs): class ReplicationCursor(_cursor): """A cursor used for communication on the replication protocol.""" - def identify_system(self): - """Get information about the cluster status.""" - - self.execute("IDENTIFY_SYSTEM") - return dict(zip([_.name for _ in self.description], - self.fetchall()[0])) - def create_replication_slot(self, slot_name, slot_type=None, output_plugin=None): """Create streaming replication slot.""" @@ -594,7 +587,7 @@ def start_replication(self, slot_name=None, slot_type=None, start_lsn=0, command += "%s %s" % (self.connection.quote_ident(k), _A(str(v))) command += ")" - return self.start_replication_expert(command) + self.start_replication_expert(command) def send_feedback_message(self, written_lsn=0, sync_lsn=0, apply_lsn=0, reply_requested=False): return self.send_replication_feedback(written_lsn, sync_lsn, apply_lsn, reply_requested) From 54079072db3a6ff0794b8ce141e2dd929416bd14 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Wed, 14 Oct 2015 12:43:26 +0200 Subject: [PATCH 28/60] Fix ReplicationTest: no NotSupportedError now. --- tests/test_connection.py | 18 +++++++++++++----- tests/testconfig.py | 4 +--- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/tests/test_connection.py b/tests/test_connection.py index 68bb6f05c..91ea51f56 100755 --- a/tests/test_connection.py +++ b/tests/test_connection.py @@ -1180,14 +1180,22 @@ def test_set_session_autocommit(self): class ReplicationTest(ConnectingTestCase): @skip_before_postgres(9, 0) - def test_replication_not_supported(self): - conn = self.repl_connect() + def test_physical_replication_connection(self): + import psycopg2.extras + conn = self.repl_connect(connection_factory=psycopg2.extras.PhysicalReplicationConnection) if conn is None: return cur = conn.cursor() - f = StringIO() - self.assertRaises(psycopg2.NotSupportedError, - cur.copy_expert, "START_REPLICATION 0/0", f) + cur.execute("IDENTIFY_SYSTEM") + cur.fetchall() + @skip_before_postgres(9, 4) + def test_logical_replication_connection(self): + import psycopg2.extras + conn = self.repl_connect(connection_factory=psycopg2.extras.LogicalReplicationConnection) + if conn is None: return + cur = conn.cursor() + cur.execute("IDENTIFY_SYSTEM") + cur.fetchall() def test_suite(): return unittest.TestLoader().loadTestsFromName(__name__) diff --git a/tests/testconfig.py b/tests/testconfig.py index 0f995fbf9..d59e5a0d4 100644 --- a/tests/testconfig.py +++ b/tests/testconfig.py @@ -7,8 +7,6 @@ dbport = os.environ.get('PSYCOPG2_TESTDB_PORT', None) dbuser = os.environ.get('PSYCOPG2_TESTDB_USER', None) dbpass = os.environ.get('PSYCOPG2_TESTDB_PASSWORD', None) -repl_dsn = os.environ.get('PSYCOPG2_TEST_REPL_DSN', - "dbname=psycopg2_test replication=1") # Check if we want to test psycopg's green path. green = os.environ.get('PSYCOPG2_TEST_GREEN', None) @@ -35,4 +33,4 @@ if dbpass is not None: dsn += ' password=%s' % dbpass - +repl_dsn = os.environ.get('PSYCOPG2_TEST_REPL_DSN', dsn) From fea2260fc5ec8dda9904eed9509b1a834b05747f Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Wed, 14 Oct 2015 12:50:08 +0200 Subject: [PATCH 29/60] Fix stop_replication: always raise outside the loop. --- psycopg/cursor_type.c | 4 ++-- tests/test_connection.py | 12 ++++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/psycopg/cursor_type.c b/psycopg/cursor_type.c index d033a3dfc..5dd08cc91 100644 --- a/psycopg/cursor_type.c +++ b/psycopg/cursor_type.c @@ -1624,8 +1624,6 @@ psyco_curs_start_replication_expert(cursorObject *self, PyObject *args, PyObject Py_INCREF(res); } - self->repl_started = 1; - return res; } @@ -1676,6 +1674,8 @@ psyco_curs_consume_replication_stream(cursorObject *self, PyObject *args, PyObje return NULL; } + self->repl_started = 1; + if (pq_copy_both(self, consumer, decode, keepalive_interval) >= 0) { res = Py_None; Py_INCREF(res); diff --git a/tests/test_connection.py b/tests/test_connection.py index 91ea51f56..18f1ff3e0 100755 --- a/tests/test_connection.py +++ b/tests/test_connection.py @@ -1197,6 +1197,18 @@ def test_logical_replication_connection(self): cur.execute("IDENTIFY_SYSTEM") cur.fetchall() + @skip_before_postgres(9, 0) + def test_stop_replication_raises(self): + import psycopg2.extras + conn = self.repl_connect(connection_factory=psycopg2.extras.PhysicalReplicationConnection) + if conn is None: return + cur = conn.cursor() + self.assertRaises(psycopg2.ProgrammingError, cur.stop_replication) + + cur.start_replication() + self.assertRaises(psycopg2.ProgrammingError, cur.stop_replication) + + def test_suite(): return unittest.TestLoader().loadTestsFromName(__name__) From a0b42a12ff63fee362fce963fcb73350a810f09c Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Wed, 14 Oct 2015 15:15:07 +0200 Subject: [PATCH 30/60] Update stop_repl, require replication consumer to be a callable. --- doc/src/extras.rst | 33 +++++++++++++++++---------------- psycopg/cursor_type.c | 16 ++++++++-------- psycopg/pqpath.c | 8 ++++---- tests/test_connection.py | 6 +++++- 4 files changed, 34 insertions(+), 29 deletions(-) diff --git a/doc/src/extras.rst b/doc/src/extras.rst index 356e10e0c..a9ba52fc7 100644 --- a/doc/src/extras.rst +++ b/doc/src/extras.rst @@ -333,9 +333,9 @@ The individual messages in the replication stream are presented by Start replication on the connection using provided ``START_REPLICATION`` command. - .. method:: consume_replication_stream(consumer, decode=False, keepalive_interval=10) + .. method:: consume_replication_stream(consume, decode=False, keepalive_interval=10) - :param consumer: an object providing ``consume()`` method + :param consume: a callable object with signature ``consume(msg)`` :param decode: a flag indicating that unicode conversion should be performed on the messages received from the server :param keepalive_interval: interval (in seconds) to send keepalive @@ -348,10 +348,9 @@ The individual messages in the replication stream are presented by `start_replication()` first. When called, this method enters an endless loop, reading messages from - the server and passing them to ``consume()`` method of the *consumer* - object. In order to make this method break out of the loop and - return, the ``consume()`` method can call `stop_replication()` on the - cursor or it can throw an exception. + the server and passing them to ``consume()``. In order to make this + method break out of the loop and return, ``consume()`` can call + `stop_replication()` on the cursor or it can throw an exception. If *decode* is set to `!True`, the messages read from the server are converted according to the connection `~connection.encoding`. This @@ -362,12 +361,12 @@ The individual messages in the replication stream are presented by *keepalive_interval* (in seconds). The value of this parameter must be equal to at least 1 second, but it can have a fractional part. - The following example is a sketch implementation of *consumer* object - for logical replication:: + The following example is a sketch implementation of ``consume()`` + callable for logical replication:: class LogicalStreamConsumer(object): - def consume(self, msg): + def __call__(self, msg): self.store_message_data(msg.payload) if self.should_report_to_the_server_now(msg): @@ -376,7 +375,7 @@ The individual messages in the replication stream are presented by consumer = LogicalStreamConsumer() cur.consume_replication_stream(consumer, decode=True) - The *msg* objects passed to the ``consume()`` method are instances of + The *msg* objects passed to ``consume()`` are instances of `ReplicationMessage` class. After storing certain amount of messages' data reliably, the client @@ -401,11 +400,10 @@ The individual messages in the replication stream are presented by .. method:: stop_replication() - This method can be called on synchronous connections from the - ``consume()`` method of a ``consumer`` object in order to break out of - the endless loop in `consume_replication_stream()`. If called on - asynchronous connection or outside of the consume loop, this method - raises an error. + This method can be called on synchronous connection from the + ``consume()`` callable in order to break out of the endless loop in + `consume_replication_stream()`. If called on asynchronous connection + or when replication is not in progress, this method raises an error. .. method:: send_replication_feedback(write_lsn=0, flush_lsn=0, apply_lsn=0, reply=False) @@ -490,11 +488,14 @@ The individual messages in the replication stream are presented by An actual example of asynchronous operation might look like this:: + def consume(msg): + ... + keepalive_interval = 10.0 while True: msg = cur.read_replication_message() if msg: - consumer.consume(msg) + consume(msg) else: timeout = keepalive_interval - (datetime.now() - cur.replication_io_timestamp).total_seconds() if timeout > 0: diff --git a/psycopg/cursor_type.c b/psycopg/cursor_type.c index 5dd08cc91..a45814952 100644 --- a/psycopg/cursor_type.c +++ b/psycopg/cursor_type.c @@ -1622,13 +1622,15 @@ psyco_curs_start_replication_expert(cursorObject *self, PyObject *args, PyObject 1 /* no_result */, 1 /* no_begin */) >= 0) { res = Py_None; Py_INCREF(res); + + self->repl_started = 1; } return res; } #define psyco_curs_stop_replication_doc \ -"stop_replication() -- Set flag to break out of endless loop in start_replication() on sync connection." +"stop_replication() -- Set flag to break out of the endless loop in consume_replication_stream()." static PyObject * psyco_curs_stop_replication(cursorObject *self) @@ -1652,13 +1654,13 @@ psyco_curs_stop_replication(cursorObject *self) static PyObject * psyco_curs_consume_replication_stream(cursorObject *self, PyObject *args, PyObject *kwargs) { - PyObject *consumer = NULL, *res = NULL; + PyObject *consume = NULL, *res = NULL; int decode = 0; double keepalive_interval = 10; - static char *kwlist[] = {"consumer", "decode", "keepalive_interval", NULL}; + static char *kwlist[] = {"consume", "decode", "keepalive_interval", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|id", kwlist, - &consumer, &decode, &keepalive_interval)) { + &consume, &decode, &keepalive_interval)) { return NULL; } @@ -1674,9 +1676,7 @@ psyco_curs_consume_replication_stream(cursorObject *self, PyObject *args, PyObje return NULL; } - self->repl_started = 1; - - if (pq_copy_both(self, consumer, decode, keepalive_interval) >= 0) { + if (pq_copy_both(self, consume, decode, keepalive_interval) >= 0) { res = Py_None; Py_INCREF(res); } @@ -1709,7 +1709,7 @@ static PyObject * curs_flush_replication_feedback(cursorObject *self, int reply) { if (!(self->repl_feedback_pending || reply)) - Py_RETURN_FALSE; + Py_RETURN_TRUE; if (pq_send_replication_feedback(self, reply)) { self->repl_feedback_pending = 0; diff --git a/psycopg/pqpath.c b/psycopg/pqpath.c index 4f1427dea..a42c9a1a8 100644 --- a/psycopg/pqpath.c +++ b/psycopg/pqpath.c @@ -1723,7 +1723,7 @@ pq_send_replication_feedback(cursorObject* curs, int reply_requested) manages to send keepalive messages to the server as needed. */ int -pq_copy_both(cursorObject *curs, PyObject *consumer, int decode, double keepalive_interval) +pq_copy_both(cursorObject *curs, PyObject *consume, int decode, double keepalive_interval) { PyObject *msg, *tmp = NULL; PyObject *consume_func = NULL; @@ -1732,8 +1732,8 @@ pq_copy_both(cursorObject *curs, PyObject *consumer, int decode, double keepaliv fd_set fds; struct timeval keep_intr, curr_time, ping_time, timeout; - if (!(consume_func = PyObject_GetAttrString(consumer, "consume"))) { - Dprintf("pq_copy_both: can't get o.consume"); + if (!(consume_func = PyObject_GetAttrString(consume, "__call__"))) { + Dprintf("pq_copy_both: expected callable consume object"); goto exit; } @@ -1743,7 +1743,7 @@ pq_copy_both(cursorObject *curs, PyObject *consumer, int decode, double keepaliv keep_intr.tv_sec = (int)keepalive_interval; keep_intr.tv_usec = (keepalive_interval - keep_intr.tv_sec)*1.0e6; - while (1) { + while (!curs->repl_stop) { msg = pq_read_replication_message(curs, decode); if (!msg) { goto exit; diff --git a/tests/test_connection.py b/tests/test_connection.py index 18f1ff3e0..e2b0da306 100755 --- a/tests/test_connection.py +++ b/tests/test_connection.py @@ -1206,7 +1206,11 @@ def test_stop_replication_raises(self): self.assertRaises(psycopg2.ProgrammingError, cur.stop_replication) cur.start_replication() - self.assertRaises(psycopg2.ProgrammingError, cur.stop_replication) + cur.stop_replication() # doesn't raise now + + def consume(msg): + pass + cur.consume_replication_stream(consume) # should return at once def test_suite(): From e05b4fd2673a721e858cffdcd5b49ae451e57332 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Wed, 14 Oct 2015 17:36:50 +0200 Subject: [PATCH 31/60] Add checks on replication state, have to have a separate check for consume loop. --- psycopg/cursor.h | 17 +++++++++++++++++ psycopg/cursor_type.c | 29 ++++++++++++++++++----------- 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/psycopg/cursor.h b/psycopg/cursor.h index 941e279e8..432425f51 100644 --- a/psycopg/cursor.h +++ b/psycopg/cursor.h @@ -76,6 +76,7 @@ struct cursorObject { /* replication cursor attrs */ int repl_started:1; /* if replication is started */ int repl_stop:1; /* if client requested to stop replication */ + int repl_consuming:1; /* if running the consume loop */ struct timeval repl_keepalive_interval; /* interval for keepalive messages in replication mode */ XLogRecPtr repl_write_lsn; /* LSN stats for replication feedback messages */ XLogRecPtr repl_flush_lsn; @@ -147,6 +148,22 @@ do \ return NULL; } \ while (0) +#define EXC_IF_REPLICATING(self, cmd) \ +do \ + if ((self)->repl_started) { \ + PyErr_SetString(ProgrammingError, \ + #cmd " cannot be used when replication is already in progress"); \ + return NULL; } \ +while (0) + +#define EXC_IF_NOT_REPLICATING(self, cmd) \ +do \ + if (!(self)->repl_started) { \ + PyErr_SetString(ProgrammingError, \ + #cmd " cannot be used when replication is not in progress"); \ + return NULL; } \ +while (0) + #ifdef __cplusplus } #endif diff --git a/psycopg/cursor_type.c b/psycopg/cursor_type.c index a45814952..c7e6c26a5 100644 --- a/psycopg/cursor_type.c +++ b/psycopg/cursor_type.c @@ -1600,16 +1600,13 @@ psyco_curs_start_replication_expert(cursorObject *self, PyObject *args, PyObject EXC_IF_CURS_CLOSED(self); EXC_IF_GREEN(start_replication_expert); EXC_IF_TPC_PREPARED(self->conn, start_replication_expert); + EXC_IF_REPLICATING(self, start_replication_expert); - if (self->repl_started) { - psyco_set_error(ProgrammingError, self, "replication already in progress"); - return NULL; - } - - Dprintf("psyco_curs_start_replication_expert: command = %s", command); + Dprintf("psyco_curs_start_replication_expert: %s", command); self->copysize = 0; self->repl_stop = 0; + self->repl_consuming = 0; self->repl_write_lsn = InvalidXLogRecPtr; self->repl_flush_lsn = InvalidXLogRecPtr; @@ -1637,11 +1634,7 @@ psyco_curs_stop_replication(cursorObject *self) { EXC_IF_CURS_CLOSED(self); EXC_IF_CURS_ASYNC(self, stop_replication); - - if (!self->repl_started || self->repl_stop) { - psyco_set_error(ProgrammingError, self, "replication is not in progress"); - return NULL; - } + EXC_IF_NOT_REPLICATING(self, stop_replication); self->repl_stop = 1; @@ -1668,6 +1661,13 @@ psyco_curs_consume_replication_stream(cursorObject *self, PyObject *args, PyObje EXC_IF_CURS_ASYNC(self, consume_replication_stream); EXC_IF_GREEN(consume_replication_stream); EXC_IF_TPC_PREPARED(self->conn, consume_replication_stream); + EXC_IF_NOT_REPLICATING(self, consume_replication_stream); + + if (self->repl_consuming) { + PyErr_SetString(ProgrammingError, + "consume_replication_stream cannot be used when already in the consume loop"); + return NULL; + } Dprintf("psyco_curs_consume_replication_stream"); @@ -1676,11 +1676,15 @@ psyco_curs_consume_replication_stream(cursorObject *self, PyObject *args, PyObje return NULL; } + self->repl_consuming = 1; + if (pq_copy_both(self, consume, decode, keepalive_interval) >= 0) { res = Py_None; Py_INCREF(res); } + self->repl_consuming = 0; + return res; } @@ -1696,6 +1700,7 @@ psyco_curs_read_replication_message(cursorObject *self, PyObject *args, PyObject EXC_IF_CURS_CLOSED(self); EXC_IF_GREEN(read_replication_message); EXC_IF_TPC_PREPARED(self->conn, read_replication_message); + EXC_IF_NOT_REPLICATING(self, read_replication_message); if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, &decode)) { @@ -1733,6 +1738,7 @@ psyco_curs_send_replication_feedback(cursorObject *self, PyObject *args, PyObjec static char* kwlist[] = {"write_lsn", "flush_lsn", "apply_lsn", "reply", NULL}; EXC_IF_CURS_CLOSED(self); + EXC_IF_NOT_REPLICATING(self, send_replication_feedback); if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|KKKi", kwlist, &write_lsn, &flush_lsn, &apply_lsn, &reply)) { @@ -1763,6 +1769,7 @@ psyco_curs_flush_replication_feedback(cursorObject *self, PyObject *args, PyObje static char *kwlist[] = {"reply", NULL}; EXC_IF_CURS_CLOSED(self); + EXC_IF_NOT_REPLICATING(self, flush_replication_feedback); if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, &reply)) { From 822d671e8b2b0039bbcfb908c87fd239aa152faf Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Wed, 14 Oct 2015 17:40:39 +0200 Subject: [PATCH 32/60] Clear repl_stop flag after the consume loop. --- psycopg/cursor_type.c | 1 + 1 file changed, 1 insertion(+) diff --git a/psycopg/cursor_type.c b/psycopg/cursor_type.c index c7e6c26a5..c797c2647 100644 --- a/psycopg/cursor_type.c +++ b/psycopg/cursor_type.c @@ -1684,6 +1684,7 @@ psyco_curs_consume_replication_stream(cursorObject *self, PyObject *args, PyObje } self->repl_consuming = 0; + self->repl_stop = 0; /* who knows, what if we will be called again? */ return res; } From e3097ec9562a09b66f3d73e5bf901c8295909f38 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Wed, 14 Oct 2015 17:42:53 +0200 Subject: [PATCH 33/60] Fix select/timeout indication in async replication example --- doc/src/extras.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/src/extras.rst b/doc/src/extras.rst index a9ba52fc7..e2ded4b60 100644 --- a/doc/src/extras.rst +++ b/doc/src/extras.rst @@ -501,9 +501,9 @@ The individual messages in the replication stream are presented by if timeout > 0: sel = select.select([cur], [], [], timeout) else: - sel = [] + sel = ([], [], []) - if not sel: + if not sel[0]: cur.send_replication_feedback() From 28a1a00d1ce29a823a91417807b9d2b9cbf7b4dd Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Wed, 14 Oct 2015 18:39:20 +0200 Subject: [PATCH 34/60] Remove commented copy_both code in pqfetch. --- psycopg/pqpath.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/psycopg/pqpath.c b/psycopg/pqpath.c index a42c9a1a8..111eb8753 100644 --- a/psycopg/pqpath.c +++ b/psycopg/pqpath.c @@ -1760,6 +1760,7 @@ pq_copy_both(cursorObject *curs, PyObject *consume, int decode, double keepalive FD_ZERO(&fds); FD_SET(fd, &fds); + /* how long can we wait before we need to send a keepalive? */ gettimeofday(&curr_time, NULL); timeradd(&curs->repl_last_io, &keep_intr, &ping_time); @@ -1880,13 +1881,7 @@ pq_fetch(cursorObject *curs, int no_result) Dprintf("pq_fetch: data from a streaming replication slot (no tuples)"); curs->rowcount = -1; ex = 0; - /*if (curs->conn->async) { - ex = 0; - } else { - ex = _pq_copy_both_v3(curs); - - if (PyErr_Occurred()) ex = -1; - }*/ + /* nothing to do here: _pq_copy_both_v3 will be called separately */ CLEARPGRES(curs->pgres); break; From 9ab38ee8c5faf1241adaec0467ff6d83d1af6434 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Wed, 14 Oct 2015 18:39:48 +0200 Subject: [PATCH 35/60] Add psyco_curs_datetime_init --- psycopg/cursor.h | 2 ++ psycopg/cursor_type.c | 18 +++++++++++++++--- psycopg/psycopgmodule.c | 1 + 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/psycopg/cursor.h b/psycopg/cursor.h index 432425f51..3f1259984 100644 --- a/psycopg/cursor.h +++ b/psycopg/cursor.h @@ -107,6 +107,8 @@ HIDDEN void curs_reset(cursorObject *self); HIDDEN int psyco_curs_withhold_set(cursorObject *self, PyObject *pyvalue); HIDDEN int psyco_curs_scrollable_set(cursorObject *self, PyObject *pyvalue); +RAISES_NEG int psyco_curs_datetime_init(void); + /* exception-raising macros */ #define EXC_IF_CURS_CLOSED(self) \ do { \ diff --git a/psycopg/cursor_type.c b/psycopg/cursor_type.c index c797c2647..f4598873f 100644 --- a/psycopg/cursor_type.c +++ b/psycopg/cursor_type.c @@ -1780,6 +1780,21 @@ psyco_curs_flush_replication_feedback(cursorObject *self, PyObject *args, PyObje return curs_flush_replication_feedback(self, reply); } + +RAISES_NEG int +psyco_curs_datetime_init(void) +{ + Dprintf("psyco_curs_datetime_init: datetime init"); + + PyDateTime_IMPORT; + + if (!PyDateTimeAPI) { + PyErr_SetString(PyExc_ImportError, "datetime initialization failed"); + return -1; + } + return 0; +} + #define psyco_curs_replication_io_timestamp_doc \ "replication_io_timestamp -- the timestamp of latest IO with the server" @@ -1791,9 +1806,6 @@ psyco_curs_get_replication_io_timestamp(cursorObject *self) EXC_IF_CURS_CLOSED(self); - // TODO: move to a one-call init function - PyDateTime_IMPORT; - seconds = self->repl_last_io.tv_sec + self->repl_last_io.tv_usec / 1.0e6; tval = Py_BuildValue("(d)", seconds); diff --git a/psycopg/psycopgmodule.c b/psycopg/psycopgmodule.c index 543b0c1ba..7d3c73d97 100644 --- a/psycopg/psycopgmodule.c +++ b/psycopg/psycopgmodule.c @@ -939,6 +939,7 @@ INIT_MODULE(_psycopg)(void) /* Initialize the PyDateTimeAPI everywhere is used */ PyDateTime_IMPORT; if (psyco_adapter_datetime_init()) { goto exit; } + if (psyco_curs_datetime_init()) { goto exit; } if (psyco_replmsg_datetime_init()) { goto exit; } Py_TYPE(&pydatetimeType) = &PyType_Type; From d14fea31a33488a1f62a45a8a87109d5be678a72 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Thu, 15 Oct 2015 12:56:21 +0200 Subject: [PATCH 36/60] Use quote_ident from psycopg2.extensions --- lib/extras.py | 18 +++++++----------- tests/test_connection.py | 14 ++++++++++++++ 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/lib/extras.py b/lib/extras.py index e0fd8ef10..f411a4d08 100644 --- a/lib/extras.py +++ b/lib/extras.py @@ -40,7 +40,7 @@ from psycopg2.extensions import cursor as _cursor from psycopg2.extensions import connection as _connection from psycopg2.extensions import replicationMessage as ReplicationMessage -from psycopg2.extensions import adapt as _A +from psycopg2.extensions import adapt as _A, quote_ident from psycopg2.extensions import b @@ -484,10 +484,6 @@ def __init__(self, dsn, **kwargs): if self.cursor_factory is None: self.cursor_factory = ReplicationCursor - def quote_ident(self, ident): - # FIXME: use PQescapeIdentifier or psycopg_escape_identifier_easy, somehow - return '"%s"' % ident.replace('"', '""') - class LogicalReplicationConnection(ReplicationConnectionBase): @@ -509,7 +505,7 @@ class ReplicationCursor(_cursor): def create_replication_slot(self, slot_name, slot_type=None, output_plugin=None): """Create streaming replication slot.""" - command = "CREATE_REPLICATION_SLOT %s " % self.connection.quote_ident(slot_name) + command = "CREATE_REPLICATION_SLOT %s " % quote_ident(slot_name, self) if slot_type is None: slot_type = self.connection.replication_type @@ -518,7 +514,7 @@ def create_replication_slot(self, slot_name, slot_type=None, output_plugin=None) if output_plugin is None: raise psycopg2.ProgrammingError("output plugin name is required to create logical replication slot") - command += "%s %s" % (slot_type, self.connection.quote_ident(output_plugin)) + command += "%s %s" % (slot_type, quote_ident(output_plugin, self)) elif slot_type == REPLICATION_PHYSICAL: if output_plugin is not None: @@ -534,7 +530,7 @@ def create_replication_slot(self, slot_name, slot_type=None, output_plugin=None) def drop_replication_slot(self, slot_name): """Drop streaming replication slot.""" - command = "DROP_REPLICATION_SLOT %s" % self.connection.quote_ident(slot_name) + command = "DROP_REPLICATION_SLOT %s" % quote_ident(slot_name, self) self.execute(command) def start_replication(self, slot_name=None, slot_type=None, start_lsn=0, @@ -548,7 +544,7 @@ def start_replication(self, slot_name=None, slot_type=None, start_lsn=0, if slot_type == REPLICATION_LOGICAL: if slot_name: - command += "SLOT %s " % self.connection.quote_ident(slot_name) + command += "SLOT %s " % quote_ident(slot_name, self) else: raise psycopg2.ProgrammingError("slot name is required for logical replication") @@ -556,7 +552,7 @@ def start_replication(self, slot_name=None, slot_type=None, start_lsn=0, elif slot_type == REPLICATION_PHYSICAL: if slot_name: - command += "SLOT %s " % self.connection.quote_ident(slot_name) + command += "SLOT %s " % quote_ident(slot_name, self) # don't add "PHYSICAL", before 9.4 it was just START_REPLICATION XXX/XXX else: @@ -584,7 +580,7 @@ def start_replication(self, slot_name=None, slot_type=None, start_lsn=0, for k,v in options.iteritems(): if not command.endswith('('): command += ", " - command += "%s %s" % (self.connection.quote_ident(k), _A(str(v))) + command += "%s %s" % (quote_ident(k, self), _A(str(v))) command += ")" self.start_replication_expert(command) diff --git a/tests/test_connection.py b/tests/test_connection.py index e2b0da306..eeeaa8451 100755 --- a/tests/test_connection.py +++ b/tests/test_connection.py @@ -1212,6 +1212,20 @@ def consume(msg): pass cur.consume_replication_stream(consume) # should return at once + @skip_before_postgres(9, 4) # slots require 9.4 + def test_create_replication_slot(self): + import psycopg2.extras + conn = self.repl_connect(connection_factory=psycopg2.extras.PhysicalReplicationConnection) + if conn is None: return + cur = conn.cursor() + + slot = "test_slot1" + try: + cur.create_replication_slot(slot) + self.assertRaises(psycopg2.ProgrammingError, cur.create_replication_slot, slot) + finally: + cur.drop_replication_slot(slot) + def test_suite(): return unittest.TestLoader().loadTestsFromName(__name__) From cf4f2411bfd2d5a1cb84393f135e48107428137b Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Thu, 15 Oct 2015 18:01:43 +0200 Subject: [PATCH 37/60] Fix async replication and test. --- lib/extras.py | 7 ++- tests/test_async.py | 16 ----- tests/test_connection.py | 49 --------------- tests/test_replication.py | 123 ++++++++++++++++++++++++++++++++++++++ tests/testutils.py | 21 ++++++- 5 files changed, 147 insertions(+), 69 deletions(-) create mode 100644 tests/test_replication.py diff --git a/lib/extras.py b/lib/extras.py index f411a4d08..dc2d5e65a 100644 --- a/lib/extras.py +++ b/lib/extras.py @@ -449,7 +449,7 @@ class ReplicationConnectionBase(_connection): classes. Uses `ReplicationCursor` automatically. """ - def __init__(self, dsn, **kwargs): + def __init__(self, *args, **kwargs): """ Initializes a replication connection by adding appropriate parameters to the provided DSN and tweaking the connection @@ -466,7 +466,7 @@ def __init__(self, dsn, **kwargs): else: raise psycopg2.ProgrammingError("unrecognized replication type: %s" % self.replication_type) - items = _ext.parse_dsn(dsn) + items = _ext.parse_dsn(args[0]) # we add an appropriate replication keyword parameter, unless # user has specified one explicitly in the DSN @@ -475,7 +475,8 @@ def __init__(self, dsn, **kwargs): dsn = " ".join(["%s=%s" % (k, psycopg2._param_escape(str(v))) for (k, v) in items.iteritems()]) - super(ReplicationConnectionBase, self).__init__(dsn, **kwargs) + args = [dsn] + list(args[1:]) # async is the possible 2nd arg + super(ReplicationConnectionBase, self).__init__(*args, **kwargs) # prevent auto-issued BEGIN statements if not self.async: diff --git a/tests/test_async.py b/tests/test_async.py index d40b9c3ed..e0bca7d51 100755 --- a/tests/test_async.py +++ b/tests/test_async.py @@ -29,7 +29,6 @@ from psycopg2 import extensions import time -import select import StringIO from testutils import ConnectingTestCase @@ -66,21 +65,6 @@ def setUp(self): )''') self.wait(curs) - def wait(self, cur_or_conn): - pollable = cur_or_conn - if not hasattr(pollable, 'poll'): - pollable = cur_or_conn.connection - while True: - state = pollable.poll() - if state == psycopg2.extensions.POLL_OK: - break - elif state == psycopg2.extensions.POLL_READ: - select.select([pollable], [], [], 10) - elif state == psycopg2.extensions.POLL_WRITE: - select.select([], [pollable], [], 10) - else: - raise Exception("Unexpected result from poll: %r", state) - def test_connection_setup(self): cur = self.conn.cursor() sync_cur = self.sync_conn.cursor() diff --git a/tests/test_connection.py b/tests/test_connection.py index eeeaa8451..568f09ed3 100755 --- a/tests/test_connection.py +++ b/tests/test_connection.py @@ -1178,55 +1178,6 @@ def test_set_session_autocommit(self): self.assertEqual(cur.fetchone()[0], 'on') -class ReplicationTest(ConnectingTestCase): - @skip_before_postgres(9, 0) - def test_physical_replication_connection(self): - import psycopg2.extras - conn = self.repl_connect(connection_factory=psycopg2.extras.PhysicalReplicationConnection) - if conn is None: return - cur = conn.cursor() - cur.execute("IDENTIFY_SYSTEM") - cur.fetchall() - - @skip_before_postgres(9, 4) - def test_logical_replication_connection(self): - import psycopg2.extras - conn = self.repl_connect(connection_factory=psycopg2.extras.LogicalReplicationConnection) - if conn is None: return - cur = conn.cursor() - cur.execute("IDENTIFY_SYSTEM") - cur.fetchall() - - @skip_before_postgres(9, 0) - def test_stop_replication_raises(self): - import psycopg2.extras - conn = self.repl_connect(connection_factory=psycopg2.extras.PhysicalReplicationConnection) - if conn is None: return - cur = conn.cursor() - self.assertRaises(psycopg2.ProgrammingError, cur.stop_replication) - - cur.start_replication() - cur.stop_replication() # doesn't raise now - - def consume(msg): - pass - cur.consume_replication_stream(consume) # should return at once - - @skip_before_postgres(9, 4) # slots require 9.4 - def test_create_replication_slot(self): - import psycopg2.extras - conn = self.repl_connect(connection_factory=psycopg2.extras.PhysicalReplicationConnection) - if conn is None: return - cur = conn.cursor() - - slot = "test_slot1" - try: - cur.create_replication_slot(slot) - self.assertRaises(psycopg2.ProgrammingError, cur.create_replication_slot, slot) - finally: - cur.drop_replication_slot(slot) - - def test_suite(): return unittest.TestLoader().loadTestsFromName(__name__) diff --git a/tests/test_replication.py b/tests/test_replication.py new file mode 100644 index 000000000..231bcd08d --- /dev/null +++ b/tests/test_replication.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python + +# test_replication.py - unit test for replication protocol +# +# Copyright (C) 2015 Daniele Varrazzo +# +# psycopg2 is free software: you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# In addition, as a special exception, the copyright holders give +# permission to link this program with the OpenSSL library (or with +# modified versions of OpenSSL that use the same license as OpenSSL), +# and distribute linked combinations including the two. +# +# You must obey the GNU Lesser General Public License in all respects for +# all of the code used other than OpenSSL. +# +# psycopg2 is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +import psycopg2 +import psycopg2.extensions +from psycopg2.extras import PhysicalReplicationConnection, LogicalReplicationConnection + +from testutils import unittest +from testutils import skip_before_postgres +from testutils import ConnectingTestCase + + +class ReplicationTestCase(ConnectingTestCase): + def setUp(self): + super(ReplicationTestCase, self).setUp() + self._slots = [] + + def tearDown(self): + # first close all connections, as they might keep the slot(s) active + super(ReplicationTestCase, self).tearDown() + + if self._slots: + kill_conn = self.repl_connect(connection_factory=PhysicalReplicationConnection) + if kill_conn: + kill_cur = kill_conn.cursor() + for slot in self._slots: + kill_cur.drop_replication_slot(slot) + kill_conn.close() + + def create_replication_slot(self, cur, slot_name, **kwargs): + cur.create_replication_slot(slot_name, **kwargs) + self._slots.append(slot_name) + + def drop_replication_slot(self, cur, slot_name): + cur.drop_replication_slot(slot_name) + self._slots.remove(slot_name) + + +class ReplicationTest(ReplicationTestCase): + @skip_before_postgres(9, 0) + def test_physical_replication_connection(self): + conn = self.repl_connect(connection_factory=PhysicalReplicationConnection) + if conn is None: return + cur = conn.cursor() + cur.execute("IDENTIFY_SYSTEM") + cur.fetchall() + + @skip_before_postgres(9, 4) + def test_logical_replication_connection(self): + conn = self.repl_connect(connection_factory=LogicalReplicationConnection) + if conn is None: return + cur = conn.cursor() + cur.execute("IDENTIFY_SYSTEM") + cur.fetchall() + + @skip_before_postgres(9, 0) + def test_stop_replication_raises(self): + conn = self.repl_connect(connection_factory=PhysicalReplicationConnection) + if conn is None: return + cur = conn.cursor() + self.assertRaises(psycopg2.ProgrammingError, cur.stop_replication) + + cur.start_replication() + cur.stop_replication() # doesn't raise now + + def consume(msg): + pass + cur.consume_replication_stream(consume) # should return at once + + @skip_before_postgres(9, 4) # slots require 9.4 + def test_create_replication_slot(self): + conn = self.repl_connect(connection_factory=PhysicalReplicationConnection) + if conn is None: return + cur = conn.cursor() + + slot = "test_slot1" + + self.create_replication_slot(cur, slot) + self.assertRaises(psycopg2.ProgrammingError, self.create_replication_slot, cur, slot) + + +class AsyncReplicationTest(ReplicationTestCase): + @skip_before_postgres(9, 4) + def test_async_replication(self): + conn = self.repl_connect(connection_factory=LogicalReplicationConnection, async=1) + if conn is None: return + self.wait(conn) + cur = conn.cursor() + + slot = "test_slot1" + self.create_replication_slot(cur, slot, output_plugin='test_decoding') + self.wait(cur) + + cur.start_replication(slot) + self.wait(cur) + + +def test_suite(): + return unittest.TestLoader().loadTestsFromName(__name__) + +if __name__ == "__main__": + unittest.main() diff --git a/tests/testutils.py b/tests/testutils.py index 76671d99d..5f4493f27 100644 --- a/tests/testutils.py +++ b/tests/testutils.py @@ -27,6 +27,7 @@ import os import platform import sys +import select from functools import wraps from testconfig import dsn, repl_dsn @@ -129,7 +130,8 @@ def repl_connect(self, **kwargs): except psycopg2.OperationalError, e: return self.skipTest("replication db not configured: %s" % e) - conn.autocommit = True + if not conn.async: + conn.autocommit = True return conn def _get_conn(self): @@ -143,6 +145,23 @@ def _set_conn(self, conn): conn = property(_get_conn, _set_conn) + # for use with async connections only + def wait(self, cur_or_conn): + import psycopg2.extensions + pollable = cur_or_conn + if not hasattr(pollable, 'poll'): + pollable = cur_or_conn.connection + while True: + state = pollable.poll() + if state == psycopg2.extensions.POLL_OK: + break + elif state == psycopg2.extensions.POLL_READ: + select.select([pollable], [], [], 10) + elif state == psycopg2.extensions.POLL_WRITE: + select.select([], [pollable], [], 10) + else: + raise Exception("Unexpected result from poll: %r", state) + def decorate_all_tests(cls, *decorators): """ From 0435320f34c56ced8c15899053920fc94fd4f3d7 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Fri, 16 Oct 2015 16:36:03 +0200 Subject: [PATCH 38/60] Fix PSYCOPG2_TEST_REPL_DSN handling. --- tests/test_replication.py | 16 ++++++++++++++++ tests/testconfig.py | 6 +++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/tests/test_replication.py b/tests/test_replication.py index 231bcd08d..dfe11af07 100644 --- a/tests/test_replication.py +++ b/tests/test_replication.py @@ -33,6 +33,9 @@ class ReplicationTestCase(ConnectingTestCase): def setUp(self): + from testconfig import repl_dsn + if not repl_dsn: + self.skipTest("replication tests disabled by default") super(ReplicationTestCase, self).setUp() self._slots = [] @@ -99,6 +102,19 @@ def test_create_replication_slot(self): self.create_replication_slot(cur, slot) self.assertRaises(psycopg2.ProgrammingError, self.create_replication_slot, cur, slot) + @skip_before_postgres(9, 4) # slots require 9.4 + def test_start_on_missing_replication_slot(self): + conn = self.repl_connect(connection_factory=PhysicalReplicationConnection) + if conn is None: return + cur = conn.cursor() + + slot = "test_slot1" + + self.assertRaises(psycopg2.ProgrammingError, cur.start_replication, slot) + + self.create_replication_slot(cur, slot) + cur.start_replication(slot) + class AsyncReplicationTest(ReplicationTestCase): @skip_before_postgres(9, 4) diff --git a/tests/testconfig.py b/tests/testconfig.py index d59e5a0d4..841eaf1cb 100644 --- a/tests/testconfig.py +++ b/tests/testconfig.py @@ -33,4 +33,8 @@ if dbpass is not None: dsn += ' password=%s' % dbpass -repl_dsn = os.environ.get('PSYCOPG2_TEST_REPL_DSN', dsn) +# Don't run replication tests if REPL_DSN is not set, default to normal DSN if +# set to empty string. +repl_dsn = os.environ.get('PSYCOPG2_TEST_REPL_DSN', None) +if repl_dsn == '': + repl_dsn = dsn From 4ab7cf0157ae311aa22c0cb38410a3d2ab9bea06 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Mon, 19 Oct 2015 15:42:42 +0200 Subject: [PATCH 39/60] Replace stop_replication with requirement for an exception. --- doc/src/extras.rst | 18 ++++++---------- lib/extras.py | 12 +++++++++++ psycopg/cursor.h | 1 - psycopg/cursor_type.c | 19 ----------------- psycopg/pqpath.c | 7 +----- tests/test_replication.py | 45 +++++++++++++++++++++++++++------------ 6 files changed, 51 insertions(+), 51 deletions(-) diff --git a/doc/src/extras.rst b/doc/src/extras.rst index 4755cc728..ddf989d77 100644 --- a/doc/src/extras.rst +++ b/doc/src/extras.rst @@ -348,9 +348,11 @@ The individual messages in the replication stream are presented by `start_replication()` first. When called, this method enters an endless loop, reading messages from - the server and passing them to ``consume()``. In order to make this - method break out of the loop and return, ``consume()`` can call - `stop_replication()` on the cursor or it can throw an exception. + the server and passing them to ``consume()``, then waiting for more + messages from the server. In order to make this method break out of + the loop and return, ``consume()`` can throw a `StopReplication` + exception (any unhandled exception will make it break out of the loop + as well). If *decode* is set to `!True`, the messages read from the server are converted according to the connection `~connection.encoding`. This @@ -398,13 +400,6 @@ The individual messages in the replication stream are presented by load on network and the server. A possible strategy is to confirm after every COMMIT message. - .. method:: stop_replication() - - This method can be called on synchronous connection from the - ``consume()`` callable in order to break out of the endless loop in - `consume_replication_stream()`. If called on asynchronous connection - or when replication is not in progress, this method raises an error. - .. method:: send_replication_feedback(write_lsn=0, flush_lsn=0, apply_lsn=0, reply=False) :param write_lsn: a LSN position up to which the client has written the data locally @@ -506,10 +501,11 @@ The individual messages in the replication stream are presented by if not sel[0]: cur.send_replication_feedback() - .. index:: pair: Cursor; Replication +.. autoclass:: StopReplication + .. index:: single: Data types; Additional diff --git a/lib/extras.py b/lib/extras.py index dc2d5e65a..8854ec2bb 100644 --- a/lib/extras.py +++ b/lib/extras.py @@ -500,6 +500,18 @@ def __init__(self, *args, **kwargs): super(PhysicalReplicationConnection, self).__init__(*args, **kwargs) +class StopReplication(Exception): + """ + Exception used to break out of the endless loop in + `~ReplicationCursor.consume_replication_stream()`. + + Subclass of `~exceptions.Exception`. Intentionally *not* inherited from + `~psycopg2.Error` as occurrence of this exception does not indicate an + error. + """ + pass + + class ReplicationCursor(_cursor): """A cursor used for communication on the replication protocol.""" diff --git a/psycopg/cursor.h b/psycopg/cursor.h index 3f1259984..669e176d6 100644 --- a/psycopg/cursor.h +++ b/psycopg/cursor.h @@ -75,7 +75,6 @@ struct cursorObject { /* replication cursor attrs */ int repl_started:1; /* if replication is started */ - int repl_stop:1; /* if client requested to stop replication */ int repl_consuming:1; /* if running the consume loop */ struct timeval repl_keepalive_interval; /* interval for keepalive messages in replication mode */ XLogRecPtr repl_write_lsn; /* LSN stats for replication feedback messages */ diff --git a/psycopg/cursor_type.c b/psycopg/cursor_type.c index f4598873f..d51f7a558 100644 --- a/psycopg/cursor_type.c +++ b/psycopg/cursor_type.c @@ -1605,7 +1605,6 @@ psyco_curs_start_replication_expert(cursorObject *self, PyObject *args, PyObject Dprintf("psyco_curs_start_replication_expert: %s", command); self->copysize = 0; - self->repl_stop = 0; self->repl_consuming = 0; self->repl_write_lsn = InvalidXLogRecPtr; @@ -1626,21 +1625,6 @@ psyco_curs_start_replication_expert(cursorObject *self, PyObject *args, PyObject return res; } -#define psyco_curs_stop_replication_doc \ -"stop_replication() -- Set flag to break out of the endless loop in consume_replication_stream()." - -static PyObject * -psyco_curs_stop_replication(cursorObject *self) -{ - EXC_IF_CURS_CLOSED(self); - EXC_IF_CURS_ASYNC(self, stop_replication); - EXC_IF_NOT_REPLICATING(self, stop_replication); - - self->repl_stop = 1; - - Py_RETURN_NONE; -} - #define psyco_curs_consume_replication_stream_doc \ "consume_replication_stream(consumer, keepalive_interval=10) -- Consume replication stream." @@ -1684,7 +1668,6 @@ psyco_curs_consume_replication_stream(cursorObject *self, PyObject *args, PyObje } self->repl_consuming = 0; - self->repl_stop = 0; /* who knows, what if we will be called again? */ return res; } @@ -1992,8 +1975,6 @@ static struct PyMethodDef cursorObject_methods[] = { METH_VARARGS|METH_KEYWORDS, psyco_curs_copy_expert_doc}, {"start_replication_expert", (PyCFunction)psyco_curs_start_replication_expert, METH_VARARGS|METH_KEYWORDS, psyco_curs_start_replication_expert_doc}, - {"stop_replication", (PyCFunction)psyco_curs_stop_replication, - METH_NOARGS, psyco_curs_stop_replication_doc}, {"consume_replication_stream", (PyCFunction)psyco_curs_consume_replication_stream, METH_VARARGS|METH_KEYWORDS, psyco_curs_consume_replication_stream_doc}, {"read_replication_message", (PyCFunction)psyco_curs_read_replication_message, diff --git a/psycopg/pqpath.c b/psycopg/pqpath.c index 111eb8753..f38fbd39d 100644 --- a/psycopg/pqpath.c +++ b/psycopg/pqpath.c @@ -1743,7 +1743,7 @@ pq_copy_both(cursorObject *curs, PyObject *consume, int decode, double keepalive keep_intr.tv_sec = (int)keepalive_interval; keep_intr.tv_usec = (keepalive_interval - keep_intr.tv_sec)*1.0e6; - while (!curs->repl_stop) { + while (1) { msg = pq_read_replication_message(curs, decode); if (!msg) { goto exit; @@ -1803,11 +1803,6 @@ pq_copy_both(cursorObject *curs, PyObject *consume, int decode, double keepalive goto exit; } Py_DECREF(tmp); - - if (curs->repl_stop) { - Dprintf("pq_copy_both: repl_stop flag set by consume_func"); - break; - } } } diff --git a/tests/test_replication.py b/tests/test_replication.py index dfe11af07..cd1321aed 100644 --- a/tests/test_replication.py +++ b/tests/test_replication.py @@ -25,6 +25,7 @@ import psycopg2 import psycopg2.extensions from psycopg2.extras import PhysicalReplicationConnection, LogicalReplicationConnection +from psycopg2.extras import StopReplication from testutils import unittest from testutils import skip_before_postgres @@ -77,20 +78,6 @@ def test_logical_replication_connection(self): cur.execute("IDENTIFY_SYSTEM") cur.fetchall() - @skip_before_postgres(9, 0) - def test_stop_replication_raises(self): - conn = self.repl_connect(connection_factory=PhysicalReplicationConnection) - if conn is None: return - cur = conn.cursor() - self.assertRaises(psycopg2.ProgrammingError, cur.stop_replication) - - cur.start_replication() - cur.stop_replication() # doesn't raise now - - def consume(msg): - pass - cur.consume_replication_stream(consume) # should return at once - @skip_before_postgres(9, 4) # slots require 9.4 def test_create_replication_slot(self): conn = self.repl_connect(connection_factory=PhysicalReplicationConnection) @@ -115,6 +102,36 @@ def test_start_on_missing_replication_slot(self): self.create_replication_slot(cur, slot) cur.start_replication(slot) + @skip_before_postgres(9, 4) # slots require 9.4 + def test_stop_replication(self): + conn = self.repl_connect(connection_factory=LogicalReplicationConnection) + if conn is None: return + cur = conn.cursor() + + slot = "test_slot1" + + self.create_replication_slot(cur, slot, output_plugin='test_decoding') + + self.make_replication_event() + + cur.start_replication(slot) + def consume(msg): + raise StopReplication() + self.assertRaises(StopReplication, cur.consume_replication_stream, consume) + + # generate an event for our replication stream + def make_replication_event(self): + conn = self.connect() + if conn is None: return + cur = conn.cursor() + + try: + cur.execute("DROP TABLE dummy1") + except psycopg2.ProgrammingError: + conn.rollback() + cur.execute("CREATE TABLE dummy1()") + conn.commit() + class AsyncReplicationTest(ReplicationTestCase): @skip_before_postgres(9, 4) From 7aea2cef6e42c961fadac61f19b570bdf8c61401 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Mon, 19 Oct 2015 17:02:18 +0200 Subject: [PATCH 40/60] Improve async replication test. --- tests/test_replication.py | 83 ++++++++++++++++++++++----------------- tests/testconfig.py | 2 + 2 files changed, 50 insertions(+), 35 deletions(-) diff --git a/tests/test_replication.py b/tests/test_replication.py index cd1321aed..5c029c884 100644 --- a/tests/test_replication.py +++ b/tests/test_replication.py @@ -27,6 +27,7 @@ from psycopg2.extras import PhysicalReplicationConnection, LogicalReplicationConnection from psycopg2.extras import StopReplication +import testconfig from testutils import unittest from testutils import skip_before_postgres from testutils import ConnectingTestCase @@ -34,10 +35,12 @@ class ReplicationTestCase(ConnectingTestCase): def setUp(self): - from testconfig import repl_dsn - if not repl_dsn: + if not testconfig.repl_dsn: self.skipTest("replication tests disabled by default") + super(ReplicationTestCase, self).setUp() + + self.slot = testconfig.repl_slot self._slots = [] def tearDown(self): @@ -52,14 +55,27 @@ def tearDown(self): kill_cur.drop_replication_slot(slot) kill_conn.close() - def create_replication_slot(self, cur, slot_name, **kwargs): + def create_replication_slot(self, cur, slot_name=testconfig.repl_slot, **kwargs): cur.create_replication_slot(slot_name, **kwargs) self._slots.append(slot_name) - def drop_replication_slot(self, cur, slot_name): + def drop_replication_slot(self, cur, slot_name=testconfig.repl_slot): cur.drop_replication_slot(slot_name) self._slots.remove(slot_name) + # generate some events for our replication stream + def make_replication_events(self): + conn = self.connect() + if conn is None: return + cur = conn.cursor() + + try: + cur.execute("DROP TABLE dummy1") + except psycopg2.ProgrammingError: + conn.rollback() + cur.execute("CREATE TABLE dummy1 AS SELECT * FROM generate_series(1, 5) AS id") + conn.commit() + class ReplicationTest(ReplicationTestCase): @skip_before_postgres(9, 0) @@ -84,10 +100,8 @@ def test_create_replication_slot(self): if conn is None: return cur = conn.cursor() - slot = "test_slot1" - - self.create_replication_slot(cur, slot) - self.assertRaises(psycopg2.ProgrammingError, self.create_replication_slot, cur, slot) + self.create_replication_slot(cur) + self.assertRaises(psycopg2.ProgrammingError, self.create_replication_slot, cur) @skip_before_postgres(9, 4) # slots require 9.4 def test_start_on_missing_replication_slot(self): @@ -95,12 +109,10 @@ def test_start_on_missing_replication_slot(self): if conn is None: return cur = conn.cursor() - slot = "test_slot1" - - self.assertRaises(psycopg2.ProgrammingError, cur.start_replication, slot) + self.assertRaises(psycopg2.ProgrammingError, cur.start_replication, self.slot) - self.create_replication_slot(cur, slot) - cur.start_replication(slot) + self.create_replication_slot(cur) + cur.start_replication(self.slot) @skip_before_postgres(9, 4) # slots require 9.4 def test_stop_replication(self): @@ -108,46 +120,47 @@ def test_stop_replication(self): if conn is None: return cur = conn.cursor() - slot = "test_slot1" - - self.create_replication_slot(cur, slot, output_plugin='test_decoding') + self.create_replication_slot(cur, output_plugin='test_decoding') - self.make_replication_event() + self.make_replication_events() - cur.start_replication(slot) + cur.start_replication(self.slot) def consume(msg): raise StopReplication() self.assertRaises(StopReplication, cur.consume_replication_stream, consume) - # generate an event for our replication stream - def make_replication_event(self): - conn = self.connect() - if conn is None: return - cur = conn.cursor() - - try: - cur.execute("DROP TABLE dummy1") - except psycopg2.ProgrammingError: - conn.rollback() - cur.execute("CREATE TABLE dummy1()") - conn.commit() - class AsyncReplicationTest(ReplicationTestCase): - @skip_before_postgres(9, 4) + @skip_before_postgres(9, 4) # slots require 9.4 def test_async_replication(self): conn = self.repl_connect(connection_factory=LogicalReplicationConnection, async=1) if conn is None: return self.wait(conn) cur = conn.cursor() - slot = "test_slot1" - self.create_replication_slot(cur, slot, output_plugin='test_decoding') + self.create_replication_slot(cur, output_plugin='test_decoding') self.wait(cur) - cur.start_replication(slot) + cur.start_replication(self.slot) self.wait(cur) + self.make_replication_events() + + self.msg_count = 0 + def consume(msg): + self.msg_count += 1 + if self.msg_count > 3: + raise StopReplication() + + def process_stream(): + from select import select + while True: + msg = cur.read_replication_message() + if msg: + consume(msg) + else: + select([cur], [], []) + self.assertRaises(StopReplication, process_stream) def test_suite(): return unittest.TestLoader().loadTestsFromName(__name__) diff --git a/tests/testconfig.py b/tests/testconfig.py index 841eaf1cb..82b48a39e 100644 --- a/tests/testconfig.py +++ b/tests/testconfig.py @@ -38,3 +38,5 @@ repl_dsn = os.environ.get('PSYCOPG2_TEST_REPL_DSN', None) if repl_dsn == '': repl_dsn = dsn + +repl_slot = os.environ.get('PSYCOPG2_TEST_REPL_SLOT', 'psycopg2_test_slot') From 0bb81fc84811134bca70b59daa4661bd0697f2ff Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Mon, 19 Oct 2015 20:00:39 +0200 Subject: [PATCH 41/60] Properly subclass ReplicationCursor on C level. --- doc/src/extras.rst | 50 ++-- lib/extensions.py | 2 +- lib/extras.py | 10 +- psycopg/cursor.h | 27 +-- psycopg/cursor_type.c | 235 +------------------ psycopg/pqpath.c | 97 ++++---- psycopg/pqpath.h | 8 +- psycopg/psycopgmodule.c | 9 +- psycopg/replication_cursor.h | 77 ++++++ psycopg/replication_cursor_type.c | 360 +++++++++++++++++++++++++++++ psycopg/replication_message_type.c | 2 +- psycopg2.cproj | 2 + setup.py | 4 +- tests/test_replication.py | 20 +- 14 files changed, 554 insertions(+), 349 deletions(-) create mode 100644 psycopg/replication_cursor.h create mode 100644 psycopg/replication_cursor_type.c diff --git a/doc/src/extras.rst b/doc/src/extras.rst index ddf989d77..9384a961e 100644 --- a/doc/src/extras.rst +++ b/doc/src/extras.rst @@ -324,16 +324,15 @@ The individual messages in the replication stream are presented by `start_replication_expert()` internally. After starting the replication, to actually consume the incoming - server messages, use `consume_replication_stream()` or implement a - loop around `read_replication_message()` in case of asynchronous - connection. + server messages, use `consume_stream()` or implement a loop around + `read_message()` in case of asynchronous connection. .. method:: start_replication_expert(command) Start replication on the connection using provided ``START_REPLICATION`` command. - .. method:: consume_replication_stream(consume, decode=False, keepalive_interval=10) + .. method:: consume_stream(consume, decode=False, keepalive_interval=10) :param consume: a callable object with signature ``consume(msg)`` :param decode: a flag indicating that unicode conversion should be @@ -342,7 +341,7 @@ The individual messages in the replication stream are presented by messages to the server This method can only be used with synchronous connection. For - asynchronous connections see `read_replication_message()`. + asynchronous connections see `read_message()`. Before calling this method to consume the stream, use `start_replication()` first. @@ -372,18 +371,18 @@ The individual messages in the replication stream are presented by self.store_message_data(msg.payload) if self.should_report_to_the_server_now(msg): - msg.cursor.send_replication_feedback(flush_lsn=msg.data_start) + msg.cursor.send_feedback(flush_lsn=msg.data_start) consumer = LogicalStreamConsumer() - cur.consume_replication_stream(consumer, decode=True) + cur.consume_stream(consumer, decode=True) The *msg* objects passed to ``consume()`` are instances of `ReplicationMessage` class. After storing certain amount of messages' data reliably, the client should send a confirmation message to the server. This should be done - by calling `send_replication_feedback()` method on the corresponding - replication cursor. A reference to the cursor is provided in the + by calling `send_feedback()` method on the corresponding replication + cursor. A reference to the cursor is provided in the `ReplicationMessage` as an attribute. .. warning:: @@ -400,7 +399,7 @@ The individual messages in the replication stream are presented by load on network and the server. A possible strategy is to confirm after every COMMIT message. - .. method:: send_replication_feedback(write_lsn=0, flush_lsn=0, apply_lsn=0, reply=False) + .. method:: send_feedback(write_lsn=0, flush_lsn=0, apply_lsn=0, reply=False) :param write_lsn: a LSN position up to which the client has written the data locally :param flush_lsn: a LSN position up to which the client has stored the @@ -419,16 +418,15 @@ The individual messages in the replication stream are presented by just send a keepalive message to the server. If the feedback message could not be sent, updates the passed LSN - positions in the cursor for a later call to - `flush_replication_feedback()` and returns `!False`, otherwise returns - `!True`. + positions in the cursor for a later call to `flush_feedback()` and + returns `!False`, otherwise returns `!True`. - .. method:: flush_replication_feedback(reply=False) + .. method:: flush_feedback(reply=False) :param reply: request the server to send back a keepalive message immediately This method tries to flush the latest replication feedback message - that `send_replication_feedback()` was trying to send but couldn't. + that `send_feedback()` was trying to send but couldn't. If *reply* is `!True` sends a keepalive message in either case. @@ -437,14 +435,13 @@ The individual messages in the replication stream are presented by Low-level methods for asynchronous connection operation. - With the synchronous connection, a call to `consume_replication_stream()` - handles all the complexity of handling the incoming messages and sending - keepalive replies, but at times it might be beneficial to use low-level - interface for better control, in particular to `~select.select()` on - multiple sockets. The following methods are provided for asynchronous - operation: + With the synchronous connection, a call to `consume_stream()` handles all + the complexity of handling the incoming messages and sending keepalive + replies, but at times it might be beneficial to use low-level interface + for better control, in particular to `~select.select()` on multiple + sockets. The following methods are provided for asynchronous operation: - .. method:: read_replication_message(decode=True) + .. method:: read_message(decode=True) :param decode: a flag indicating that unicode conversion should be performed on the data received from the server @@ -475,7 +472,7 @@ The individual messages in the replication stream are presented by This is a convenience method which allows replication cursor to be used directly in `~select.select()` or `~select.poll()` calls. - .. attribute:: replication_io_timestamp + .. attribute:: io_timestamp A `~datetime` object representing the timestamp at the moment of last communication with the server (a data or keepalive message in either @@ -488,18 +485,19 @@ The individual messages in the replication stream are presented by keepalive_interval = 10.0 while True: - msg = cur.read_replication_message() + msg = cur.read_message() if msg: consume(msg) else: - timeout = keepalive_interval - (datetime.now() - cur.replication_io_timestamp).total_seconds() + now = datetime.now() + timeout = keepalive_interval - (now - cur.io_timestamp).total_seconds() if timeout > 0: sel = select.select([cur], [], [], timeout) else: sel = ([], [], []) if not sel[0]: - cur.send_replication_feedback() + cur.send_feedback() .. index:: pair: Cursor; Replication diff --git a/lib/extensions.py b/lib/extensions.py index 513b7fc73..af27bca66 100644 --- a/lib/extensions.py +++ b/lib/extensions.py @@ -61,7 +61,7 @@ from psycopg2._psycopg import ISQLQuote, Notify, Diagnostics, Column from psycopg2._psycopg import QueryCanceledError, TransactionRollbackError -from psycopg2._psycopg import replicationMessage +from psycopg2._psycopg import ReplicationCursor, ReplicationMessage try: from psycopg2._psycopg import set_wait_callback, get_wait_callback diff --git a/lib/extras.py b/lib/extras.py index 8854ec2bb..7c713573d 100644 --- a/lib/extras.py +++ b/lib/extras.py @@ -39,7 +39,8 @@ from psycopg2 import extensions as _ext from psycopg2.extensions import cursor as _cursor from psycopg2.extensions import connection as _connection -from psycopg2.extensions import replicationMessage as ReplicationMessage +from psycopg2.extensions import ReplicationCursor as _replicationCursor +from psycopg2.extensions import ReplicationMessage from psycopg2.extensions import adapt as _A, quote_ident from psycopg2.extensions import b @@ -503,7 +504,7 @@ def __init__(self, *args, **kwargs): class StopReplication(Exception): """ Exception used to break out of the endless loop in - `~ReplicationCursor.consume_replication_stream()`. + `~ReplicationCursor.consume_stream()`. Subclass of `~exceptions.Exception`. Intentionally *not* inherited from `~psycopg2.Error` as occurrence of this exception does not indicate an @@ -512,7 +513,7 @@ class StopReplication(Exception): pass -class ReplicationCursor(_cursor): +class ReplicationCursor(_replicationCursor): """A cursor used for communication on the replication protocol.""" def create_replication_slot(self, slot_name, slot_type=None, output_plugin=None): @@ -598,9 +599,6 @@ def start_replication(self, slot_name=None, slot_type=None, start_lsn=0, self.start_replication_expert(command) - def send_feedback_message(self, written_lsn=0, sync_lsn=0, apply_lsn=0, reply_requested=False): - return self.send_replication_feedback(written_lsn, sync_lsn, apply_lsn, reply_requested) - # allows replication cursors to be used in select.select() directly def fileno(self): return self.connection.fileno() diff --git a/psycopg/cursor.h b/psycopg/cursor.h index 669e176d6..18e31e5fe 100644 --- a/psycopg/cursor.h +++ b/psycopg/cursor.h @@ -27,7 +27,6 @@ #define PSYCOPG_CURSOR_H 1 #include "psycopg/connection.h" -#include "libpq_support.h" #ifdef __cplusplus extern "C" { @@ -74,14 +73,6 @@ struct cursorObject { #define DEFAULT_COPYBUFF 8192 /* replication cursor attrs */ - int repl_started:1; /* if replication is started */ - int repl_consuming:1; /* if running the consume loop */ - struct timeval repl_keepalive_interval; /* interval for keepalive messages in replication mode */ - XLogRecPtr repl_write_lsn; /* LSN stats for replication feedback messages */ - XLogRecPtr repl_flush_lsn; - XLogRecPtr repl_apply_lsn; - int repl_feedback_pending; /* flag set when we couldn't send the feedback to the server */ - struct timeval repl_last_io; /* timestamp of the last exchange with the server */ PyObject *tuple_factory; /* factory for result tuples */ PyObject *tzinfo_factory; /* factory for tzinfo objects */ @@ -106,7 +97,7 @@ HIDDEN void curs_reset(cursorObject *self); HIDDEN int psyco_curs_withhold_set(cursorObject *self, PyObject *pyvalue); HIDDEN int psyco_curs_scrollable_set(cursorObject *self, PyObject *pyvalue); -RAISES_NEG int psyco_curs_datetime_init(void); +HIDDEN int psyco_curs_init(PyObject *obj, PyObject *args, PyObject *kwargs); /* exception-raising macros */ #define EXC_IF_CURS_CLOSED(self) \ @@ -149,22 +140,6 @@ do \ return NULL; } \ while (0) -#define EXC_IF_REPLICATING(self, cmd) \ -do \ - if ((self)->repl_started) { \ - PyErr_SetString(ProgrammingError, \ - #cmd " cannot be used when replication is already in progress"); \ - return NULL; } \ -while (0) - -#define EXC_IF_NOT_REPLICATING(self, cmd) \ -do \ - if (!(self)->repl_started) { \ - PyErr_SetString(ProgrammingError, \ - #cmd " cannot be used when replication is not in progress"); \ - return NULL; } \ -while (0) - #ifdef __cplusplus } #endif diff --git a/psycopg/cursor_type.c b/psycopg/cursor_type.c index d51f7a558..63bd5a103 100644 --- a/psycopg/cursor_type.c +++ b/psycopg/cursor_type.c @@ -28,7 +28,6 @@ #include "psycopg/cursor.h" #include "psycopg/connection.h" -#include "psycopg/replication_message.h" #include "psycopg/green.h" #include "psycopg/pqpath.h" #include "psycopg/typecast.h" @@ -39,9 +38,6 @@ #include -/* python */ -#include "datetime.h" - /** DBAPI methods **/ @@ -1583,222 +1579,6 @@ psyco_curs_copy_expert(cursorObject *self, PyObject *args, PyObject *kwargs) return res; } -#define psyco_curs_start_replication_expert_doc \ -"start_replication_expert(command, writer=None, keepalive_interval=10) -- Start and consume replication stream with direct command." - -static PyObject * -psyco_curs_start_replication_expert(cursorObject *self, PyObject *args, PyObject *kwargs) -{ - PyObject *res = NULL; - char *command; - static char *kwlist[] = {"command", NULL}; - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s", kwlist, &command)) { - return NULL; - } - - EXC_IF_CURS_CLOSED(self); - EXC_IF_GREEN(start_replication_expert); - EXC_IF_TPC_PREPARED(self->conn, start_replication_expert); - EXC_IF_REPLICATING(self, start_replication_expert); - - Dprintf("psyco_curs_start_replication_expert: %s", command); - - self->copysize = 0; - self->repl_consuming = 0; - - self->repl_write_lsn = InvalidXLogRecPtr; - self->repl_flush_lsn = InvalidXLogRecPtr; - self->repl_apply_lsn = InvalidXLogRecPtr; - self->repl_feedback_pending = 0; - - gettimeofday(&self->repl_last_io, NULL); - - if (pq_execute(self, command, self->conn->async, - 1 /* no_result */, 1 /* no_begin */) >= 0) { - res = Py_None; - Py_INCREF(res); - - self->repl_started = 1; - } - - return res; -} - -#define psyco_curs_consume_replication_stream_doc \ -"consume_replication_stream(consumer, keepalive_interval=10) -- Consume replication stream." - -static PyObject * -psyco_curs_consume_replication_stream(cursorObject *self, PyObject *args, PyObject *kwargs) -{ - PyObject *consume = NULL, *res = NULL; - int decode = 0; - double keepalive_interval = 10; - static char *kwlist[] = {"consume", "decode", "keepalive_interval", NULL}; - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|id", kwlist, - &consume, &decode, &keepalive_interval)) { - return NULL; - } - - EXC_IF_CURS_CLOSED(self); - EXC_IF_CURS_ASYNC(self, consume_replication_stream); - EXC_IF_GREEN(consume_replication_stream); - EXC_IF_TPC_PREPARED(self->conn, consume_replication_stream); - EXC_IF_NOT_REPLICATING(self, consume_replication_stream); - - if (self->repl_consuming) { - PyErr_SetString(ProgrammingError, - "consume_replication_stream cannot be used when already in the consume loop"); - return NULL; - } - - Dprintf("psyco_curs_consume_replication_stream"); - - if (keepalive_interval < 1.0) { - psyco_set_error(ProgrammingError, self, "keepalive_interval must be >= 1 (sec)"); - return NULL; - } - - self->repl_consuming = 1; - - if (pq_copy_both(self, consume, decode, keepalive_interval) >= 0) { - res = Py_None; - Py_INCREF(res); - } - - self->repl_consuming = 0; - - return res; -} - -#define psyco_curs_read_replication_message_doc \ -"read_replication_message(decode=True) -- Try reading a replication message from the server (non-blocking)." - -static PyObject * -psyco_curs_read_replication_message(cursorObject *self, PyObject *args, PyObject *kwargs) -{ - int decode = 1; - static char *kwlist[] = {"decode", NULL}; - - EXC_IF_CURS_CLOSED(self); - EXC_IF_GREEN(read_replication_message); - EXC_IF_TPC_PREPARED(self->conn, read_replication_message); - EXC_IF_NOT_REPLICATING(self, read_replication_message); - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, - &decode)) { - return NULL; - } - - return pq_read_replication_message(self, decode); -} - -static PyObject * -curs_flush_replication_feedback(cursorObject *self, int reply) -{ - if (!(self->repl_feedback_pending || reply)) - Py_RETURN_TRUE; - - if (pq_send_replication_feedback(self, reply)) { - self->repl_feedback_pending = 0; - Py_RETURN_TRUE; - } else { - self->repl_feedback_pending = 1; - Py_RETURN_FALSE; - } -} - -#define psyco_curs_send_replication_feedback_doc \ -"send_replication_feedback(write_lsn=0, flush_lsn=0, apply_lsn=0, reply=False) -- Try sending a replication feedback message to the server and optionally request a reply." - -static PyObject * -psyco_curs_send_replication_feedback(cursorObject *self, PyObject *args, PyObject *kwargs) -{ - XLogRecPtr write_lsn = InvalidXLogRecPtr, - flush_lsn = InvalidXLogRecPtr, - apply_lsn = InvalidXLogRecPtr; - int reply = 0; - static char* kwlist[] = {"write_lsn", "flush_lsn", "apply_lsn", "reply", NULL}; - - EXC_IF_CURS_CLOSED(self); - EXC_IF_NOT_REPLICATING(self, send_replication_feedback); - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|KKKi", kwlist, - &write_lsn, &flush_lsn, &apply_lsn, &reply)) { - return NULL; - } - - if (write_lsn > self->repl_write_lsn) - self->repl_write_lsn = write_lsn; - - if (flush_lsn > self->repl_flush_lsn) - self->repl_flush_lsn = flush_lsn; - - if (apply_lsn > self->repl_apply_lsn) - self->repl_apply_lsn = apply_lsn; - - self->repl_feedback_pending = 1; - - return curs_flush_replication_feedback(self, reply); -} - -#define psyco_curs_flush_replication_feedback_doc \ -"flush_replication_feedback(reply=False) -- Try flushing the latest pending replication feedback message to the server and optionally request a reply." - -static PyObject * -psyco_curs_flush_replication_feedback(cursorObject *self, PyObject *args, PyObject *kwargs) -{ - int reply = 0; - static char *kwlist[] = {"reply", NULL}; - - EXC_IF_CURS_CLOSED(self); - EXC_IF_NOT_REPLICATING(self, flush_replication_feedback); - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, - &reply)) { - return NULL; - } - - return curs_flush_replication_feedback(self, reply); -} - - -RAISES_NEG int -psyco_curs_datetime_init(void) -{ - Dprintf("psyco_curs_datetime_init: datetime init"); - - PyDateTime_IMPORT; - - if (!PyDateTimeAPI) { - PyErr_SetString(PyExc_ImportError, "datetime initialization failed"); - return -1; - } - return 0; -} - -#define psyco_curs_replication_io_timestamp_doc \ -"replication_io_timestamp -- the timestamp of latest IO with the server" - -static PyObject * -psyco_curs_get_replication_io_timestamp(cursorObject *self) -{ - PyObject *tval, *res = NULL; - double seconds; - - EXC_IF_CURS_CLOSED(self); - - seconds = self->repl_last_io.tv_sec + self->repl_last_io.tv_usec / 1.0e6; - - tval = Py_BuildValue("(d)", seconds); - if (tval) { - res = PyDateTime_FromTimestamp(tval); - Py_DECREF(tval); - } - return res; -} - /* extension: closed - return true if cursor is closed */ #define psyco_curs_closed_doc \ @@ -1973,16 +1753,6 @@ static struct PyMethodDef cursorObject_methods[] = { METH_VARARGS|METH_KEYWORDS, psyco_curs_copy_to_doc}, {"copy_expert", (PyCFunction)psyco_curs_copy_expert, METH_VARARGS|METH_KEYWORDS, psyco_curs_copy_expert_doc}, - {"start_replication_expert", (PyCFunction)psyco_curs_start_replication_expert, - METH_VARARGS|METH_KEYWORDS, psyco_curs_start_replication_expert_doc}, - {"consume_replication_stream", (PyCFunction)psyco_curs_consume_replication_stream, - METH_VARARGS|METH_KEYWORDS, psyco_curs_consume_replication_stream_doc}, - {"read_replication_message", (PyCFunction)psyco_curs_read_replication_message, - METH_VARARGS|METH_KEYWORDS, psyco_curs_read_replication_message_doc}, - {"send_replication_feedback", (PyCFunction)psyco_curs_send_replication_feedback, - METH_VARARGS|METH_KEYWORDS, psyco_curs_send_replication_feedback_doc}, - {"flush_replication_feedback", (PyCFunction)psyco_curs_flush_replication_feedback, - METH_VARARGS|METH_KEYWORDS, psyco_curs_flush_replication_feedback_doc}, {NULL} }; @@ -2033,9 +1803,6 @@ static struct PyGetSetDef cursorObject_getsets[] = { (getter)psyco_curs_scrollable_get, (setter)psyco_curs_scrollable_set, psyco_curs_scrollable_doc, NULL }, - { "replication_io_timestamp", - (getter)psyco_curs_get_replication_io_timestamp, NULL, - psyco_curs_replication_io_timestamp_doc, NULL }, {NULL} }; @@ -2134,7 +1901,7 @@ cursor_dealloc(PyObject* obj) Py_TYPE(obj)->tp_free(obj); } -static int +int cursor_init(PyObject *obj, PyObject *args, PyObject *kwargs) { PyObject *conn; diff --git a/psycopg/pqpath.c b/psycopg/pqpath.c index f38fbd39d..d68869810 100644 --- a/psycopg/pqpath.c +++ b/psycopg/pqpath.c @@ -35,6 +35,7 @@ #include "psycopg/pqpath.h" #include "psycopg/connection.h" #include "psycopg/cursor.h" +#include "psycopg/replication_cursor.h" #include "psycopg/replication_message.h" #include "psycopg/green.h" #include "psycopg/typecast.h" @@ -1542,19 +1543,23 @@ _pq_copy_out_v3(cursorObject *curs) are never returned to the caller. */ PyObject * -pq_read_replication_message(cursorObject *curs, int decode) +pq_read_replication_message(replicationCursorObject *repl, int decode) { + cursorObject *curs = &repl->cur; + connectionObject *conn = curs->conn; + PGconn *pgconn = conn->pgconn; char *buffer = NULL; int len, data_size, consumed, hdr, reply; XLogRecPtr data_start, wal_end; pg_int64 send_time; - PyObject *str = NULL, *msg = NULL; + PyObject *str = NULL, *result = NULL; + replicationMessageObject *msg = NULL; Dprintf("pq_read_replication_message(decode=%d)", decode); consumed = 0; retry: - len = PQgetCopyData(curs->conn->pgconn, &buffer, 1 /* async */); + len = PQgetCopyData(pgconn, &buffer, 1 /* async */); if (len == 0) { /* If we've tried reading some data, but there was none, bail out. */ @@ -1566,8 +1571,8 @@ pq_read_replication_message(cursorObject *curs, int decode) server we might be reading a number of messages for every single one we process, thus overgrowing the internal buffer until the client system runs out of memory. */ - if (!PQconsumeInput(curs->conn->pgconn)) { - pq_raise(curs->conn, curs, NULL); + if (!PQconsumeInput(pgconn)) { + pq_raise(conn, curs, NULL); goto exit; } /* But PQconsumeInput() doesn't tell us if it has actually read @@ -1581,15 +1586,15 @@ pq_read_replication_message(cursorObject *curs, int decode) if (len == -2) { /* serious error */ - pq_raise(curs->conn, curs, NULL); + pq_raise(conn, curs, NULL); goto exit; } if (len == -1) { /* EOF */ - curs->pgres = PQgetResult(curs->conn->pgconn); + curs->pgres = PQgetResult(pgconn); if (curs->pgres && PQresultStatus(curs->pgres) == PGRES_FATAL_ERROR) { - pq_raise(curs->conn, curs, NULL); + pq_raise(conn, curs, NULL); goto exit; } @@ -1603,7 +1608,7 @@ pq_read_replication_message(cursorObject *curs, int decode) consumed = 1; /* ok, we did really read something: update the io timestamp */ - gettimeofday(&curs->repl_last_io, NULL); + gettimeofday(&repl->last_io, NULL); Dprintf("pq_read_replication_message: msg=%c, len=%d", buffer[0], len); if (buffer[0] == 'w') { @@ -1626,21 +1631,22 @@ pq_read_replication_message(cursorObject *curs, int decode) /* XXX it would be wise to check if it's really a logical replication */ if (decode) { - str = PyUnicode_Decode(buffer + hdr, data_size, curs->conn->codec, NULL); + str = PyUnicode_Decode(buffer + hdr, data_size, conn->codec, NULL); } else { str = Bytes_FromStringAndSize(buffer + hdr, data_size); } if (!str) { goto exit; } - msg = PyObject_CallFunctionObjArgs((PyObject *)&replicationMessageType, - curs, str, NULL); + result = PyObject_CallFunctionObjArgs((PyObject *)&replicationMessageType, + curs, str, NULL); Py_DECREF(str); - if (!msg) { goto exit; } + if (!result) { goto exit; } - ((replicationMessageObject *)msg)->data_size = data_size; - ((replicationMessageObject *)msg)->data_start = data_start; - ((replicationMessageObject *)msg)->wal_end = wal_end; - ((replicationMessageObject *)msg)->send_time = send_time; + msg = (replicationMessageObject *)result; + msg->data_size = data_size; + msg->data_start = data_start; + msg->wal_end = wal_end; + msg->send_time = send_time; } else if (buffer[0] == 'k') { /* Primary keepalive message: msgtype(1), walEnd(8), sendTime(8), reply(1) */ @@ -1652,17 +1658,17 @@ pq_read_replication_message(cursorObject *curs, int decode) reply = buffer[hdr]; if (reply) { - if (!pq_send_replication_feedback(curs, 0)) { - if (curs->conn->async) { - curs->repl_feedback_pending = 1; + if (!pq_send_replication_feedback(repl, 0)) { + if (conn->async) { + repl->feedback_pending = 1; } else { /* XXX not sure if this was a good idea after all */ - pq_raise(curs->conn, curs, NULL); + pq_raise(conn, curs, NULL); goto exit; } } else { - gettimeofday(&curs->repl_last_io, NULL); + gettimeofday(&repl->last_io, NULL); } } @@ -1680,37 +1686,38 @@ pq_read_replication_message(cursorObject *curs, int decode) PQfreemem(buffer); } - return msg; + return result; none: - msg = Py_None; - Py_INCREF(msg); + result = Py_None; + Py_INCREF(result); goto exit; } int -pq_send_replication_feedback(cursorObject* curs, int reply_requested) +pq_send_replication_feedback(replicationCursorObject *repl, int reply_requested) { + cursorObject *curs = &repl->cur; + PGconn *pgconn = curs->conn->pgconn; char replybuf[1 + 8 + 8 + 8 + 8 + 1]; int len = 0; Dprintf("pq_send_replication_feedback: write="XLOGFMTSTR", flush="XLOGFMTSTR", apply="XLOGFMTSTR, - XLOGFMTARGS(curs->repl_write_lsn), - XLOGFMTARGS(curs->repl_flush_lsn), - XLOGFMTARGS(curs->repl_apply_lsn)); + XLOGFMTARGS(repl->write_lsn), + XLOGFMTARGS(repl->flush_lsn), + XLOGFMTARGS(repl->apply_lsn)); replybuf[len] = 'r'; len += 1; - fe_sendint64(curs->repl_write_lsn, &replybuf[len]); len += 8; - fe_sendint64(curs->repl_flush_lsn, &replybuf[len]); len += 8; - fe_sendint64(curs->repl_apply_lsn, &replybuf[len]); len += 8; + fe_sendint64(repl->write_lsn, &replybuf[len]); len += 8; + fe_sendint64(repl->flush_lsn, &replybuf[len]); len += 8; + fe_sendint64(repl->apply_lsn, &replybuf[len]); len += 8; fe_sendint64(feGetCurrentTimestamp(), &replybuf[len]); len += 8; replybuf[len] = reply_requested ? 1 : 0; len += 1; - if (PQputCopyData(curs->conn->pgconn, replybuf, len) <= 0 || - PQflush(curs->conn->pgconn) != 0) { + if (PQputCopyData(pgconn, replybuf, len) <= 0 || PQflush(pgconn) != 0) { return 0; } - gettimeofday(&curs->repl_last_io, NULL); + gettimeofday(&repl->last_io, NULL); return 1; } @@ -1723,12 +1730,15 @@ pq_send_replication_feedback(cursorObject* curs, int reply_requested) manages to send keepalive messages to the server as needed. */ int -pq_copy_both(cursorObject *curs, PyObject *consume, int decode, double keepalive_interval) +pq_copy_both(replicationCursorObject *repl, PyObject *consume, int decode, + double keepalive_interval) { + cursorObject *curs = &repl->cur; + connectionObject *conn = curs->conn; + PGconn *pgconn = conn->pgconn; PyObject *msg, *tmp = NULL; PyObject *consume_func = NULL; int fd, sel, ret = -1; - PGconn *pgconn; fd_set fds; struct timeval keep_intr, curr_time, ping_time, timeout; @@ -1738,13 +1748,12 @@ pq_copy_both(cursorObject *curs, PyObject *consume, int decode, double keepalive } CLEARPGRES(curs->pgres); - pgconn = curs->conn->pgconn; keep_intr.tv_sec = (int)keepalive_interval; keep_intr.tv_usec = (keepalive_interval - keep_intr.tv_sec)*1.0e6; while (1) { - msg = pq_read_replication_message(curs, decode); + msg = pq_read_replication_message(repl, decode); if (!msg) { goto exit; } @@ -1753,7 +1762,7 @@ pq_copy_both(cursorObject *curs, PyObject *consume, int decode, double keepalive fd = PQsocket(pgconn); if (fd < 0) { - pq_raise(curs->conn, curs, NULL); + pq_raise(conn, curs, NULL); goto exit; } @@ -1763,7 +1772,7 @@ pq_copy_both(cursorObject *curs, PyObject *consume, int decode, double keepalive /* how long can we wait before we need to send a keepalive? */ gettimeofday(&curr_time, NULL); - timeradd(&curs->repl_last_io, &keep_intr, &ping_time); + timeradd(&repl->last_io, &keep_intr, &ping_time); timersub(&ping_time, &curr_time, &timeout); if (timeout.tv_sec >= 0) { @@ -1787,8 +1796,8 @@ pq_copy_both(cursorObject *curs, PyObject *consume, int decode, double keepalive } if (sel == 0) { - if (!pq_send_replication_feedback(curs, 0)) { - pq_raise(curs->conn, curs, NULL); + if (!pq_send_replication_feedback(repl, 0)) { + pq_raise(conn, curs, NULL); goto exit; } } @@ -1876,7 +1885,7 @@ pq_fetch(cursorObject *curs, int no_result) Dprintf("pq_fetch: data from a streaming replication slot (no tuples)"); curs->rowcount = -1; ex = 0; - /* nothing to do here: _pq_copy_both_v3 will be called separately */ + /* nothing to do here: pq_copy_both will be called separately */ CLEARPGRES(curs->pgres); break; diff --git a/psycopg/pqpath.h b/psycopg/pqpath.h index a858a2692..568f07682 100644 --- a/psycopg/pqpath.h +++ b/psycopg/pqpath.h @@ -27,6 +27,7 @@ #define PSYCOPG_PQPATH_H 1 #include "psycopg/cursor.h" +#include "psycopg/replication_cursor.h" #include "psycopg/connection.h" /* macro to clean the pg result */ @@ -72,9 +73,10 @@ HIDDEN int pq_execute_command_locked(connectionObject *conn, RAISES HIDDEN void pq_complete_error(connectionObject *conn, PGresult **pgres, char **error); -HIDDEN int pq_copy_both(cursorObject *curs, PyObject *consumer, +/* replication protocol support */ +HIDDEN int pq_copy_both(replicationCursorObject *repl, PyObject *consumer, int decode, double keepalive_interval); -HIDDEN PyObject *pq_read_replication_message(cursorObject *curs, int decode); -HIDDEN int pq_send_replication_feedback(cursorObject *curs, int reply_requested); +HIDDEN PyObject *pq_read_replication_message(replicationCursorObject *repl, int decode); +HIDDEN int pq_send_replication_feedback(replicationCursorObject *repl, int reply_requested); #endif /* !defined(PSYCOPG_PQPATH_H) */ diff --git a/psycopg/psycopgmodule.c b/psycopg/psycopgmodule.c index f9f29a2ec..25e325981 100644 --- a/psycopg/psycopgmodule.c +++ b/psycopg/psycopgmodule.c @@ -28,6 +28,7 @@ #include "psycopg/connection.h" #include "psycopg/cursor.h" +#include "psycopg/replication_cursor.h" #include "psycopg/replication_message.h" #include "psycopg/green.h" #include "psycopg/lobject.h" @@ -917,6 +918,9 @@ INIT_MODULE(_psycopg)(void) Py_TYPE(&cursorType) = &PyType_Type; if (PyType_Ready(&cursorType) == -1) goto exit; + Py_TYPE(&replicationCursorType) = &PyType_Type; + if (PyType_Ready(&replicationCursorType) == -1) goto exit; + Py_TYPE(&replicationMessageType) = &PyType_Type; if (PyType_Ready(&replicationMessageType) == -1) goto exit; @@ -1000,7 +1004,7 @@ INIT_MODULE(_psycopg)(void) /* Initialize the PyDateTimeAPI everywhere is used */ PyDateTime_IMPORT; if (psyco_adapter_datetime_init()) { goto exit; } - if (psyco_curs_datetime_init()) { goto exit; } + if (psyco_repl_curs_datetime_init()) { goto exit; } if (psyco_replmsg_datetime_init()) { goto exit; } Py_TYPE(&pydatetimeType) = &PyType_Type; @@ -1044,7 +1048,8 @@ INIT_MODULE(_psycopg)(void) /* put new types in module dictionary */ PyModule_AddObject(module, "connection", (PyObject*)&connectionType); PyModule_AddObject(module, "cursor", (PyObject*)&cursorType); - PyModule_AddObject(module, "replicationMessage", (PyObject*)&replicationMessageType); + PyModule_AddObject(module, "ReplicationCursor", (PyObject*)&replicationCursorType); + PyModule_AddObject(module, "ReplicationMessage", (PyObject*)&replicationMessageType); PyModule_AddObject(module, "ISQLQuote", (PyObject*)&isqlquoteType); PyModule_AddObject(module, "Notify", (PyObject*)¬ifyType); PyModule_AddObject(module, "Xid", (PyObject*)&xidType); diff --git a/psycopg/replication_cursor.h b/psycopg/replication_cursor.h new file mode 100644 index 000000000..1b6dbfab9 --- /dev/null +++ b/psycopg/replication_cursor.h @@ -0,0 +1,77 @@ +/* replication_cursor.h - definition for the psycopg replication cursor type + * + * Copyright (C) 2015 Daniele Varrazzo + * + * This file is part of psycopg. + * + * psycopg2 is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * In addition, as a special exception, the copyright holders give + * permission to link this program with the OpenSSL library (or with + * modified versions of OpenSSL that use the same license as OpenSSL), + * and distribute linked combinations including the two. + * + * You must obey the GNU Lesser General Public License in all respects for + * all of the code used other than OpenSSL. + * + * psycopg2 is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + */ + +#ifndef PSYCOPG_REPLICATION_CURSOR_H +#define PSYCOPG_REPLICATION_CURSOR_H 1 + +#include "psycopg/cursor.h" +#include "libpq_support.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern HIDDEN PyTypeObject replicationCursorType; + +typedef struct replicationCursorObject { + cursorObject cur; + + int started:1; /* if replication is started */ + int consuming:1; /* if running the consume loop */ + + struct timeval last_io; /* timestamp of the last exchange with the server */ + struct timeval keepalive_interval; /* interval for keepalive messages in replication mode */ + + XLogRecPtr write_lsn; /* LSN stats for replication feedback messages */ + XLogRecPtr flush_lsn; + XLogRecPtr apply_lsn; + int feedback_pending; /* flag set when we couldn't send the feedback to the server */ +} replicationCursorObject; + + +RAISES_NEG int psyco_repl_curs_datetime_init(void); + +/* exception-raising macros */ +#define EXC_IF_REPLICATING(self, cmd) \ +do \ + if ((self)->started) { \ + PyErr_SetString(ProgrammingError, \ + #cmd " cannot be used when replication is already in progress"); \ + return NULL; } \ +while (0) + +#define EXC_IF_NOT_REPLICATING(self, cmd) \ +do \ + if (!(self)->started) { \ + PyErr_SetString(ProgrammingError, \ + #cmd " cannot be used when replication is not in progress"); \ + return NULL; } \ +while (0) + +#ifdef __cplusplus +} +#endif + +#endif /* !defined(PSYCOPG_REPLICATION_CURSOR_H) */ diff --git a/psycopg/replication_cursor_type.c b/psycopg/replication_cursor_type.c new file mode 100644 index 000000000..d1f7939a5 --- /dev/null +++ b/psycopg/replication_cursor_type.c @@ -0,0 +1,360 @@ +/* replication_cursor_type.c - python interface to replication cursor objects + * + * Copyright (C) 2015 Daniele Varrazzo + * + * This file is part of psycopg. + * + * psycopg2 is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * In addition, as a special exception, the copyright holders give + * permission to link this program with the OpenSSL library (or with + * modified versions of OpenSSL that use the same license as OpenSSL), + * and distribute linked combinations including the two. + * + * You must obey the GNU Lesser General Public License in all respects for + * all of the code used other than OpenSSL. + * + * psycopg2 is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + */ + +#define PSYCOPG_MODULE +#include "psycopg/psycopg.h" + +#include "psycopg/replication_cursor.h" +#include "psycopg/replication_message.h" +#include "psycopg/green.h" +#include "psycopg/pqpath.h" + +#include +#include + +/* python */ +#include "datetime.h" + + +#define psyco_repl_curs_start_replication_expert_doc \ +"start_replication_expert(command, writer=None, keepalive_interval=10) -- Start replication stream with a directly given command." + +static PyObject * +psyco_repl_curs_start_replication_expert(replicationCursorObject *self, + PyObject *args, PyObject *kwargs) +{ + cursorObject *curs = &self->cur; + connectionObject *conn = self->cur.conn; + PyObject *res = NULL; + char *command; + static char *kwlist[] = {"command", NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s", kwlist, &command)) { + return NULL; + } + + EXC_IF_CURS_CLOSED(curs); + EXC_IF_GREEN(start_replication_expert); + EXC_IF_TPC_PREPARED(conn, start_replication_expert); + EXC_IF_REPLICATING(self, start_replication_expert); + + Dprintf("psyco_repl_curs_start_replication_expert: %s", command); + + /* self->copysize = 0;*/ + + gettimeofday(&self->last_io, NULL); + + if (pq_execute(curs, command, conn->async, 1 /* no_result */, 1 /* no_begin */) >= 0) { + res = Py_None; + Py_INCREF(res); + + self->started = 1; + } + + return res; +} + +#define psyco_repl_curs_consume_stream_doc \ +"consume_stream(consumer, keepalive_interval=10) -- Consume replication stream." + +static PyObject * +psyco_repl_curs_consume_stream(replicationCursorObject *self, + PyObject *args, PyObject *kwargs) +{ + cursorObject *curs = &self->cur; + PyObject *consume = NULL, *res = NULL; + int decode = 0; + double keepalive_interval = 10; + static char *kwlist[] = {"consume", "decode", "keepalive_interval", NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|id", kwlist, + &consume, &decode, &keepalive_interval)) { + return NULL; + } + + EXC_IF_CURS_CLOSED(curs); + EXC_IF_CURS_ASYNC(curs, consume_stream); + EXC_IF_GREEN(consume_stream); + EXC_IF_TPC_PREPARED(self->cur.conn, consume_stream); + EXC_IF_NOT_REPLICATING(self, consume_stream); + + if (self->consuming) { + PyErr_SetString(ProgrammingError, + "consume_stream cannot be used when already in the consume loop"); + return NULL; + } + + Dprintf("psyco_repl_curs_consume_stream"); + + if (keepalive_interval < 1.0) { + psyco_set_error(ProgrammingError, curs, "keepalive_interval must be >= 1 (sec)"); + return NULL; + } + + self->consuming = 1; + + if (pq_copy_both(self, consume, decode, keepalive_interval) >= 0) { + res = Py_None; + Py_INCREF(res); + } + + self->consuming = 0; + + return res; +} + +#define psyco_repl_curs_read_message_doc \ +"read_message(decode=True) -- Try reading a replication message from the server (non-blocking)." + +static PyObject * +psyco_repl_curs_read_message(replicationCursorObject *self, + PyObject *args, PyObject *kwargs) +{ + cursorObject *curs = &self->cur; + int decode = 1; + static char *kwlist[] = {"decode", NULL}; + + EXC_IF_CURS_CLOSED(curs); + EXC_IF_GREEN(read_message); + EXC_IF_TPC_PREPARED(self->cur.conn, read_message); + EXC_IF_NOT_REPLICATING(self, read_message); + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, + &decode)) { + return NULL; + } + + return pq_read_replication_message(self, decode); +} + +static PyObject * +repl_curs_flush_feedback(replicationCursorObject *self, int reply) +{ + if (!(self->feedback_pending || reply)) + Py_RETURN_TRUE; + + if (pq_send_replication_feedback(self, reply)) { + self->feedback_pending = 0; + Py_RETURN_TRUE; + } else { + self->feedback_pending = 1; + Py_RETURN_FALSE; + } +} + +#define psyco_repl_curs_send_feedback_doc \ +"send_feedback(write_lsn=0, flush_lsn=0, apply_lsn=0, reply=False) -- Try sending a replication feedback message to the server and optionally request a reply." + +static PyObject * +psyco_repl_curs_send_feedback(replicationCursorObject *self, + PyObject *args, PyObject *kwargs) +{ + cursorObject *curs = &self->cur; + XLogRecPtr write_lsn = InvalidXLogRecPtr, + flush_lsn = InvalidXLogRecPtr, + apply_lsn = InvalidXLogRecPtr; + int reply = 0; + static char* kwlist[] = {"write_lsn", "flush_lsn", "apply_lsn", "reply", NULL}; + + EXC_IF_CURS_CLOSED(curs); + EXC_IF_NOT_REPLICATING(self, send_feedback); + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|KKKi", kwlist, + &write_lsn, &flush_lsn, &apply_lsn, &reply)) { + return NULL; + } + + if (write_lsn > self->write_lsn) + self->write_lsn = write_lsn; + + if (flush_lsn > self->flush_lsn) + self->flush_lsn = flush_lsn; + + if (apply_lsn > self->apply_lsn) + self->apply_lsn = apply_lsn; + + self->feedback_pending = 1; + + return repl_curs_flush_feedback(self, reply); +} + +#define psyco_repl_curs_flush_feedback_doc \ +"flush_feedback(reply=False) -- Try flushing the latest pending replication feedback message to the server and optionally request a reply." + +static PyObject * +psyco_repl_curs_flush_feedback(replicationCursorObject *self, + PyObject *args, PyObject *kwargs) +{ + cursorObject *curs = &self->cur; + int reply = 0; + static char *kwlist[] = {"reply", NULL}; + + EXC_IF_CURS_CLOSED(curs); + EXC_IF_NOT_REPLICATING(self, flush_feedback); + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, + &reply)) { + return NULL; + } + + return repl_curs_flush_feedback(self, reply); +} + + +RAISES_NEG int +psyco_repl_curs_datetime_init(void) +{ + Dprintf("psyco_repl_curs_datetime_init: datetime init"); + + PyDateTime_IMPORT; + + if (!PyDateTimeAPI) { + PyErr_SetString(PyExc_ImportError, "datetime initialization failed"); + return -1; + } + return 0; +} + +#define psyco_repl_curs_io_timestamp_doc \ +"io_timestamp -- the timestamp of latest IO with the server" + +static PyObject * +psyco_repl_curs_get_io_timestamp(replicationCursorObject *self) +{ + cursorObject *curs = &self->cur; + PyObject *tval, *res = NULL; + double seconds; + + EXC_IF_CURS_CLOSED(curs); + + seconds = self->last_io.tv_sec + self->last_io.tv_usec / 1.0e6; + + tval = Py_BuildValue("(d)", seconds); + if (tval) { + res = PyDateTime_FromTimestamp(tval); + Py_DECREF(tval); + } + return res; +} + +/* object method list */ + +static struct PyMethodDef replicationCursorObject_methods[] = { + {"start_replication_expert", (PyCFunction)psyco_repl_curs_start_replication_expert, + METH_VARARGS|METH_KEYWORDS, psyco_repl_curs_start_replication_expert_doc}, + {"consume_stream", (PyCFunction)psyco_repl_curs_consume_stream, + METH_VARARGS|METH_KEYWORDS, psyco_repl_curs_consume_stream_doc}, + {"read_message", (PyCFunction)psyco_repl_curs_read_message, + METH_VARARGS|METH_KEYWORDS, psyco_repl_curs_read_message_doc}, + {"send_feedback", (PyCFunction)psyco_repl_curs_send_feedback, + METH_VARARGS|METH_KEYWORDS, psyco_repl_curs_send_feedback_doc}, + {"flush_feedback", (PyCFunction)psyco_repl_curs_flush_feedback, + METH_VARARGS|METH_KEYWORDS, psyco_repl_curs_flush_feedback_doc}, + {NULL} +}; + +/* object calculated member list */ + +static struct PyGetSetDef replicationCursorObject_getsets[] = { + { "io_timestamp", + (getter)psyco_repl_curs_get_io_timestamp, NULL, + psyco_repl_curs_io_timestamp_doc, NULL }, + {NULL} +}; + +static int +replicationCursor_setup(replicationCursorObject* self) +{ + self->started = 0; + self->consuming = 0; + + self->write_lsn = InvalidXLogRecPtr; + self->flush_lsn = InvalidXLogRecPtr; + self->apply_lsn = InvalidXLogRecPtr; + self->feedback_pending = 0; + + return 0; +} + +static int +replicationCursor_init(PyObject *obj, PyObject *args, PyObject *kwargs) +{ + replicationCursor_setup((replicationCursorObject *)obj); + return cursor_init(obj, args, kwargs); +} + +static PyObject * +replicationCursor_repr(replicationCursorObject *self) +{ + return PyString_FromFormat( + "", self, self->cur.closed); +} + + +/* object type */ + +#define replicationCursorType_doc \ +"A database replication cursor." + +PyTypeObject replicationCursorType = { + PyVarObject_HEAD_INIT(NULL, 0) + "psycopg2.extensions.ReplicationCursor", + sizeof(replicationCursorObject), 0, + 0, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + (reprfunc)replicationCursor_repr, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash*/ + 0, /*tp_call*/ + (reprfunc)replicationCursor_repr, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_ITER | + Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + replicationCursorType_doc, /*tp_doc*/ + 0, /*tp_traverse*/ + 0, /*tp_clear*/ + 0, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + 0, /*tp_iter*/ + 0, /*tp_iternext*/ + replicationCursorObject_methods, /*tp_methods*/ + 0, /*tp_members*/ + replicationCursorObject_getsets, /*tp_getset*/ + &cursorType, /*tp_base*/ + 0, /*tp_dict*/ + 0, /*tp_descr_get*/ + 0, /*tp_descr_set*/ + 0, /*tp_dictoffset*/ + replicationCursor_init, /*tp_init*/ + 0, /*tp_alloc*/ + 0, /*tp_new*/ +}; diff --git a/psycopg/replication_message_type.c b/psycopg/replication_message_type.c index 61833931b..d4b0457b7 100644 --- a/psycopg/replication_message_type.c +++ b/psycopg/replication_message_type.c @@ -49,7 +49,7 @@ static PyObject * replmsg_repr(replicationMessageObject *self) { return PyString_FromFormat( - "", + "", self, self->data_size, XLOGFMTARGS(self->data_start), XLOGFMTARGS(self->wal_end), self->send_time); } diff --git a/psycopg2.cproj b/psycopg2.cproj index 386287c13..682b69d06 100644 --- a/psycopg2.cproj +++ b/psycopg2.cproj @@ -92,6 +92,7 @@ + @@ -225,6 +226,7 @@ + diff --git a/setup.py b/setup.py index 339d7f2a4..18c47b7cc 100644 --- a/setup.py +++ b/setup.py @@ -466,7 +466,7 @@ def is_py_64(): 'connection_int.c', 'connection_type.c', 'cursor_int.c', 'cursor_type.c', - 'replication_message_type.c', + 'replication_cursor_type.c', 'replication_message_type.c', 'diagnostics_type.c', 'error_type.c', 'lobject_int.c', 'lobject_type.c', 'notify_type.c', 'xid_type.c', @@ -482,7 +482,7 @@ def is_py_64(): # headers 'config.h', 'pgtypes.h', 'psycopg.h', 'python.h', 'connection.h', 'cursor.h', 'diagnostics.h', 'error.h', 'green.h', 'lobject.h', - 'replication_message.h', + 'replication_cursor.h', 'replication_message.h', 'notify.h', 'pqpath.h', 'xid.h', 'libpq_support.h', 'win32_support.h', diff --git a/tests/test_replication.py b/tests/test_replication.py index 5c029c884..2dbb00866 100644 --- a/tests/test_replication.py +++ b/tests/test_replication.py @@ -47,12 +47,16 @@ def tearDown(self): # first close all connections, as they might keep the slot(s) active super(ReplicationTestCase, self).tearDown() + import time + time.sleep(0.025) # sometimes the slot is still active, wait a little + if self._slots: - kill_conn = self.repl_connect(connection_factory=PhysicalReplicationConnection) + kill_conn = self.connect() if kill_conn: kill_cur = kill_conn.cursor() for slot in self._slots: - kill_cur.drop_replication_slot(slot) + kill_cur.execute("SELECT pg_drop_replication_slot(%s)", (slot,)) + kill_conn.commit() kill_conn.close() def create_replication_slot(self, cur, slot_name=testconfig.repl_slot, **kwargs): @@ -127,7 +131,7 @@ def test_stop_replication(self): cur.start_replication(self.slot) def consume(msg): raise StopReplication() - self.assertRaises(StopReplication, cur.consume_replication_stream, consume) + self.assertRaises(StopReplication, cur.consume_stream, consume) class AsyncReplicationTest(ReplicationTestCase): @@ -148,14 +152,22 @@ def test_async_replication(self): self.msg_count = 0 def consume(msg): + # just check the methods + log = "%s: %s" % (cur.io_timestamp, repr(msg)) + self.msg_count += 1 if self.msg_count > 3: + cur.flush_feedback(reply=True) raise StopReplication() + cur.send_feedback(flush_lsn=msg.data_start) + + self.assertRaises(psycopg2.ProgrammingError, cur.consume_stream, consume) + def process_stream(): from select import select while True: - msg = cur.read_replication_message() + msg = cur.read_message() if msg: consume(msg) else: From 23abe4f501ce60468e9e6b089910068265342368 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Tue, 20 Oct 2015 12:36:13 +0200 Subject: [PATCH 42/60] Add quick start to the replication doc, minor doc fixes. --- doc/src/extras.rst | 248 ++++++++++++++++++----------- lib/extras.py | 2 +- psycopg/replication_message_type.c | 2 +- 3 files changed, 161 insertions(+), 91 deletions(-) diff --git a/doc/src/extras.rst b/doc/src/extras.rst index 9384a961e..2a7bed26b 100644 --- a/doc/src/extras.rst +++ b/doc/src/extras.rst @@ -141,8 +141,81 @@ Logging cursor .. autoclass:: MinTimeLoggingCursor -Replication cursor -^^^^^^^^^^^^^^^^^^ +Replication protocol support +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Modern PostgreSQL servers (version 9.0 and above) support replication. The +replication protocol is built on top of the client-server protocol and can be +operated using ``libpq``, as such it can be also operated by ``psycopg2``. +The replication protocol can be operated on both synchronous and +:ref:`asynchronous ` connections. + +Server version 9.4 adds a new feature called *Logical Replication*. + +.. seealso:: + + - PostgreSQL `Streaming Replication Protocol`__ + + .. __: http://www.postgresql.org/docs/current/static/protocol-replication.html + + +Logical replication Quick-Start +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You must be using PostgreSQL server version 9.4 or above to run this quick +start. + +Make sure that replication connections are permitted for user ``postgres`` in +``pg_hba.conf`` and reload the server configuration. You also need to set +``wal_level=logical`` and ``max_wal_senders``, ``max_replication_slots`` to +value greater than zero in ``postgresql.conf`` (these changes require a server +restart). Create a database ``psycopg2test``. + +Then run the following code to quickly try the replication support out. This +is not production code -- it has no error handling, it sends feedback too +often, etc. -- and it's only intended as a simple demo of logical +replication:: + + from __future__ import print_function + import sys + import psycopg2 + import psycopg2.extras + + conn = psycopg2.connect('dbname=psycopg2test user=postgres', + connection_factory=psycopg2.extras.LogicalReplicationConnection) + cur = conn.cursor() + try: + cur.start_replication(slot_name='pytest') + except psycopg2.ProgrammingError: + cur.create_replication_slot('pytest', output_plugin='test_decoding') + cur.start_replication(slot_name='pytest') + + class DemoConsumer(object): + def __call__(self, msg): + print(msg.payload) + msg.cursor.send_feedback(flush_lsn=msg.data_start) + + democonsumer = DemoConsumer() + + print("Starting streaming, press Control-C to end...", file=sys.stderr) + try: + cur.consume_stream(democonsumer) + except KeyboardInterrupt: + cur.close() + conn.close() + print("The slot 'pytest' still exists. Drop it with SELECT pg_drop_replication_slot('pytest'); if no longer needed.", file=sys.stderr) + print("WARNING: Transaction logs will accumulate in pg_xlog until the slot is dropped.", file=sys.stderr) + + +You can now make changes to the ``psycopg2test`` database using a normal +psycopg2 session, ``psql``, etc. and see the logical decoding stream printed +by this demo client. + +This will continue running until terminated with ``Control-C``. + + +Replication connection and cursor classes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: ReplicationConnectionBase @@ -177,17 +250,11 @@ The following replication types are defined: phys_cur = phys_conn.cursor() Both `LogicalReplicationConnection` and `PhysicalReplicationConnection` use - `ReplicationCursor` for actual communication on the connection. - -.. seealso:: - - - PostgreSQL `Streaming Replication Protocol`__ - - .. __: http://www.postgresql.org/docs/current/static/protocol-replication.html + `ReplicationCursor` for actual communication with the server. -The individual messages in the replication stream are presented by -`ReplicationMessage` objects: +The individual messages in the replication stream are represented by +`ReplicationMessage` objects (both logical and physical type): .. autoclass:: ReplicationMessage @@ -249,7 +316,7 @@ The individual messages in the replication stream are presented by replication slot is created by default. No output plugin parameter is required or allowed when creating a physical replication slot. - In either case, the type of slot being created can be specified + In either case the type of slot being created can be specified explicitly using *slot_type* parameter. Replication slots are a feature of PostgreSQL server starting with @@ -295,25 +362,25 @@ The individual messages in the replication stream are presented by replication can be used with both types of connection. On the other hand, physical replication doesn't require a named - replication slot to be used, only logical one does. In any case, - logical replication and replication slots are a feature of PostgreSQL - server starting with version 9.4. Physical replication can be used - starting with 9.0. + replication slot to be used, only logical replication does. In any + case logical replication and replication slots are a feature of + PostgreSQL server starting with version 9.4. Physical replication can + be used starting with 9.0. If *start_lsn* is specified, the requested stream will start from that - LSN. The default is `!None`, which passes the LSN ``0/0``, causing - replay to begin at the last point at which the server got replay - confirmation from the client for, or the oldest available point for a - new slot. + LSN. The default is `!None` which passes the LSN ``0/0`` causing + replay to begin at the last point for which the server got flush + confirmation from the client, or the oldest available point for a new + slot. The server might produce an error if a WAL file for the given LSN has - already been recycled, or it may silently start streaming from a later + already been recycled or it may silently start streaming from a later position: the client can verify the actual position using information - provided the `ReplicationMessage` attributes. The exact server + provided by the `ReplicationMessage` attributes. The exact server behavior depends on the type of replication and use of slots. - A *timeline* parameter can only be specified with physical replication - and only starting with server version 9.3. + The *timeline* parameter can only be specified with physical + replication and only starting with server version 9.3. A dictionary of *options* may be passed to the logical decoding plugin on a logical replication slot. The set of supported options depends @@ -324,8 +391,9 @@ The individual messages in the replication stream are presented by `start_replication_expert()` internally. After starting the replication, to actually consume the incoming - server messages, use `consume_stream()` or implement a loop around - `read_message()` in case of asynchronous connection. + server messages use `consume_stream()` or implement a loop around + `read_message()` in case of :ref:`asynchronous connection + `. .. method:: start_replication_expert(command) @@ -343,66 +411,66 @@ The individual messages in the replication stream are presented by This method can only be used with synchronous connection. For asynchronous connections see `read_message()`. - Before calling this method to consume the stream, use + Before calling this method to consume the stream use `start_replication()` first. - When called, this method enters an endless loop, reading messages from - the server and passing them to ``consume()``, then waiting for more - messages from the server. In order to make this method break out of - the loop and return, ``consume()`` can throw a `StopReplication` - exception (any unhandled exception will make it break out of the loop - as well). + This method enters an endless loop reading messages from the server + and passing them to ``consume()``, then waiting for more messages from + the server. In order to make this method break out of the loop and + return, ``consume()`` can throw a `StopReplication` exception. Any + unhandled exception will make it break out of the loop as well. - If *decode* is set to `!True`, the messages read from the server are + If *decode* is set to `!True` the messages read from the server are converted according to the connection `~connection.encoding`. This parameter should not be set with physical replication. - This method also sends keepalive messages to the server, in case there + This method also sends keepalive messages to the server in case there were no new data from the server for the duration of *keepalive_interval* (in seconds). The value of this parameter must - be equal to at least 1 second, but it can have a fractional part. + be set to at least 1 second, but it can have a fractional part. + + The *msg* objects passed to ``consume()`` are instances of + `ReplicationMessage` class. + + After processing certain amount of messages the client should send a + confirmation message to the server. This should be done by calling + `send_feedback()` method on the corresponding replication cursor. A + reference to the cursor is provided in the `ReplicationMessage` as an + attribute. The following example is a sketch implementation of ``consume()`` callable for logical replication:: class LogicalStreamConsumer(object): + ... + def __call__(self, msg): - self.store_message_data(msg.payload) + self.process_message(msg.payload) - if self.should_report_to_the_server_now(msg): + if self.should_send_feedback(msg): msg.cursor.send_feedback(flush_lsn=msg.data_start) consumer = LogicalStreamConsumer() cur.consume_stream(consumer, decode=True) - The *msg* objects passed to ``consume()`` are instances of - `ReplicationMessage` class. - - After storing certain amount of messages' data reliably, the client - should send a confirmation message to the server. This should be done - by calling `send_feedback()` method on the corresponding replication - cursor. A reference to the cursor is provided in the - `ReplicationMessage` as an attribute. - .. warning:: - When using replication with slots, failure to properly notify the - server by constantly consuming and reporting success at - appropriate times can eventually lead to "disk full" condition on - the server, because the server retains all the WAL segments that - might be needed to stream the changes via all of the currently - open replication slots. + When using replication with slots, failure to constantly consume + *and* report success to the server appropriately can eventually + lead to "disk full" condition on the server, because the server + retains all the WAL segments that might be needed to stream the + changes via all of the currently open replication slots. - On the other hand, it is not recommended to send a confirmation - after every processed message, since that will put an unnecessary - load on network and the server. A possible strategy is to confirm - after every COMMIT message. + On the other hand, it is not recommended to send confirmation + after *every* processed message, since that will put an + unnecessary load on network and the server. A possible strategy + is to confirm after every COMMIT message. .. method:: send_feedback(write_lsn=0, flush_lsn=0, apply_lsn=0, reply=False) :param write_lsn: a LSN position up to which the client has written the data locally - :param flush_lsn: a LSN position up to which the client has stored the + :param flush_lsn: a LSN position up to which the client has processed the data reliably (the server is allowed to discard all and every data that predates this LSN) :param apply_lsn: a LSN position up to which the warm standby server @@ -411,7 +479,7 @@ The individual messages in the replication stream are presented by :param reply: request the server to send back a keepalive message immediately Use this method to report to the server that all messages up to a - certain LSN position have been stored on the client and may be + certain LSN position have been processed on the client and may be discarded on the server. This method can also be called with all default parameters' values to @@ -433,13 +501,14 @@ The individual messages in the replication stream are presented by Returns `!True` if the feedback message was sent successfully, `!False` otherwise. - Low-level methods for asynchronous connection operation. + Low-level replication cursor methods for :ref:`asynchronous connection + ` operation. - With the synchronous connection, a call to `consume_stream()` handles all + With the synchronous connection a call to `consume_stream()` handles all the complexity of handling the incoming messages and sending keepalive replies, but at times it might be beneficial to use low-level interface - for better control, in particular to `~select.select()` on multiple - sockets. The following methods are provided for asynchronous operation: + for better control, in particular to `~select` on multiple sockets. The + following methods are provided for asynchronous operation: .. method:: read_message(decode=True) @@ -449,16 +518,16 @@ The individual messages in the replication stream are presented by This method should be used in a loop with asynchronous connections after calling `start_replication()` once. - It tries to read the next message from the server, without blocking - and returns an instance of `ReplicationMessage` or `!None`, in case - there are no more data messages from the server at the moment. + It tries to read the next message from the server without blocking and + returns an instance of `ReplicationMessage` or `!None`, in case there + are no more data messages from the server at the moment. It is expected that the calling code will call this method repeatedly - in order to consume all of the messages that might have been buffered, - until `!None` is returned. After receiving a `!None` value from this - method, the caller should use `~select.select()` or `~select.poll()` - on the corresponding connection to block the process until there is - more data from the server. + in order to consume all of the messages that might have been buffered + until `!None` is returned. After receiving `!None` from this method + the caller should use `~select.select()` or `~select.poll()` on the + corresponding connection to block the process until there is more data + from the server. The server can send keepalive messages to the client periodically. Such messages are silently consumed by this method and are never @@ -480,24 +549,25 @@ The individual messages in the replication stream are presented by An actual example of asynchronous operation might look like this:: - def consume(msg): - ... - - keepalive_interval = 10.0 - while True: - msg = cur.read_message() - if msg: - consume(msg) - else: - now = datetime.now() - timeout = keepalive_interval - (now - cur.io_timestamp).total_seconds() - if timeout > 0: - sel = select.select([cur], [], [], timeout) - else: - sel = ([], [], []) - - if not sel[0]: - cur.send_feedback() + def consume(msg): + ... + + keepalive_interval = 10.0 + while True: + msg = cur.read_message() + if msg: + consume(msg) + else: + now = datetime.now() + timeout = keepalive_interval - (now - cur.io_timestamp).total_seconds() + if timeout > 0: + sel = select.select([cur], [], [], timeout) + else: + sel = ([], [], []) + + if not sel[0]: + # timed out, send keepalive message + cur.send_feedback() .. index:: pair: Cursor; Replication diff --git a/lib/extras.py b/lib/extras.py index 7c713573d..8e1373c15 100644 --- a/lib/extras.py +++ b/lib/extras.py @@ -514,7 +514,7 @@ class StopReplication(Exception): class ReplicationCursor(_replicationCursor): - """A cursor used for communication on the replication protocol.""" + """A cursor used for communication on replication connections.""" def create_replication_slot(self, slot_name, slot_type=None, output_plugin=None): """Create streaming replication slot.""" diff --git a/psycopg/replication_message_type.c b/psycopg/replication_message_type.c index d4b0457b7..893ce7ad5 100644 --- a/psycopg/replication_message_type.c +++ b/psycopg/replication_message_type.c @@ -146,7 +146,7 @@ static struct PyGetSetDef replicationMessageObject_getsets[] = { /* object type */ #define replicationMessageType_doc \ -"A database replication message." +"A replication protocol message." PyTypeObject replicationMessageType = { PyVarObject_HEAD_INIT(NULL, 0) From b3f8e9adb56f8db16fb75ebf56ed262a52095ebb Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Tue, 20 Oct 2015 12:54:22 +0200 Subject: [PATCH 43/60] Fix send_time printf format in replmsg_repr(). --- psycopg/replication_message_type.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/psycopg/replication_message_type.c b/psycopg/replication_message_type.c index 893ce7ad5..f607d2ba8 100644 --- a/psycopg/replication_message_type.c +++ b/psycopg/replication_message_type.c @@ -49,7 +49,7 @@ static PyObject * replmsg_repr(replicationMessageObject *self) { return PyString_FromFormat( - "", + "", self, self->data_size, XLOGFMTARGS(self->data_start), XLOGFMTARGS(self->wal_end), self->send_time); } From 089e745af64b660574424cae88011d0689d56c5c Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Tue, 20 Oct 2015 12:55:43 +0200 Subject: [PATCH 44/60] Fix cursor_init() declaration for use in replication_cursor_type.c --- psycopg/cursor.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/psycopg/cursor.h b/psycopg/cursor.h index 18e31e5fe..44d8a47a4 100644 --- a/psycopg/cursor.h +++ b/psycopg/cursor.h @@ -92,12 +92,13 @@ struct cursorObject { /* C-callable functions in cursor_int.c and cursor_type.c */ +HIDDEN int cursor_init(PyObject *obj, PyObject *args, PyObject *kwargs); + BORROWED HIDDEN PyObject *curs_get_cast(cursorObject *self, PyObject *oid); HIDDEN void curs_reset(cursorObject *self); HIDDEN int psyco_curs_withhold_set(cursorObject *self, PyObject *pyvalue); HIDDEN int psyco_curs_scrollable_set(cursorObject *self, PyObject *pyvalue); -HIDDEN int psyco_curs_init(PyObject *obj, PyObject *args, PyObject *kwargs); /* exception-raising macros */ #define EXC_IF_CURS_CLOSED(self) \ From 22cbfb26d6c7f596e17166f91b5e0712ff683dcc Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Tue, 20 Oct 2015 13:05:43 +0200 Subject: [PATCH 45/60] Actually add replication tests to the test suite. --- psycopg2.cproj | 1 + tests/__init__.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/psycopg2.cproj b/psycopg2.cproj index 682b69d06..75d961806 100644 --- a/psycopg2.cproj +++ b/psycopg2.cproj @@ -128,6 +128,7 @@ + diff --git a/tests/__init__.py b/tests/__init__.py index 3e677d853..2d2609ced 100755 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -31,6 +31,7 @@ import test_bug_gc import test_cancel import test_connection +import test_replication import test_copy import test_cursor import test_dates @@ -68,6 +69,7 @@ def test_suite(): suite.addTest(test_bug_gc.test_suite()) suite.addTest(test_cancel.test_suite()) suite.addTest(test_connection.test_suite()) + suite.addTest(test_replication.test_suite()) suite.addTest(test_copy.test_suite()) suite.addTest(test_cursor.test_suite()) suite.addTest(test_dates.test_suite()) From 76c7f4a0b5f3ff69499239917fb0aec8b0da6adf Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Thu, 22 Oct 2015 16:17:08 +0200 Subject: [PATCH 46/60] Use direct call to consume() callable in pq_copy_both() --- psycopg/pqpath.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/psycopg/pqpath.c b/psycopg/pqpath.c index d68869810..30a3d394a 100644 --- a/psycopg/pqpath.c +++ b/psycopg/pqpath.c @@ -1737,12 +1737,11 @@ pq_copy_both(replicationCursorObject *repl, PyObject *consume, int decode, connectionObject *conn = curs->conn; PGconn *pgconn = conn->pgconn; PyObject *msg, *tmp = NULL; - PyObject *consume_func = NULL; int fd, sel, ret = -1; fd_set fds; struct timeval keep_intr, curr_time, ping_time, timeout; - if (!(consume_func = PyObject_GetAttrString(consume, "__call__"))) { + if (!PyCallable_Check(consume)) { Dprintf("pq_copy_both: expected callable consume object"); goto exit; } @@ -1804,11 +1803,11 @@ pq_copy_both(replicationCursorObject *repl, PyObject *consume, int decode, continue; } else { - tmp = PyObject_CallFunctionObjArgs(consume_func, msg, NULL); + tmp = PyObject_CallFunctionObjArgs(consume, msg, NULL); Py_DECREF(msg); if (tmp == NULL) { - Dprintf("pq_copy_both: consume_func returned NULL"); + Dprintf("pq_copy_both: consume returned NULL"); goto exit; } Py_DECREF(tmp); @@ -1818,7 +1817,6 @@ pq_copy_both(replicationCursorObject *repl, PyObject *consume, int decode, ret = 1; exit: - Py_XDECREF(consume_func); return ret; } From e69dafbeccf4a1ff096759bd531fd771955592da Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Fri, 23 Oct 2015 11:31:55 +0200 Subject: [PATCH 47/60] Move the `decode` parameter to `start_replication()`. It makes more sense this way, because otherwise it must be passed to every call of `read_message()`. --- doc/src/extras.rst | 75 ++++++++++++++++++------------- lib/extras.py | 4 +- psycopg/pqpath.c | 12 +++-- psycopg/pqpath.h | 4 +- psycopg/replication_cursor.h | 7 +-- psycopg/replication_cursor_type.c | 41 +++++++---------- 6 files changed, 72 insertions(+), 71 deletions(-) diff --git a/doc/src/extras.rst b/doc/src/extras.rst index 2a7bed26b..7df68a777 100644 --- a/doc/src/extras.rst +++ b/doc/src/extras.rst @@ -185,10 +185,10 @@ replication:: connection_factory=psycopg2.extras.LogicalReplicationConnection) cur = conn.cursor() try: - cur.start_replication(slot_name='pytest') + cur.start_replication(slot_name='pytest', decode=True) # test_decoding produces textual output except psycopg2.ProgrammingError: cur.create_replication_slot('pytest', output_plugin='test_decoding') - cur.start_replication(slot_name='pytest') + cur.start_replication(slot_name='pytest', decode=True) class DemoConsumer(object): def __call__(self, msg): @@ -260,9 +260,12 @@ The individual messages in the replication stream are represented by .. attribute:: payload - The actual data received from the server. An instance of either - ``str`` or ``unicode``, depending on the method that was used to - produce this message. + The actual data received from the server. + + An instance of either `bytes()` or `unicode()`, depending on the value + of `decode` option passed to `ReplicationCursor.start_replication()` + on the connection. See `ReplicationCursor.read_message()` for + details. .. attribute:: data_size @@ -336,7 +339,7 @@ The individual messages in the replication stream are represented by Replication slots are a feature of PostgreSQL server starting with version 9.4. - .. method:: start_replication(slot_name=None, slot_type=None, start_lsn=0, timeline=0, options=None) + .. method:: start_replication(slot_name=None, slot_type=None, start_lsn=0, timeline=0, options=None, decode=False) Start replication on the connection. @@ -352,6 +355,8 @@ The individual messages in the replication stream are represented by can only be used with physical replication) :param options: a dictionary of options to pass to logical replication slot (not allowed with physical replication) + :param decode: a flag indicating that unicode conversion should be + performed on messages received from the server If a *slot_name* is specified, the slot must exist on the server and its type must match the replication type used. @@ -387,6 +392,11 @@ The individual messages in the replication stream are represented by on the output plugin that was used to create the slot. Must be `!None` for physical replication. + If *decode* is set to `!True` the messages received from the server + would be converted according to the connection `~connection.encoding`. + *This parameter should not be set with physical replication or with + logical replication plugins that produce binary output.* + This function constructs a ``START_REPLICATION`` command and calls `start_replication_expert()` internally. @@ -395,43 +405,40 @@ The individual messages in the replication stream are represented by `read_message()` in case of :ref:`asynchronous connection `. - .. method:: start_replication_expert(command) + .. method:: start_replication_expert(command, decode=False) - Start replication on the connection using provided ``START_REPLICATION`` - command. + Start replication on the connection using provided + ``START_REPLICATION`` command. See `start_replication()` for + description of *decode* parameter. - .. method:: consume_stream(consume, decode=False, keepalive_interval=10) + .. method:: consume_stream(consume, keepalive_interval=10) :param consume: a callable object with signature ``consume(msg)`` - :param decode: a flag indicating that unicode conversion should be - performed on the messages received from the server :param keepalive_interval: interval (in seconds) to send keepalive messages to the server This method can only be used with synchronous connection. For asynchronous connections see `read_message()`. - Before calling this method to consume the stream use + Before using this method to consume the stream call `start_replication()` first. This method enters an endless loop reading messages from the server - and passing them to ``consume()``, then waiting for more messages from - the server. In order to make this method break out of the loop and - return, ``consume()`` can throw a `StopReplication` exception. Any - unhandled exception will make it break out of the loop as well. + and passing them to ``consume()`` one at a time, then waiting for more + messages from the server. In order to make this method break out of + the loop and return, ``consume()`` can throw a `StopReplication` + exception. Any unhandled exception will make it break out of the loop + as well. - If *decode* is set to `!True` the messages read from the server are - converted according to the connection `~connection.encoding`. This - parameter should not be set with physical replication. + The *msg* object passed to ``consume()`` is an instance of + `ReplicationMessage` class. See `read_message()` for details about + message decoding. This method also sends keepalive messages to the server in case there were no new data from the server for the duration of *keepalive_interval* (in seconds). The value of this parameter must be set to at least 1 second, but it can have a fractional part. - The *msg* objects passed to ``consume()`` are instances of - `ReplicationMessage` class. - After processing certain amount of messages the client should send a confirmation message to the server. This should be done by calling `send_feedback()` method on the corresponding replication cursor. A @@ -452,7 +459,7 @@ The individual messages in the replication stream are represented by msg.cursor.send_feedback(flush_lsn=msg.data_start) consumer = LogicalStreamConsumer() - cur.consume_stream(consumer, decode=True) + cur.consume_stream(consumer) .. warning:: @@ -510,17 +517,21 @@ The individual messages in the replication stream are represented by for better control, in particular to `~select` on multiple sockets. The following methods are provided for asynchronous operation: - .. method:: read_message(decode=True) + .. method:: read_message() - :param decode: a flag indicating that unicode conversion should be - performed on the data received from the server + Try to read the next message from the server without blocking and + return an instance of `ReplicationMessage` or `!None`, in case there + are no more data messages from the server at the moment. This method should be used in a loop with asynchronous connections - after calling `start_replication()` once. - - It tries to read the next message from the server without blocking and - returns an instance of `ReplicationMessage` or `!None`, in case there - are no more data messages from the server at the moment. + (after calling `start_replication()` once). For synchronous + connections see `consume_stream()`. + + The returned message's `ReplicationMessage.payload` is an instance of + `unicode()` decoded according to connection `connection.encoding` + *iff* `decode` was set to `!True` in the initial call to + `start_replication()` on this connection, otherwise it is an instance + of `bytes()` with no decoding. It is expected that the calling code will call this method repeatedly in order to consume all of the messages that might have been buffered diff --git a/lib/extras.py b/lib/extras.py index 8e1373c15..8a8d34ff1 100644 --- a/lib/extras.py +++ b/lib/extras.py @@ -548,7 +548,7 @@ def drop_replication_slot(self, slot_name): self.execute(command) def start_replication(self, slot_name=None, slot_type=None, start_lsn=0, - timeline=0, options=None): + timeline=0, options=None, decode=False): """Start replication stream.""" command = "START_REPLICATION " @@ -597,7 +597,7 @@ def start_replication(self, slot_name=None, slot_type=None, start_lsn=0, command += "%s %s" % (quote_ident(k, self), _A(str(v))) command += ")" - self.start_replication_expert(command) + self.start_replication_expert(command, decode=decode) # allows replication cursors to be used in select.select() directly def fileno(self): diff --git a/psycopg/pqpath.c b/psycopg/pqpath.c index 30a3d394a..424ed901d 100644 --- a/psycopg/pqpath.c +++ b/psycopg/pqpath.c @@ -1543,7 +1543,7 @@ _pq_copy_out_v3(cursorObject *curs) are never returned to the caller. */ PyObject * -pq_read_replication_message(replicationCursorObject *repl, int decode) +pq_read_replication_message(replicationCursorObject *repl) { cursorObject *curs = &repl->cur; connectionObject *conn = curs->conn; @@ -1555,7 +1555,7 @@ pq_read_replication_message(replicationCursorObject *repl, int decode) PyObject *str = NULL, *result = NULL; replicationMessageObject *msg = NULL; - Dprintf("pq_read_replication_message(decode=%d)", decode); + Dprintf("pq_read_replication_message"); consumed = 0; retry: @@ -1629,8 +1629,7 @@ pq_read_replication_message(replicationCursorObject *repl, int decode) Dprintf("pq_read_replication_message: >>%.*s<<", data_size, buffer + hdr); - /* XXX it would be wise to check if it's really a logical replication */ - if (decode) { + if (repl->decode) { str = PyUnicode_Decode(buffer + hdr, data_size, conn->codec, NULL); } else { str = Bytes_FromStringAndSize(buffer + hdr, data_size); @@ -1730,8 +1729,7 @@ pq_send_replication_feedback(replicationCursorObject *repl, int reply_requested) manages to send keepalive messages to the server as needed. */ int -pq_copy_both(replicationCursorObject *repl, PyObject *consume, int decode, - double keepalive_interval) +pq_copy_both(replicationCursorObject *repl, PyObject *consume, double keepalive_interval) { cursorObject *curs = &repl->cur; connectionObject *conn = curs->conn; @@ -1752,7 +1750,7 @@ pq_copy_both(replicationCursorObject *repl, PyObject *consume, int decode, keep_intr.tv_usec = (keepalive_interval - keep_intr.tv_sec)*1.0e6; while (1) { - msg = pq_read_replication_message(repl, decode); + msg = pq_read_replication_message(repl); if (!msg) { goto exit; } diff --git a/psycopg/pqpath.h b/psycopg/pqpath.h index 568f07682..1348d9c4c 100644 --- a/psycopg/pqpath.h +++ b/psycopg/pqpath.h @@ -75,8 +75,8 @@ RAISES HIDDEN void pq_complete_error(connectionObject *conn, PGresult **pgres, /* replication protocol support */ HIDDEN int pq_copy_both(replicationCursorObject *repl, PyObject *consumer, - int decode, double keepalive_interval); -HIDDEN PyObject *pq_read_replication_message(replicationCursorObject *repl, int decode); + double keepalive_interval); +HIDDEN PyObject *pq_read_replication_message(replicationCursorObject *repl); HIDDEN int pq_send_replication_feedback(replicationCursorObject *repl, int reply_requested); #endif /* !defined(PSYCOPG_PQPATH_H) */ diff --git a/psycopg/replication_cursor.h b/psycopg/replication_cursor.h index 1b6dbfab9..07bf7b543 100644 --- a/psycopg/replication_cursor.h +++ b/psycopg/replication_cursor.h @@ -38,10 +38,11 @@ extern HIDDEN PyTypeObject replicationCursorType; typedef struct replicationCursorObject { cursorObject cur; - int started:1; /* if replication is started */ - int consuming:1; /* if running the consume loop */ + int started:1; /* if replication is started */ + int consuming:1; /* if running the consume loop */ + int decode:1; /* if we should use character decoding on the messages */ - struct timeval last_io; /* timestamp of the last exchange with the server */ + struct timeval last_io ; /* timestamp of the last exchange with the server */ struct timeval keepalive_interval; /* interval for keepalive messages in replication mode */ XLogRecPtr write_lsn; /* LSN stats for replication feedback messages */ diff --git a/psycopg/replication_cursor_type.c b/psycopg/replication_cursor_type.c index d1f7939a5..1fd5ea39e 100644 --- a/psycopg/replication_cursor_type.c +++ b/psycopg/replication_cursor_type.c @@ -38,8 +38,8 @@ #include "datetime.h" -#define psyco_repl_curs_start_replication_expert_doc \ -"start_replication_expert(command, writer=None, keepalive_interval=10) -- Start replication stream with a directly given command." +#define psyco_repl_curs_start_replication_expert_doc \ +"start_replication_expert(command, decode=False) -- Start replication with a given command." static PyObject * psyco_repl_curs_start_replication_expert(replicationCursorObject *self, @@ -49,9 +49,10 @@ psyco_repl_curs_start_replication_expert(replicationCursorObject *self, connectionObject *conn = self->cur.conn; PyObject *res = NULL; char *command; - static char *kwlist[] = {"command", NULL}; + long int decode = 0; + static char *kwlist[] = {"command", "decode", NULL}; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s", kwlist, &command)) { + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|l", kwlist, &command, &decode)) { return NULL; } @@ -60,17 +61,15 @@ psyco_repl_curs_start_replication_expert(replicationCursorObject *self, EXC_IF_TPC_PREPARED(conn, start_replication_expert); EXC_IF_REPLICATING(self, start_replication_expert); - Dprintf("psyco_repl_curs_start_replication_expert: %s", command); - - /* self->copysize = 0;*/ - - gettimeofday(&self->last_io, NULL); + Dprintf("psyco_repl_curs_start_replication_expert: '%s'; decode: %d", command, decode); if (pq_execute(curs, command, conn->async, 1 /* no_result */, 1 /* no_begin */) >= 0) { res = Py_None; Py_INCREF(res); self->started = 1; + self->decode = decode; + gettimeofday(&self->last_io, NULL); } return res; @@ -85,12 +84,11 @@ psyco_repl_curs_consume_stream(replicationCursorObject *self, { cursorObject *curs = &self->cur; PyObject *consume = NULL, *res = NULL; - int decode = 0; double keepalive_interval = 10; - static char *kwlist[] = {"consume", "decode", "keepalive_interval", NULL}; + static char *kwlist[] = {"consume", "keepalive_interval", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|id", kwlist, - &consume, &decode, &keepalive_interval)) { + &consume, &keepalive_interval)) { return NULL; } @@ -115,7 +113,7 @@ psyco_repl_curs_consume_stream(replicationCursorObject *self, self->consuming = 1; - if (pq_copy_both(self, consume, decode, keepalive_interval) >= 0) { + if (pq_copy_both(self, consume, keepalive_interval) >= 0) { res = Py_None; Py_INCREF(res); } @@ -126,27 +124,19 @@ psyco_repl_curs_consume_stream(replicationCursorObject *self, } #define psyco_repl_curs_read_message_doc \ -"read_message(decode=True) -- Try reading a replication message from the server (non-blocking)." +"read_message() -- Try reading a replication message from the server (non-blocking)." static PyObject * -psyco_repl_curs_read_message(replicationCursorObject *self, - PyObject *args, PyObject *kwargs) +psyco_repl_curs_read_message(replicationCursorObject *self) { cursorObject *curs = &self->cur; - int decode = 1; - static char *kwlist[] = {"decode", NULL}; EXC_IF_CURS_CLOSED(curs); EXC_IF_GREEN(read_message); EXC_IF_TPC_PREPARED(self->cur.conn, read_message); EXC_IF_NOT_REPLICATING(self, read_message); - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, - &decode)) { - return NULL; - } - - return pq_read_replication_message(self, decode); + return pq_read_replication_message(self); } static PyObject * @@ -267,7 +257,7 @@ static struct PyMethodDef replicationCursorObject_methods[] = { {"consume_stream", (PyCFunction)psyco_repl_curs_consume_stream, METH_VARARGS|METH_KEYWORDS, psyco_repl_curs_consume_stream_doc}, {"read_message", (PyCFunction)psyco_repl_curs_read_message, - METH_VARARGS|METH_KEYWORDS, psyco_repl_curs_read_message_doc}, + METH_NOARGS, psyco_repl_curs_read_message_doc}, {"send_feedback", (PyCFunction)psyco_repl_curs_send_feedback, METH_VARARGS|METH_KEYWORDS, psyco_repl_curs_send_feedback_doc}, {"flush_feedback", (PyCFunction)psyco_repl_curs_flush_feedback, @@ -289,6 +279,7 @@ replicationCursor_setup(replicationCursorObject* self) { self->started = 0; self->consuming = 0; + self->decode = 0; self->write_lsn = InvalidXLogRecPtr; self->flush_lsn = InvalidXLogRecPtr; From dd6bcbd04fc9714ac87b827af12647590ef131a1 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Fri, 23 Oct 2015 17:51:03 +0200 Subject: [PATCH 48/60] Improve async replication example. --- doc/src/extras.rst | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/doc/src/extras.rst b/doc/src/extras.rst index 7df68a777..bd13a782f 100644 --- a/doc/src/extras.rst +++ b/doc/src/extras.rst @@ -560,6 +560,9 @@ The individual messages in the replication stream are represented by An actual example of asynchronous operation might look like this:: + from select import select + from datetime import datetime + def consume(msg): ... @@ -571,14 +574,12 @@ The individual messages in the replication stream are represented by else: now = datetime.now() timeout = keepalive_interval - (now - cur.io_timestamp).total_seconds() - if timeout > 0: - sel = select.select([cur], [], [], timeout) - else: - sel = ([], [], []) - - if not sel[0]: - # timed out, send keepalive message - cur.send_feedback() + try: + sel = select([cur], [], [], max(0, timeout)) + if not any(sel): + cur.send_feedback() # timed out, send keepalive message + except InterruptedError: + pass # recalculate timeout and continue .. index:: pair: Cursor; Replication From 8b79bf43ace9b7d09f16b4c829c96a6c1784dacf Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Fri, 23 Oct 2015 18:30:18 +0200 Subject: [PATCH 49/60] Drop ReplicationCursor.flush_feedback(), rectify pq_*_replication_*() interface. --- doc/src/extras.rst | 16 ------- psycopg/libpq_support.h | 2 - psycopg/pqpath.c | 69 +++++++++++++------------------ psycopg/pqpath.h | 6 ++- psycopg/replication_cursor.h | 3 +- psycopg/replication_cursor_type.c | 60 +++++++-------------------- tests/test_replication.py | 2 +- 7 files changed, 49 insertions(+), 109 deletions(-) diff --git a/doc/src/extras.rst b/doc/src/extras.rst index bd13a782f..58b0dc07c 100644 --- a/doc/src/extras.rst +++ b/doc/src/extras.rst @@ -492,22 +492,6 @@ The individual messages in the replication stream are represented by This method can also be called with all default parameters' values to just send a keepalive message to the server. - If the feedback message could not be sent, updates the passed LSN - positions in the cursor for a later call to `flush_feedback()` and - returns `!False`, otherwise returns `!True`. - - .. method:: flush_feedback(reply=False) - - :param reply: request the server to send back a keepalive message immediately - - This method tries to flush the latest replication feedback message - that `send_feedback()` was trying to send but couldn't. - - If *reply* is `!True` sends a keepalive message in either case. - - Returns `!True` if the feedback message was sent successfully, - `!False` otherwise. - Low-level replication cursor methods for :ref:`asynchronous connection ` operation. diff --git a/psycopg/libpq_support.h b/psycopg/libpq_support.h index c71394632..77d7ab12f 100644 --- a/psycopg/libpq_support.h +++ b/psycopg/libpq_support.h @@ -31,8 +31,6 @@ /* type and constant definitions from internal postgres includes not available otherwise */ typedef unsigned PG_INT64_TYPE XLogRecPtr; -#define InvalidXLogRecPtr ((XLogRecPtr) 0) - /* have to use lowercase %x, as PyString_FromFormat can't do %X */ #define XLOGFMTSTR "%x/%x" #define XLOGFMTARGS(x) ((uint32)((x) >> 32)), ((uint32)((x) & 0xFFFFFFFF)) diff --git a/psycopg/pqpath.c b/psycopg/pqpath.c index 424ed901d..63154172e 100644 --- a/psycopg/pqpath.c +++ b/psycopg/pqpath.c @@ -1542,8 +1542,8 @@ _pq_copy_out_v3(cursorObject *curs) Any keepalive messages from the server are silently consumed and are never returned to the caller. */ -PyObject * -pq_read_replication_message(replicationCursorObject *repl) +int +pq_read_replication_message(replicationCursorObject *repl, replicationMessageObject **msg) { cursorObject *curs = &repl->cur; connectionObject *conn = curs->conn; @@ -1553,18 +1553,21 @@ pq_read_replication_message(replicationCursorObject *repl) XLogRecPtr data_start, wal_end; pg_int64 send_time; PyObject *str = NULL, *result = NULL; - replicationMessageObject *msg = NULL; + int ret = -1; Dprintf("pq_read_replication_message"); + *msg = NULL; consumed = 0; + retry: len = PQgetCopyData(pgconn, &buffer, 1 /* async */); if (len == 0) { /* If we've tried reading some data, but there was none, bail out. */ if (consumed) { - goto none; + ret = 0; + goto exit; } /* We should only try reading more data when there is nothing available at the moment. Otherwise, with a really highly loaded @@ -1599,7 +1602,8 @@ pq_read_replication_message(replicationCursorObject *repl) } CLEARPGRES(curs->pgres); - goto none; + ret = 0; + goto exit; } /* It also makes sense to set this flag here to make us return early in @@ -1641,11 +1645,11 @@ pq_read_replication_message(replicationCursorObject *repl) Py_DECREF(str); if (!result) { goto exit; } - msg = (replicationMessageObject *)result; - msg->data_size = data_size; - msg->data_start = data_start; - msg->wal_end = wal_end; - msg->send_time = send_time; + *msg = (replicationMessageObject *)result; + (*msg)->data_size = data_size; + (*msg)->data_start = data_start; + (*msg)->wal_end = wal_end; + (*msg)->send_time = send_time; } else if (buffer[0] == 'k') { /* Primary keepalive message: msgtype(1), walEnd(8), sendTime(8), reply(1) */ @@ -1656,19 +1660,8 @@ pq_read_replication_message(replicationCursorObject *repl) } reply = buffer[hdr]; - if (reply) { - if (!pq_send_replication_feedback(repl, 0)) { - if (conn->async) { - repl->feedback_pending = 1; - } else { - /* XXX not sure if this was a good idea after all */ - pq_raise(conn, curs, NULL); - goto exit; - } - } - else { - gettimeofday(&repl->last_io, NULL); - } + if (reply && pq_send_replication_feedback(repl, 0) < 0) { + goto exit; } PQfreemem(buffer); @@ -1680,24 +1673,22 @@ pq_read_replication_message(replicationCursorObject *repl) goto exit; } + ret = 0; + exit: if (buffer) { PQfreemem(buffer); } - return result; - -none: - result = Py_None; - Py_INCREF(result); - goto exit; + return ret; } int pq_send_replication_feedback(replicationCursorObject *repl, int reply_requested) { cursorObject *curs = &repl->cur; - PGconn *pgconn = curs->conn->pgconn; + connectionObject *conn = curs->conn; + PGconn *pgconn = conn->pgconn; char replybuf[1 + 8 + 8 + 8 + 8 + 1]; int len = 0; @@ -1714,11 +1705,12 @@ pq_send_replication_feedback(replicationCursorObject *repl, int reply_requested) replybuf[len] = reply_requested ? 1 : 0; len += 1; if (PQputCopyData(pgconn, replybuf, len) <= 0 || PQflush(pgconn) != 0) { - return 0; + pq_raise(conn, curs, NULL); + return -1; } gettimeofday(&repl->last_io, NULL); - return 1; + return 0; } /* Calls pq_read_replication_message in an endless loop, until @@ -1734,7 +1726,8 @@ pq_copy_both(replicationCursorObject *repl, PyObject *consume, double keepalive_ cursorObject *curs = &repl->cur; connectionObject *conn = curs->conn; PGconn *pgconn = conn->pgconn; - PyObject *msg, *tmp = NULL; + replicationMessageObject *msg = NULL; + PyObject *tmp = NULL; int fd, sel, ret = -1; fd_set fds; struct timeval keep_intr, curr_time, ping_time, timeout; @@ -1750,13 +1743,10 @@ pq_copy_both(replicationCursorObject *repl, PyObject *consume, double keepalive_ keep_intr.tv_usec = (keepalive_interval - keep_intr.tv_sec)*1.0e6; while (1) { - msg = pq_read_replication_message(repl); - if (!msg) { + if (pq_read_replication_message(repl, &msg) < 0) { goto exit; } - else if (msg == Py_None) { - Py_DECREF(msg); - + else if (msg == NULL) { fd = PQsocket(pgconn); if (fd < 0) { pq_raise(conn, curs, NULL); @@ -1793,8 +1783,7 @@ pq_copy_both(replicationCursorObject *repl, PyObject *consume, double keepalive_ } if (sel == 0) { - if (!pq_send_replication_feedback(repl, 0)) { - pq_raise(conn, curs, NULL); + if (pq_send_replication_feedback(repl, 0) < 0) { goto exit; } } diff --git a/psycopg/pqpath.h b/psycopg/pqpath.h index 1348d9c4c..5cf22309b 100644 --- a/psycopg/pqpath.h +++ b/psycopg/pqpath.h @@ -27,8 +27,9 @@ #define PSYCOPG_PQPATH_H 1 #include "psycopg/cursor.h" -#include "psycopg/replication_cursor.h" #include "psycopg/connection.h" +#include "psycopg/replication_cursor.h" +#include "psycopg/replication_message.h" /* macro to clean the pg result */ #define CLEARPGRES(pgres) do { PQclear(pgres); pgres = NULL; } while (0) @@ -76,7 +77,8 @@ RAISES HIDDEN void pq_complete_error(connectionObject *conn, PGresult **pgres, /* replication protocol support */ HIDDEN int pq_copy_both(replicationCursorObject *repl, PyObject *consumer, double keepalive_interval); -HIDDEN PyObject *pq_read_replication_message(replicationCursorObject *repl); +HIDDEN int pq_read_replication_message(replicationCursorObject *repl, + replicationMessageObject **msg); HIDDEN int pq_send_replication_feedback(replicationCursorObject *repl, int reply_requested); #endif /* !defined(PSYCOPG_PQPATH_H) */ diff --git a/psycopg/replication_cursor.h b/psycopg/replication_cursor.h index 07bf7b543..36ced1389 100644 --- a/psycopg/replication_cursor.h +++ b/psycopg/replication_cursor.h @@ -45,10 +45,9 @@ typedef struct replicationCursorObject { struct timeval last_io ; /* timestamp of the last exchange with the server */ struct timeval keepalive_interval; /* interval for keepalive messages in replication mode */ - XLogRecPtr write_lsn; /* LSN stats for replication feedback messages */ + XLogRecPtr write_lsn; /* LSNs for replication feedback messages */ XLogRecPtr flush_lsn; XLogRecPtr apply_lsn; - int feedback_pending; /* flag set when we couldn't send the feedback to the server */ } replicationCursorObject; diff --git a/psycopg/replication_cursor_type.c b/psycopg/replication_cursor_type.c index 1fd5ea39e..f652984e2 100644 --- a/psycopg/replication_cursor_type.c +++ b/psycopg/replication_cursor_type.c @@ -130,28 +130,21 @@ static PyObject * psyco_repl_curs_read_message(replicationCursorObject *self) { cursorObject *curs = &self->cur; + replicationMessageObject *msg = NULL; EXC_IF_CURS_CLOSED(curs); EXC_IF_GREEN(read_message); EXC_IF_TPC_PREPARED(self->cur.conn, read_message); EXC_IF_NOT_REPLICATING(self, read_message); - return pq_read_replication_message(self); -} - -static PyObject * -repl_curs_flush_feedback(replicationCursorObject *self, int reply) -{ - if (!(self->feedback_pending || reply)) - Py_RETURN_TRUE; - - if (pq_send_replication_feedback(self, reply)) { - self->feedback_pending = 0; - Py_RETURN_TRUE; - } else { - self->feedback_pending = 1; - Py_RETURN_FALSE; + if (pq_read_replication_message(self, &msg) < 0) { + return NULL; } + if (msg) { + return (PyObject *)msg; + } + + Py_RETURN_NONE; } #define psyco_repl_curs_send_feedback_doc \ @@ -162,9 +155,7 @@ psyco_repl_curs_send_feedback(replicationCursorObject *self, PyObject *args, PyObject *kwargs) { cursorObject *curs = &self->cur; - XLogRecPtr write_lsn = InvalidXLogRecPtr, - flush_lsn = InvalidXLogRecPtr, - apply_lsn = InvalidXLogRecPtr; + XLogRecPtr write_lsn = 0, flush_lsn = 0, apply_lsn = 0; int reply = 0; static char* kwlist[] = {"write_lsn", "flush_lsn", "apply_lsn", "reply", NULL}; @@ -185,31 +176,11 @@ psyco_repl_curs_send_feedback(replicationCursorObject *self, if (apply_lsn > self->apply_lsn) self->apply_lsn = apply_lsn; - self->feedback_pending = 1; - - return repl_curs_flush_feedback(self, reply); -} - -#define psyco_repl_curs_flush_feedback_doc \ -"flush_feedback(reply=False) -- Try flushing the latest pending replication feedback message to the server and optionally request a reply." - -static PyObject * -psyco_repl_curs_flush_feedback(replicationCursorObject *self, - PyObject *args, PyObject *kwargs) -{ - cursorObject *curs = &self->cur; - int reply = 0; - static char *kwlist[] = {"reply", NULL}; - - EXC_IF_CURS_CLOSED(curs); - EXC_IF_NOT_REPLICATING(self, flush_feedback); - - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, - &reply)) { + if (pq_send_replication_feedback(self, reply) < 0) { return NULL; } - return repl_curs_flush_feedback(self, reply); + Py_RETURN_NONE; } @@ -260,8 +231,6 @@ static struct PyMethodDef replicationCursorObject_methods[] = { METH_NOARGS, psyco_repl_curs_read_message_doc}, {"send_feedback", (PyCFunction)psyco_repl_curs_send_feedback, METH_VARARGS|METH_KEYWORDS, psyco_repl_curs_send_feedback_doc}, - {"flush_feedback", (PyCFunction)psyco_repl_curs_flush_feedback, - METH_VARARGS|METH_KEYWORDS, psyco_repl_curs_flush_feedback_doc}, {NULL} }; @@ -281,10 +250,9 @@ replicationCursor_setup(replicationCursorObject* self) self->consuming = 0; self->decode = 0; - self->write_lsn = InvalidXLogRecPtr; - self->flush_lsn = InvalidXLogRecPtr; - self->apply_lsn = InvalidXLogRecPtr; - self->feedback_pending = 0; + self->write_lsn = 0; + self->flush_lsn = 0; + self->apply_lsn = 0; return 0; } diff --git a/tests/test_replication.py b/tests/test_replication.py index 2dbb00866..4441a2666 100644 --- a/tests/test_replication.py +++ b/tests/test_replication.py @@ -157,7 +157,7 @@ def consume(msg): self.msg_count += 1 if self.msg_count > 3: - cur.flush_feedback(reply=True) + cur.send_feedback(reply=True) raise StopReplication() cur.send_feedback(flush_lsn=msg.data_start) From 7aba8b3ed0483c675d757bf52c8ce9456c9aeeb1 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Tue, 27 Oct 2015 12:54:10 +0100 Subject: [PATCH 50/60] Rework psycopg2.connect() interface. --- doc/src/extensions.rst | 22 +++++ lib/__init__.py | 49 +--------- lib/extensions.py | 3 +- psycopg/psycopg.h | 6 ++ psycopg/psycopgmodule.c | 210 ++++++++++++++++++++++++++++++++++++++-- psycopg/utils.c | 44 +++++++++ tests/test_module.py | 32 ++++-- 7 files changed, 303 insertions(+), 63 deletions(-) diff --git a/doc/src/extensions.rst b/doc/src/extensions.rst index d96cca4fb..dcaa2340c 100644 --- a/doc/src/extensions.rst +++ b/doc/src/extensions.rst @@ -24,6 +24,28 @@ functionalities defined by the |DBAPI|_. >>> psycopg2.extensions.parse_dsn('dbname=test user=postgres password=secret') {'password': 'secret', 'user': 'postgres', 'dbname': 'test'} +.. function:: make_dsn(**kwargs) + + Wrap keyword parameters into a connection string, applying necessary + quoting and escaping any special characters (namely, single quote and + backslash). + + Example (note the order of parameters in the resulting string is + arbitrary):: + + >>> psycopg2.extensions.make_dsn(dbname='test', user='postgres', password='secret') + 'user=postgres dbname=test password=secret' + + As a special case, the *database* keyword is translated to *dbname*:: + + >>> psycopg2.extensions.make_dsn(database='test') + 'dbname=test' + + An example of quoting (using `print()` for clarity):: + + >>> print(psycopg2.extensions.make_dsn(database='test', password="some\\thing ''special")) + password='some\\thing \'\'special' dbname=test + .. class:: connection(dsn, async=False) Is the class usually returned by the `~psycopg2.connect()` function. diff --git a/lib/__init__.py b/lib/__init__.py index 994b15a8a..39dd12e2b 100644 --- a/lib/__init__.py +++ b/lib/__init__.py @@ -56,7 +56,7 @@ from psycopg2._psycopg import IntegrityError, InterfaceError, InternalError from psycopg2._psycopg import NotSupportedError, OperationalError -from psycopg2._psycopg import _connect, apilevel, threadsafety, paramstyle +from psycopg2._psycopg import _connect, parse_args, apilevel, threadsafety, paramstyle from psycopg2._psycopg import __version__, __libpq_version__ from psycopg2 import tz @@ -80,27 +80,8 @@ _ext.register_adapter(Decimal, Adapter) del Decimal, Adapter -import re - -def _param_escape(s, - re_escape=re.compile(r"([\\'])"), - re_space=re.compile(r'\s')): - """ - Apply the escaping rule required by PQconnectdb - """ - if not s: return "''" - - s = re_escape.sub(r'\\\1', s) - if re_space.search(s): - s = "'" + s + "'" - - return s - -del re - def connect(dsn=None, - database=None, user=None, password=None, host=None, port=None, connection_factory=None, cursor_factory=None, async=False, **kwargs): """ Create a new database connection. @@ -135,33 +116,7 @@ def connect(dsn=None, library: the list of supported parameters depends on the library version. """ - items = [] - if database is not None: - items.append(('dbname', database)) - if user is not None: - items.append(('user', user)) - if password is not None: - items.append(('password', password)) - if host is not None: - items.append(('host', host)) - if port is not None: - items.append(('port', port)) - - items.extend([(k, v) for (k, v) in kwargs.iteritems() if v is not None]) - - if dsn is not None and items: - raise TypeError( - "'%s' is an invalid keyword argument when the dsn is specified" - % items[0][0]) - - if dsn is None: - if not items: - raise TypeError('missing dsn and no parameters') - else: - dsn = " ".join(["%s=%s" % (k, _param_escape(str(v))) - for (k, v) in items]) - - conn = _connect(dsn, connection_factory=connection_factory, async=async) + conn = _connect(dsn, connection_factory, async, **kwargs) if cursor_factory is not None: conn.cursor_factory = cursor_factory diff --git a/lib/extensions.py b/lib/extensions.py index b40e28b8d..f99ed9395 100644 --- a/lib/extensions.py +++ b/lib/extensions.py @@ -56,7 +56,8 @@ except ImportError: pass -from psycopg2._psycopg import adapt, adapters, encodings, connection, cursor, lobject, Xid, libpq_version, parse_dsn, quote_ident +from psycopg2._psycopg import adapt, adapters, encodings, connection, cursor, lobject, Xid, libpq_version +from psycopg2._psycopg import parse_dsn, make_dsn, quote_ident from psycopg2._psycopg import string_types, binary_types, new_type, new_array_type, register_type from psycopg2._psycopg import ISQLQuote, Notify, Diagnostics, Column diff --git a/psycopg/psycopg.h b/psycopg/psycopg.h index eb406fd2b..770de7c62 100644 --- a/psycopg/psycopg.h +++ b/psycopg/psycopg.h @@ -119,11 +119,17 @@ typedef struct cursorObject cursorObject; typedef struct connectionObject connectionObject; /* some utility functions */ +HIDDEN PyObject *psyco_parse_args(PyObject *self, PyObject *args, PyObject *kwargs); +HIDDEN PyObject *psyco_parse_dsn(PyObject *self, PyObject *args, PyObject *kwargs); +HIDDEN PyObject *psyco_make_dsn(PyObject *self, PyObject *args, PyObject *kwargs); + RAISES HIDDEN PyObject *psyco_set_error(PyObject *exc, cursorObject *curs, const char *msg); HIDDEN char *psycopg_escape_string(connectionObject *conn, const char *from, Py_ssize_t len, char *to, Py_ssize_t *tolen); HIDDEN char *psycopg_escape_identifier_easy(const char *from, Py_ssize_t len); +HIDDEN char *psycopg_escape_conninfo(const char *from, Py_ssize_t len); + HIDDEN int psycopg_strdup(char **to, const char *from, Py_ssize_t len); HIDDEN int psycopg_is_text_file(PyObject *f); diff --git a/psycopg/psycopgmodule.c b/psycopg/psycopgmodule.c index cf70a4ad1..03b115d06 100644 --- a/psycopg/psycopgmodule.c +++ b/psycopg/psycopgmodule.c @@ -70,24 +70,104 @@ HIDDEN PyObject *psyco_null = NULL; /* The type of the cursor.description items */ HIDDEN PyObject *psyco_DescriptionType = NULL; + +/* finds a keyword or positional arg (pops it from kwargs if found there) */ +static PyObject * +parse_arg(int pos, char *name, PyObject *defval, PyObject *args, PyObject *kwargs) +{ + Py_ssize_t nargs = PyTuple_GET_SIZE(args); + PyObject *val = NULL; + + if (kwargs && PyMapping_HasKeyString(kwargs, name)) { + val = PyMapping_GetItemString(kwargs, name); + Py_XINCREF(val); + PyMapping_DelItemString(kwargs, name); /* pop from the kwargs dict! */ + } + if (nargs > pos) { + if (!val) { + val = PyTuple_GET_ITEM(args, pos); + Py_XINCREF(val); + } else { + PyErr_Format(PyExc_TypeError, + "parse_args() got multiple values for keyword argument '%s'", name); + return NULL; + } + } + if (!val) { + val = defval; + Py_XINCREF(val); + } + + return val; +} + + +#define psyco_parse_args_doc \ +"parse_args(...) -- parse connection parameters.\n\n" \ +"Return a tuple of (dsn, connection_factory, async)" + +PyObject * +psyco_parse_args(PyObject *self, PyObject *args, PyObject *kwargs) +{ + Py_ssize_t nargs = PyTuple_GET_SIZE(args); + PyObject *dsn = NULL; + PyObject *factory = NULL; + PyObject *async = NULL; + PyObject *res = NULL; + + if (nargs > 3) { + PyErr_Format(PyExc_TypeError, + "parse_args() takes at most 3 arguments (%d given)", (int)nargs); + goto exit; + } + /* parse and remove all keywords we know, so they are not interpreted as part of DSN */ + if (!(dsn = parse_arg(0, "dsn", Py_None, args, kwargs))) { goto exit; } + if (!(factory = parse_arg(1, "connection_factory", Py_None, + args, kwargs))) { goto exit; } + if (!(async = parse_arg(2, "async", Py_False, args, kwargs))) { goto exit; } + + if (kwargs && PyMapping_Size(kwargs) > 0) { + if (dsn == Py_None) { + Py_DECREF(dsn); + if (!(dsn = psyco_make_dsn(NULL, NULL, kwargs))) { goto exit; } + } else { + PyErr_SetString(PyExc_TypeError, "both dsn and parameters given"); + goto exit; + } + } else { + if (dsn == Py_None) { + PyErr_SetString(PyExc_TypeError, "missing dsn and no parameters"); + goto exit; + } + } + + res = PyTuple_Pack(3, dsn, factory, async); + +exit: + Py_XDECREF(dsn); + Py_XDECREF(factory); + Py_XDECREF(async); + + return res; +} + + /** connect module-level function **/ #define psyco_connect_doc \ -"_connect(dsn, [connection_factory], [async]) -- New database connection.\n\n" +"_connect(dsn, [connection_factory], [async], **kwargs) -- New database connection.\n\n" static PyObject * psyco_connect(PyObject *self, PyObject *args, PyObject *keywds) { PyObject *conn = NULL; + PyObject *tuple = NULL; PyObject *factory = NULL; const char *dsn = NULL; int async = 0; - static char *kwlist[] = {"dsn", "connection_factory", "async", NULL}; + if (!(tuple = psyco_parse_args(self, args, keywds))) { goto exit; } - if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|Oi", kwlist, - &dsn, &factory, &async)) { - return NULL; - } + if (!PyArg_ParseTuple(tuple, "s|Oi", &dsn, &factory, &async)) { goto exit; } Dprintf("psyco_connect: dsn = '%s', async = %d", dsn, async); @@ -109,12 +189,16 @@ psyco_connect(PyObject *self, PyObject *args, PyObject *keywds) conn = PyObject_CallFunction(factory, "si", dsn, async); } +exit: + Py_XDECREF(tuple); + return conn; } + #define psyco_parse_dsn_doc "parse_dsn(dsn) -> dict" -static PyObject * +PyObject * psyco_parse_dsn(PyObject *self, PyObject *args, PyObject *kwargs) { char *err = NULL; @@ -166,6 +250,114 @@ psyco_parse_dsn(PyObject *self, PyObject *args, PyObject *kwargs) } +#define psyco_make_dsn_doc "make_dsn(**kwargs) -> str" + +PyObject * +psyco_make_dsn(PyObject *self, PyObject *args, PyObject *kwargs) +{ + Py_ssize_t len, pos; + PyObject *res = NULL; + PyObject *key = NULL, *value = NULL; + PyObject *newkey, *newval; + PyObject *dict = NULL; + char *str = NULL, *p, *q; + + if (args && (len = PyTuple_Size(args)) > 0) { + PyErr_Format(PyExc_TypeError, "make_dsn() takes no arguments (%d given)", (int)len); + goto exit; + } + if (kwargs == NULL) { + return Text_FromUTF8(""); + } + + /* iterate through kwargs, calculating the total resulting string + length and saving prepared key/values to a temp. dict */ + if (!(dict = PyDict_New())) { goto exit; } + + len = 0; + pos = 0; + while (PyDict_Next(kwargs, &pos, &key, &value)) { + if (value == NULL || value == Py_None) { continue; } + + Py_INCREF(key); /* for ensure_bytes */ + if (!(newkey = psycopg_ensure_bytes(key))) { goto exit; } + + /* special handling of 'database' keyword */ + if (strcmp(Bytes_AsString(newkey), "database") == 0) { + key = Bytes_FromString("dbname"); + Py_DECREF(newkey); + } else { + key = newkey; + } + + /* now transform the value */ + if (Bytes_CheckExact(value)) { + Py_INCREF(value); + } else if (PyUnicode_CheckExact(value)) { + if (!(value = PyUnicode_AsUTF8String(value))) { goto exit; } + } else { + /* this could be port=5432, so we need to get the text representation */ + if (!(value = PyObject_Str(value))) { goto exit; } + /* and still ensure it's bytes() (but no need to incref here) */ + if (!(value = psycopg_ensure_bytes(value))) { goto exit; } + } + + /* passing NULL for plen checks for NIL bytes in content and errors out */ + if (Bytes_AsStringAndSize(value, &str, NULL) < 0) { goto exit; } + /* escape any special chars */ + if (!(str = psycopg_escape_conninfo(str, 0))) { goto exit; } + if (!(newval = Bytes_FromString(str))) { + goto exit; + } + PyMem_Free(str); + str = NULL; + Py_DECREF(value); + value = newval; + + /* finally put into the temp. dict */ + if (PyDict_SetItem(dict, key, value) < 0) { goto exit; } + + len += Bytes_GET_SIZE(key) + Bytes_GET_SIZE(value) + 2; /* =, space or NIL */ + + Py_DECREF(key); + Py_DECREF(value); + } + key = NULL; + value = NULL; + + if (!(str = PyMem_Malloc(len))) { + PyErr_NoMemory(); + goto exit; + } + + p = str; + pos = 0; + while (PyDict_Next(dict, &pos, &newkey, &newval)) { + if (p != str) { + *(p++) = ' '; + } + if (Bytes_AsStringAndSize(newkey, &q, &len) < 0) { goto exit; } + strncpy(p, q, len); + p += len; + *(p++) = '='; + if (Bytes_AsStringAndSize(newval, &q, &len) < 0) { goto exit; } + strncpy(p, q, len); + p += len; + } + *p = '\0'; + + res = Text_FromUTF8AndSize(str, p - str); + +exit: + PyMem_Free(str); + Py_XDECREF(key); + Py_XDECREF(value); + Py_XDECREF(dict); + + return res; +} + + #define psyco_quote_ident_doc \ "quote_ident(str, conn_or_curs) -> str -- wrapper around PQescapeIdentifier\n\n" \ ":Parameters:\n" \ @@ -820,8 +1012,12 @@ psyco_make_description_type(void) static PyMethodDef psycopgMethods[] = { {"_connect", (PyCFunction)psyco_connect, METH_VARARGS|METH_KEYWORDS, psyco_connect_doc}, + {"parse_args", (PyCFunction)psyco_parse_args, + METH_VARARGS|METH_KEYWORDS, psyco_parse_args_doc}, {"parse_dsn", (PyCFunction)psyco_parse_dsn, METH_VARARGS|METH_KEYWORDS, psyco_parse_dsn_doc}, + {"make_dsn", (PyCFunction)psyco_make_dsn, + METH_VARARGS|METH_KEYWORDS, psyco_make_dsn_doc}, {"quote_ident", (PyCFunction)psyco_quote_ident, METH_VARARGS|METH_KEYWORDS, psyco_quote_ident_doc}, {"adapt", (PyCFunction)psyco_microprotocols_adapt, diff --git a/psycopg/utils.c b/psycopg/utils.c index ec8e47c88..e9dc3ba6f 100644 --- a/psycopg/utils.c +++ b/psycopg/utils.c @@ -124,6 +124,50 @@ psycopg_escape_identifier_easy(const char *from, Py_ssize_t len) return rv; } +char * +psycopg_escape_conninfo(const char *from, Py_ssize_t len) +{ + char *rv = NULL; + const char *src; + const char *end; + char *dst; + int space = 0; + + if (!len) { len = strlen(from); } + end = from + len; + + if (!(rv = PyMem_Malloc(3 + 2 * len))) { + PyErr_NoMemory(); + return NULL; + } + + /* check for any whitespace or empty string */ + if (from < end && *from) { + for (src = from; src < end && *src; ++src) { + if (isspace(*src)) { + space = 1; + break; + } + } + } else { + /* empty string: we should produce '' */ + space = 1; + } + + dst = rv; + if (space) { *(dst++) = '\''; } + /* scan and copy */ + for (src = from; src < end && *src; ++src, ++dst) { + if (*src == '\'' || *src == '\\') + *(dst++) = '\\'; + *dst = *src; + } + if (space) { *(dst++) = '\''; } + *dst = '\0'; + + return rv; +} + /* Duplicate a string. * * Allocate a new buffer on the Python heap containing the new string. diff --git a/tests/test_module.py b/tests/test_module.py index 62b85ee23..528f79c58 100755 --- a/tests/test_module.py +++ b/tests/test_module.py @@ -34,11 +34,11 @@ class ConnectTestCase(unittest.TestCase): def setUp(self): self.args = None - def conect_stub(dsn, connection_factory=None, async=False): - self.args = (dsn, connection_factory, async) + def connect_stub(*args, **kwargs): + self.args = psycopg2.parse_args(*args, **kwargs) self._connect_orig = psycopg2._connect - psycopg2._connect = conect_stub + psycopg2._connect = connect_stub def tearDown(self): psycopg2._connect = self._connect_orig @@ -91,29 +91,45 @@ def f(dsn, async=False): pass psycopg2.connect(database='foo', bar='baz', connection_factory=f) - self.assertEqual(self.args[0], 'dbname=foo bar=baz') + dsn = " %s " % self.args[0] + self.assertIn(" dbname=foo ", dsn) + self.assertIn(" bar=baz ", dsn) self.assertEqual(self.args[1], f) self.assertEqual(self.args[2], False) psycopg2.connect("dbname=foo bar=baz", connection_factory=f) - self.assertEqual(self.args[0], 'dbname=foo bar=baz') + dsn = " %s " % self.args[0] + self.assertIn(" dbname=foo ", dsn) + self.assertIn(" bar=baz ", dsn) self.assertEqual(self.args[1], f) self.assertEqual(self.args[2], False) def test_async(self): psycopg2.connect(database='foo', bar='baz', async=1) - self.assertEqual(self.args[0], 'dbname=foo bar=baz') + dsn = " %s " % self.args[0] + self.assertIn(" dbname=foo ", dsn) + self.assertIn(" bar=baz ", dsn) self.assertEqual(self.args[1], None) self.assert_(self.args[2]) psycopg2.connect("dbname=foo bar=baz", async=True) - self.assertEqual(self.args[0], 'dbname=foo bar=baz') + dsn = " %s " % self.args[0] + self.assertIn(" dbname=foo ", dsn) + self.assertIn(" bar=baz ", dsn) self.assertEqual(self.args[1], None) self.assert_(self.args[2]) + def test_int_port_param(self): + psycopg2.connect(database='sony', port=6543) + dsn = " %s " % self.args[0] + self.assertIn(" dbname=sony ", dsn) + self.assertIn(" port=6543 ", dsn) + def test_empty_param(self): psycopg2.connect(database='sony', password='') - self.assertEqual(self.args[0], "dbname=sony password=''") + dsn = " %s " % self.args[0] + self.assertIn(" dbname=sony ", dsn) + self.assertIn(" password='' ", dsn) def test_escape(self): psycopg2.connect(database='hello world') From fbcf99ad070a3eae67c258d357ab86bda29793fd Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Tue, 27 Oct 2015 18:21:24 +0100 Subject: [PATCH 51/60] Move replication connection to C level. --- lib/extensions.py | 3 +- lib/extras.py | 57 ++----- psycopg/psycopg.h | 1 + psycopg/psycopgmodule.c | 12 +- psycopg/replication_connection.h | 53 +++++++ psycopg/replication_connection_type.c | 210 ++++++++++++++++++++++++++ psycopg2.cproj | 2 + setup.py | 8 +- tests/testutils.py | 3 - 9 files changed, 296 insertions(+), 53 deletions(-) create mode 100644 psycopg/replication_connection.h create mode 100644 psycopg/replication_connection_type.c diff --git a/lib/extensions.py b/lib/extensions.py index ad0f31e0a..fb91c0f20 100644 --- a/lib/extensions.py +++ b/lib/extensions.py @@ -62,7 +62,8 @@ from psycopg2._psycopg import ISQLQuote, Notify, Diagnostics, Column from psycopg2._psycopg import QueryCanceledError, TransactionRollbackError -from psycopg2._psycopg import ReplicationCursor, ReplicationMessage +from psycopg2._psycopg import REPLICATION_PHYSICAL, REPLICATION_LOGICAL +from psycopg2._psycopg import ReplicationConnection, ReplicationCursor, ReplicationMessage try: from psycopg2._psycopg import set_wait_callback, get_wait_callback diff --git a/lib/extras.py b/lib/extras.py index 8a8d34ff1..6e815d69f 100644 --- a/lib/extras.py +++ b/lib/extras.py @@ -39,6 +39,8 @@ from psycopg2 import extensions as _ext from psycopg2.extensions import cursor as _cursor from psycopg2.extensions import connection as _connection +from psycopg2.extensions import REPLICATION_PHYSICAL, REPLICATION_LOGICAL +from psycopg2.extensions import ReplicationConnection as _replicationConnection from psycopg2.extensions import ReplicationCursor as _replicationCursor from psycopg2.extensions import ReplicationMessage from psycopg2.extensions import adapt as _A, quote_ident @@ -439,65 +441,28 @@ def callproc(self, procname, vars=None): return LoggingCursor.callproc(self, procname, vars) -"""Replication connection types.""" -REPLICATION_LOGICAL = "LOGICAL" -REPLICATION_PHYSICAL = "PHYSICAL" - - -class ReplicationConnectionBase(_connection): +class ReplicationConnectionBase(_replicationConnection): """ Base class for Logical and Physical replication connection classes. Uses `ReplicationCursor` automatically. """ def __init__(self, *args, **kwargs): - """ - Initializes a replication connection by adding appropriate - parameters to the provided DSN and tweaking the connection - attributes. - """ - - # replication_type is set in subclasses - if self.replication_type == REPLICATION_LOGICAL: - replication = 'database' - - elif self.replication_type == REPLICATION_PHYSICAL: - replication = 'true' - - else: - raise psycopg2.ProgrammingError("unrecognized replication type: %s" % self.replication_type) - - items = _ext.parse_dsn(args[0]) - - # we add an appropriate replication keyword parameter, unless - # user has specified one explicitly in the DSN - items.setdefault('replication', replication) - - dsn = " ".join(["%s=%s" % (k, psycopg2._param_escape(str(v))) - for (k, v) in items.iteritems()]) - - args = [dsn] + list(args[1:]) # async is the possible 2nd arg super(ReplicationConnectionBase, self).__init__(*args, **kwargs) - - # prevent auto-issued BEGIN statements - if not self.async: - self.autocommit = True - - if self.cursor_factory is None: - self.cursor_factory = ReplicationCursor + self.cursor_factory = ReplicationCursor class LogicalReplicationConnection(ReplicationConnectionBase): def __init__(self, *args, **kwargs): - self.replication_type = REPLICATION_LOGICAL + kwargs['replication_type'] = REPLICATION_LOGICAL super(LogicalReplicationConnection, self).__init__(*args, **kwargs) class PhysicalReplicationConnection(ReplicationConnectionBase): def __init__(self, *args, **kwargs): - self.replication_type = REPLICATION_PHYSICAL + kwargs['replication_type'] = REPLICATION_PHYSICAL super(PhysicalReplicationConnection, self).__init__(*args, **kwargs) @@ -528,16 +493,16 @@ def create_replication_slot(self, slot_name, slot_type=None, output_plugin=None) if output_plugin is None: raise psycopg2.ProgrammingError("output plugin name is required to create logical replication slot") - command += "%s %s" % (slot_type, quote_ident(output_plugin, self)) + command += "LOGICAL %s" % quote_ident(output_plugin, self) elif slot_type == REPLICATION_PHYSICAL: if output_plugin is not None: raise psycopg2.ProgrammingError("cannot specify output plugin name when creating physical replication slot") - command += slot_type + command += "PHYSICAL" else: - raise psycopg2.ProgrammingError("unrecognized replication type: %s" % slot_type) + raise psycopg2.ProgrammingError("unrecognized replication type: %s" % repr(slot_type)) self.execute(command) @@ -562,7 +527,7 @@ def start_replication(self, slot_name=None, slot_type=None, start_lsn=0, else: raise psycopg2.ProgrammingError("slot name is required for logical replication") - command += "%s " % slot_type + command += "LOGICAL " elif slot_type == REPLICATION_PHYSICAL: if slot_name: @@ -570,7 +535,7 @@ def start_replication(self, slot_name=None, slot_type=None, start_lsn=0, # don't add "PHYSICAL", before 9.4 it was just START_REPLICATION XXX/XXX else: - raise psycopg2.ProgrammingError("unrecognized replication type: %s" % slot_type) + raise psycopg2.ProgrammingError("unrecognized replication type: %s" % repr(slot_type)) if type(start_lsn) is str: lsn = start_lsn.split('/') diff --git a/psycopg/psycopg.h b/psycopg/psycopg.h index 0c5bdcce6..8134a83f1 100644 --- a/psycopg/psycopg.h +++ b/psycopg/psycopg.h @@ -120,6 +120,7 @@ typedef struct connectionObject connectionObject; typedef struct replicationMessageObject replicationMessageObject; /* some utility functions */ +HIDDEN PyObject *parse_arg(int pos, char *name, PyObject *defval, PyObject *args, PyObject *kwargs); HIDDEN PyObject *psyco_parse_args(PyObject *self, PyObject *args, PyObject *kwargs); HIDDEN PyObject *psyco_parse_dsn(PyObject *self, PyObject *args, PyObject *kwargs); HIDDEN PyObject *psyco_make_dsn(PyObject *self, PyObject *args, PyObject *kwargs); diff --git a/psycopg/psycopgmodule.c b/psycopg/psycopgmodule.c index eaa451d8e..04f781f54 100644 --- a/psycopg/psycopgmodule.c +++ b/psycopg/psycopgmodule.c @@ -28,6 +28,7 @@ #include "psycopg/connection.h" #include "psycopg/cursor.h" +#include "psycopg/replication_connection.h" #include "psycopg/replication_cursor.h" #include "psycopg/replication_message.h" #include "psycopg/green.h" @@ -74,7 +75,7 @@ HIDDEN PyObject *psyco_DescriptionType = NULL; /* finds a keyword or positional arg (pops it from kwargs if found there) */ -static PyObject * +PyObject * parse_arg(int pos, char *name, PyObject *defval, PyObject *args, PyObject *kwargs) { Py_ssize_t nargs = PyTuple_GET_SIZE(args); @@ -1114,6 +1115,9 @@ INIT_MODULE(_psycopg)(void) Py_TYPE(&cursorType) = &PyType_Type; if (PyType_Ready(&cursorType) == -1) goto exit; + Py_TYPE(&replicationConnectionType) = &PyType_Type; + if (PyType_Ready(&replicationConnectionType) == -1) goto exit; + Py_TYPE(&replicationCursorType) = &PyType_Type; if (PyType_Ready(&replicationCursorType) == -1) goto exit; @@ -1237,6 +1241,8 @@ INIT_MODULE(_psycopg)(void) PyModule_AddStringConstant(module, "__version__", PSYCOPG_VERSION); PyModule_AddStringConstant(module, "__doc__", "psycopg PostgreSQL driver"); PyModule_AddIntConstant(module, "__libpq_version__", PG_VERSION_NUM); + PyModule_AddIntMacro(module, REPLICATION_PHYSICAL); + PyModule_AddIntMacro(module, REPLICATION_LOGICAL); PyModule_AddObject(module, "apilevel", Text_FromUTF8(APILEVEL)); PyModule_AddObject(module, "threadsafety", PyInt_FromLong(THREADSAFETY)); PyModule_AddObject(module, "paramstyle", Text_FromUTF8(PARAMSTYLE)); @@ -1244,6 +1250,7 @@ INIT_MODULE(_psycopg)(void) /* put new types in module dictionary */ PyModule_AddObject(module, "connection", (PyObject*)&connectionType); PyModule_AddObject(module, "cursor", (PyObject*)&cursorType); + PyModule_AddObject(module, "ReplicationConnection", (PyObject*)&replicationConnectionType); PyModule_AddObject(module, "ReplicationCursor", (PyObject*)&replicationCursorType); PyModule_AddObject(module, "ReplicationMessage", (PyObject*)&replicationMessageType); PyModule_AddObject(module, "ISQLQuote", (PyObject*)&isqlquoteType); @@ -1285,6 +1292,9 @@ INIT_MODULE(_psycopg)(void) if (0 != psyco_errors_init()) { goto exit; } psyco_errors_fill(dict); + replicationPhysicalConst = PyDict_GetItemString(dict, "REPLICATION_PHYSICAL"); + replicationLogicalConst = PyDict_GetItemString(dict, "REPLICATION_LOGICAL"); + Dprintf("initpsycopg: module initialization complete"); exit: diff --git a/psycopg/replication_connection.h b/psycopg/replication_connection.h new file mode 100644 index 000000000..9198f5def --- /dev/null +++ b/psycopg/replication_connection.h @@ -0,0 +1,53 @@ +/* replication_connection.h - definition for the psycopg replication connection type + * + * Copyright (C) 2015 Daniele Varrazzo + * + * This file is part of psycopg. + * + * psycopg2 is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * In addition, as a special exception, the copyright holders give + * permission to link this program with the OpenSSL library (or with + * modified versions of OpenSSL that use the same license as OpenSSL), + * and distribute linked combinations including the two. + * + * You must obey the GNU Lesser General Public License in all respects for + * all of the code used other than OpenSSL. + * + * psycopg2 is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + */ + +#ifndef PSYCOPG_REPLICATION_CONNECTION_H +#define PSYCOPG_REPLICATION_CONNECTION_H 1 + +#include "psycopg/connection.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern HIDDEN PyTypeObject replicationConnectionType; + +typedef struct replicationConnectionObject { + connectionObject conn; + + long int type; +} replicationConnectionObject; + +#define REPLICATION_PHYSICAL 1 +#define REPLICATION_LOGICAL 2 + +extern HIDDEN PyObject *replicationPhysicalConst; +extern HIDDEN PyObject *replicationLogicalConst; + +#ifdef __cplusplus +} +#endif + +#endif /* !defined(PSYCOPG_REPLICATION_CONNECTION_H) */ diff --git a/psycopg/replication_connection_type.c b/psycopg/replication_connection_type.c new file mode 100644 index 000000000..16c52414e --- /dev/null +++ b/psycopg/replication_connection_type.c @@ -0,0 +1,210 @@ +/* replication_connection_type.c - python interface to replication connection objects + * + * Copyright (C) 2015 Daniele Varrazzo + * + * This file is part of psycopg. + * + * psycopg2 is free software: you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * In addition, as a special exception, the copyright holders give + * permission to link this program with the OpenSSL library (or with + * modified versions of OpenSSL that use the same license as OpenSSL), + * and distribute linked combinations including the two. + * + * You must obey the GNU Lesser General Public License in all respects for + * all of the code used other than OpenSSL. + * + * psycopg2 is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + * License for more details. + */ + +#define PSYCOPG_MODULE +#include "psycopg/psycopg.h" + +#include "psycopg/replication_connection.h" +#include "psycopg/replication_message.h" +#include "psycopg/green.h" +#include "psycopg/pqpath.h" + +#include +#include + + +#define psyco_repl_conn_type_doc \ +"replication_type -- the replication connection type" + +static PyObject * +psyco_repl_conn_get_type(replicationConnectionObject *self) +{ + connectionObject *conn = &self->conn; + PyObject *res = NULL; + + EXC_IF_CONN_CLOSED(conn); + + if (self->type == REPLICATION_PHYSICAL) { + res = replicationPhysicalConst; + } else if (self->type == REPLICATION_LOGICAL) { + res = replicationLogicalConst; + } else { + PyErr_Format(PyExc_TypeError, "unknown replication type constant: %ld", self->type); + } + + Py_XINCREF(res); + return res; +} + +static int +replicationConnection_init(PyObject *obj, PyObject *args, PyObject *kwargs) +{ + replicationConnectionObject *self = (replicationConnectionObject *)obj; + PyObject *dsn = NULL; + PyObject *async = NULL; + PyObject *tmp = NULL; + const char *repl = NULL; + int ret = -1; + + Py_XINCREF(args); + Py_XINCREF(kwargs); + + /* dsn, async, replication_type */ + if (!(dsn = parse_arg(0, "dsn", Py_None, args, kwargs))) { goto exit; } + if (!(async = parse_arg(1, "async", Py_False, args, kwargs))) { goto exit; } + if (!(tmp = parse_arg(2, "replication_type", Py_None, args, kwargs))) { goto exit; } + + if (tmp == replicationPhysicalConst) { + self->type = REPLICATION_PHYSICAL; + repl = "true"; + } else if (tmp == replicationLogicalConst) { + self->type = REPLICATION_LOGICAL; + repl = "database"; + } else { + PyErr_SetString(PyExc_TypeError, + "replication_type must be either REPLICATION_PHYSICAL or REPLICATION_LOGICAL"); + goto exit; + } + Py_DECREF(tmp); + tmp = NULL; + + if (dsn != Py_None) { + if (kwargs && PyMapping_Size(kwargs) > 0) { + PyErr_SetString(PyExc_TypeError, "both dsn and parameters given"); + goto exit; + } else { + if (!(tmp = PyTuple_Pack(1, dsn))) { goto exit; } + + Py_XDECREF(kwargs); + if (!(kwargs = psyco_parse_dsn(NULL, tmp, NULL))) { goto exit; } + } + } else { + if (!(kwargs && PyMapping_Size(kwargs) > 0)) { + PyErr_SetString(PyExc_TypeError, "missing dsn and no parameters"); + goto exit; + } + } + + if (!PyMapping_HasKeyString(kwargs, "replication")) { + PyMapping_SetItemString(kwargs, "replication", Text_FromUTF8(repl)); + } + + Py_DECREF(dsn); + if (!(dsn = psyco_make_dsn(NULL, NULL, kwargs))) { goto exit; } + + Py_DECREF(args); + Py_DECREF(kwargs); + kwargs = NULL; + if (!(args = PyTuple_Pack(2, dsn, async))) { goto exit; } + + if ((ret = connectionType.tp_init(obj, args, NULL)) < 0) { goto exit; } + + self->conn.autocommit = 1; + self->conn.cursor_factory = (PyObject *)&replicationCursorType; + Py_INCREF(self->conn.cursor_factory); + +exit: + Py_XDECREF(tmp); + Py_XDECREF(dsn); + Py_XDECREF(async); + Py_XDECREF(args); + Py_XDECREF(kwargs); + + return ret; +} + +static PyObject * +replicationConnection_repr(replicationConnectionObject *self) +{ + return PyString_FromFormat( + "", + self, self->conn.dsn, self->conn.closed); +} + + +/* object calculated member list */ + +static struct PyGetSetDef replicationConnectionObject_getsets[] = { + /* override to prevent user tweaking these: */ + { "autocommit", NULL, NULL, NULL }, + { "isolation_level", NULL, NULL, NULL }, + { "set_session", NULL, NULL, NULL }, + { "set_isolation_level", NULL, NULL, NULL }, + { "reset", NULL, NULL, NULL }, + /* an actual getter */ + { "replication_type", + (getter)psyco_repl_conn_get_type, NULL, + psyco_repl_conn_type_doc, NULL }, + {NULL} +}; + +/* object type */ + +#define replicationConnectionType_doc \ +"A replication connection." + +PyTypeObject replicationConnectionType = { + PyVarObject_HEAD_INIT(NULL, 0) + "psycopg2.extensions.ReplicationConnection", + sizeof(replicationConnectionObject), 0, + 0, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + (reprfunc)replicationConnection_repr, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash*/ + 0, /*tp_call*/ + (reprfunc)replicationConnection_repr, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_ITER | + Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + replicationConnectionType_doc, /*tp_doc*/ + 0, /*tp_traverse*/ + 0, /*tp_clear*/ + 0, /*tp_richcompare*/ + 0, /*tp_weaklistoffset*/ + 0, /*tp_iter*/ + 0, /*tp_iternext*/ + 0, /*tp_methods*/ + 0, /*tp_members*/ + replicationConnectionObject_getsets, /*tp_getset*/ + &connectionType, /*tp_base*/ + 0, /*tp_dict*/ + 0, /*tp_descr_get*/ + 0, /*tp_descr_set*/ + 0, /*tp_dictoffset*/ + replicationConnection_init, /*tp_init*/ + 0, /*tp_alloc*/ + 0, /*tp_new*/ +}; + +PyObject *replicationPhysicalConst; +PyObject *replicationLogicalConst; diff --git a/psycopg2.cproj b/psycopg2.cproj index 75d961806..f6f85c728 100644 --- a/psycopg2.cproj +++ b/psycopg2.cproj @@ -92,6 +92,7 @@ + @@ -227,6 +228,7 @@ + diff --git a/setup.py b/setup.py index 18c47b7cc..210ad831d 100644 --- a/setup.py +++ b/setup.py @@ -466,7 +466,9 @@ def is_py_64(): 'connection_int.c', 'connection_type.c', 'cursor_int.c', 'cursor_type.c', - 'replication_cursor_type.c', 'replication_message_type.c', + 'replication_connection_type.c', + 'replication_cursor_type.c', + 'replication_message_type.c', 'diagnostics_type.c', 'error_type.c', 'lobject_int.c', 'lobject_type.c', 'notify_type.c', 'xid_type.c', @@ -482,7 +484,9 @@ def is_py_64(): # headers 'config.h', 'pgtypes.h', 'psycopg.h', 'python.h', 'connection.h', 'cursor.h', 'diagnostics.h', 'error.h', 'green.h', 'lobject.h', - 'replication_cursor.h', 'replication_message.h', + 'replication_connection.h', + 'replication_cursor.h', + 'replication_message.h', 'notify.h', 'pqpath.h', 'xid.h', 'libpq_support.h', 'win32_support.h', diff --git a/tests/testutils.py b/tests/testutils.py index 5f4493f27..70eb2cc94 100644 --- a/tests/testutils.py +++ b/tests/testutils.py @@ -129,9 +129,6 @@ def repl_connect(self, **kwargs): conn = self.connect(**kwargs) except psycopg2.OperationalError, e: return self.skipTest("replication db not configured: %s" % e) - - if not conn.async: - conn.autocommit = True return conn def _get_conn(self): From e61db578cfc6b8ae18ffac41f2719c05cb04bb00 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Fri, 30 Oct 2015 13:00:55 +0100 Subject: [PATCH 52/60] Add dbname=replication for physical replication type. --- psycopg/replication_connection_type.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/psycopg/replication_connection_type.c b/psycopg/replication_connection_type.c index 16c52414e..154a0dddd 100644 --- a/psycopg/replication_connection_type.c +++ b/psycopg/replication_connection_type.c @@ -110,6 +110,10 @@ replicationConnection_init(PyObject *obj, PyObject *args, PyObject *kwargs) if (!PyMapping_HasKeyString(kwargs, "replication")) { PyMapping_SetItemString(kwargs, "replication", Text_FromUTF8(repl)); } + /* with physical specify dbname=replication for .pgpass lookup */ + if (self->type == REPLICATION_PHYSICAL) { + PyMapping_SetItemString(kwargs, "dbname", Text_FromUTF8("replication")); + } Py_DECREF(dsn); if (!(dsn = psyco_make_dsn(NULL, NULL, kwargs))) { goto exit; } From 09a4bb70a168799a91f63f1c2039f456c485960f Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Tue, 5 Jan 2016 12:31:57 +0100 Subject: [PATCH 53/60] Allow retrying start_replication after syntax or data error. --- psycopg/pqpath.c | 7 +++++-- psycopg/replication_cursor.h | 20 +------------------- psycopg/replication_cursor_type.c | 21 +++++++++++---------- tests/test_replication.py | 13 +++++++++++++ 4 files changed, 30 insertions(+), 31 deletions(-) diff --git a/psycopg/pqpath.c b/psycopg/pqpath.c index 760fc977b..6d6728ca4 100644 --- a/psycopg/pqpath.c +++ b/psycopg/pqpath.c @@ -1870,8 +1870,11 @@ pq_fetch(cursorObject *curs, int no_result) Dprintf("pq_fetch: data from a streaming replication slot (no tuples)"); curs->rowcount = -1; ex = 0; - /* nothing to do here: pq_copy_both will be called separately */ - CLEARPGRES(curs->pgres); + /* Nothing to do here: pq_copy_both will be called separately. + + Also don't clear the result status: it's checked in + consume_stream. */ + /*CLEARPGRES(curs->pgres);*/ break; case PGRES_TUPLES_OK: diff --git a/psycopg/replication_cursor.h b/psycopg/replication_cursor.h index 36ced1389..71c6e190b 100644 --- a/psycopg/replication_cursor.h +++ b/psycopg/replication_cursor.h @@ -38,11 +38,10 @@ extern HIDDEN PyTypeObject replicationCursorType; typedef struct replicationCursorObject { cursorObject cur; - int started:1; /* if replication is started */ int consuming:1; /* if running the consume loop */ int decode:1; /* if we should use character decoding on the messages */ - struct timeval last_io ; /* timestamp of the last exchange with the server */ + struct timeval last_io; /* timestamp of the last exchange with the server */ struct timeval keepalive_interval; /* interval for keepalive messages in replication mode */ XLogRecPtr write_lsn; /* LSNs for replication feedback messages */ @@ -53,23 +52,6 @@ typedef struct replicationCursorObject { RAISES_NEG int psyco_repl_curs_datetime_init(void); -/* exception-raising macros */ -#define EXC_IF_REPLICATING(self, cmd) \ -do \ - if ((self)->started) { \ - PyErr_SetString(ProgrammingError, \ - #cmd " cannot be used when replication is already in progress"); \ - return NULL; } \ -while (0) - -#define EXC_IF_NOT_REPLICATING(self, cmd) \ -do \ - if (!(self)->started) { \ - PyErr_SetString(ProgrammingError, \ - #cmd " cannot be used when replication is not in progress"); \ - return NULL; } \ -while (0) - #ifdef __cplusplus } #endif diff --git a/psycopg/replication_cursor_type.c b/psycopg/replication_cursor_type.c index f652984e2..204ff20aa 100644 --- a/psycopg/replication_cursor_type.c +++ b/psycopg/replication_cursor_type.c @@ -59,7 +59,6 @@ psyco_repl_curs_start_replication_expert(replicationCursorObject *self, EXC_IF_CURS_CLOSED(curs); EXC_IF_GREEN(start_replication_expert); EXC_IF_TPC_PREPARED(conn, start_replication_expert); - EXC_IF_REPLICATING(self, start_replication_expert); Dprintf("psyco_repl_curs_start_replication_expert: '%s'; decode: %d", command, decode); @@ -67,7 +66,6 @@ psyco_repl_curs_start_replication_expert(replicationCursorObject *self, res = Py_None; Py_INCREF(res); - self->started = 1; self->decode = decode; gettimeofday(&self->last_io, NULL); } @@ -96,7 +94,13 @@ psyco_repl_curs_consume_stream(replicationCursorObject *self, EXC_IF_CURS_ASYNC(curs, consume_stream); EXC_IF_GREEN(consume_stream); EXC_IF_TPC_PREPARED(self->cur.conn, consume_stream); - EXC_IF_NOT_REPLICATING(self, consume_stream); + + Dprintf("psyco_repl_curs_consume_stream"); + + if (keepalive_interval < 1.0) { + psyco_set_error(ProgrammingError, curs, "keepalive_interval must be >= 1 (sec)"); + return NULL; + } if (self->consuming) { PyErr_SetString(ProgrammingError, @@ -104,12 +108,12 @@ psyco_repl_curs_consume_stream(replicationCursorObject *self, return NULL; } - Dprintf("psyco_repl_curs_consume_stream"); - - if (keepalive_interval < 1.0) { - psyco_set_error(ProgrammingError, curs, "keepalive_interval must be >= 1 (sec)"); + if (curs->pgres == NULL || PQresultStatus(curs->pgres) != PGRES_COPY_BOTH) { + PyErr_SetString(ProgrammingError, + "consume_stream: not replicating, call start_replication first"); return NULL; } + CLEARPGRES(curs->pgres); self->consuming = 1; @@ -135,7 +139,6 @@ psyco_repl_curs_read_message(replicationCursorObject *self) EXC_IF_CURS_CLOSED(curs); EXC_IF_GREEN(read_message); EXC_IF_TPC_PREPARED(self->cur.conn, read_message); - EXC_IF_NOT_REPLICATING(self, read_message); if (pq_read_replication_message(self, &msg) < 0) { return NULL; @@ -160,7 +163,6 @@ psyco_repl_curs_send_feedback(replicationCursorObject *self, static char* kwlist[] = {"write_lsn", "flush_lsn", "apply_lsn", "reply", NULL}; EXC_IF_CURS_CLOSED(curs); - EXC_IF_NOT_REPLICATING(self, send_feedback); if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|KKKi", kwlist, &write_lsn, &flush_lsn, &apply_lsn, &reply)) { @@ -246,7 +248,6 @@ static struct PyGetSetDef replicationCursorObject_getsets[] = { static int replicationCursor_setup(replicationCursorObject* self) { - self->started = 0; self->consuming = 0; self->decode = 0; diff --git a/tests/test_replication.py b/tests/test_replication.py index 4441a2666..a316135f1 100644 --- a/tests/test_replication.py +++ b/tests/test_replication.py @@ -118,6 +118,18 @@ def test_start_on_missing_replication_slot(self): self.create_replication_slot(cur) cur.start_replication(self.slot) + @skip_before_postgres(9, 4) # slots require 9.4 + def test_start_and_recover_from_error(self): + conn = self.repl_connect(connection_factory=LogicalReplicationConnection) + if conn is None: return + cur = conn.cursor() + + self.create_replication_slot(cur, output_plugin='test_decoding') + + self.assertRaises(psycopg2.DataError, cur.start_replication, + slot_name=self.slot, options=dict(invalid_param='value')) + cur.start_replication(slot_name=self.slot) + @skip_before_postgres(9, 4) # slots require 9.4 def test_stop_replication(self): conn = self.repl_connect(connection_factory=LogicalReplicationConnection) @@ -162,6 +174,7 @@ def consume(msg): cur.send_feedback(flush_lsn=msg.data_start) + # cannot be used in asynchronous mode self.assertRaises(psycopg2.ProgrammingError, cur.consume_stream, consume) def process_stream(): From 5d33b39829d9733aa322246e12e2078a18d283b5 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Thu, 21 Jan 2016 15:56:27 +0100 Subject: [PATCH 54/60] Fix error test for invalid START_REPLICATION command. --- tests/test_replication.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/test_replication.py b/tests/test_replication.py index a316135f1..f527edd2b 100644 --- a/tests/test_replication.py +++ b/tests/test_replication.py @@ -126,8 +126,14 @@ def test_start_and_recover_from_error(self): self.create_replication_slot(cur, output_plugin='test_decoding') - self.assertRaises(psycopg2.DataError, cur.start_replication, - slot_name=self.slot, options=dict(invalid_param='value')) + # try with invalid options + cur.start_replication(slot_name=self.slot, options={'invalid_param': 'value'}) + def consume(msg): + pass + # we don't see the error from the server before we try to read the data + self.assertRaises(psycopg2.DataError, cur.consume_stream, consume) + + # try with correct command cur.start_replication(slot_name=self.slot) @skip_before_postgres(9, 4) # slots require 9.4 From da6e061ee8c6e3a5cad4386daf5aa814cdb7be80 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Tue, 8 Mar 2016 15:44:29 +0100 Subject: [PATCH 55/60] Use python-defined make_dsn() for ReplicationConnection class --- lib/__init__.py | 2 +- lib/extras.py | 15 +- psycopg/cursor.h | 2 - psycopg/psycopg.h | 5 - psycopg/psycopgmodule.c | 209 +------------------------- psycopg/replication_connection.h | 6 +- psycopg/replication_connection_type.c | 105 +++++++------ tests/test_module.py | 2 +- 8 files changed, 71 insertions(+), 275 deletions(-) diff --git a/lib/__init__.py b/lib/__init__.py index 2cc0acb23..829e29ebf 100644 --- a/lib/__init__.py +++ b/lib/__init__.py @@ -56,7 +56,7 @@ from psycopg2._psycopg import IntegrityError, InterfaceError, InternalError from psycopg2._psycopg import NotSupportedError, OperationalError -from psycopg2._psycopg import _connect, parse_args, apilevel, threadsafety, paramstyle +from psycopg2._psycopg import _connect, apilevel, threadsafety, paramstyle from psycopg2._psycopg import __version__, __libpq_version__ from psycopg2 import tz diff --git a/lib/extras.py b/lib/extras.py index 6e815d69f..78452239f 100644 --- a/lib/extras.py +++ b/lib/extras.py @@ -441,25 +441,14 @@ def callproc(self, procname, vars=None): return LoggingCursor.callproc(self, procname, vars) -class ReplicationConnectionBase(_replicationConnection): - """ - Base class for Logical and Physical replication connection - classes. Uses `ReplicationCursor` automatically. - """ - - def __init__(self, *args, **kwargs): - super(ReplicationConnectionBase, self).__init__(*args, **kwargs) - self.cursor_factory = ReplicationCursor - - -class LogicalReplicationConnection(ReplicationConnectionBase): +class LogicalReplicationConnection(_replicationConnection): def __init__(self, *args, **kwargs): kwargs['replication_type'] = REPLICATION_LOGICAL super(LogicalReplicationConnection, self).__init__(*args, **kwargs) -class PhysicalReplicationConnection(ReplicationConnectionBase): +class PhysicalReplicationConnection(_replicationConnection): def __init__(self, *args, **kwargs): kwargs['replication_type'] = REPLICATION_PHYSICAL diff --git a/psycopg/cursor.h b/psycopg/cursor.h index 44d8a47a4..5170900f1 100644 --- a/psycopg/cursor.h +++ b/psycopg/cursor.h @@ -72,8 +72,6 @@ struct cursorObject { #define DEFAULT_COPYSIZE 16384 #define DEFAULT_COPYBUFF 8192 - /* replication cursor attrs */ - PyObject *tuple_factory; /* factory for result tuples */ PyObject *tzinfo_factory; /* factory for tzinfo objects */ diff --git a/psycopg/psycopg.h b/psycopg/psycopg.h index 8134a83f1..7834cf679 100644 --- a/psycopg/psycopg.h +++ b/psycopg/psycopg.h @@ -120,11 +120,6 @@ typedef struct connectionObject connectionObject; typedef struct replicationMessageObject replicationMessageObject; /* some utility functions */ -HIDDEN PyObject *parse_arg(int pos, char *name, PyObject *defval, PyObject *args, PyObject *kwargs); -HIDDEN PyObject *psyco_parse_args(PyObject *self, PyObject *args, PyObject *kwargs); -HIDDEN PyObject *psyco_parse_dsn(PyObject *self, PyObject *args, PyObject *kwargs); -HIDDEN PyObject *psyco_make_dsn(PyObject *self, PyObject *args, PyObject *kwargs); - RAISES HIDDEN PyObject *psyco_set_error(PyObject *exc, cursorObject *curs, const char *msg); HIDDEN char *psycopg_escape_string(connectionObject *conn, diff --git a/psycopg/psycopgmodule.c b/psycopg/psycopgmodule.c index fc9a8ebdd..5e8eb5b76 100644 --- a/psycopg/psycopgmodule.c +++ b/psycopg/psycopgmodule.c @@ -74,103 +74,23 @@ HIDDEN PyObject *psyco_null = NULL; HIDDEN PyObject *psyco_DescriptionType = NULL; -/* finds a keyword or positional arg (pops it from kwargs if found there) */ -PyObject * -parse_arg(int pos, char *name, PyObject *defval, PyObject *args, PyObject *kwargs) -{ - Py_ssize_t nargs = PyTuple_GET_SIZE(args); - PyObject *val = NULL; - - if (kwargs && PyMapping_HasKeyString(kwargs, name)) { - val = PyMapping_GetItemString(kwargs, name); - Py_XINCREF(val); - PyMapping_DelItemString(kwargs, name); /* pop from the kwargs dict! */ - } - if (nargs > pos) { - if (!val) { - val = PyTuple_GET_ITEM(args, pos); - Py_XINCREF(val); - } else { - PyErr_Format(PyExc_TypeError, - "parse_args() got multiple values for keyword argument '%s'", name); - return NULL; - } - } - if (!val) { - val = defval; - Py_XINCREF(val); - } - - return val; -} - - -#define psyco_parse_args_doc \ -"parse_args(...) -- parse connection parameters.\n\n" \ -"Return a tuple of (dsn, connection_factory, async)" - -PyObject * -psyco_parse_args(PyObject *self, PyObject *args, PyObject *kwargs) -{ - Py_ssize_t nargs = PyTuple_GET_SIZE(args); - PyObject *dsn = NULL; - PyObject *factory = NULL; - PyObject *async = NULL; - PyObject *res = NULL; - - if (nargs > 3) { - PyErr_Format(PyExc_TypeError, - "parse_args() takes at most 3 arguments (%d given)", (int)nargs); - goto exit; - } - /* parse and remove all keywords we know, so they are not interpreted as part of DSN */ - if (!(dsn = parse_arg(0, "dsn", Py_None, args, kwargs))) { goto exit; } - if (!(factory = parse_arg(1, "connection_factory", Py_None, - args, kwargs))) { goto exit; } - if (!(async = parse_arg(2, "async", Py_False, args, kwargs))) { goto exit; } - - if (kwargs && PyMapping_Size(kwargs) > 0) { - if (dsn == Py_None) { - Py_DECREF(dsn); - if (!(dsn = psyco_make_dsn(NULL, NULL, kwargs))) { goto exit; } - } else { - PyErr_SetString(PyExc_TypeError, "both dsn and parameters given"); - goto exit; - } - } else { - if (dsn == Py_None) { - PyErr_SetString(PyExc_TypeError, "missing dsn and no parameters"); - goto exit; - } - } - - res = PyTuple_Pack(3, dsn, factory, async); - -exit: - Py_XDECREF(dsn); - Py_XDECREF(factory); - Py_XDECREF(async); - - return res; -} - - /** connect module-level function **/ #define psyco_connect_doc \ -"_connect(dsn, [connection_factory], [async], **kwargs) -- New database connection.\n\n" +"_connect(dsn, [connection_factory], [async]) -- New database connection.\n\n" static PyObject * psyco_connect(PyObject *self, PyObject *args, PyObject *keywds) { PyObject *conn = NULL; - PyObject *tuple = NULL; PyObject *factory = NULL; const char *dsn = NULL; int async = 0; - if (!(tuple = psyco_parse_args(self, args, keywds))) { goto exit; } - - if (!PyArg_ParseTuple(tuple, "s|Oi", &dsn, &factory, &async)) { goto exit; } + static char *kwlist[] = {"dsn", "connection_factory", "async", NULL}; + if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|Oi", kwlist, + &dsn, &factory, &async)) { + return NULL; + } Dprintf("psyco_connect: dsn = '%s', async = %d", dsn, async); @@ -192,9 +112,6 @@ psyco_connect(PyObject *self, PyObject *args, PyObject *keywds) conn = PyObject_CallFunction(factory, "si", dsn, async); } -exit: - Py_XDECREF(tuple); - return conn; } @@ -202,7 +119,7 @@ psyco_connect(PyObject *self, PyObject *args, PyObject *keywds) #define psyco_parse_dsn_doc \ "parse_dsn(dsn) -> dict -- parse a connection string into parameters" -PyObject * +static PyObject * psyco_parse_dsn(PyObject *self, PyObject *args, PyObject *kwargs) { char *err = NULL; @@ -254,114 +171,6 @@ psyco_parse_dsn(PyObject *self, PyObject *args, PyObject *kwargs) } -#define psyco_make_dsn_doc "make_dsn(**kwargs) -> str" - -PyObject * -psyco_make_dsn(PyObject *self, PyObject *args, PyObject *kwargs) -{ - Py_ssize_t len, pos; - PyObject *res = NULL; - PyObject *key = NULL, *value = NULL; - PyObject *newkey, *newval; - PyObject *dict = NULL; - char *str = NULL, *p, *q; - - if (args && (len = PyTuple_Size(args)) > 0) { - PyErr_Format(PyExc_TypeError, "make_dsn() takes no arguments (%d given)", (int)len); - goto exit; - } - if (kwargs == NULL) { - return Text_FromUTF8(""); - } - - /* iterate through kwargs, calculating the total resulting string - length and saving prepared key/values to a temp. dict */ - if (!(dict = PyDict_New())) { goto exit; } - - len = 0; - pos = 0; - while (PyDict_Next(kwargs, &pos, &key, &value)) { - if (value == NULL || value == Py_None) { continue; } - - Py_INCREF(key); /* for ensure_bytes */ - if (!(newkey = psycopg_ensure_bytes(key))) { goto exit; } - - /* special handling of 'database' keyword */ - if (strcmp(Bytes_AsString(newkey), "database") == 0) { - key = Bytes_FromString("dbname"); - Py_DECREF(newkey); - } else { - key = newkey; - } - - /* now transform the value */ - if (Bytes_CheckExact(value)) { - Py_INCREF(value); - } else if (PyUnicode_CheckExact(value)) { - if (!(value = PyUnicode_AsUTF8String(value))) { goto exit; } - } else { - /* this could be port=5432, so we need to get the text representation */ - if (!(value = PyObject_Str(value))) { goto exit; } - /* and still ensure it's bytes() (but no need to incref here) */ - if (!(value = psycopg_ensure_bytes(value))) { goto exit; } - } - - /* passing NULL for plen checks for NIL bytes in content and errors out */ - if (Bytes_AsStringAndSize(value, &str, NULL) < 0) { goto exit; } - /* escape any special chars */ - if (!(str = psycopg_escape_conninfo(str, 0))) { goto exit; } - if (!(newval = Bytes_FromString(str))) { - goto exit; - } - PyMem_Free(str); - str = NULL; - Py_DECREF(value); - value = newval; - - /* finally put into the temp. dict */ - if (PyDict_SetItem(dict, key, value) < 0) { goto exit; } - - len += Bytes_GET_SIZE(key) + Bytes_GET_SIZE(value) + 2; /* =, space or NIL */ - - Py_DECREF(key); - Py_DECREF(value); - } - key = NULL; - value = NULL; - - if (!(str = PyMem_Malloc(len))) { - PyErr_NoMemory(); - goto exit; - } - - p = str; - pos = 0; - while (PyDict_Next(dict, &pos, &newkey, &newval)) { - if (p != str) { - *(p++) = ' '; - } - if (Bytes_AsStringAndSize(newkey, &q, &len) < 0) { goto exit; } - strncpy(p, q, len); - p += len; - *(p++) = '='; - if (Bytes_AsStringAndSize(newval, &q, &len) < 0) { goto exit; } - strncpy(p, q, len); - p += len; - } - *p = '\0'; - - res = Text_FromUTF8AndSize(str, p - str); - -exit: - PyMem_Free(str); - Py_XDECREF(key); - Py_XDECREF(value); - Py_XDECREF(dict); - - return res; -} - - #define psyco_quote_ident_doc \ "quote_ident(str, conn_or_curs) -> str -- wrapper around PQescapeIdentifier\n\n" \ ":Parameters:\n" \ @@ -1016,12 +825,8 @@ psyco_make_description_type(void) static PyMethodDef psycopgMethods[] = { {"_connect", (PyCFunction)psyco_connect, METH_VARARGS|METH_KEYWORDS, psyco_connect_doc}, - {"parse_args", (PyCFunction)psyco_parse_args, - METH_VARARGS|METH_KEYWORDS, psyco_parse_args_doc}, {"parse_dsn", (PyCFunction)psyco_parse_dsn, METH_VARARGS|METH_KEYWORDS, psyco_parse_dsn_doc}, - {"make_dsn", (PyCFunction)psyco_make_dsn, - METH_VARARGS|METH_KEYWORDS, psyco_make_dsn_doc}, {"quote_ident", (PyCFunction)psyco_quote_ident, METH_VARARGS|METH_KEYWORDS, psyco_quote_ident_doc}, {"adapt", (PyCFunction)psyco_microprotocols_adapt, diff --git a/psycopg/replication_connection.h b/psycopg/replication_connection.h index 9198f5def..e693038ac 100644 --- a/psycopg/replication_connection.h +++ b/psycopg/replication_connection.h @@ -40,8 +40,10 @@ typedef struct replicationConnectionObject { long int type; } replicationConnectionObject; -#define REPLICATION_PHYSICAL 1 -#define REPLICATION_LOGICAL 2 +/* The funny constant values should help to avoid mixups with some + commonly used numbers like 1 and 2. */ +#define REPLICATION_PHYSICAL 12345678 +#define REPLICATION_LOGICAL 87654321 extern HIDDEN PyObject *replicationPhysicalConst; extern HIDDEN PyObject *replicationLogicalConst; diff --git a/psycopg/replication_connection_type.c b/psycopg/replication_connection_type.c index 154a0dddd..5e5d2229d 100644 --- a/psycopg/replication_connection_type.c +++ b/psycopg/replication_connection_type.c @@ -58,81 +58,88 @@ psyco_repl_conn_get_type(replicationConnectionObject *self) return res; } + static int replicationConnection_init(PyObject *obj, PyObject *args, PyObject *kwargs) { replicationConnectionObject *self = (replicationConnectionObject *)obj; - PyObject *dsn = NULL; - PyObject *async = NULL; - PyObject *tmp = NULL; - const char *repl = NULL; + PyObject *dsn = NULL, *replication_type = NULL, + *item = NULL, *ext = NULL, *make_dsn = NULL, + *extras = NULL, *cursor = NULL; + int async = 0; int ret = -1; - Py_XINCREF(args); - Py_XINCREF(kwargs); + /* 'replication_type' is not actually optional, but there's no + good way to put it before 'async' in the list */ + static char *kwlist[] = {"dsn", "async", "replication_type", NULL}; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|iO", kwlist, + &dsn, &async, &replication_type)) { return ret; } + + /* + We have to call make_dsn() to add replication-specific + connection parameters, because the DSN might be an URI (if there + were no keyword arguments to connect() it is passed unchanged). + */ + /* we reuse args and kwargs to call make_dsn() and parent type's tp_init() */ + if (!(kwargs = PyDict_New())) { return ret; } + Py_INCREF(args); + + /* we also reuse the dsn to hold the result of the make_dsn() call */ + Py_INCREF(dsn); + + if (!(ext = PyImport_ImportModule("psycopg2.extensions"))) { goto exit; } + if (!(make_dsn = PyObject_GetAttrString(ext, "make_dsn"))) { goto exit; } + + /* all the nice stuff is located in python-level ReplicationCursor class */ + if (!(extras = PyImport_ImportModule("psycopg2.extras"))) { goto exit; } + if (!(cursor = PyObject_GetAttrString(extras, "ReplicationCursor"))) { goto exit; } + + /* checking the object reference helps to avoid recognizing + unrelated integer constants as valid input values */ + if (replication_type == replicationPhysicalConst) { + self->type = REPLICATION_PHYSICAL; - /* dsn, async, replication_type */ - if (!(dsn = parse_arg(0, "dsn", Py_None, args, kwargs))) { goto exit; } - if (!(async = parse_arg(1, "async", Py_False, args, kwargs))) { goto exit; } - if (!(tmp = parse_arg(2, "replication_type", Py_None, args, kwargs))) { goto exit; } +#define SET_ITEM(k, v) \ + if (!(item = Text_FromUTF8(#v))) { goto exit; } \ + if (PyDict_SetItemString(kwargs, #k, item) != 0) { goto exit; } \ + Py_DECREF(item); \ + item = NULL; - if (tmp == replicationPhysicalConst) { - self->type = REPLICATION_PHYSICAL; - repl = "true"; - } else if (tmp == replicationLogicalConst) { + SET_ITEM(replication, true); + SET_ITEM(dbname, replication); /* required for .pgpass lookup */ + } else if (replication_type == replicationLogicalConst) { self->type = REPLICATION_LOGICAL; - repl = "database"; + + SET_ITEM(replication, database); +#undef SET_ITEM } else { PyErr_SetString(PyExc_TypeError, "replication_type must be either REPLICATION_PHYSICAL or REPLICATION_LOGICAL"); goto exit; } - Py_DECREF(tmp); - tmp = NULL; - - if (dsn != Py_None) { - if (kwargs && PyMapping_Size(kwargs) > 0) { - PyErr_SetString(PyExc_TypeError, "both dsn and parameters given"); - goto exit; - } else { - if (!(tmp = PyTuple_Pack(1, dsn))) { goto exit; } - - Py_XDECREF(kwargs); - if (!(kwargs = psyco_parse_dsn(NULL, tmp, NULL))) { goto exit; } - } - } else { - if (!(kwargs && PyMapping_Size(kwargs) > 0)) { - PyErr_SetString(PyExc_TypeError, "missing dsn and no parameters"); - goto exit; - } - } - if (!PyMapping_HasKeyString(kwargs, "replication")) { - PyMapping_SetItemString(kwargs, "replication", Text_FromUTF8(repl)); - } - /* with physical specify dbname=replication for .pgpass lookup */ - if (self->type == REPLICATION_PHYSICAL) { - PyMapping_SetItemString(kwargs, "dbname", Text_FromUTF8("replication")); - } + Py_DECREF(args); + if (!(args = PyTuple_Pack(1, dsn))) { goto exit; } Py_DECREF(dsn); - if (!(dsn = psyco_make_dsn(NULL, NULL, kwargs))) { goto exit; } + if (!(dsn = PyObject_Call(make_dsn, args, kwargs))) { goto exit; } Py_DECREF(args); - Py_DECREF(kwargs); - kwargs = NULL; - if (!(args = PyTuple_Pack(2, dsn, async))) { goto exit; } + if (!(args = Py_BuildValue("(Oi)", dsn, async))) { goto exit; } + /* only attempt the connection once we've handled all possible errors */ if ((ret = connectionType.tp_init(obj, args, NULL)) < 0) { goto exit; } self->conn.autocommit = 1; - self->conn.cursor_factory = (PyObject *)&replicationCursorType; - Py_INCREF(self->conn.cursor_factory); + Py_INCREF(self->conn.cursor_factory = cursor); exit: - Py_XDECREF(tmp); + Py_XDECREF(item); + Py_XDECREF(ext); + Py_XDECREF(make_dsn); + Py_XDECREF(extras); + Py_XDECREF(cursor); Py_XDECREF(dsn); - Py_XDECREF(async); Py_XDECREF(args); Py_XDECREF(kwargs); diff --git a/tests/test_module.py b/tests/test_module.py index 7d4ae9a30..1a9a19d48 100755 --- a/tests/test_module.py +++ b/tests/test_module.py @@ -40,7 +40,7 @@ def conect_stub(dsn, connection_factory=None, async=False): self.args = (dsn, connection_factory, async) self._connect_orig = psycopg2._connect - psycopg2._connect = connect_stub + psycopg2._connect = conect_stub def tearDown(self): psycopg2._connect = self._connect_orig From 1d52f34e6045648397709c74b3c2538404dc679a Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Tue, 8 Mar 2016 18:23:32 +0100 Subject: [PATCH 56/60] We don't need to expose cursor_init(), call tp_init() on the type instead. --- psycopg/cursor.h | 3 --- psycopg/cursor_type.c | 2 +- psycopg/replication_cursor_type.c | 13 ++++--------- 3 files changed, 5 insertions(+), 13 deletions(-) diff --git a/psycopg/cursor.h b/psycopg/cursor.h index 5170900f1..e291d45f6 100644 --- a/psycopg/cursor.h +++ b/psycopg/cursor.h @@ -90,14 +90,11 @@ struct cursorObject { /* C-callable functions in cursor_int.c and cursor_type.c */ -HIDDEN int cursor_init(PyObject *obj, PyObject *args, PyObject *kwargs); - BORROWED HIDDEN PyObject *curs_get_cast(cursorObject *self, PyObject *oid); HIDDEN void curs_reset(cursorObject *self); HIDDEN int psyco_curs_withhold_set(cursorObject *self, PyObject *pyvalue); HIDDEN int psyco_curs_scrollable_set(cursorObject *self, PyObject *pyvalue); - /* exception-raising macros */ #define EXC_IF_CURS_CLOSED(self) \ do { \ diff --git a/psycopg/cursor_type.c b/psycopg/cursor_type.c index 63bd5a103..cd8d5ca3f 100644 --- a/psycopg/cursor_type.c +++ b/psycopg/cursor_type.c @@ -1901,7 +1901,7 @@ cursor_dealloc(PyObject* obj) Py_TYPE(obj)->tp_free(obj); } -int +static int cursor_init(PyObject *obj, PyObject *args, PyObject *kwargs) { PyObject *conn; diff --git a/psycopg/replication_cursor_type.c b/psycopg/replication_cursor_type.c index f652984e2..8d96c0e1e 100644 --- a/psycopg/replication_cursor_type.c +++ b/psycopg/replication_cursor_type.c @@ -244,8 +244,10 @@ static struct PyGetSetDef replicationCursorObject_getsets[] = { }; static int -replicationCursor_setup(replicationCursorObject* self) +replicationCursor_init(PyObject *obj, PyObject *args, PyObject *kwargs) { + replicationCursorObject *self = (replicationCursorObject *)obj; + self->started = 0; self->consuming = 0; self->decode = 0; @@ -254,14 +256,7 @@ replicationCursor_setup(replicationCursorObject* self) self->flush_lsn = 0; self->apply_lsn = 0; - return 0; -} - -static int -replicationCursor_init(PyObject *obj, PyObject *args, PyObject *kwargs) -{ - replicationCursor_setup((replicationCursorObject *)obj); - return cursor_init(obj, args, kwargs); + return cursorType.tp_init(obj, args, kwargs); } static PyObject * From 2de2ed7c6354fb640b43ce24cf45bdb5183fd408 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Tue, 8 Mar 2016 18:35:55 +0100 Subject: [PATCH 57/60] Remove some dead code --- psycopg/psycopg.h | 2 -- psycopg/psycopgmodule.c | 2 +- psycopg/utils.c | 44 ----------------------------------------- 3 files changed, 1 insertion(+), 47 deletions(-) diff --git a/psycopg/psycopg.h b/psycopg/psycopg.h index 7834cf679..adda12d93 100644 --- a/psycopg/psycopg.h +++ b/psycopg/psycopg.h @@ -125,8 +125,6 @@ RAISES HIDDEN PyObject *psyco_set_error(PyObject *exc, cursorObject *curs, const HIDDEN char *psycopg_escape_string(connectionObject *conn, const char *from, Py_ssize_t len, char *to, Py_ssize_t *tolen); HIDDEN char *psycopg_escape_identifier_easy(const char *from, Py_ssize_t len); -HIDDEN char *psycopg_escape_conninfo(const char *from, Py_ssize_t len); - HIDDEN int psycopg_strdup(char **to, const char *from, Py_ssize_t len); HIDDEN int psycopg_is_text_file(PyObject *f); diff --git a/psycopg/psycopgmodule.c b/psycopg/psycopgmodule.c index 5e8eb5b76..c08cd70ef 100644 --- a/psycopg/psycopgmodule.c +++ b/psycopg/psycopgmodule.c @@ -73,7 +73,6 @@ HIDDEN PyObject *psyco_null = NULL; /* The type of the cursor.description items */ HIDDEN PyObject *psyco_DescriptionType = NULL; - /** connect module-level function **/ #define psyco_connect_doc \ "_connect(dsn, [connection_factory], [async]) -- New database connection.\n\n" @@ -87,6 +86,7 @@ psyco_connect(PyObject *self, PyObject *args, PyObject *keywds) int async = 0; static char *kwlist[] = {"dsn", "connection_factory", "async", NULL}; + if (!PyArg_ParseTupleAndKeywords(args, keywds, "s|Oi", kwlist, &dsn, &factory, &async)) { return NULL; diff --git a/psycopg/utils.c b/psycopg/utils.c index e9dc3ba6f..ec8e47c88 100644 --- a/psycopg/utils.c +++ b/psycopg/utils.c @@ -124,50 +124,6 @@ psycopg_escape_identifier_easy(const char *from, Py_ssize_t len) return rv; } -char * -psycopg_escape_conninfo(const char *from, Py_ssize_t len) -{ - char *rv = NULL; - const char *src; - const char *end; - char *dst; - int space = 0; - - if (!len) { len = strlen(from); } - end = from + len; - - if (!(rv = PyMem_Malloc(3 + 2 * len))) { - PyErr_NoMemory(); - return NULL; - } - - /* check for any whitespace or empty string */ - if (from < end && *from) { - for (src = from; src < end && *src; ++src) { - if (isspace(*src)) { - space = 1; - break; - } - } - } else { - /* empty string: we should produce '' */ - space = 1; - } - - dst = rv; - if (space) { *(dst++) = '\''; } - /* scan and copy */ - for (src = from; src < end && *src; ++src, ++dst) { - if (*src == '\'' || *src == '\\') - *(dst++) = '\\'; - *dst = *src; - } - if (space) { *(dst++) = '\''; } - *dst = '\0'; - - return rv; -} - /* Duplicate a string. * * Allocate a new buffer on the Python heap containing the new string. From b21c8f7a4e4c64795e8a10c30f68531ad1d9580f Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Tue, 8 Mar 2016 18:34:22 +0100 Subject: [PATCH 58/60] Move replication-related imports to extras.py --- lib/extensions.py | 2 -- lib/extras.py | 8 ++++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/lib/extensions.py b/lib/extensions.py index 65792fc72..213009856 100644 --- a/lib/extensions.py +++ b/lib/extensions.py @@ -65,8 +65,6 @@ from psycopg2._psycopg import ISQLQuote, Notify, Diagnostics, Column from psycopg2._psycopg import QueryCanceledError, TransactionRollbackError -from psycopg2._psycopg import REPLICATION_PHYSICAL, REPLICATION_LOGICAL -from psycopg2._psycopg import ReplicationConnection, ReplicationCursor, ReplicationMessage try: from psycopg2._psycopg import set_wait_callback, get_wait_callback diff --git a/lib/extras.py b/lib/extras.py index 78452239f..6ae98517d 100644 --- a/lib/extras.py +++ b/lib/extras.py @@ -39,12 +39,12 @@ from psycopg2 import extensions as _ext from psycopg2.extensions import cursor as _cursor from psycopg2.extensions import connection as _connection -from psycopg2.extensions import REPLICATION_PHYSICAL, REPLICATION_LOGICAL -from psycopg2.extensions import ReplicationConnection as _replicationConnection -from psycopg2.extensions import ReplicationCursor as _replicationCursor -from psycopg2.extensions import ReplicationMessage from psycopg2.extensions import adapt as _A, quote_ident from psycopg2.extensions import b +from psycopg2._psycopg import REPLICATION_PHYSICAL, REPLICATION_LOGICAL +from psycopg2._psycopg import ReplicationConnection as _replicationConnection +from psycopg2._psycopg import ReplicationCursor as _replicationCursor +from psycopg2._psycopg import ReplicationMessage class DictCursorBase(_cursor): From 3f10b4dd315e6d86813e302a6ed7d0143b7484ec Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Tue, 8 Mar 2016 18:27:57 +0100 Subject: [PATCH 59/60] Remove duplicated doc for make_dsn() --- doc/src/extensions.rst | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/doc/src/extensions.rst b/doc/src/extensions.rst index 1a0e154ef..b661895da 100644 --- a/doc/src/extensions.rst +++ b/doc/src/extensions.rst @@ -19,28 +19,6 @@ Instances of these classes are usually returned by factory functions or attributes. Their definitions are exposed here to allow subclassing, introspection etc. -.. function:: make_dsn(**kwargs) - - Wrap keyword parameters into a connection string, applying necessary - quoting and escaping any special characters (namely, single quote and - backslash). - - Example (note the order of parameters in the resulting string is - arbitrary):: - - >>> psycopg2.extensions.make_dsn(dbname='test', user='postgres', password='secret') - 'user=postgres dbname=test password=secret' - - As a special case, the *database* keyword is translated to *dbname*:: - - >>> psycopg2.extensions.make_dsn(database='test') - 'dbname=test' - - An example of quoting (using `print()` for clarity):: - - >>> print(psycopg2.extensions.make_dsn(database='test', password="some\\thing ''special")) - password='some\\thing \'\'special' dbname=test - .. class:: connection(dsn, async=False) Is the class usually returned by the `~psycopg2.connect()` function. From d5443c65fde6cae87a1dcd901f31b6cdca7a1811 Mon Sep 17 00:00:00 2001 From: Oleksandr Shulgin Date: Thu, 21 Apr 2016 15:32:05 +0200 Subject: [PATCH 60/60] Fix TODOs in ReplicationMessage inline docs --- psycopg/replication_message_type.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/psycopg/replication_message_type.c b/psycopg/replication_message_type.c index f607d2ba8..358d14970 100644 --- a/psycopg/replication_message_type.c +++ b/psycopg/replication_message_type.c @@ -125,15 +125,15 @@ psyco_replmsg_get_send_time(replicationMessageObject *self) static struct PyMemberDef replicationMessageObject_members[] = { {"cursor", T_OBJECT, OFFSETOF(cursor), READONLY, - "TODO"}, + "Related ReplcationCursor object."}, {"payload", T_OBJECT, OFFSETOF(payload), READONLY, - "TODO"}, + "The actual message data."}, {"data_size", T_INT, OFFSETOF(data_size), READONLY, - "TODO"}, + "Raw size of the message data in bytes."}, {"data_start", T_ULONGLONG, OFFSETOF(data_start), READONLY, - "TODO"}, + "LSN position of the start of this message."}, {"wal_end", T_ULONGLONG, OFFSETOF(wal_end), READONLY, - "TODO"}, + "LSN position of the current end of WAL on the server."}, {NULL} };