Skip to content

Commit

Permalink
Make logrepl markers for (partial) decompressions (#5805)
Browse files Browse the repository at this point in the history
Added logical replication messages (PG14+) as markers for (partial)
decompression events (mutual compression), which makes it possible to
differentiate inserts happening as part of the decompression vs actual
inserts by the user, and filter the former out of the event stream.
While some tools may be interested in all events, synching the pure
"state" (without internal behavior) is required for others.

As of now this PR is missing tests. I wonder if anyone has a good idea
how to create an automatic test for it.
  • Loading branch information
noctarius committed Aug 9, 2023
1 parent a9505b4 commit b5b46a3
Show file tree
Hide file tree
Showing 6 changed files with 282 additions and 1 deletion.
1 change: 1 addition & 0 deletions .unreleased/PR_5805
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Implements: #5805 Make logrepl markers for (partial) decompressions
14 changes: 14 additions & 0 deletions src/guc.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ bool ts_guc_enable_now_constify = true;
bool ts_guc_enable_osm_reads = true;
TSDLLEXPORT bool ts_guc_enable_dml_decompression = true;
TSDLLEXPORT bool ts_guc_enable_transparent_decompression = true;
TSDLLEXPORT bool ts_guc_enable_decompression_logrep_markers = false;
TSDLLEXPORT bool ts_guc_enable_decompression_sorted_merge = true;
bool ts_guc_enable_per_data_node_queries = true;
bool ts_guc_enable_parameterized_data_node_scan = true;
Expand Down Expand Up @@ -368,6 +369,19 @@ _guc_init(void)
NULL,
NULL);

DefineCustomBoolVariable("timescaledb.enable_decompression_logrep_markers",
"Enable logical replication markers for decompression ops",
"Enable the generation of logical replication markers in the "
"WAL stream to mark the start and end of decompressions (for insert, "
"update, and delete operations)",
&ts_guc_enable_decompression_logrep_markers,
false,
PGC_SIGHUP,
0,
NULL,
NULL,
NULL);

DefineCustomBoolVariable("timescaledb.enable_decompression_sorted_merge",
"Enable compressed batches heap merge",
"Enable the merge of compressed batches to preserve the compression "
Expand Down
1 change: 1 addition & 0 deletions src/guc.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ extern bool ts_guc_enable_now_constify;
extern bool ts_guc_enable_osm_reads;
extern TSDLLEXPORT bool ts_guc_enable_dml_decompression;
extern TSDLLEXPORT bool ts_guc_enable_transparent_decompression;
extern TSDLLEXPORT bool ts_guc_enable_decompression_logrep_markers;
extern TSDLLEXPORT bool ts_guc_enable_decompression_sorted_merge;
extern TSDLLEXPORT bool ts_guc_enable_per_data_node_queries;
extern TSDLLEXPORT bool ts_guc_enable_parameterized_data_node_scan;
Expand Down
39 changes: 39 additions & 0 deletions tsl/src/compression/compression.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include <utils/syscache.h>
#include <utils/tuplesort.h>
#include <utils/typcache.h>
#include <replication/message.h>

#include "compat/compat.h"

Expand All @@ -69,6 +70,39 @@ static const CompressionAlgorithmDefinition definitions[_END_COMPRESSION_ALGORIT
[COMPRESSION_ALGORITHM_DELTADELTA] = DELTA_DELTA_ALGORITHM_DEFINITION,
};

#if PG14_GE
/* The prefix of a logical replication message which is inserted into the
* replication stream right before decompression inserts are happening
*/
#define DECOMPRESSION_MARKER_START "::timescaledb-decompression-start"
/* The prefix of a logical replication message which is inserted into the
* replication stream right after all decompression inserts have finished
*/
#define DECOMPRESSION_MARKER_END "::timescaledb-decompression-end"
#endif

static inline void
write_logical_replication_msg_decompression_start()
{
#if PG14_GE
if (ts_guc_enable_decompression_logrep_markers && XLogLogicalInfoActive())
{
LogLogicalMessage(DECOMPRESSION_MARKER_START, "", 0, true);
}
#endif
}

static inline void
write_logical_replication_msg_decompression_end()
{
#if PG14_GE
if (ts_guc_enable_decompression_logrep_markers && XLogLogicalInfoActive())
{
LogLogicalMessage(DECOMPRESSION_MARKER_END, "", 0, true);
}
#endif
}

static Compressor *
compressor_for_algorithm_and_type(CompressionAlgorithms algorithm, Oid type)
{
Expand Down Expand Up @@ -2028,7 +2062,9 @@ decompress_batches_for_insert(ChunkInsertState *cis, Chunk *chunk, TupleTableSlo
decompressor.compressed_datums,
decompressor.compressed_is_nulls);

write_logical_replication_msg_decompression_start();
row_decompressor_decompress_row(&decompressor, NULL);
write_logical_replication_msg_decompression_end();

TM_FailureData tmfd;
TM_Result result pg_attribute_unused();
Expand Down Expand Up @@ -3164,6 +3200,7 @@ decompress_batches_for_update_delete(Chunk *chunk, List *predicates, EState *est
comp_chunk_rel = table_open(comp_chunk->table_id, RowExclusiveLock);
decompressor = build_decompressor(comp_chunk_rel, chunk_rel);

write_logical_replication_msg_decompression_start();
if (filters)
{
scankeys =
Expand Down Expand Up @@ -3202,6 +3239,8 @@ decompress_batches_for_update_delete(Chunk *chunk, List *predicates, EState *est
is_null,
&chunk_status_changed);
}
write_logical_replication_msg_decompression_end();

/*
* tuples from compressed chunk has been decompressed and moved
* to staging area, thus mark this chunk as partially compressed
Expand Down
225 changes: 225 additions & 0 deletions tsl/test/t/009_logrepl_decomp_marker.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
# This file and its contents are licensed under the Timescale License.
# Please see the included NOTICE for copyright information and
# LICENSE-TIMESCALE for a copy of the license.

use strict;
use warnings;
use TimescaleNode;
use Test::More;

plan skip_all => "PostgreSQL version < 14" if $ENV{PG_VERSION_MAJOR} < 14;

# This test checks the creation of logical replication messages
# used to mark the start and end of inserts happening as a result
# of a (partial) decompression.

# Publishing node
my $publisher =
TimescaleNode->create('publisher', allows_streaming => 'logical');

# Subscribing node
my $subscriber =
TimescaleNode->create('subscriber', allows_streaming => 'logical');

# Setup test structures
$publisher->safe_psql(
'postgres',
qq(
CREATE TABLE test (ts timestamptz NOT NULL PRIMARY KEY , val INT);
SELECT create_hypertable('test', 'ts', chunk_time_interval := INTERVAL '1day');
)
);

# To kick off replication we need to fake the setup of a hypertable
$subscriber->safe_psql('postgres',
"CREATE TABLE _timescaledb_internal._hyper_1_1_chunk (ts timestamptz NOT NULL PRIMARY KEY , val INT)"
);

# Initial data insert and preparation of the internal chunk tables
$publisher->safe_psql(
'postgres',
qq(
INSERT INTO test
SELECT s.s, (random() * 100)::INT
FROM generate_series('2023-01-01'::timestamptz, '2023-01-02'::timestamptz, INTERVAL '3 hour') s;
)
);

# Setup logical replication
my $publisher_connstr = $publisher->connstr . ' dbname=postgres';
$publisher->safe_psql('postgres',
"CREATE PUBLICATION tap_pub FOR TABLE _timescaledb_internal._hyper_1_1_chunk"
);
$subscriber->safe_psql('postgres',
"CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr' PUBLICATION tap_pub WITH (binary = true)"
);

# Wait for catchup and disable consumption of additional messages
$publisher->wait_for_catchup('tap_sub');
$subscriber->safe_psql('postgres', "ALTER SUBSCRIPTION tap_sub DISABLE");
$publisher->poll_query_until(
'postgres',
"SELECT COUNT(*) FROM pg_catalog.pg_replication_slots WHERE slot_name = 'tap_sub' AND active='f'",
1);

# Enable marker generation through GUC
$publisher->append_conf('postgresql.conf',
'timescaledb.enable_decompression_logrep_markers=true');
$publisher->reload();

# Compress chunks and consume replication stream explicitly
$publisher->safe_psql(
'postgres',
qq(
ALTER TABLE test SET (timescaledb.compress);
SELECT compress_chunk('_timescaledb_internal._hyper_1_1_chunk'::regclass, TRUE);
)
);
$publisher->safe_psql(
'postgres',
qq(
SELECT pg_logical_slot_get_binary_changes('tap_sub', NULL, NULL,
'proto_version', '1',
'publication_names', 'tap_pub',
'messages', 'true'
);
)
);

# Create a new entry which forces a decompression to happen
$publisher->safe_psql('postgres',
"INSERT INTO test VALUES ('2023-01-01 00:10:00', 5555)");

# Retrieve the replication log messages
my $result = $publisher->safe_psql(
'postgres',
qq(
SELECT get_byte(data, 0)
FROM pg_logical_slot_peek_binary_changes('tap_sub', NULL, NULL,
'proto_version', '1',
'publication_names', 'tap_pub',
'messages', 'true'
);
)
);

# Test: BEGIN, MESSAGE (start marker), RELATION, ... INSERT (decompression inserts x6) ..., MESSAGE (end marker), INSERT, COMMIT
is( $result,
qq(66
77
82
73
73
73
73
73
73
77
73
67),
'messages on slot meet expectation <<BEGIN, MESSAGE (start marker), RELATION, ... INSERT (decompression inserts x6) ..., MESSAGE (end marker), INSERT, COMMIT>>'
);

# Get initial message entry
$result = $publisher->safe_psql(
'postgres',
qq(
SELECT get_byte(data, 1), encode(substr(data, 11, 33), 'escape')
FROM pg_logical_slot_peek_binary_changes('tap_sub', NULL, NULL,
'proto_version', '1',
'publication_names', 'tap_pub',
'messages', 'true'
)
OFFSET 1 LIMIT 1;
)
);
is( $result,
qq(1|::timescaledb-decompression-start),
'first entry is decompression marker start message');

# Get second message entry
$result = $publisher->safe_psql(
'postgres',
qq(
SELECT get_byte(data, 1), encode(substr(data, 11, 31), 'escape')
FROM pg_logical_slot_peek_binary_changes('tap_sub', NULL, NULL,
'proto_version', '1',
'publication_names', 'tap_pub',
'messages', 'true'
)
OFFSET 9 LIMIT 1;
)
);
is( $result,
qq(1|::timescaledb-decompression-end),
'10th entry is decompression marker end message');

# Get last insert entry to check it is the user executed insert (and value is 5555 or 35353535 in hex)
$result = $publisher->safe_psql(
'postgres',
qq(
SELECT get_byte(data, 0), encode(substring(data from 41 for 44), 'hex')
FROM pg_logical_slot_peek_binary_changes('tap_sub', NULL, NULL,
'proto_version', '1',
'publication_names', 'tap_pub',
'messages', 'true'
)
OFFSET 10 LIMIT 1;
)
);
is($result, qq(73|35353535), '11th entry is an insert message');

# Disable marker generation through GUC
$publisher->append_conf('postgresql.conf',
'timescaledb.enable_decompression_logrep_markers=false');
$publisher->reload();

# Compress chunks and consume replication stream explicitly
$publisher->safe_psql('postgres',
"CALL recompress_chunk('_timescaledb_internal._hyper_1_1_chunk'::regclass, TRUE)"
);
$publisher->safe_psql(
'postgres',
qq(
SELECT pg_logical_slot_get_binary_changes('tap_sub', NULL, NULL,
'proto_version', '1',
'publication_names', 'tap_pub',
'messages', 'true'
);
)
);

# Create a new entry which forces a decompression to happen
$publisher->safe_psql('postgres',
"INSERT INTO test VALUES ('2023-01-01 00:11:00', 5555)");

# Retrieve the replication log messages
$result = $publisher->safe_psql(
'postgres',
qq(
SELECT get_byte(data, 0)
FROM pg_logical_slot_peek_binary_changes('tap_sub', NULL, NULL,
'proto_version', '1',
'publication_names', 'tap_pub',
'messages', 'true'
);
)
);

# Test: BEGIN, RELATION, ... INSERT (decompression inserts x7) ..., INSERT, COMMIT
is( $result,
qq(66
82
73
73
73
73
73
73
73
73
67),
'messages on slot meet expectation <<BEGIN, RELATION, ... INSERT (decompression inserts x7) ..., INSERT, COMMIT>>'
);

done_testing();
3 changes: 2 additions & 1 deletion tsl/test/t/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
set(PROVE_TEST_FILES 001_simple_multinode.pl 003_connections_privs.pl)
set(PROVE_TEST_FILES 001_simple_multinode.pl 003_connections_privs.pl
009_logrepl_decomp_marker.pl)
set(PROVE_DEBUG_TEST_FILES
002_chunk_copy_move.pl 004_multinode_rdwr_1pc.pl 005_add_data_node.pl
006_job_crash_log.pl 007_healthcheck.pl 008_mvcc_cagg.pl)
Expand Down

0 comments on commit b5b46a3

Please sign in to comment.