From baeb8542c1cc849e58c016f401687b9c4d3e7f20 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Tue, 3 Oct 2023 10:25:15 +0900 Subject: [PATCH] Fail hard on out-of-memory failures in xlogreader.c This commit changes the WAL reader routines so as a FATAL for the backend or exit(FAILURE) for the frontend is triggered if an allocation for a WAL record decode fails in walreader.c, rather than treating this case as bogus data, which would be equivalent to the end of WAL. The key is to avoid palloc_extended(MCXT_ALLOC_NO_OOM) in walreader.c, relying on plain palloc() calls. The previous behavior could make WAL replay finish too early than it should. For example, crash recovery finishing earlier may corrupt clusters because not all the WAL available locally was replayed to ensure a consistent state. Out-of-memory failures would show up randomly depending on the memory pressure on the host, but one simple case would be to generate a large record, then replay this record after downsizing a host, as Ethan Mertz originally reported. This relies on bae868caf222, as the WAL reader routines now do the memory allocation required for a record only once its header has been fully read and validated, making xl_tot_len trustable. Making the WAL reader react differently on out-of-memory or bogus record data would require ABI changes, so this is the safest choice for stable branches. Also, it is worth noting that 3f1ce973467a has been using a plain palloc() in this code for some time now. Thanks to Noah Misch and Thomas Munro for the discussion. Like the other commit, backpatch down to 12, leaving out v11 that will be EOL'd soon. The behavior of considering a failed allocation as bogus data comes originally from 0ffe11abd3a0, where the record length retrieved from its header was not entirely trustable. Reported-by: Ethan Mertz Discussion: https://postgr.es/m/ZRKKdI5-RRlta3aF@paquier.xyz Backpatch-through: 12 --- src/backend/access/transam/xlogreader.c | 31 ++++--------------------- 1 file changed, 5 insertions(+), 26 deletions(-) diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c index 2a5a464bc0b54..f84569cdbab8e 100644 --- a/src/backend/access/transam/xlogreader.c +++ b/src/backend/access/transam/xlogreader.c @@ -35,7 +35,7 @@ static void report_invalid_record(XLogReaderState *state, const char *fmt,...) pg_attribute_printf(2, 3); -static bool allocate_recordbuf(XLogReaderState *state, uint32 reclength); +static void allocate_recordbuf(XLogReaderState *state, uint32 reclength); static int ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen); static void XLogReaderInvalReadState(XLogReaderState *state); @@ -124,14 +124,7 @@ XLogReaderAllocate(int wal_segment_size, const char *waldir, * Allocate an initial readRecordBuf of minimal size, which can later be * enlarged if necessary. */ - if (!allocate_recordbuf(state, 0)) - { - pfree(state->errormsg_buf); - pfree(state->readBuf); - pfree(state); - return NULL; - } - + allocate_recordbuf(state, 0); return state; } @@ -160,7 +153,6 @@ XLogReaderFree(XLogReaderState *state) /* * Allocate readRecordBuf to fit a record of at least the given length. - * Returns true if successful, false if out of memory. * * readRecordBufSize is set to the new buffer size. * @@ -172,7 +164,7 @@ XLogReaderFree(XLogReaderState *state) * Note: This routine should *never* be called for xl_tot_len until the header * of the record has been fully validated. */ -static bool +static void allocate_recordbuf(XLogReaderState *state, uint32 reclength) { uint32 newSize = reclength; @@ -182,15 +174,8 @@ allocate_recordbuf(XLogReaderState *state, uint32 reclength) if (state->readRecordBuf) pfree(state->readRecordBuf); - state->readRecordBuf = - (char *) palloc_extended(newSize, MCXT_ALLOC_NO_OOM); - if (state->readRecordBuf == NULL) - { - state->readRecordBufSize = 0; - return false; - } + state->readRecordBuf = (char *) palloc(newSize); state->readRecordBufSize = newSize; - return true; } /* @@ -524,13 +509,7 @@ XLogReadRecord(XLogReaderState *state, char **errormsg) Assert(gotlen <= lengthof(save_copy)); Assert(gotlen <= state->readRecordBufSize); memcpy(save_copy, state->readRecordBuf, gotlen); - if (!allocate_recordbuf(state, total_len)) - { - /* We treat this as a "bogus data" condition */ - report_invalid_record(state, "record length %u at %X/%X too long", - total_len, LSN_FORMAT_ARGS(RecPtr)); - goto err; - } + allocate_recordbuf(state, total_len); memcpy(state->readRecordBuf, save_copy, gotlen); buffer = state->readRecordBuf + gotlen; }