Skip to content

Commit

Permalink
Prevent invalid recovery when backup_label removed.
Browse files Browse the repository at this point in the history
If backup_label is removed from a restored backup then PostgreSQL will instead use checkpoint information from pg_control to attempt (what is thinks is) crash recovery. This will nearly always result in a corrupt cluster because the checkpoint will not be from the beginning of the backup, and even if it is, the end point will not be specified, which could lead to recovery stopping too early.

To prevent this, invalidate the checkpoint LSN in pg_control on restore. If backup_label is removed then recovery will still fail because PostgreSQL will not be able to find the invalid checkpoint. The LSN of the checkpoint is not logged but it will be visible in pg_controldata output as 0/DEAD. This value is invalid because PostgreSQL always skips the first WAL segment when initializing a cluster.
  • Loading branch information
dwsteele committed Mar 10, 2024
1 parent 960b435 commit e634fd8
Show file tree
Hide file tree
Showing 9 changed files with 228 additions and 53 deletions.
11 changes: 11 additions & 0 deletions doc/xml/release/2024/2.51.xml
Expand Up @@ -74,6 +74,17 @@
<p>Detect files that have not changed during non-delta incremental backup.</p>
</release-item>

<release-item>
<github-pull-request id="2232"/>

<release-item-contributor-list>
<release-item-contributor id="david.steele"/>
<release-item-reviewer id="stephen.frost"/>
</release-item-contributor-list>

<p>Prevent invalid recovery when <file>backup_label</file> removed.</p>

This comment has been minimized.

Copy link
@mbanck

mbanck Mar 11, 2024

I think there is a word missing here, maybe "was removed" or "got removed"?

This comment has been minimized.

Copy link
@dwsteele

dwsteele Mar 12, 2024

Author Member

"is removed" would probably be best. This is a bit of summary abbreviation -- not intended to be 100% grammatically correct.

</release-item>

<release-item>
<github-pull-request id="2277"/>

Expand Down
10 changes: 9 additions & 1 deletion src/command/archive/get/get.c
Expand Up @@ -375,9 +375,17 @@ archiveGetCheck(const StringList *archiveRequestList)
// List of warnings
StringList *warnList = strLstNew();

// Get pg control info
// Get pg_control info. Error if the invalid checkpoint written by restore is detected and backup_label is not present.
PgControl controlInfo = pgControlFromFile(storagePg(), cfgOptionStrNull(cfgOptPgVersionForce));

if (controlInfo.checkpoint == PG_CONTROL_CHECKPOINT_INVALID && !storageExistsP(storagePg(), STRDEF(PG_FILE_BACKUPLABEL)))
{
THROW(
FormatError,
"pg_control from backup is not valid without backup_label\n"
"HINT: was the backup_label file removed?");
}

// Build list of repos/archiveIds where WAL may be found
List *cacheRepoList = lstNewP(sizeof(ArchiveGetFindCacheRepo));

Expand Down
16 changes: 16 additions & 0 deletions src/command/restore/restore.c
Expand Up @@ -2612,6 +2612,22 @@ cmdRestore(void)
// from being started.
if (storageExistsP(storagePg(), STRDEF(PG_PATH_GLOBAL "/" PG_FILE_PGCONTROL "." STORAGE_FILE_TEMP_EXT)))
{
// Invalidate the checkpoint in pg_control so the cluster cannot be started without backup_label
if (manifestData(jobData.manifest)->backupOptionOnline)
{
Buffer *const pgControlBuffer = storageGetP(
storageNewReadP(storagePg(), STRDEF(PG_PATH_GLOBAL "/" PG_FILE_PGCONTROL "." STORAGE_FILE_TEMP_EXT)));
const ManifestFile pgControlFile = manifestFileFind(
jobData.manifest, STRDEF(MANIFEST_TARGET_PGDATA "/" PG_PATH_GLOBAL "/" PG_FILE_PGCONTROL));

pgControlCheckpointInvalidate(pgControlBuffer, cfgOptionStrNull(cfgOptPgVersionForce));
storagePutP(
storageNewWriteP(
storagePgWrite(), STRDEF(PG_PATH_GLOBAL "/" PG_FILE_PGCONTROL "." STORAGE_FILE_TEMP_EXT),
.modeFile = pgControlFile.mode, .timeModified = pgControlFile.timestamp),
pgControlBuffer);
}

LOG_INFO(
"restore " PG_PATH_GLOBAL "/" PG_FILE_PGCONTROL " (performed last to ensure aborted restores cannot be started)");

Expand Down
148 changes: 108 additions & 40 deletions src/postgres/interface.c
Expand Up @@ -71,6 +71,9 @@ typedef struct PgInterface
// Get control crc offset
size_t (*controlCrcOffset)(void);

// Invalidate control checkpoint
void (*controlCheckpointInvalidate)(unsigned char *);

// Get the control version for this version of PostgreSQL
uint32_t (*controlVersion)(void);

Expand Down Expand Up @@ -191,6 +194,88 @@ pgWalSegmentSizeCheck(unsigned int pgVersion, unsigned int walSegmentSize)
FUNCTION_TEST_RETURN_VOID();
}

/***********************************************************************************************************************************
Validate the CRC in pg_control. If the version has been forced this may required scanning pg_control for the offset. Return the
offset found so it can be used to update the CRC.
***********************************************************************************************************************************/
static size_t
pgControlCrcValidate(const Buffer *const controlFile, const PgInterface *const interface, const bool pgVersionForce)
{
FUNCTION_LOG_BEGIN(logLevelTrace);
FUNCTION_LOG_PARAM(BUFFER, controlFile);
FUNCTION_LOG_PARAM_P(VOID, interface);
FUNCTION_LOG_PARAM(BOOL, pgVersionForce);
FUNCTION_LOG_END();

ASSERT(controlFile != NULL);
ASSERT(interface != NULL);

size_t result = interface->controlCrcOffset();

do
{
// Calculate CRC and retrieve expected CRC
const uint32_t crcCalculated =
interface->version > PG_VERSION_94 ?
crc32cOne(bufPtrConst(controlFile), result) : crc32One(bufPtrConst(controlFile), result);
const uint32_t crcExpected = *((uint32_t *)(bufPtrConst(controlFile) + result));

// If CRC does not match
if (crcCalculated != crcExpected)
{
// If version is forced then the CRC might be later in the file (assuming the fork added extra fields to pg_control).
// Increment the offset by CRC data size and continue to try again.
if (pgVersionForce)
{
result += sizeof(uint32_t);

if (result <= bufUsed(controlFile) - sizeof(uint32_t))
continue;
}

// If no retry then error
THROW_FMT(
ChecksumError,
"calculated " PG_FILE_PGCONTROL " checksum does not match expected value\n"
"HINT: calculated 0x%x but expected value is 0x%x\n"
"%s"
"HINT: is " PG_FILE_PGCONTROL " corrupt?\n"
"HINT: does " PG_FILE_PGCONTROL " have a different layout than expected?",
crcCalculated, crcExpected,
pgVersionForce ? "HINT: checksum values may be misleading due to forced version scan\n" : "");
}

// Do not retry if the CRC is valid
break;
}
while (true);

FUNCTION_LOG_RETURN(SIZE, result);
}

/***********************************************************************************************************************************
Update the CRC in pg_control
***********************************************************************************************************************************/
static void
pgControlCrcUpdate(Buffer *const controlFile, const unsigned int pgVersion, const size_t crcOffset)
{
FUNCTION_LOG_BEGIN(logLevelTrace);
FUNCTION_LOG_PARAM(BUFFER, controlFile);
FUNCTION_LOG_PARAM(UINT, pgVersion);
FUNCTION_LOG_PARAM(SIZE, crcOffset);
FUNCTION_LOG_END();

ASSERT(controlFile != NULL);
ASSERT(pgVersion != 0);
ASSERT(crcOffset != 0);

*((uint32_t *)(bufPtr(controlFile) + crcOffset)) =
pgVersion > PG_VERSION_94 ?
crc32cOne(bufPtrConst(controlFile), crcOffset) : crc32One(bufPtrConst(controlFile), crcOffset);

FUNCTION_LOG_RETURN_VOID();
}

/**********************************************************************************************************************************/
static PgControl
pgControlFromBuffer(const Buffer *controlFile, const String *const pgVersionForce)
Expand Down Expand Up @@ -237,46 +322,8 @@ pgControlFromBuffer(const Buffer *controlFile, const String *const pgVersionForc
PgControl result = interface->control(bufPtrConst(controlFile));
result.version = interface->version;

// Check CRC
size_t crcOffset = interface->controlCrcOffset();

do
{
// Calculate CRC and retrieve expected CRC
const uint32_t crcCalculated =
result.version > PG_VERSION_94 ?
crc32cOne(bufPtrConst(controlFile), crcOffset) : crc32One(bufPtrConst(controlFile), crcOffset);
const uint32_t crcExpected = *((uint32_t *)(bufPtrConst(controlFile) + crcOffset));

// If CRC does not match
if (crcCalculated != crcExpected)
{
// If version is forced then the CRC might be later in the file (assuming the fork added extra fields to pg_control).
// Increment the offset by CRC data size and continue to try again.
if (pgVersionForce != NULL)
{
crcOffset += sizeof(uint32_t);

if (crcOffset <= bufUsed(controlFile) - sizeof(uint32_t))
continue;
}

// If no retry then error
THROW_FMT(
ChecksumError,
"calculated " PG_FILE_PGCONTROL " checksum does not match expected value\n"
"HINT: calculated 0x%x but expected value is 0x%x\n"
"%s"
"HINT: is " PG_FILE_PGCONTROL " corrupt?\n"
"HINT: does " PG_FILE_PGCONTROL " have a different layout than expected?",
crcCalculated, crcExpected,
pgVersionForce == NULL ? "" : "HINT: checksum values may be misleading due to forced version scan\n");
}

// Do not retry if the CRC is valid
break;
}
while (true);
// Validate the CRC
pgControlCrcValidate(controlFile, interface, pgVersionForce != NULL);

// Check the segment size
pgWalSegmentSizeCheck(result.version, result.walSegmentSize);
Expand Down Expand Up @@ -389,6 +436,27 @@ pgControlFromFile(const Storage *const storage, const String *const pgVersionFor
FUNCTION_LOG_RETURN(PG_CONTROL, result);
}

/**********************************************************************************************************************************/
FN_EXTERN void
pgControlCheckpointInvalidate(Buffer *const buffer, const String *const pgVersionForce)
{
FUNCTION_LOG_BEGIN(logLevelDebug);
FUNCTION_LOG_PARAM(BUFFER, buffer);
FUNCTION_LOG_PARAM(STRING, pgVersionForce);
FUNCTION_LOG_END();

ASSERT(buffer != NULL);

const PgControl pgControl = pgControlFromBuffer(buffer, pgVersionForce);
const PgInterface *const interface = pgInterfaceVersion(pgControl.version);
const size_t crcOffset = pgControlCrcValidate(buffer, interface, pgVersionForce);

pgInterfaceVersion(pgControl.version)->controlCheckpointInvalidate(bufPtr(buffer));
pgControlCrcUpdate(buffer, interface->version, crcOffset);

FUNCTION_LOG_RETURN_VOID();
}

/**********************************************************************************************************************************/
FN_EXTERN uint32_t
pgControlVersion(unsigned int pgVersion)
Expand Down
8 changes: 8 additions & 0 deletions src/postgres/interface.h
Expand Up @@ -94,6 +94,11 @@ WAL segment size is supported for versions below 11.
***********************************************************************************************************************************/
#define PG_WAL_SEGMENT_SIZE_DEFAULT ((unsigned int)(16 * 1024 * 1024))

/***********************************************************************************************************************************
Checkpoint written into pg_control on restore. This will prevent PostgreSQL from starting if backup_label is not present.
***********************************************************************************************************************************/
#define PG_CONTROL_CHECKPOINT_INVALID 0xDEAD

/***********************************************************************************************************************************
PostgreSQL Control File Info
***********************************************************************************************************************************/
Expand Down Expand Up @@ -138,6 +143,9 @@ FN_EXTERN bool pgDbIsSystemId(unsigned int id);
FN_EXTERN Buffer *pgControlBufferFromFile(const Storage *storage, const String *pgVersionForce);
FN_EXTERN PgControl pgControlFromFile(const Storage *storage, const String *pgVersionForce);

// Invalidate checkpoint record in pg_control
FN_EXTERN void pgControlCheckpointInvalidate(Buffer *buffer, const String *pgVersionForce);

// Get the control version for a PostgreSQL version
FN_EXTERN uint32_t pgControlVersion(unsigned int pgVersion);

Expand Down
16 changes: 16 additions & 0 deletions src/postgres/interface/version.intern.h
Expand Up @@ -92,6 +92,22 @@ Get control crc offset

#endif

/***********************************************************************************************************************************
Invalidate control checkpoint. PostgreSQL skips the first segment so any LSN in that segment is invalid.
***********************************************************************************************************************************/
#if PG_VERSION > PG_VERSION_MAX

#elif PG_VERSION >= PG_VERSION_93

#define PG_INTERFACE_CONTROL_CHECKPOINT_INVALIDATE(version) \
static void \
pgInterfaceControlCheckpointInvalidate##version(unsigned char *const controlFile) \
{ \
((ControlFileData *)controlFile)->checkPoint = PG_CONTROL_CHECKPOINT_INVALID; \
}

#endif

/***********************************************************************************************************************************
Get the control version
***********************************************************************************************************************************/
Expand Down
21 changes: 19 additions & 2 deletions test/src/module/command/archiveGetTest.c
Expand Up @@ -135,10 +135,28 @@ testRun(void)
.remove = true);
TEST_STORAGE_LIST_EMPTY(storageSpool(), STORAGE_SPOOL_ARCHIVE_IN);

// -------------------------------------------------------------------------------------------------------------------------
TEST_TITLE("pg_control from backup is not valid without backup_label");

HRN_PG_CONTROL_PUT(storagePgWrite(), PG_VERSION_10, .checkpoint = PG_CONTROL_CHECKPOINT_INVALID);

strLstAddZ(argList, "000000010000000100000001");
HRN_CFG_LOAD(cfgCmdArchiveGet, argList, .role = cfgCmdRoleAsync);

TEST_ERROR(
cmdArchiveGetAsync(), FormatError,
"pg_control from backup is not valid without backup_label\n"
"HINT: was the backup_label file removed?");

TEST_RESULT_LOG(
"P00 INFO: get 1 WAL file(s) from archive: 000000010000000100000001");
TEST_STORAGE_LIST(storageSpoolWrite(), STORAGE_SPOOL_ARCHIVE_IN, "global.error\n", .remove = true);

// -------------------------------------------------------------------------------------------------------------------------
TEST_TITLE("no segments to find");

HRN_PG_CONTROL_PUT(storagePgWrite(), PG_VERSION_10);
// Success not that backup_label exists
HRN_STORAGE_PUT_EMPTY(storagePgWrite(), PG_FILE_BACKUPLABEL);

HRN_INFO_PUT(
storageRepoWrite(), INFO_ARCHIVE_PATH_FILE,
Expand All @@ -148,7 +166,6 @@ testRun(void)
"[db:history]\n"
"1={\"db-id\":" HRN_PG_SYSTEMID_10_Z ",\"db-version\":\"10\"}\n");

strLstAddZ(argList, "000000010000000100000001");
HRN_CFG_LOAD(cfgCmdArchiveGet, argList, .role = cfgCmdRoleAsync);

TEST_RESULT_VOID(cmdArchiveGetAsync(), "get async");
Expand Down

0 comments on commit e634fd8

Please sign in to comment.