Skip to content

Commit 07f63e4

Browse files
committed
added some improvements based on review
1 parent 0e2336f commit 07f63e4

File tree

1 file changed

+42
-50
lines changed

1 file changed

+42
-50
lines changed

src/backup.c

Lines changed: 42 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1399,31 +1399,32 @@ pg_ptrack_get_and_clear(Oid tablespace_oid, Oid db_oid, Oid rel_filenode,
13991399
}
14001400

14011401
/*
1402-
* Wait for target 'lsn' or WAL segment, containing 'lsn'.
1402+
* Wait for target LSN or WAL segment, containing target LSN.
14031403
*
1404-
* If current backup started in archive mode wait for 'lsn' to be archived in
1405-
* archive 'wal' directory with WAL segment file.
1406-
* If current backup started in stream mode wait for 'lsn' to be streamed in
1407-
* 'pg_wal' directory.
1404+
* Depending on value of flag in_stream_dir wait for target LSN to archived or
1405+
* streamed in 'archive_dir' or 'pg_wal' directory.
14081406
*
1409-
* If 'is_start_lsn' is true then issue warning for first-time users.
1407+
* If flag 'is_start_lsn' is set then issue warning for first-time users.
1408+
* If flag 'in_prev_segment' is set, look for LSN in previous segment,
1409+
* with EndRecPtr >= Target LSN. It should be used only for solving
1410+
* invalid XRecOff problem.
1411+
* If flag 'segment_only' is set, then, instead of waiting for LSN, wait for segment,
1412+
* containing that LSN.
1413+
* If flags 'in_prev_segment' and 'segment_only' are both set, then wait for
1414+
* previous segment.
14101415
*
1411-
* If 'in_prev_segment' is set, look for LSN in previous segment.
1412-
* If 'segment_only' is set, then instead of looking for LSN, look for segment itself.
1413-
* If 'in_prev_segment' and 'segment_only' are both set, then wait for previous segment.
1416+
* Flag 'in_stream_dir' determine whether we looking for WAL in 'pg_wal' directory or
1417+
* in archive. Do note, that we cannot rely sorely on global variable 'stream_wal' because,
1418+
* for example, PAGE backup must(!) look for start_lsn in archive regardless of wal_mode.
14141419
*
1415-
* Flag 'in_stream_dir' determine whether we looking for wal in 'pg_wal' directory or
1416-
* in archive. Do note, that we cannot rely sorely on 'stream_wal' because, for example,
1417-
* PAGE backup must(!) look for start_lsn in archive regardless of wal_mode.
1418-
14191420
* 'timeout_elevel' determine the elevel for timeout elog message. If elevel lighter than
14201421
* ERROR is used, then return InvalidXLogRecPtr. TODO: return something more concrete, for example 1.
14211422
*
1422-
* Returns LSN of last valid record if wait_prev_segment is not true, otherwise
1423-
* returns InvalidXLogRecPtr.
1423+
* Returns target LSN if such is found, failing that returns LSN of record prior to target LSN.
1424+
* Returns InvalidXLogRecPtr if 'segment_only' flag is used.
14241425
*/
14251426
static XLogRecPtr
1426-
wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, TimeLineID tli,
1427+
wait_wal_lsn(XLogRecPtr target_lsn, bool is_start_lsn, TimeLineID tli,
14271428
bool in_prev_segment, bool segment_only,
14281429
int timeout_elevel, bool in_stream_dir)
14291430
{
@@ -1442,16 +1443,16 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, TimeLineID tli,
14421443
#endif
14431444

14441445
/* Compute the name of the WAL file containing requested LSN */
1445-
GetXLogSegNo(lsn, targetSegNo, instance_config.xlog_seg_size);
1446+
GetXLogSegNo(target_lsn, targetSegNo, instance_config.xlog_seg_size);
14461447
if (in_prev_segment)
14471448
targetSegNo--;
14481449
GetXLogFileName(wal_segment, tli, targetSegNo,
14491450
instance_config.xlog_seg_size);
14501451

14511452
/*
1452-
* In pg_start_backup we wait for 'lsn' in 'pg_wal' directory if it is
1453+
* In pg_start_backup we wait for 'target_lsn' in 'pg_wal' directory if it is
14531454
* stream and non-page backup. Page backup needs archived WAL files, so we
1454-
* wait for 'lsn' in archive 'wal' directory for page backups.
1455+
* wait for 'target_lsn' in archive 'wal' directory for page backups.
14551456
*
14561457
* In pg_stop_backup it depends only on stream_wal.
14571458
*/
@@ -1468,6 +1469,7 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, TimeLineID tli,
14681469
wal_segment_dir = arclog_path;
14691470
}
14701471

1472+
/* TODO: remove this in 3.0 (it is a cludge against some old bug with archive_timeout) */
14711473
if (instance_config.archive_timeout > 0)
14721474
timeout = instance_config.archive_timeout;
14731475
else
@@ -1477,7 +1479,7 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, TimeLineID tli,
14771479
elog(LOG, "Looking for segment: %s", wal_segment);
14781480
else
14791481
elog(LOG, "Looking for LSN %X/%X in segment: %s",
1480-
(uint32) (lsn >> 32), (uint32) lsn, wal_segment);
1482+
(uint32) (target_lsn >> 32), (uint32) target_lsn, wal_segment);
14811483

14821484
#ifdef HAVE_LIBZ
14831485
snprintf(gz_wal_segment_path, sizeof(gz_wal_segment_path), "%s.gz",
@@ -1506,26 +1508,24 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, TimeLineID tli,
15061508

15071509
if (file_exists)
15081510
{
1509-
//TODO is this comment also relates to current segment_only case
1510-
// and should be updated?
1511-
/* Do not check LSN for previous WAL segment */
1511+
/* Do not check for target LSN */
15121512
if (segment_only)
15131513
return InvalidXLogRecPtr;
15141514

15151515
/*
1516-
* A WAL segment found. Check LSN on it.
1516+
* A WAL segment found. Look for target LSN in it.
15171517
*/
1518-
if (!XRecOffIsNull(lsn) &&
1519-
wal_contains_lsn(wal_segment_dir, lsn, tli,
1518+
if (!XRecOffIsNull(target_lsn) &&
1519+
wal_contains_lsn(wal_segment_dir, target_lsn, tli,
15201520
instance_config.xlog_seg_size))
15211521
/* Target LSN was found */
15221522
{
1523-
elog(LOG, "Found LSN: %X/%X", (uint32) (lsn >> 32), (uint32) lsn);
1524-
return lsn;
1523+
elog(LOG, "Found LSN: %X/%X", (uint32) (target_lsn >> 32), (uint32) target_lsn);
1524+
return target_lsn;
15251525
}
15261526

15271527
/*
1528-
* If we failed to get LSN of valid record in a reasonable time, try
1528+
* If we failed to get target LSN in a reasonable time, try
15291529
* to get LSN of last valid record prior to the target LSN. But only
15301530
* in case of a backup from a replica.
15311531
@@ -1539,7 +1539,7 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, TimeLineID tli,
15391539
{
15401540
XLogRecPtr res;
15411541

1542-
res = get_last_wal_lsn(wal_segment_dir, current.start_lsn, lsn, tli,
1542+
res = get_last_wal_lsn(wal_segment_dir, current.start_lsn, target_lsn, tli,
15431543
in_prev_segment, instance_config.xlog_seg_size);
15441544

15451545
if (!XLogRecPtrIsInvalid(res))
@@ -1565,7 +1565,7 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, TimeLineID tli,
15651565
wal_segment_path, wal_delivery_str);
15661566
else
15671567
elog(INFO, "Wait for LSN %X/%X in %s WAL segment %s",
1568-
(uint32) (lsn >> 32), (uint32) lsn,
1568+
(uint32) (target_lsn >> 32), (uint32) target_lsn,
15691569
wal_delivery_str, wal_segment_path);
15701570
}
15711571

@@ -1580,7 +1580,7 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, TimeLineID tli,
15801580
elog(timeout_elevel, "WAL segment %s was %s, "
15811581
"but target LSN %X/%X could not be archived in %d seconds",
15821582
wal_segment, wal_delivery_str,
1583-
(uint32) (lsn >> 32), (uint32) lsn, timeout);
1583+
(uint32) (target_lsn >> 32), (uint32) target_lsn, timeout);
15841584
/* If WAL segment doesn't exist or we wait for previous segment */
15851585
else
15861586
elog(timeout_elevel,
@@ -1808,11 +1808,10 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
18081808
XLogRecPtr lsn_tmp = InvalidXLogRecPtr;
18091809

18101810
/*
1811-
* TODO Let's rephrase that to be less scary for a user.
18121811
* Even though the value is invalid, it's expected postgres behaviour
18131812
* and we're trying to fix it below.
18141813
*/
1815-
elog(WARNING, "Invalid stop_backup_lsn value %X/%X",
1814+
elog(LOG, "Null offset in stop_backup_lsn value %X/%X, trying to fix",
18161815
(uint32) (stop_backup_lsn_tmp >> 32), (uint32) (stop_backup_lsn_tmp));
18171816

18181817
/*
@@ -1844,15 +1843,12 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
18441843
* and to use it in WAL validation.
18451844
*
18461845
* So we try to do the following:
1847-
* 1. Wait for current segment and look in it for the LSN >= STOP_LSN.
1848-
* //TODO what is 'current' segment?
1849-
* //TODO how long do we wait? is there a timeout?
1850-
* It solves the problem of occasional invalid XRecOff
1851-
* on write-busy system.
1846+
* 1. Wait 'archive_timeout' seconds for segment containing stop_lsn and
1847+
* look for the first valid record in it.
1848+
* It solves the problem of occasional invalid XRecOff on write-busy system.
18521849
* 2. Failing that, look for record in previous segment with endpoint
1853-
* equal or greater than 0 offset LSN. It may(!) solve the problem of 0 offset
1854-
* on write-idle system.
1855-
* //TODO what if if didn't?
1850+
* equal or greater than stop_lsn. It may(!) solve the problem of 0 offset
1851+
* on write-idle system. If that fails too, error out.
18561852
* //TODO what kind of record that refers to?
18571853
*/
18581854

@@ -1864,8 +1860,7 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
18641860
lsn_tmp = get_first_wal_lsn(xlog_path, segno, backup->tli,
18651861
instance_config.xlog_seg_size);
18661862

1867-
/* Check if returned LSN is satisfying our requirements */
1868-
//TODO what requirements?
1863+
/* Check that returned LSN is valid and greater than stop_lsn */
18691864
if (XLogRecPtrIsInvalid(lsn_tmp) ||
18701865
!XRecOffIsValid(lsn_tmp) ||
18711866
lsn_tmp < stop_backup_lsn_tmp)
@@ -1893,9 +1888,7 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
18931888
stop_backup_lsn = lsn_tmp;
18941889
stop_lsn_exists = true;
18951890
}
1896-
//TODO is it good to throw an error here? Shouldn't we rather
1897-
// save it as is and mark backup as DONE instead of OK
1898-
// and check it in validation and restore?
1891+
/* Replica returned something very illegal, error out */
18991892
else
19001893
elog(ERROR, "Invalid stop_backup_lsn value %X/%X",
19011894
(uint32) (stop_backup_lsn_tmp >> 32), (uint32) (stop_backup_lsn_tmp));
@@ -2000,9 +1993,8 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
20001993

20011994
/*
20021995
* Wait for stop_lsn to be archived or streamed.
2003-
* //TODO is the sentence below outdated?
2004-
* If replica returned non-existent LSN, look for previous record,
2005-
* which endpoint >= stop_lsn
1996+
* If replica returned valid STOP_LSN of not actually existing record,
1997+
* look for previous record with endpoint >= STOP_LSN.
20061998
*/
20071999
if (!stop_lsn_exists)
20082000
stop_backup_lsn = wait_wal_lsn(stop_backup_lsn_tmp, false, backup->tli,

0 commit comments

Comments
 (0)