@@ -1399,31 +1399,32 @@ pg_ptrack_get_and_clear(Oid tablespace_oid, Oid db_oid, Oid rel_filenode,
13991399}
14001400
14011401/*
1402- * Wait for target 'lsn' or WAL segment, containing 'lsn' .
1402+ * Wait for target LSN or WAL segment, containing target LSN .
14031403 *
1404- * If current backup started in archive mode wait for 'lsn' to be archived in
1405- * archive 'wal' directory with WAL segment file.
1406- * If current backup started in stream mode wait for 'lsn' to be streamed in
1407- * 'pg_wal' directory.
1404+ * Depending on value of flag in_stream_dir wait for target LSN to archived or
1405+ * streamed in 'archive_dir' or 'pg_wal' directory.
14081406 *
1409- * If 'is_start_lsn' is true then issue warning for first-time users.
1407+ * If flag 'is_start_lsn' is set then issue warning for first-time users.
1408+ * If flag 'in_prev_segment' is set, look for LSN in previous segment,
1409+ * with EndRecPtr >= Target LSN. It should be used only for solving
1410+ * invalid XRecOff problem.
1411+ * If flag 'segment_only' is set, then, instead of waiting for LSN, wait for segment,
1412+ * containing that LSN.
1413+ * If flags 'in_prev_segment' and 'segment_only' are both set, then wait for
1414+ * previous segment.
14101415 *
1411- * If 'in_prev_segment' is set, look for LSN in previous segment.
1412- * If 'segment_only' is set, then instead of looking for LSN, look for segment itself.
1413- * If 'in_prev_segment' and 'segment_only' are both set, then wait for previous segment .
1416+ * Flag 'in_stream_dir' determine whether we looking for WAL in 'pg_wal' directory or
1417+ * in archive. Do note, that we cannot rely sorely on global variable 'stream_wal' because,
1418+ * for example, PAGE backup must(!) look for start_lsn in archive regardless of wal_mode .
14141419 *
1415- * Flag 'in_stream_dir' determine whether we looking for wal in 'pg_wal' directory or
1416- * in archive. Do note, that we cannot rely sorely on 'stream_wal' because, for example,
1417- * PAGE backup must(!) look for start_lsn in archive regardless of wal_mode.
1418-
14191420 * 'timeout_elevel' determine the elevel for timeout elog message. If elevel lighter than
14201421 * ERROR is used, then return InvalidXLogRecPtr. TODO: return something more concrete, for example 1.
14211422 *
1422- * Returns LSN of last valid record if wait_prev_segment is not true, otherwise
1423- * returns InvalidXLogRecPtr.
1423+ * Returns target LSN if such is found, failing that returns LSN of record prior to target LSN.
1424+ * Returns InvalidXLogRecPtr if 'segment_only' flag is used .
14241425 */
14251426static XLogRecPtr
1426- wait_wal_lsn (XLogRecPtr lsn , bool is_start_lsn , TimeLineID tli ,
1427+ wait_wal_lsn (XLogRecPtr target_lsn , bool is_start_lsn , TimeLineID tli ,
14271428 bool in_prev_segment , bool segment_only ,
14281429 int timeout_elevel , bool in_stream_dir )
14291430{
@@ -1442,16 +1443,16 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, TimeLineID tli,
14421443#endif
14431444
14441445 /* Compute the name of the WAL file containing requested LSN */
1445- GetXLogSegNo (lsn , targetSegNo , instance_config .xlog_seg_size );
1446+ GetXLogSegNo (target_lsn , targetSegNo , instance_config .xlog_seg_size );
14461447 if (in_prev_segment )
14471448 targetSegNo -- ;
14481449 GetXLogFileName (wal_segment , tli , targetSegNo ,
14491450 instance_config .xlog_seg_size );
14501451
14511452 /*
1452- * In pg_start_backup we wait for 'lsn ' in 'pg_wal' directory if it is
1453+ * In pg_start_backup we wait for 'target_lsn ' in 'pg_wal' directory if it is
14531454 * stream and non-page backup. Page backup needs archived WAL files, so we
1454- * wait for 'lsn ' in archive 'wal' directory for page backups.
1455+ * wait for 'target_lsn ' in archive 'wal' directory for page backups.
14551456 *
14561457 * In pg_stop_backup it depends only on stream_wal.
14571458 */
@@ -1468,6 +1469,7 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, TimeLineID tli,
14681469 wal_segment_dir = arclog_path ;
14691470 }
14701471
1472+ /* TODO: remove this in 3.0 (it is a cludge against some old bug with archive_timeout) */
14711473 if (instance_config .archive_timeout > 0 )
14721474 timeout = instance_config .archive_timeout ;
14731475 else
@@ -1477,7 +1479,7 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, TimeLineID tli,
14771479 elog (LOG , "Looking for segment: %s" , wal_segment );
14781480 else
14791481 elog (LOG , "Looking for LSN %X/%X in segment: %s" ,
1480- (uint32 ) (lsn >> 32 ), (uint32 ) lsn , wal_segment );
1482+ (uint32 ) (target_lsn >> 32 ), (uint32 ) target_lsn , wal_segment );
14811483
14821484#ifdef HAVE_LIBZ
14831485 snprintf (gz_wal_segment_path , sizeof (gz_wal_segment_path ), "%s.gz" ,
@@ -1506,26 +1508,24 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, TimeLineID tli,
15061508
15071509 if (file_exists )
15081510 {
1509- //TODO is this comment also relates to current segment_only case
1510- // and should be updated?
1511- /* Do not check LSN for previous WAL segment */
1511+ /* Do not check for target LSN */
15121512 if (segment_only )
15131513 return InvalidXLogRecPtr ;
15141514
15151515 /*
1516- * A WAL segment found. Check LSN on it.
1516+ * A WAL segment found. Look for target LSN in it.
15171517 */
1518- if (!XRecOffIsNull (lsn ) &&
1519- wal_contains_lsn (wal_segment_dir , lsn , tli ,
1518+ if (!XRecOffIsNull (target_lsn ) &&
1519+ wal_contains_lsn (wal_segment_dir , target_lsn , tli ,
15201520 instance_config .xlog_seg_size ))
15211521 /* Target LSN was found */
15221522 {
1523- elog (LOG , "Found LSN: %X/%X" , (uint32 ) (lsn >> 32 ), (uint32 ) lsn );
1524- return lsn ;
1523+ elog (LOG , "Found LSN: %X/%X" , (uint32 ) (target_lsn >> 32 ), (uint32 ) target_lsn );
1524+ return target_lsn ;
15251525 }
15261526
15271527 /*
1528- * If we failed to get LSN of valid record in a reasonable time, try
1528+ * If we failed to get target LSN in a reasonable time, try
15291529 * to get LSN of last valid record prior to the target LSN. But only
15301530 * in case of a backup from a replica.
15311531
@@ -1539,7 +1539,7 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, TimeLineID tli,
15391539 {
15401540 XLogRecPtr res ;
15411541
1542- res = get_last_wal_lsn (wal_segment_dir , current .start_lsn , lsn , tli ,
1542+ res = get_last_wal_lsn (wal_segment_dir , current .start_lsn , target_lsn , tli ,
15431543 in_prev_segment , instance_config .xlog_seg_size );
15441544
15451545 if (!XLogRecPtrIsInvalid (res ))
@@ -1565,7 +1565,7 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, TimeLineID tli,
15651565 wal_segment_path , wal_delivery_str );
15661566 else
15671567 elog (INFO , "Wait for LSN %X/%X in %s WAL segment %s" ,
1568- (uint32 ) (lsn >> 32 ), (uint32 ) lsn ,
1568+ (uint32 ) (target_lsn >> 32 ), (uint32 ) target_lsn ,
15691569 wal_delivery_str , wal_segment_path );
15701570 }
15711571
@@ -1580,7 +1580,7 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, TimeLineID tli,
15801580 elog (timeout_elevel , "WAL segment %s was %s, "
15811581 "but target LSN %X/%X could not be archived in %d seconds" ,
15821582 wal_segment , wal_delivery_str ,
1583- (uint32 ) (lsn >> 32 ), (uint32 ) lsn , timeout );
1583+ (uint32 ) (target_lsn >> 32 ), (uint32 ) target_lsn , timeout );
15841584 /* If WAL segment doesn't exist or we wait for previous segment */
15851585 else
15861586 elog (timeout_elevel ,
@@ -1808,11 +1808,10 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
18081808 XLogRecPtr lsn_tmp = InvalidXLogRecPtr ;
18091809
18101810 /*
1811- * TODO Let's rephrase that to be less scary for a user.
18121811 * Even though the value is invalid, it's expected postgres behaviour
18131812 * and we're trying to fix it below.
18141813 */
1815- elog (WARNING , "Invalid stop_backup_lsn value %X/%X" ,
1814+ elog (LOG , "Null offset in stop_backup_lsn value %X/%X, trying to fix " ,
18161815 (uint32 ) (stop_backup_lsn_tmp >> 32 ), (uint32 ) (stop_backup_lsn_tmp ));
18171816
18181817 /*
@@ -1844,15 +1843,12 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
18441843 * and to use it in WAL validation.
18451844 *
18461845 * So we try to do the following:
1847- * 1. Wait for current segment and look in it for the LSN >= STOP_LSN.
1848- * //TODO what is 'current' segment?
1849- * //TODO how long do we wait? is there a timeout?
1850- * It solves the problem of occasional invalid XRecOff
1851- * on write-busy system.
1846+ * 1. Wait 'archive_timeout' seconds for segment containing stop_lsn and
1847+ * look for the first valid record in it.
1848+ * It solves the problem of occasional invalid XRecOff on write-busy system.
18521849 * 2. Failing that, look for record in previous segment with endpoint
1853- * equal or greater than 0 offset LSN. It may(!) solve the problem of 0 offset
1854- * on write-idle system.
1855- * //TODO what if if didn't?
1850+ * equal or greater than stop_lsn. It may(!) solve the problem of 0 offset
1851+ * on write-idle system. If that fails too, error out.
18561852 * //TODO what kind of record that refers to?
18571853 */
18581854
@@ -1864,8 +1860,7 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
18641860 lsn_tmp = get_first_wal_lsn (xlog_path , segno , backup -> tli ,
18651861 instance_config .xlog_seg_size );
18661862
1867- /* Check if returned LSN is satisfying our requirements */
1868- //TODO what requirements?
1863+ /* Check that returned LSN is valid and greater than stop_lsn */
18691864 if (XLogRecPtrIsInvalid (lsn_tmp ) ||
18701865 !XRecOffIsValid (lsn_tmp ) ||
18711866 lsn_tmp < stop_backup_lsn_tmp )
@@ -1893,9 +1888,7 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
18931888 stop_backup_lsn = lsn_tmp ;
18941889 stop_lsn_exists = true;
18951890 }
1896- //TODO is it good to throw an error here? Shouldn't we rather
1897- // save it as is and mark backup as DONE instead of OK
1898- // and check it in validation and restore?
1891+ /* Replica returned something very illegal, error out */
18991892 else
19001893 elog (ERROR , "Invalid stop_backup_lsn value %X/%X" ,
19011894 (uint32 ) (stop_backup_lsn_tmp >> 32 ), (uint32 ) (stop_backup_lsn_tmp ));
@@ -2000,9 +1993,8 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
20001993
20011994 /*
20021995 * Wait for stop_lsn to be archived or streamed.
2003- * //TODO is the sentence below outdated?
2004- * If replica returned non-existent LSN, look for previous record,
2005- * which endpoint >= stop_lsn
1996+ * If replica returned valid STOP_LSN of not actually existing record,
1997+ * look for previous record with endpoint >= STOP_LSN.
20061998 */
20071999 if (!stop_lsn_exists )
20082000 stop_backup_lsn = wait_wal_lsn (stop_backup_lsn_tmp , false, backup -> tli ,
0 commit comments