@@ -412,10 +412,10 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync, bool
412412
413413 if (current .backup_mode != BACKUP_MODE_FULL )
414414 {
415- elog (LOG , "current_tli: %X" , current .tli );
416- elog (LOG , "prev_backup-> start_lsn: %X/%X" ,
415+ elog (LOG , "Current tli: %X" , current .tli );
416+ elog (LOG , "Parent start_lsn: %X/%X" ,
417417 (uint32 ) (prev_backup -> start_lsn >> 32 ), (uint32 ) (prev_backup -> start_lsn ));
418- elog (LOG , "current. start_lsn: %X/%X" ,
418+ elog (LOG , "start_lsn: %X/%X" ,
419419 (uint32 ) (current .start_lsn >> 32 ), (uint32 ) (current .start_lsn ));
420420 }
421421
@@ -573,9 +573,6 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync, bool
573573 /* Notify end of backup */
574574 pg_stop_backup (& current , pg_startbackup_conn , nodeInfo );
575575
576- elog (LOG , "current.stop_lsn: %X/%X" ,
577- (uint32 ) (stop_backup_lsn >> 32 ), (uint32 ) (stop_backup_lsn ));
578-
579576 /* In case of backup from replica >= 9.6 we must fix minRecPoint,
580577 * First we must find pg_control in backup_files_list.
581578 */
@@ -1732,65 +1729,66 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
17321729 /* Calculate LSN */
17331730 stop_backup_lsn_tmp = ((uint64 ) lsn_hi ) << 32 | lsn_lo ;
17341731
1732+ /* It is ok for replica to return invalid STOP LSN
1733+ * UPD: Apparently it is ok even for a master.
1734+ */
17351735 if (!XRecOffIsValid (stop_backup_lsn_tmp ))
17361736 {
1737- /* It is ok for replica to return STOP LSN with NullXRecOff
1738- * UPD: Apparently it is ok even for master.
1739- */
1740- if (XRecOffIsNull (stop_backup_lsn_tmp ))
1741- {
1742- char * xlog_path ,
1743- stream_xlog_path [MAXPGPATH ];
1744- XLogSegNo segno = 0 ;
1745- XLogRecPtr lsn_tmp = InvalidXLogRecPtr ;
1737+ char * xlog_path ,
1738+ stream_xlog_path [MAXPGPATH ];
1739+ XLogSegNo segno = 0 ;
1740+ XLogRecPtr lsn_tmp = InvalidXLogRecPtr ;
17461741
1747- /*
1748- * Even though the value is invalid, it's expected postgres behaviour
1749- * and we're trying to fix it below.
1750- */
1751- elog (LOG , "Null offset in stop_backup_lsn value %X/%X, trying to fix" ,
1752- (uint32 ) (stop_backup_lsn_tmp >> 32 ), (uint32 ) (stop_backup_lsn_tmp ));
1742+ /*
1743+ * Even though the value is invalid, it's expected postgres behaviour
1744+ * and we're trying to fix it below.
1745+ */
1746+ elog (LOG , "Invalid offset in stop_lsn value %X/%X, trying to fix" ,
1747+ (uint32 ) (stop_backup_lsn_tmp >> 32 ), (uint32 ) (stop_backup_lsn_tmp ));
17531748
1754- /*
1755- * Note: even with gdb it is very hard to produce automated tests for
1756- * contrecord + NullXRecOff , so emulate it for manual testing.
1757- */
1758- //stop_backup_lsn_tmp = stop_backup_lsn_tmp - XLOG_SEG_SIZE;
1759- //elog(WARNING, "New Invalid stop_backup_lsn value %X/%X",
1760- // (uint32) (stop_backup_lsn_tmp >> 32), (uint32) (stop_backup_lsn_tmp));
1749+ /*
1750+ * Note: even with gdb it is very hard to produce automated tests for
1751+ * contrecord + invalid LSN , so emulate it for manual testing.
1752+ */
1753+ //stop_backup_lsn_tmp = stop_backup_lsn_tmp - XLOG_SEG_SIZE;
1754+ //elog(WARNING, "New Invalid stop_backup_lsn value %X/%X",
1755+ // (uint32) (stop_backup_lsn_tmp >> 32), (uint32) (stop_backup_lsn_tmp));
17611756
1762- if (stream_wal )
1763- {
1764- pgBackupGetPath2 (backup , stream_xlog_path ,
1765- lengthof (stream_xlog_path ),
1766- DATABASE_DIR , PG_XLOG_DIR );
1767- xlog_path = stream_xlog_path ;
1768- }
1769- else
1770- xlog_path = arclog_path ;
1757+ if (stream_wal )
1758+ {
1759+ pgBackupGetPath2 (backup , stream_xlog_path ,
1760+ lengthof (stream_xlog_path ),
1761+ DATABASE_DIR , PG_XLOG_DIR );
1762+ xlog_path = stream_xlog_path ;
1763+ }
1764+ else
1765+ xlog_path = arclog_path ;
17711766
1772- GetXLogSegNo (stop_backup_lsn_tmp , segno , instance_config .xlog_seg_size );
1767+ GetXLogSegNo (stop_backup_lsn_tmp , segno , instance_config .xlog_seg_size );
17731768
1774- /*
1775- * Note, that there is no guarantee that corresponding WAL file even exists.
1776- * Replica may return LSN from future and keep staying in present.
1777- * Or it can return LSN with NullXRecOff .
1778- *
1779- * That's bad, since we want to get real LSN to save it in backup label file
1780- * and to use it in WAL validation.
1781- *
1782- * So we try to do the following:
1783- * 1. Wait 'archive_timeout' seconds for segment containing stop_lsn and
1784- * look for the first valid record in it.
1785- * It solves the problem of occasional invalid XRecOff on write-busy system.
1786- * 2. Failing that, look for record in previous segment with endpoint
1787- * equal or greater than stop_lsn. It may(!) solve the problem of NullXRecOff
1788- * on write-idle system. If that fails too, error out.
1789- */
1769+ /*
1770+ * Note, that there is no guarantee that corresponding WAL file even exists.
1771+ * Replica may return LSN from future and keep staying in present.
1772+ * Or it can return invalid LSN .
1773+ *
1774+ * That's bad, since we want to get real LSN to save it in backup label file
1775+ * and to use it in WAL validation.
1776+ *
1777+ * So we try to do the following:
1778+ * 1. Wait 'archive_timeout' seconds for segment containing stop_lsn and
1779+ * look for the first valid record in it.
1780+ * It solves the problem of occasional invalid LSN on write-busy system.
1781+ * 2. Failing that, look for record in previous segment with endpoint
1782+ * equal or greater than stop_lsn. It may(!) solve the problem of invalid LSN
1783+ * on write-idle system. If that fails too, error out.
1784+ */
17901785
1786+ /* stop_lsn is pointing to a 0 byte of xlog segment */
1787+ if (stop_backup_lsn_tmp % instance_config .xlog_seg_size == 0 )
1788+ {
17911789 /* Wait for segment with current stop_lsn, it is ok for it to never arrive */
17921790 wait_wal_lsn (stop_backup_lsn_tmp , false, backup -> tli ,
1793- false, true, WARNING , stream_wal );
1791+ false, true, WARNING , stream_wal );
17941792
17951793 /* Get the first record in segment with current stop_lsn */
17961794 lsn_tmp = get_first_record_lsn (xlog_path , segno , backup -> tli ,
@@ -1826,17 +1824,39 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
18261824 (uint32 ) (stop_backup_lsn_tmp >> 32 ),
18271825 (uint32 ) (stop_backup_lsn_tmp ));
18281826 }
1827+ }
1828+ /* stop lsn is aligned to xlog block size, just find next lsn */
1829+ else if (stop_backup_lsn_tmp % XLOG_BLCKSZ == 0 )
1830+ {
1831+ /* Wait for segment with current stop_lsn */
1832+ wait_wal_lsn (stop_backup_lsn_tmp , false, backup -> tli ,
1833+ false, true, ERROR , stream_wal );
1834+
1835+ /* Get the next closest record in segment with current stop_lsn */
1836+ lsn_tmp = get_next_record_lsn (xlog_path , segno , backup -> tli ,
1837+ instance_config .xlog_seg_size ,
1838+ instance_config .archive_timeout ,
1839+ stop_backup_lsn_tmp );
18291840
1830- /* Setting stop_backup_lsn will set stop point for streaming */
1831- stop_backup_lsn = lsn_tmp ;
1832- stop_lsn_exists = true;
1841+ /* sanity */
1842+ if (!XRecOffIsValid (lsn_tmp ) || XLogRecPtrIsInvalid (lsn_tmp ))
1843+ elog (ERROR , "Failed to get WAL record next to %X/%X" ,
1844+ (uint32 ) (stop_backup_lsn_tmp >> 32 ),
1845+ (uint32 ) (stop_backup_lsn_tmp ));
18331846 }
18341847 /* PostgreSQL returned something very illegal as STOP_LSN, error out */
18351848 else
18361849 elog (ERROR , "Invalid stop_backup_lsn value %X/%X" ,
18371850 (uint32 ) (stop_backup_lsn_tmp >> 32 ), (uint32 ) (stop_backup_lsn_tmp ));
1851+
1852+ /* Setting stop_backup_lsn will set stop point for streaming */
1853+ stop_backup_lsn = lsn_tmp ;
1854+ stop_lsn_exists = true;
18381855 }
18391856
1857+ elog (LOG , "stop_lsn: %X/%X" ,
1858+ (uint32 ) (stop_backup_lsn >> 32 ), (uint32 ) (stop_backup_lsn ));
1859+
18401860 /* Write backup_label and tablespace_map */
18411861 if (!exclusive_backup )
18421862 {
0 commit comments