@@ -98,9 +98,10 @@ static void pg_switch_wal(PGconn *conn);
9898static void pg_stop_backup (pgBackup * backup , PGconn * pg_startbackup_conn , PGNodeInfo * nodeInfo );
9999static int checkpoint_timeout (PGconn * backup_conn );
100100
101- //static void backup_list_file(parray *files, const char *root, )
102- static XLogRecPtr wait_wal_lsn (XLogRecPtr lsn , bool is_start_lsn ,
103- bool wait_prev_segment );
101+ static XLogRecPtr wait_wal_lsn (XLogRecPtr lsn , bool is_start_lsn , TimeLineID tli ,
102+ bool in_prev_segment , bool segment_only ,
103+ int timeout_elevel , bool in_stream_dir );
104+
104105static void make_pagemap_from_ptrack (parray * files , PGconn * backup_conn );
105106static void * StreamLog (void * arg );
106107static void IdentifySystem (StreamThreadArg * stream_thread_arg );
@@ -506,6 +507,9 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo)
506507 /* Notify end of backup */
507508 pg_stop_backup (& current , pg_startbackup_conn , nodeInfo );
508509
510+ elog (LOG , "current.stop_lsn: %X/%X" ,
511+ (uint32 ) (stop_backup_lsn >> 32 ), (uint32 ) (stop_backup_lsn ));
512+
509513 /* In case of backup from replica >= 9.6 we must fix minRecPoint,
510514 * First we must find pg_control in backup_files_list.
511515 */
@@ -992,15 +996,15 @@ pg_start_backup(const char *label, bool smooth, pgBackup *backup,
992996
993997 if (current .backup_mode == BACKUP_MODE_DIFF_PAGE )
994998 /* In PAGE mode wait for current segment... */
995- wait_wal_lsn (backup -> start_lsn , true, false);
999+ wait_wal_lsn (backup -> start_lsn , true, backup -> tli , false, true, ERROR , false);
9961000 /*
9971001 * Do not wait start_lsn for stream backup.
9981002 * Because WAL streaming will start after pg_start_backup() in stream
9991003 * mode.
10001004 */
10011005 else if (!stream_wal )
10021006 /* ...for others wait for previous segment */
1003- wait_wal_lsn (backup -> start_lsn , true, true);
1007+ wait_wal_lsn (backup -> start_lsn , true, backup -> tli , true, true, ERROR , false );
10041008}
10051009
10061010/*
@@ -1395,25 +1399,34 @@ pg_ptrack_get_and_clear(Oid tablespace_oid, Oid db_oid, Oid rel_filenode,
13951399}
13961400
13971401/*
1398- * Wait for target 'lsn'.
1402+ * Wait for target 'lsn' or WAL segment, containing 'lsn' .
13991403 *
14001404 * If current backup started in archive mode wait for 'lsn' to be archived in
14011405 * archive 'wal' directory with WAL segment file.
14021406 * If current backup started in stream mode wait for 'lsn' to be streamed in
14031407 * 'pg_wal' directory.
14041408 *
1405- * If 'is_start_lsn' is true and backup mode is PAGE then we wait for 'lsn' to
1406- * be archived in archive 'wal' directory regardless stream mode.
1409+ * If 'is_start_lsn' is true then issue warning for first-time users.
14071410 *
1408- * If 'wait_prev_segment' wait for previous segment.
1411+ * If 'in_prev_segment' is set, look for LSN in previous segment.
1412+ * If 'segment_only' is set, then instead of looking for LSN, look for segment itself.
1413+ * If 'in_prev_segment' and 'segment_only' are both set, then wait for previous segment.
1414+ *
1415+ * Flag 'in_stream_dir' determine whether we looking for wal in 'pg_wal' directory or
1416+ * in archive. Do note, that we cannot rely sorely on 'stream_wal' because, for example,
1417+ * PAGE backup must(!) look for start_lsn in archive regardless of wal_mode.
1418+
1419+ * 'timeout_elevel' determine the elevel for timeout elog message. If elevel lighter than
1420+ * ERROR is used, then return InvalidXLogRecPtr. TODO: return something more concrete, for example 1.
14091421 *
14101422 * Returns LSN of last valid record if wait_prev_segment is not true, otherwise
14111423 * returns InvalidXLogRecPtr.
14121424 */
14131425static XLogRecPtr
1414- wait_wal_lsn (XLogRecPtr lsn , bool is_start_lsn , bool wait_prev_segment )
1426+ wait_wal_lsn (XLogRecPtr lsn , bool is_start_lsn , TimeLineID tli ,
1427+ bool in_prev_segment , bool segment_only ,
1428+ int timeout_elevel , bool in_stream_dir )
14151429{
1416- TimeLineID tli ;
14171430 XLogSegNo targetSegNo ;
14181431 char pg_wal_dir [MAXPGPATH ];
14191432 char wal_segment_path [MAXPGPATH ],
@@ -1422,16 +1435,15 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_segment)
14221435 bool file_exists = false;
14231436 uint32 try_count = 0 ,
14241437 timeout ;
1438+ char * wal_delivery_str = in_stream_dir ? "streamed" :"archived" ;
14251439
14261440#ifdef HAVE_LIBZ
14271441 char gz_wal_segment_path [MAXPGPATH ];
14281442#endif
14291443
1430- tli = get_current_timeline (false);
1431-
14321444 /* Compute the name of the WAL file containing requested LSN */
14331445 GetXLogSegNo (lsn , targetSegNo , instance_config .xlog_seg_size );
1434- if (wait_prev_segment )
1446+ if (in_prev_segment )
14351447 targetSegNo -- ;
14361448 GetXLogFileName (wal_segment , tli , targetSegNo ,
14371449 instance_config .xlog_seg_size );
@@ -1443,8 +1455,7 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_segment)
14431455 *
14441456 * In pg_stop_backup it depends only on stream_wal.
14451457 */
1446- if (stream_wal &&
1447- (current .backup_mode != BACKUP_MODE_DIFF_PAGE || !is_start_lsn ))
1458+ if (in_stream_dir )
14481459 {
14491460 pgBackupGetPath2 (& current , pg_wal_dir , lengthof (pg_wal_dir ),
14501461 DATABASE_DIR , PG_XLOG_DIR );
@@ -1462,7 +1473,7 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_segment)
14621473 else
14631474 timeout = ARCHIVE_TIMEOUT_DEFAULT ;
14641475
1465- if (wait_prev_segment )
1476+ if (segment_only )
14661477 elog (LOG , "Looking for segment: %s" , wal_segment );
14671478 else
14681479 elog (LOG , "Looking for LSN %X/%X in segment: %s" ,
@@ -1496,14 +1507,15 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_segment)
14961507 if (file_exists )
14971508 {
14981509 /* Do not check LSN for previous WAL segment */
1499- if (wait_prev_segment )
1510+ if (segment_only )
15001511 return InvalidXLogRecPtr ;
15011512
15021513 /*
15031514 * A WAL segment found. Check LSN on it.
15041515 */
1505- if (wal_contains_lsn (wal_segment_dir , lsn , tli ,
1506- instance_config .xlog_seg_size ))
1516+ if (!XRecOffIsNull (lsn ) &&
1517+ wal_contains_lsn (wal_segment_dir , lsn , tli ,
1518+ instance_config .xlog_seg_size ))
15071519 /* Target LSN was found */
15081520 {
15091521 elog (LOG , "Found LSN: %X/%X" , (uint32 ) (lsn >> 32 ), (uint32 ) lsn );
@@ -1514,19 +1526,24 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_segment)
15141526 * If we failed to get LSN of valid record in a reasonable time, try
15151527 * to get LSN of last valid record prior to the target LSN. But only
15161528 * in case of a backup from a replica.
1529+
1530+ * There are two cases for this:
1531+ * 1. Replica returned readpoint LSN which just do not exists. We want to look
1532+ * for previous record in the same(!) WAL segment which endpoint points to this LSN.
1533+ * 2. Replica returened endpoint LSN with 0 offset. We want to look
1534+ * for previous record which endpoint points greater or equal LSN in previous WAL segment.
15171535 */
1518- if (!exclusive_backup && current .from_replica &&
1519- (try_count > timeout / 4 ))
1536+ if (!exclusive_backup && current .from_replica && try_count > timeout / 2 )
15201537 {
15211538 XLogRecPtr res ;
15221539
1523- res = get_last_wal_lsn (wal_segment_dir , current .start_lsn ,
1524- lsn , tli , false,
1525- instance_config . xlog_seg_size );
1540+ res = get_last_wal_lsn (wal_segment_dir , current .start_lsn , lsn , tli ,
1541+ in_prev_segment , instance_config . xlog_seg_size );
1542+
15261543 if (!XLogRecPtrIsInvalid (res ))
15271544 {
15281545 /* LSN of the prior record was found */
1529- elog (LOG , "Found prior LSN: %X/%X, it is used as stop LSN " ,
1546+ elog (LOG , "Found prior LSN: %X/%X" ,
15301547 (uint32 ) (res >> 32 ), (uint32 ) res );
15311548 return res ;
15321549 }
@@ -1541,12 +1558,13 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_segment)
15411558 /* Inform user if WAL segment is absent in first attempt */
15421559 if (try_count == 1 )
15431560 {
1544- if (wait_prev_segment )
1545- elog (INFO , "Wait for WAL segment %s to be archived " ,
1546- wal_segment_path );
1561+ if (segment_only )
1562+ elog (INFO , "Wait for WAL segment %s to be %s " ,
1563+ wal_segment_path , wal_delivery_str );
15471564 else
1548- elog (INFO , "Wait for LSN %X/%X in archived WAL segment %s" ,
1549- (uint32 ) (lsn >> 32 ), (uint32 ) lsn , wal_segment_path );
1565+ elog (INFO , "Wait for LSN %X/%X in %s WAL segment %s" ,
1566+ (uint32 ) (lsn >> 32 ), (uint32 ) lsn ,
1567+ wal_delivery_str , wal_segment_path );
15501568 }
15511569
15521570 if (!stream_wal && is_start_lsn && try_count == 30 )
@@ -1557,14 +1575,17 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_segment)
15571575 if (timeout > 0 && try_count > timeout )
15581576 {
15591577 if (file_exists )
1560- elog (ERROR , "WAL segment %s was archived , "
1578+ elog (timeout_elevel , "WAL segment %s was %s , "
15611579 "but target LSN %X/%X could not be archived in %d seconds" ,
1562- wal_segment , (uint32 ) (lsn >> 32 ), (uint32 ) lsn , timeout );
1580+ wal_segment , wal_delivery_str ,
1581+ (uint32 ) (lsn >> 32 ), (uint32 ) lsn , timeout );
15631582 /* If WAL segment doesn't exist or we wait for previous segment */
15641583 else
1565- elog (ERROR ,
1566- "Switched WAL segment %s could not be archived in %d seconds" ,
1567- wal_segment , timeout );
1584+ elog (timeout_elevel ,
1585+ "WAL segment %s could not be %s in %d seconds" ,
1586+ wal_segment , wal_delivery_str , timeout );
1587+
1588+ return InvalidXLogRecPtr ;
15681589 }
15691590 }
15701591}
@@ -1591,6 +1612,7 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
15911612 char * val = NULL ;
15921613 char * stop_backup_query = NULL ;
15931614 bool stop_lsn_exists = false;
1615+ XLogRecPtr stop_backup_lsn_tmp = InvalidXLogRecPtr ;
15941616
15951617 /*
15961618 * We will use this values if there are no transactions between start_lsn
@@ -1771,17 +1793,28 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
17711793 /* Extract timeline and LSN from results of pg_stop_backup() */
17721794 XLogDataFromLSN (PQgetvalue (res , 0 , 2 ), & lsn_hi , & lsn_lo );
17731795 /* Calculate LSN */
1774- stop_backup_lsn = ((uint64 ) lsn_hi ) << 32 | lsn_lo ;
1796+ stop_backup_lsn_tmp = ((uint64 ) lsn_hi ) << 32 | lsn_lo ;
17751797
1776- if (!XRecOffIsValid (stop_backup_lsn ))
1798+ if (!XRecOffIsValid (stop_backup_lsn_tmp ))
17771799 {
1778- if (XRecOffIsNull (stop_backup_lsn ))
1800+ /* Replica returned STOP LSN with null offset */
1801+ if (XRecOffIsNull (stop_backup_lsn_tmp ))
17791802 {
17801803 char * xlog_path ,
17811804 stream_xlog_path [MAXPGPATH ];
1805+ XLogSegNo segno = 0 ;
1806+ XLogRecPtr lsn_tmp = InvalidXLogRecPtr ;
17821807
17831808 elog (WARNING , "Invalid stop_backup_lsn value %X/%X" ,
1784- (uint32 ) (stop_backup_lsn >> 32 ), (uint32 ) (stop_backup_lsn ));
1809+ (uint32 ) (stop_backup_lsn_tmp >> 32 ), (uint32 ) (stop_backup_lsn_tmp ));
1810+
1811+ /*
1812+ * Note: even with gdb it is very hard to produce automated tests for
1813+ * contrecord + null_offset STOP_LSN, so emulate it for manual testing.
1814+ */
1815+ //stop_backup_lsn_tmp = stop_backup_lsn_tmp - XLOG_SEG_SIZE;
1816+ //elog(WARNING, "New Invalid stop_backup_lsn value %X/%X",
1817+ // (uint32) (stop_backup_lsn_tmp >> 32), (uint32) (stop_backup_lsn_tmp));
17851818
17861819 if (stream_wal )
17871820 {
@@ -1793,24 +1826,62 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
17931826 else
17941827 xlog_path = arclog_path ;
17951828
1796- wait_wal_lsn (stop_backup_lsn , false, true);
1797- stop_backup_lsn = get_last_wal_lsn (xlog_path , backup -> start_lsn ,
1798- stop_backup_lsn , backup -> tli ,
1799- true, instance_config .xlog_seg_size );
1829+ GetXLogSegNo (stop_backup_lsn_tmp , segno , instance_config .xlog_seg_size );
1830+
18001831 /*
1801- * Do not check existance of LSN again below using
1802- * wait_wal_lsn().
1832+ * Note, that there is no guarantee that corresponding WAL file is even exists.
1833+ * Basically replica may return LSN from future and keep staying in present.
1834+ * Yeah, it sucks.
1835+ *
1836+ * So we should try to do the following:
1837+ * 1. Wait for current segment and look in it for the LSN >= STOP_LSN. It should
1838+ * solve the problem of occasional 0 offset on write-busy system.
1839+ * 2. Failing that, look for record in previous segment with endpoint
1840+ * equal or greater than 0 offset LSN. It may(!) solve the problem of 0 offset
1841+ * on write-idle system.
18031842 */
1843+
1844+ /* Wait for segment with current stop_lsn, it is ok for it to never arrive */
1845+ wait_wal_lsn (stop_backup_lsn_tmp , false, backup -> tli ,
1846+ false, true, WARNING , stream_wal );
1847+
1848+ /* Optimistically try to get the first record in segment with current stop_lsn */
1849+ lsn_tmp = get_first_wal_lsn (xlog_path , segno , backup -> tli ,
1850+ instance_config .xlog_seg_size );
1851+
1852+ /* Check if returned LSN is satisfying our requirements */
1853+ if (XLogRecPtrIsInvalid (lsn_tmp ) ||
1854+ !XRecOffIsValid (lsn_tmp ) ||
1855+ lsn_tmp < stop_backup_lsn_tmp )
1856+ {
1857+ /* No luck, falling back to looking up for previous record */
1858+ elog (WARNING , "Failed to get next WAL record after %X/%X, "
1859+ "looking for previous WAL record" ,
1860+ (uint32 ) (stop_backup_lsn_tmp >> 32 ),
1861+ (uint32 ) (stop_backup_lsn_tmp ));
1862+
1863+ /* Despite looking for previous record there is not guarantee of success
1864+ * because previous record can be the contrecord.
1865+ */
1866+ lsn_tmp = wait_wal_lsn (stop_backup_lsn_tmp , false, backup -> tli ,
1867+ true, false, ERROR , stream_wal );
1868+
1869+ /* sanity */
1870+ if (!XRecOffIsValid (lsn_tmp ) || XLogRecPtrIsInvalid (lsn_tmp ))
1871+ elog (ERROR , "Failed to get WAL record prior to %X/%X" ,
1872+ (uint32 ) (stop_backup_lsn_tmp >> 32 ),
1873+ (uint32 ) (stop_backup_lsn_tmp ));
1874+ }
1875+
1876+ /* Setting stop_backup_lsn will set stop point for streaming */
1877+ stop_backup_lsn = lsn_tmp ;
18041878 stop_lsn_exists = true;
18051879 }
18061880 else
18071881 elog (ERROR , "Invalid stop_backup_lsn value %X/%X" ,
1808- (uint32 ) (stop_backup_lsn >> 32 ), (uint32 ) (stop_backup_lsn ));
1882+ (uint32 ) (stop_backup_lsn_tmp >> 32 ), (uint32 ) (stop_backup_lsn_tmp ));
18091883 }
18101884
1811- elog (LOG , "current.stop_lsn: %X/%X" ,
1812- (uint32 ) (stop_backup_lsn >> 32 ), (uint32 ) (stop_backup_lsn ));
1813-
18141885 /* Write backup_label and tablespace_map */
18151886 if (!exclusive_backup )
18161887 {
@@ -1900,14 +1971,6 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
19001971 if (tablespace_map_content )
19011972 PQclear (tablespace_map_content );
19021973 PQclear (res );
1903-
1904- if (stream_wal )
1905- {
1906- /* Wait for the completion of stream */
1907- pthread_join (stream_thread , NULL );
1908- if (stream_thread_arg .ret == 1 )
1909- elog (ERROR , "WAL streaming failed" );
1910- }
19111974 }
19121975
19131976 /* Fill in fields if that is the correct end of backup. */
@@ -1918,13 +1981,20 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
19181981
19191982 /*
19201983 * Wait for stop_lsn to be archived or streamed.
1921- * We wait for stop_lsn in stream mode just in case.
1984+ * If replica returned non-existent LSN, look for previous record,
1985+ * which endpoint >= stop_lsn
19221986 */
19231987 if (!stop_lsn_exists )
1924- stop_backup_lsn = wait_wal_lsn (stop_backup_lsn , false, false);
1988+ stop_backup_lsn = wait_wal_lsn (stop_backup_lsn_tmp , false, backup -> tli ,
1989+ false, false, ERROR , stream_wal );
19251990
19261991 if (stream_wal )
19271992 {
1993+ /* Wait for the completion of stream */
1994+ pthread_join (stream_thread , NULL );
1995+ if (stream_thread_arg .ret == 1 )
1996+ elog (ERROR , "WAL streaming failed" );
1997+
19281998 pgBackupGetPath2 (backup , stream_xlog_path ,
19291999 lengthof (stream_xlog_path ),
19302000 DATABASE_DIR , PG_XLOG_DIR );
@@ -1933,7 +2003,6 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
19332003 else
19342004 xlog_path = arclog_path ;
19352005
1936- backup -> tli = get_current_timeline (false);
19372006 backup -> stop_lsn = stop_backup_lsn ;
19382007
19392008 elog (LOG , "Getting the Recovery Time from WAL" );
@@ -1944,7 +2013,7 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
19442013 backup -> start_lsn , backup -> stop_lsn ,
19452014 & backup -> recovery_time , & backup -> recovery_xid ))
19462015 {
1947- elog (LOG , "Failed to find Recovery Time in WAL. Forced to trust current_timestamp" );
2016+ elog (LOG , "Failed to find Recovery Time in WAL, forced to trust current_timestamp" );
19482017 backup -> recovery_time = recovery_time ;
19492018 backup -> recovery_xid = recovery_xid ;
19502019 }
0 commit comments