Skip to content

Commit 7c5e799

Browse files
committed
[Issue #115] look-forward approach of handling the invalid stop_lsn received from replica
1 parent 18211c5 commit 7c5e799

File tree

4 files changed

+191
-80
lines changed

4 files changed

+191
-80
lines changed

src/backup.c

Lines changed: 131 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,10 @@ static void pg_switch_wal(PGconn *conn);
9898
static void pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn, PGNodeInfo *nodeInfo);
9999
static int checkpoint_timeout(PGconn *backup_conn);
100100

101-
//static void backup_list_file(parray *files, const char *root, )
102-
static XLogRecPtr wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn,
103-
bool wait_prev_segment);
101+
static XLogRecPtr wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, TimeLineID tli,
102+
bool in_prev_segment, bool segment_only,
103+
int timeout_elevel, bool in_stream_dir);
104+
104105
static void make_pagemap_from_ptrack(parray* files, PGconn* backup_conn);
105106
static void *StreamLog(void *arg);
106107
static void IdentifySystem(StreamThreadArg *stream_thread_arg);
@@ -506,6 +507,9 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo)
506507
/* Notify end of backup */
507508
pg_stop_backup(&current, pg_startbackup_conn, nodeInfo);
508509

510+
elog(LOG, "current.stop_lsn: %X/%X",
511+
(uint32) (stop_backup_lsn >> 32), (uint32) (stop_backup_lsn));
512+
509513
/* In case of backup from replica >= 9.6 we must fix minRecPoint,
510514
* First we must find pg_control in backup_files_list.
511515
*/
@@ -992,15 +996,15 @@ pg_start_backup(const char *label, bool smooth, pgBackup *backup,
992996

993997
if (current.backup_mode == BACKUP_MODE_DIFF_PAGE)
994998
/* In PAGE mode wait for current segment... */
995-
wait_wal_lsn(backup->start_lsn, true, false);
999+
wait_wal_lsn(backup->start_lsn, true, backup->tli, false, true, ERROR, false);
9961000
/*
9971001
* Do not wait start_lsn for stream backup.
9981002
* Because WAL streaming will start after pg_start_backup() in stream
9991003
* mode.
10001004
*/
10011005
else if (!stream_wal)
10021006
/* ...for others wait for previous segment */
1003-
wait_wal_lsn(backup->start_lsn, true, true);
1007+
wait_wal_lsn(backup->start_lsn, true, backup->tli, true, true, ERROR, false);
10041008
}
10051009

10061010
/*
@@ -1395,25 +1399,34 @@ pg_ptrack_get_and_clear(Oid tablespace_oid, Oid db_oid, Oid rel_filenode,
13951399
}
13961400

13971401
/*
1398-
* Wait for target 'lsn'.
1402+
* Wait for target 'lsn' or WAL segment, containing 'lsn'.
13991403
*
14001404
* If current backup started in archive mode wait for 'lsn' to be archived in
14011405
* archive 'wal' directory with WAL segment file.
14021406
* If current backup started in stream mode wait for 'lsn' to be streamed in
14031407
* 'pg_wal' directory.
14041408
*
1405-
* If 'is_start_lsn' is true and backup mode is PAGE then we wait for 'lsn' to
1406-
* be archived in archive 'wal' directory regardless stream mode.
1409+
* If 'is_start_lsn' is true then issue warning for first-time users.
14071410
*
1408-
* If 'wait_prev_segment' wait for previous segment.
1411+
* If 'in_prev_segment' is set, look for LSN in previous segment.
1412+
* If 'segment_only' is set, then instead of looking for LSN, look for segment itself.
1413+
* If 'in_prev_segment' and 'segment_only' are both set, then wait for previous segment.
1414+
*
1415+
* Flag 'in_stream_dir' determine whether we looking for wal in 'pg_wal' directory or
1416+
* in archive. Do note, that we cannot rely sorely on 'stream_wal' because, for example,
1417+
* PAGE backup must(!) look for start_lsn in archive regardless of wal_mode.
1418+
1419+
* 'timeout_elevel' determine the elevel for timeout elog message. If elevel lighter than
1420+
* ERROR is used, then return InvalidXLogRecPtr. TODO: return something more concrete, for example 1.
14091421
*
14101422
* Returns LSN of last valid record if wait_prev_segment is not true, otherwise
14111423
* returns InvalidXLogRecPtr.
14121424
*/
14131425
static XLogRecPtr
1414-
wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_segment)
1426+
wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, TimeLineID tli,
1427+
bool in_prev_segment, bool segment_only,
1428+
int timeout_elevel, bool in_stream_dir)
14151429
{
1416-
TimeLineID tli;
14171430
XLogSegNo targetSegNo;
14181431
char pg_wal_dir[MAXPGPATH];
14191432
char wal_segment_path[MAXPGPATH],
@@ -1422,16 +1435,15 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_segment)
14221435
bool file_exists = false;
14231436
uint32 try_count = 0,
14241437
timeout;
1438+
char *wal_delivery_str = in_stream_dir ? "streamed":"archived";
14251439

14261440
#ifdef HAVE_LIBZ
14271441
char gz_wal_segment_path[MAXPGPATH];
14281442
#endif
14291443

1430-
tli = get_current_timeline(false);
1431-
14321444
/* Compute the name of the WAL file containing requested LSN */
14331445
GetXLogSegNo(lsn, targetSegNo, instance_config.xlog_seg_size);
1434-
if (wait_prev_segment)
1446+
if (in_prev_segment)
14351447
targetSegNo--;
14361448
GetXLogFileName(wal_segment, tli, targetSegNo,
14371449
instance_config.xlog_seg_size);
@@ -1443,8 +1455,7 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_segment)
14431455
*
14441456
* In pg_stop_backup it depends only on stream_wal.
14451457
*/
1446-
if (stream_wal &&
1447-
(current.backup_mode != BACKUP_MODE_DIFF_PAGE || !is_start_lsn))
1458+
if (in_stream_dir)
14481459
{
14491460
pgBackupGetPath2(&current, pg_wal_dir, lengthof(pg_wal_dir),
14501461
DATABASE_DIR, PG_XLOG_DIR);
@@ -1462,7 +1473,7 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_segment)
14621473
else
14631474
timeout = ARCHIVE_TIMEOUT_DEFAULT;
14641475

1465-
if (wait_prev_segment)
1476+
if (segment_only)
14661477
elog(LOG, "Looking for segment: %s", wal_segment);
14671478
else
14681479
elog(LOG, "Looking for LSN %X/%X in segment: %s",
@@ -1496,14 +1507,15 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_segment)
14961507
if (file_exists)
14971508
{
14981509
/* Do not check LSN for previous WAL segment */
1499-
if (wait_prev_segment)
1510+
if (segment_only)
15001511
return InvalidXLogRecPtr;
15011512

15021513
/*
15031514
* A WAL segment found. Check LSN on it.
15041515
*/
1505-
if (wal_contains_lsn(wal_segment_dir, lsn, tli,
1506-
instance_config.xlog_seg_size))
1516+
if (!XRecOffIsNull(lsn) &&
1517+
wal_contains_lsn(wal_segment_dir, lsn, tli,
1518+
instance_config.xlog_seg_size))
15071519
/* Target LSN was found */
15081520
{
15091521
elog(LOG, "Found LSN: %X/%X", (uint32) (lsn >> 32), (uint32) lsn);
@@ -1514,19 +1526,24 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_segment)
15141526
* If we failed to get LSN of valid record in a reasonable time, try
15151527
* to get LSN of last valid record prior to the target LSN. But only
15161528
* in case of a backup from a replica.
1529+
1530+
* There are two cases for this:
1531+
* 1. Replica returned readpoint LSN which just do not exists. We want to look
1532+
* for previous record in the same(!) WAL segment which endpoint points to this LSN.
1533+
* 2. Replica returened endpoint LSN with 0 offset. We want to look
1534+
* for previous record which endpoint points greater or equal LSN in previous WAL segment.
15171535
*/
1518-
if (!exclusive_backup && current.from_replica &&
1519-
(try_count > timeout / 4))
1536+
if (!exclusive_backup && current.from_replica && try_count > timeout / 2)
15201537
{
15211538
XLogRecPtr res;
15221539

1523-
res = get_last_wal_lsn(wal_segment_dir, current.start_lsn,
1524-
lsn, tli, false,
1525-
instance_config.xlog_seg_size);
1540+
res = get_last_wal_lsn(wal_segment_dir, current.start_lsn, lsn, tli,
1541+
in_prev_segment, instance_config.xlog_seg_size);
1542+
15261543
if (!XLogRecPtrIsInvalid(res))
15271544
{
15281545
/* LSN of the prior record was found */
1529-
elog(LOG, "Found prior LSN: %X/%X, it is used as stop LSN",
1546+
elog(LOG, "Found prior LSN: %X/%X",
15301547
(uint32) (res >> 32), (uint32) res);
15311548
return res;
15321549
}
@@ -1541,12 +1558,13 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_segment)
15411558
/* Inform user if WAL segment is absent in first attempt */
15421559
if (try_count == 1)
15431560
{
1544-
if (wait_prev_segment)
1545-
elog(INFO, "Wait for WAL segment %s to be archived",
1546-
wal_segment_path);
1561+
if (segment_only)
1562+
elog(INFO, "Wait for WAL segment %s to be %s",
1563+
wal_segment_path, wal_delivery_str);
15471564
else
1548-
elog(INFO, "Wait for LSN %X/%X in archived WAL segment %s",
1549-
(uint32) (lsn >> 32), (uint32) lsn, wal_segment_path);
1565+
elog(INFO, "Wait for LSN %X/%X in %s WAL segment %s",
1566+
(uint32) (lsn >> 32), (uint32) lsn,
1567+
wal_delivery_str, wal_segment_path);
15501568
}
15511569

15521570
if (!stream_wal && is_start_lsn && try_count == 30)
@@ -1557,14 +1575,17 @@ wait_wal_lsn(XLogRecPtr lsn, bool is_start_lsn, bool wait_prev_segment)
15571575
if (timeout > 0 && try_count > timeout)
15581576
{
15591577
if (file_exists)
1560-
elog(ERROR, "WAL segment %s was archived, "
1578+
elog(timeout_elevel, "WAL segment %s was %s, "
15611579
"but target LSN %X/%X could not be archived in %d seconds",
1562-
wal_segment, (uint32) (lsn >> 32), (uint32) lsn, timeout);
1580+
wal_segment, wal_delivery_str,
1581+
(uint32) (lsn >> 32), (uint32) lsn, timeout);
15631582
/* If WAL segment doesn't exist or we wait for previous segment */
15641583
else
1565-
elog(ERROR,
1566-
"Switched WAL segment %s could not be archived in %d seconds",
1567-
wal_segment, timeout);
1584+
elog(timeout_elevel,
1585+
"WAL segment %s could not be %s in %d seconds",
1586+
wal_segment, wal_delivery_str, timeout);
1587+
1588+
return InvalidXLogRecPtr;
15681589
}
15691590
}
15701591
}
@@ -1591,6 +1612,7 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
15911612
char *val = NULL;
15921613
char *stop_backup_query = NULL;
15931614
bool stop_lsn_exists = false;
1615+
XLogRecPtr stop_backup_lsn_tmp = InvalidXLogRecPtr;
15941616

15951617
/*
15961618
* We will use this values if there are no transactions between start_lsn
@@ -1771,17 +1793,28 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
17711793
/* Extract timeline and LSN from results of pg_stop_backup() */
17721794
XLogDataFromLSN(PQgetvalue(res, 0, 2), &lsn_hi, &lsn_lo);
17731795
/* Calculate LSN */
1774-
stop_backup_lsn = ((uint64) lsn_hi) << 32 | lsn_lo;
1796+
stop_backup_lsn_tmp = ((uint64) lsn_hi) << 32 | lsn_lo;
17751797

1776-
if (!XRecOffIsValid(stop_backup_lsn))
1798+
if (!XRecOffIsValid(stop_backup_lsn_tmp))
17771799
{
1778-
if (XRecOffIsNull(stop_backup_lsn))
1800+
/* Replica returned STOP LSN with null offset */
1801+
if (XRecOffIsNull(stop_backup_lsn_tmp))
17791802
{
17801803
char *xlog_path,
17811804
stream_xlog_path[MAXPGPATH];
1805+
XLogSegNo segno = 0;
1806+
XLogRecPtr lsn_tmp = InvalidXLogRecPtr;
17821807

17831808
elog(WARNING, "Invalid stop_backup_lsn value %X/%X",
1784-
(uint32) (stop_backup_lsn >> 32), (uint32) (stop_backup_lsn));
1809+
(uint32) (stop_backup_lsn_tmp >> 32), (uint32) (stop_backup_lsn_tmp));
1810+
1811+
/*
1812+
* Note: even with gdb it is very hard to produce automated tests for
1813+
* contrecord + null_offset STOP_LSN, so emulate it for manual testing.
1814+
*/
1815+
//stop_backup_lsn_tmp = stop_backup_lsn_tmp - XLOG_SEG_SIZE;
1816+
//elog(WARNING, "New Invalid stop_backup_lsn value %X/%X",
1817+
// (uint32) (stop_backup_lsn_tmp >> 32), (uint32) (stop_backup_lsn_tmp));
17851818

17861819
if (stream_wal)
17871820
{
@@ -1793,24 +1826,62 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
17931826
else
17941827
xlog_path = arclog_path;
17951828

1796-
wait_wal_lsn(stop_backup_lsn, false, true);
1797-
stop_backup_lsn = get_last_wal_lsn(xlog_path, backup->start_lsn,
1798-
stop_backup_lsn, backup->tli,
1799-
true, instance_config.xlog_seg_size);
1829+
GetXLogSegNo(stop_backup_lsn_tmp, segno, instance_config.xlog_seg_size);
1830+
18001831
/*
1801-
* Do not check existance of LSN again below using
1802-
* wait_wal_lsn().
1832+
* Note, that there is no guarantee that corresponding WAL file is even exists.
1833+
* Basically replica may return LSN from future and keep staying in present.
1834+
* Yeah, it sucks.
1835+
*
1836+
* So we should try to do the following:
1837+
* 1. Wait for current segment and look in it for the LSN >= STOP_LSN. It should
1838+
* solve the problem of occasional 0 offset on write-busy system.
1839+
* 2. Failing that, look for record in previous segment with endpoint
1840+
* equal or greater than 0 offset LSN. It may(!) solve the problem of 0 offset
1841+
* on write-idle system.
18031842
*/
1843+
1844+
/* Wait for segment with current stop_lsn, it is ok for it to never arrive */
1845+
wait_wal_lsn(stop_backup_lsn_tmp, false, backup->tli,
1846+
false, true, WARNING, stream_wal);
1847+
1848+
/* Optimistically try to get the first record in segment with current stop_lsn */
1849+
lsn_tmp = get_first_wal_lsn(xlog_path, segno, backup->tli,
1850+
instance_config.xlog_seg_size);
1851+
1852+
/* Check if returned LSN is satisfying our requirements */
1853+
if (XLogRecPtrIsInvalid(lsn_tmp) ||
1854+
!XRecOffIsValid(lsn_tmp) ||
1855+
lsn_tmp < stop_backup_lsn_tmp)
1856+
{
1857+
/* No luck, falling back to looking up for previous record */
1858+
elog(WARNING, "Failed to get next WAL record after %X/%X, "
1859+
"looking for previous WAL record",
1860+
(uint32) (stop_backup_lsn_tmp >> 32),
1861+
(uint32) (stop_backup_lsn_tmp));
1862+
1863+
/* Despite looking for previous record there is not guarantee of success
1864+
* because previous record can be the contrecord.
1865+
*/
1866+
lsn_tmp = wait_wal_lsn(stop_backup_lsn_tmp, false, backup->tli,
1867+
true, false, ERROR, stream_wal);
1868+
1869+
/* sanity */
1870+
if (!XRecOffIsValid(lsn_tmp) || XLogRecPtrIsInvalid(lsn_tmp))
1871+
elog(ERROR, "Failed to get WAL record prior to %X/%X",
1872+
(uint32) (stop_backup_lsn_tmp >> 32),
1873+
(uint32) (stop_backup_lsn_tmp));
1874+
}
1875+
1876+
/* Setting stop_backup_lsn will set stop point for streaming */
1877+
stop_backup_lsn = lsn_tmp;
18041878
stop_lsn_exists = true;
18051879
}
18061880
else
18071881
elog(ERROR, "Invalid stop_backup_lsn value %X/%X",
1808-
(uint32) (stop_backup_lsn >> 32), (uint32) (stop_backup_lsn));
1882+
(uint32) (stop_backup_lsn_tmp >> 32), (uint32) (stop_backup_lsn_tmp));
18091883
}
18101884

1811-
elog(LOG, "current.stop_lsn: %X/%X",
1812-
(uint32) (stop_backup_lsn >> 32), (uint32) (stop_backup_lsn));
1813-
18141885
/* Write backup_label and tablespace_map */
18151886
if (!exclusive_backup)
18161887
{
@@ -1900,14 +1971,6 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
19001971
if (tablespace_map_content)
19011972
PQclear(tablespace_map_content);
19021973
PQclear(res);
1903-
1904-
if (stream_wal)
1905-
{
1906-
/* Wait for the completion of stream */
1907-
pthread_join(stream_thread, NULL);
1908-
if (stream_thread_arg.ret == 1)
1909-
elog(ERROR, "WAL streaming failed");
1910-
}
19111974
}
19121975

19131976
/* Fill in fields if that is the correct end of backup. */
@@ -1918,13 +1981,20 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
19181981

19191982
/*
19201983
* Wait for stop_lsn to be archived or streamed.
1921-
* We wait for stop_lsn in stream mode just in case.
1984+
* If replica returned non-existent LSN, look for previous record,
1985+
* which endpoint >= stop_lsn
19221986
*/
19231987
if (!stop_lsn_exists)
1924-
stop_backup_lsn = wait_wal_lsn(stop_backup_lsn, false, false);
1988+
stop_backup_lsn = wait_wal_lsn(stop_backup_lsn_tmp, false, backup->tli,
1989+
false, false, ERROR, stream_wal);
19251990

19261991
if (stream_wal)
19271992
{
1993+
/* Wait for the completion of stream */
1994+
pthread_join(stream_thread, NULL);
1995+
if (stream_thread_arg.ret == 1)
1996+
elog(ERROR, "WAL streaming failed");
1997+
19281998
pgBackupGetPath2(backup, stream_xlog_path,
19291999
lengthof(stream_xlog_path),
19302000
DATABASE_DIR, PG_XLOG_DIR);
@@ -1933,7 +2003,6 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
19332003
else
19342004
xlog_path = arclog_path;
19352005

1936-
backup->tli = get_current_timeline(false);
19372006
backup->stop_lsn = stop_backup_lsn;
19382007

19392008
elog(LOG, "Getting the Recovery Time from WAL");
@@ -1944,7 +2013,7 @@ pg_stop_backup(pgBackup *backup, PGconn *pg_startbackup_conn,
19442013
backup->start_lsn, backup->stop_lsn,
19452014
&backup->recovery_time, &backup->recovery_xid))
19462015
{
1947-
elog(LOG, "Failed to find Recovery Time in WAL. Forced to trust current_timestamp");
2016+
elog(LOG, "Failed to find Recovery Time in WAL, forced to trust current_timestamp");
19482017
backup->recovery_time = recovery_time;
19492018
backup->recovery_xid = recovery_xid;
19502019
}

0 commit comments

Comments
 (0)