Skip to content
106 changes: 81 additions & 25 deletions src/backend/access/transam/xlog.c
Original file line number Diff line number Diff line change
Expand Up @@ -764,6 +764,12 @@ static void WALInsertLockAcquireExclusive(void);
static void WALInsertLockRelease(void);
static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt);

static XLogRecPtr SetLastWrittenLSNForBlockRangeInternal(XLogRecPtr lsn,
RelFileLocator rlocator,
ForkNumber forknum,
BlockNumber from,
BlockNumber n_blocks);

/*
* Insert an XLOG record represented by an already-constructed chain of data
* chunks. This is a low-level routine; to construct the WAL record header
Expand Down Expand Up @@ -6681,7 +6687,7 @@ GetInsertRecPtr(void)
* either from a cached last written LSN or a global maximum last written LSN.
* If rnode is InvalidOid then we calculate maximum among all cached LSN and maxLastWrittenLsn.
* If cache is large enough, iterating through all hash items may be rather expensive.
* But GetLastWrittenLSN(InvalidOid) is used only by zenith_dbsize which is not performance critical.
* But GetLastWrittenLSN(InvalidOid) is used only by neon_dbsize which is not performance critical.
*/
XLogRecPtr
GetLastWrittenLSN(RelFileLocator rlocator, ForkNumber forknum, BlockNumber blkno)
Expand Down Expand Up @@ -6710,7 +6716,19 @@ GetLastWrittenLSN(RelFileLocator rlocator, ForkNumber forknum, BlockNumber blkno
else
{
LWLockRelease(LastWrittenLsnLock);
return SetLastWrittenLSNForBlock(lsn, rlocator, forknum, blkno);
LWLockAcquire(LastWrittenLsnLock, LW_EXCLUSIVE);
/*
* In case of statements CREATE TABLE AS SELECT... or INSERT FROM SELECT... we are fetching data from source table
* and storing it in destination table. It cause problems with prefetch last-written-lsn is known for the pages of
* source table (which for example happens after compute restart). In this case we get get global value of
* last-written-lsn which is changed frequently as far as we are writing pages of destination table.
* As a result request-lsn for the prefetch and request-let when this page is actually needed are different
* and we got exported prefetch request. So it actually disarms prefetch.
* To prevent that, we re-insert the page with the latest LSN, so that it's
* less likely the LSN for this page will get evicted from the LwLsnCache
* before the page is read.
*/
lsn = SetLastWrittenLSNForBlockRangeInternal(lsn, rlocator, forknum, blkno, 1);
}
}
else
Expand All @@ -6735,13 +6753,14 @@ GetLastWrittenLSN(RelFileLocator rlocator, ForkNumber forknum, BlockNumber blkno
* either from a cached last written LSN or a global maximum last written LSN.
* If rnode is InvalidOid then we calculate maximum among all cached LSN and maxLastWrittenLsn.
* If cache is large enough, iterating through all hash items may be rather expensive.
* But GetLastWrittenLSN(InvalidOid) is used only by zenith_dbsize which is not performance critical.
* But GetLastWrittenLSN(InvalidOid) is used only by neon_dbsize which is not performance critical.
*/
void
GetLastWrittenLSNv(RelFileLocator relfilenode, ForkNumber forknum,
BlockNumber blkno, int nblocks, XLogRecPtr *lsns)
{
LastWrittenLsnCacheEntry* entry;
XLogRecPtr lsn;

Assert(lastWrittenLsnCacheSize != 0);
Assert(nblocks > 0);
Expand All @@ -6752,6 +6771,8 @@ GetLastWrittenLSNv(RelFileLocator relfilenode, ForkNumber forknum,
if (relfilenode.relNumber != InvalidOid)
{
BufferTag key;
bool missed_keys = false;

key.spcOid = relfilenode.spcOid;
key.dbOid = relfilenode.dbOid;
key.relNumber = relfilenode.relNumber;
Expand All @@ -6763,24 +6784,43 @@ GetLastWrittenLSNv(RelFileLocator relfilenode, ForkNumber forknum,
key.blockNum = blkno + i;

entry = hash_search(lastWrittenLsnCache, &key, HASH_FIND, NULL);

if (entry != NULL)
lsns[i] = entry->lsn;
{
lsns[i] = entry->lsn;
}
else
{
XLogRecPtr lsn;
lsns[i] = lsn = XLogCtl->maxLastWrittenLsn;
/* Mark this block's LSN as missing - we'll update the LwLSN for missing blocks in bulk later */
lsns[i] = InvalidXLogRecPtr;
missed_keys = true;
}
}

/*
* If we had any missing LwLSN entries, we add the missing ones now.
* By doing the insertions in one batch, we decrease lock contention.
*/
if (missed_keys)
{
LWLockRelease(LastWrittenLsnLock);
LWLockAcquire(LastWrittenLsnLock, LW_EXCLUSIVE);

lsn = XLogCtl->maxLastWrittenLsn;

LWLockRelease(LastWrittenLsnLock);
SetLastWrittenLSNForBlock(lsn, relfilenode, forknum, key.blockNum);
LWLockAcquire(LastWrittenLsnLock, LW_SHARED);
for (int i = 0; i < nblocks; i++)
{
if (lsns[i] == InvalidXLogRecPtr)
{
lsns[i] = lsn;
SetLastWrittenLSNForBlockRangeInternal(lsn, relfilenode, forknum, blkno + i, 1);
}
}
}
}
else
{
HASH_SEQ_STATUS seq;
XLogRecPtr lsn = XLogCtl->maxLastWrittenLsn;
lsn = XLogCtl->maxLastWrittenLsn;
/* Find maximum of all cached LSNs */
hash_seq_init(&seq, lastWrittenLsnCache);
while ((entry = (LastWrittenLsnCacheEntry *) hash_seq_search(&seq)) != NULL)
Expand All @@ -6796,22 +6836,16 @@ GetLastWrittenLSNv(RelFileLocator relfilenode, ForkNumber forknum,
}

/*
* SetLastWrittenLSNForBlockRange -- Set maximal LSN of written page range.
* We maintain cache of last written LSNs with limited size and LRU replacement
* policy. Keeping last written LSN for each page allows to use old LSN when
* requesting pages of unchanged or appended relations. Also it is critical for
* efficient work of prefetch in case massive update operations (like vacuum or remove).
*
* rlocator.relNumber can be InvalidOid, in this case maxLastWrittenLsn is updated.
* SetLastWrittenLsn with dummy rlocator is used by createdb and dbase_redo functions.
* Guts for SetLastWrittenLSNForBlockRange.
* Caller must ensure LastWrittenLsnLock is held in exclusive mode.
*/
XLogRecPtr
SetLastWrittenLSNForBlockRange(XLogRecPtr lsn, RelFileLocator rlocator, ForkNumber forknum, BlockNumber from, BlockNumber n_blocks)
static XLogRecPtr
SetLastWrittenLSNForBlockRangeInternal(XLogRecPtr lsn,
RelFileLocator rlocator,
ForkNumber forknum,
BlockNumber from,
BlockNumber n_blocks)
{
if (lsn == InvalidXLogRecPtr || n_blocks == 0 || lastWrittenLsnCacheSize == 0)
return lsn;

LWLockAcquire(LastWrittenLsnLock, LW_EXCLUSIVE);
if (rlocator.relNumber == InvalidOid)
{
if (lsn > XLogCtl->maxLastWrittenLsn)
Expand Down Expand Up @@ -6861,7 +6895,29 @@ SetLastWrittenLSNForBlockRange(XLogRecPtr lsn, RelFileLocator rlocator, ForkNumb
dlist_push_tail(&XLogCtl->lastWrittenLsnLRU, &entry->lru_node);
}
}
return lsn;
}

/*
* SetLastWrittenLSNForBlockRange -- Set maximal LSN of written page range.
* We maintain cache of last written LSNs with limited size and LRU replacement
* policy. Keeping last written LSN for each page allows to use old LSN when
* requesting pages of unchanged or appended relations. Also it is critical for
* efficient work of prefetch in case massive update operations (like vacuum or remove).
*
* rlocator.relNumber can be InvalidOid, in this case maxLastWrittenLsn is updated.
* SetLastWrittenLsn with dummy rlocator is used by createdb and dbase_redo functions.
*/
XLogRecPtr
SetLastWrittenLSNForBlockRange(XLogRecPtr lsn, RelFileLocator rlocator, ForkNumber forknum, BlockNumber from, BlockNumber n_blocks)
{
if (lsn == InvalidXLogRecPtr || n_blocks == 0 || lastWrittenLsnCacheSize == 0)
return lsn;

LWLockAcquire(LastWrittenLsnLock, LW_EXCLUSIVE);
lsn = SetLastWrittenLSNForBlockRangeInternal(lsn, rlocator, forknum, from, n_blocks);
LWLockRelease(LastWrittenLsnLock);

return lsn;
}

Expand Down