Skip to content

Commit

Permalink
Index SLRUs by 64-bit integers rather than by 32-bit integers
Browse files Browse the repository at this point in the history
We've had repeated bugs in the area of handling SLRU wraparound in the past,
some of which have caused data loss. Switching to an indexing system for SLRUs
that does not wrap around should allow us to get rid of a whole bunch
of problems and improve the overall reliability of the system.

This particular patch however only changes the indexing and doesn't address
the wraparound per se. This is going to be done in the following patches.

Author: Maxim Orlov, Aleksander Alekseev, Alexander Korotkov, Teodor Sigaev
Author: Nikita Glukhov, Pavel Borisov, Yura Sokolov
Reviewed-by: Jacob Champion, Heikki Linnakangas, Alexander Korotkov
Reviewed-by: Japin Li, Pavel Borisov, Tom Lane, Peter Eisentraut, Andres Freund
Reviewed-by: Andrey Borodin, Dilip Kumar, Aleksander Alekseev
Discussion: https://postgr.es/m/CACG%3DezZe1NQSCnfHOr78AtAZxJZeCvxrts0ygrxYwe%3DpyyjVWA%40mail.gmail.com
Discussion: https://postgr.es/m/CAJ7c6TPDOYBYrnCAeyndkBktO0WG2xSdYduTF0nxq%2BvfkmTF5Q%40mail.gmail.com
  • Loading branch information
akorotkov committed Nov 28, 2023
1 parent a916b47 commit 4ed8f09
Show file tree
Hide file tree
Showing 18 changed files with 303 additions and 202 deletions.
10 changes: 5 additions & 5 deletions src/backend/access/rmgrdesc/clogdesc.c
Expand Up @@ -25,18 +25,18 @@ clog_desc(StringInfo buf, XLogReaderState *record)

if (info == CLOG_ZEROPAGE)
{
int pageno;
int64 pageno;

memcpy(&pageno, rec, sizeof(int));
appendStringInfo(buf, "page %d", pageno);
memcpy(&pageno, rec, sizeof(pageno));
appendStringInfo(buf, "page %lld", (long long) pageno);
}
else if (info == CLOG_TRUNCATE)
{
xl_clog_truncate xlrec;

memcpy(&xlrec, rec, sizeof(xl_clog_truncate));
appendStringInfo(buf, "page %d; oldestXact %u",
xlrec.pageno, xlrec.oldestXact);
appendStringInfo(buf, "page %lld; oldestXact %u",
(long long) xlrec.pageno, xlrec.oldestXact);
}
}

Expand Down
10 changes: 5 additions & 5 deletions src/backend/access/rmgrdesc/committsdesc.c
Expand Up @@ -26,17 +26,17 @@ commit_ts_desc(StringInfo buf, XLogReaderState *record)

if (info == COMMIT_TS_ZEROPAGE)
{
int pageno;
int64 pageno;

memcpy(&pageno, rec, sizeof(int));
appendStringInfo(buf, "%d", pageno);
memcpy(&pageno, rec, sizeof(pageno));
appendStringInfo(buf, "%lld", (long long) pageno);
}
else if (info == COMMIT_TS_TRUNCATE)
{
xl_commit_ts_truncate *trunc = (xl_commit_ts_truncate *) rec;

appendStringInfo(buf, "pageno %d, oldestXid %u",
trunc->pageno, trunc->oldestXid);
appendStringInfo(buf, "pageno %lld, oldestXid %u",
(long long) trunc->pageno, trunc->oldestXid);
}
}

Expand Down
6 changes: 3 additions & 3 deletions src/backend/access/rmgrdesc/mxactdesc.c
Expand Up @@ -55,10 +55,10 @@ multixact_desc(StringInfo buf, XLogReaderState *record)
if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE ||
info == XLOG_MULTIXACT_ZERO_MEM_PAGE)
{
int pageno;
int64 pageno;

memcpy(&pageno, rec, sizeof(int));
appendStringInfo(buf, "%d", pageno);
memcpy(&pageno, rec, sizeof(pageno));
appendStringInfo(buf, "%lld", (long long) pageno);
}
else if (info == XLOG_MULTIXACT_CREATE_ID)
{
Expand Down
64 changes: 37 additions & 27 deletions src/backend/access/transam/clog.c
Expand Up @@ -62,7 +62,17 @@
#define CLOG_XACTS_PER_PAGE (BLCKSZ * CLOG_XACTS_PER_BYTE)
#define CLOG_XACT_BITMASK ((1 << CLOG_BITS_PER_XACT) - 1)

#define TransactionIdToPage(xid) ((xid) / (TransactionId) CLOG_XACTS_PER_PAGE)

/*
* Although we return an int64 the actual value can't currently exceed
* 0xFFFFFFFF/CLOG_XACTS_PER_PAGE.
*/
static inline int64
TransactionIdToPage(TransactionId xid)
{
return xid / (int64) CLOG_XACTS_PER_PAGE;
}

#define TransactionIdToPgIndex(xid) ((xid) % (TransactionId) CLOG_XACTS_PER_PAGE)
#define TransactionIdToByte(xid) (TransactionIdToPgIndex(xid) / CLOG_XACTS_PER_BYTE)
#define TransactionIdToBIndex(xid) ((xid) % (TransactionId) CLOG_XACTS_PER_BYTE)
Expand All @@ -89,24 +99,24 @@ static SlruCtlData XactCtlData;
#define XactCtl (&XactCtlData)


static int ZeroCLOGPage(int pageno, bool writeXlog);
static bool CLOGPagePrecedes(int page1, int page2);
static void WriteZeroPageXlogRec(int pageno);
static void WriteTruncateXlogRec(int pageno, TransactionId oldestXact,
static int ZeroCLOGPage(int64 pageno, bool writeXlog);
static bool CLOGPagePrecedes(int64 page1, int64 page2);
static void WriteZeroPageXlogRec(int64 pageno);
static void WriteTruncateXlogRec(int64 pageno, TransactionId oldestXact,
Oid oldestXactDb);
static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
TransactionId *subxids, XidStatus status,
XLogRecPtr lsn, int pageno,
XLogRecPtr lsn, int64 pageno,
bool all_xact_same_page);
static void TransactionIdSetStatusBit(TransactionId xid, XidStatus status,
XLogRecPtr lsn, int slotno);
static void set_status_by_pages(int nsubxids, TransactionId *subxids,
XidStatus status, XLogRecPtr lsn);
static bool TransactionGroupUpdateXidStatus(TransactionId xid,
XidStatus status, XLogRecPtr lsn, int pageno);
XidStatus status, XLogRecPtr lsn, int64 pageno);
static void TransactionIdSetPageStatusInternal(TransactionId xid, int nsubxids,
TransactionId *subxids, XidStatus status,
XLogRecPtr lsn, int pageno);
XLogRecPtr lsn, int64 pageno);


/*
Expand Down Expand Up @@ -162,7 +172,7 @@ void
TransactionIdSetTreeStatus(TransactionId xid, int nsubxids,
TransactionId *subxids, XidStatus status, XLogRecPtr lsn)
{
int pageno = TransactionIdToPage(xid); /* get page of parent */
int64 pageno = TransactionIdToPage(xid); /* get page of parent */
int i;

Assert(status == TRANSACTION_STATUS_COMMITTED ||
Expand Down Expand Up @@ -236,7 +246,7 @@ static void
set_status_by_pages(int nsubxids, TransactionId *subxids,
XidStatus status, XLogRecPtr lsn)
{
int pageno = TransactionIdToPage(subxids[0]);
int64 pageno = TransactionIdToPage(subxids[0]);
int offset = 0;
int i = 0;

Expand All @@ -245,7 +255,7 @@ set_status_by_pages(int nsubxids, TransactionId *subxids,
while (i < nsubxids)
{
int num_on_page = 0;
int nextpageno;
int64 nextpageno;

do
{
Expand All @@ -271,7 +281,7 @@ set_status_by_pages(int nsubxids, TransactionId *subxids,
static void
TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
TransactionId *subxids, XidStatus status,
XLogRecPtr lsn, int pageno,
XLogRecPtr lsn, int64 pageno,
bool all_xact_same_page)
{
/* Can't use group update when PGPROC overflows. */
Expand Down Expand Up @@ -337,7 +347,7 @@ TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
static void
TransactionIdSetPageStatusInternal(TransactionId xid, int nsubxids,
TransactionId *subxids, XidStatus status,
XLogRecPtr lsn, int pageno)
XLogRecPtr lsn, int64 pageno)
{
int slotno;
int i;
Expand Down Expand Up @@ -411,7 +421,7 @@ TransactionIdSetPageStatusInternal(TransactionId xid, int nsubxids,
*/
static bool
TransactionGroupUpdateXidStatus(TransactionId xid, XidStatus status,
XLogRecPtr lsn, int pageno)
XLogRecPtr lsn, int64 pageno)
{
volatile PROC_HDR *procglobal = ProcGlobal;
PGPROC *proc = MyProc;
Expand Down Expand Up @@ -637,7 +647,7 @@ TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, i
XidStatus
TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn)
{
int pageno = TransactionIdToPage(xid);
int64 pageno = TransactionIdToPage(xid);
int byteno = TransactionIdToByte(xid);
int bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT;
int slotno;
Expand Down Expand Up @@ -697,7 +707,7 @@ CLOGShmemInit(void)
XactCtl->PagePrecedes = CLOGPagePrecedes;
SimpleLruInit(XactCtl, "Xact", CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE,
XactSLRULock, "pg_xact", LWTRANCHE_XACT_BUFFER,
SYNC_HANDLER_CLOG);
SYNC_HANDLER_CLOG, false);
SlruPagePrecedesUnitTests(XactCtl, CLOG_XACTS_PER_PAGE);
}

Expand Down Expand Up @@ -734,7 +744,7 @@ BootStrapCLOG(void)
* Control lock must be held at entry, and will be held at exit.
*/
static int
ZeroCLOGPage(int pageno, bool writeXlog)
ZeroCLOGPage(int64 pageno, bool writeXlog)
{
int slotno;

Expand All @@ -754,7 +764,7 @@ void
StartupCLOG(void)
{
TransactionId xid = XidFromFullTransactionId(ShmemVariableCache->nextXid);
int pageno = TransactionIdToPage(xid);
int64 pageno = TransactionIdToPage(xid);

LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);

Expand All @@ -773,7 +783,7 @@ void
TrimCLOG(void)
{
TransactionId xid = XidFromFullTransactionId(ShmemVariableCache->nextXid);
int pageno = TransactionIdToPage(xid);
int64 pageno = TransactionIdToPage(xid);

LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);

Expand Down Expand Up @@ -838,7 +848,7 @@ CheckPointCLOG(void)
void
ExtendCLOG(TransactionId newestXact)
{
int pageno;
int64 pageno;

/*
* No work except at first XID of a page. But beware: just after
Expand Down Expand Up @@ -877,7 +887,7 @@ ExtendCLOG(TransactionId newestXact)
void
TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid)
{
int cutoffPage;
int64 cutoffPage;

/*
* The cutoff point is the start of the segment containing oldestXact. We
Expand Down Expand Up @@ -930,7 +940,7 @@ TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid)
* don't optimize that edge case.
*/
static bool
CLOGPagePrecedes(int page1, int page2)
CLOGPagePrecedes(int64 page1, int64 page2)
{
TransactionId xid1;
TransactionId xid2;
Expand All @@ -949,10 +959,10 @@ CLOGPagePrecedes(int page1, int page2)
* Write a ZEROPAGE xlog record
*/
static void
WriteZeroPageXlogRec(int pageno)
WriteZeroPageXlogRec(int64 pageno)
{
XLogBeginInsert();
XLogRegisterData((char *) (&pageno), sizeof(int));
XLogRegisterData((char *) (&pageno), sizeof(pageno));
(void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE);
}

Expand All @@ -963,7 +973,7 @@ WriteZeroPageXlogRec(int pageno)
* in TruncateCLOG().
*/
static void
WriteTruncateXlogRec(int pageno, TransactionId oldestXact, Oid oldestXactDb)
WriteTruncateXlogRec(int64 pageno, TransactionId oldestXact, Oid oldestXactDb)
{
XLogRecPtr recptr;
xl_clog_truncate xlrec;
Expand Down Expand Up @@ -991,10 +1001,10 @@ clog_redo(XLogReaderState *record)

if (info == CLOG_ZEROPAGE)
{
int pageno;
int64 pageno;
int slotno;

memcpy(&pageno, XLogRecGetData(record), sizeof(int));
memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));

LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);

Expand Down

0 comments on commit 4ed8f09

Please sign in to comment.