Skip to content

Commit

Permalink
Restore running xacts from CLOG on replica startup
Browse files Browse the repository at this point in the history
  • Loading branch information
Konstantin Knizhnik committed May 23, 2024
1 parent 0d30e28 commit adc11ad
Showing 1 changed file with 88 additions and 0 deletions.
88 changes: 88 additions & 0 deletions src/backend/access/transam/xlog.c
Original file line number Diff line number Diff line change
Expand Up @@ -6701,6 +6701,75 @@ CheckRequiredParameterValues(void)
}
}

/*
* This is taken from procarry.c
* TODO: should we better move it to some header file?
*/
#define PROCARRAY_MAXPROCS (MaxBackends + max_prepared_xacts)
#define TOTAL_MAX_CACHED_SUBXIDS \
((PGPROC_MAX_CACHED_SUBXIDS + 1) * PROCARRAY_MAXPROCS)


/*
* NEON: we do not want to wait running-xacts record from primary which is generated each 15 seconds (if there is some activity).
* So we try to restore information about running transactions from CLOG.
*/
static void
RestoreRunningXactsFromClog(CheckPoint* checkPoint, TransactionId** xids, int* nxids)
{
TransactionId from = checkPoint->oldestActiveXid;
TransactionId till = XidFromFullTransactionId(checkPoint->nextXid);
int xcnt;

if (!TransactionIdIsNormal(from))
{
/*
* No checkpoint or running-xacts record was written,
* so use most conservative approximation for oldestActiveXid: firstNormalTransactionId.
* There are should not be problems with wraparounf because it is not possible that
* XID is overflown without writting any checkpoint or running-xact record.
*/
from = FirstNormalTransactionId;
}
if (*xids != NULL)
{
/*
* xids already set by PrescanPreparedTransactions.
* We do not need inforamtion about prepared transactions, because
* they should be subset of one reconstred frion CLOG.
* So just deallocate *xids not to leak memory
*/
pfree(*xids);
}
/* Upper estimation of number of running xids */
xcnt = till > from ? till - from : till - from - FirstNormalTransactionId;

/*
* FIXME: To avoid "too many KnownAssignedXids" error we limit number of reported transactions.
* Certainly it may cause (with very small probability) incorrect results at replica for not reported transactions.
* But is t seems to be better than stuck replica waiting fior sunning-xacts while primary isnot going to send it because
* there were no changes since last generation of this record. Or crashing replica because of fatal error "too many KnownAssignedXids".
* TOTAL_MAX_CACHED_SUBXIDS/2 is taken in the assumption that we need to reserve some space for known xids for subsequent calls
* of RecordKnownAssignedTransactionIds
*/
xcnt = Min(xcnt, TOTAL_MAX_CACHED_SUBXIDS/2);
*xids = (TransactionId*)palloc(xcnt*sizeof(TransactionId));
xcnt = 0;
for (TransactionId xid = from; TransactionIdPrecedes(xid, till);)
{
XLogRecPtr xidlsn;
XidStatus xidstatus = TransactionIdGetStatus(xid, &xidlsn);
ExtendSUBTRANS(xid);
if (xidstatus == TRANSACTION_STATUS_IN_PROGRESS)
{
if (xcnt < TOTAL_MAX_CACHED_SUBXIDS/2)
(*xids)[xcnt++] = xid;
}
TransactionIdAdvance(xid);
}
*nxids = xcnt;
}

/*
* This must be called ONCE during postmaster or standalone-backend startup
*/
Expand Down Expand Up @@ -7501,14 +7570,33 @@ StartupXLOG(void)
InitRecoveryTransactionEnvironment();

if (wasShutdown)
{
oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
if (TransactionIdIsValid(checkPoint.oldestActiveXid) &&
NormalTransactionIdPrecedes(checkPoint.oldestActiveXid, oldestActiveXID))
{
oldestActiveXID = checkPoint.oldestActiveXid;
}
}
else
{
oldestActiveXID = checkPoint.oldestActiveXid;
}
elog(LOG, "checkPoint.oldestActiveXid=%d, oldestActiveXID=%d", checkPoint.oldestActiveXid, oldestActiveXID);
Assert(TransactionIdIsValid(oldestActiveXID));

/* Tell procarray about the range of xids it has to deal with */
ProcArrayInitRecovery(XidFromFullTransactionId(ShmemVariableCache->nextXid));

/*
* NEON: we always assume that standby replica was normally shotdown,
* but primary can be actually alive and we have to restore iformation
* about runniong xacts from CLOG
*/
if (wasShutdown)
{
RestoreRunningXactsFromClog(&checkPoint, &xids, &nxids);
}
/*
* Startup subtrans only. CLOG, MultiXact and commit timestamp
* have already been started up and other SLRUs are not maintained
Expand Down

0 comments on commit adc11ad

Please sign in to comment.