Skip to content

Commit

Permalink
Skip checking of scan keys required for directional scan in B-tree
Browse files Browse the repository at this point in the history
Currently, B-tree code matches every scan key to every item on the page.
Imagine the ordered B-tree scan for the query like this.

SELECT * FROM tbl WHERE col > 'a' AND col < 'b' ORDER BY col;

The (col > 'a') scan key will be always matched once we find the location to
start the scan.  The (col < 'b') scan key will match every item on the page
as long as it matches the last item on the page.

This patch implements prechecking of the scan keys required for directional
scan on beginning of page scan.  If precheck is successful we can skip this
scan keys check for the items on the page.  That could lead to significant
acceleration especially if the comparison operator is expensive.

Idea from patch by Konstantin Knizhnik.

Discussion: https://postgr.es/m/079c3f8e-3371-abe2-e93c-fc8a0ae3f571%40garret.ru
Reviewed-by: Peter Geoghegan, Pavel Borisov
  • Loading branch information
akorotkov committed Oct 6, 2023
1 parent 5da0a62 commit e0b1ee1
Show file tree
Hide file tree
Showing 4 changed files with 119 additions and 17 deletions.
1 change: 1 addition & 0 deletions src/backend/access/nbtree/nbtree.c
Expand Up @@ -407,6 +407,7 @@ btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,

so->markItemIndex = -1;
so->arrayKeyCount = 0;
so->firstPage = false;
BTScanPosUnpinIfPinned(so->markPos);
BTScanPosInvalidate(so->markPos);

Expand Down
70 changes: 67 additions & 3 deletions src/backend/access/nbtree/nbtsearch.c
Expand Up @@ -1429,6 +1429,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
/* remember which buffer we have pinned, if any */
Assert(!BTScanPosIsValid(so->currPos));
so->currPos.buf = buf;
so->firstPage = true;

/*
* Now load data from the first page of the scan.
Expand Down Expand Up @@ -1539,6 +1540,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
int itemIndex;
bool continuescan;
int indnatts;
bool requiredMatchedByPrecheck;

/*
* We must have the buffer pinned and locked, but the usual macro can't be
Expand Down Expand Up @@ -1592,6 +1594,46 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
*/
Assert(BTScanPosIsPinned(so->currPos));

/*
* Prechecking the page with scan keys required for direction scan. We
* check these keys with the last item on the page (according to our scan
* direction). If these keys are matched, we can skip checking them with
* every item on the page. Scan keys for our scan direction would
* necessarily match the previous items. Scan keys required for opposite
* direction scan are already matched by the _bt_first() call.
*
* With the forward scan, we do this check for the last item on the page
* instead of the high key. It's relatively likely that the most
* significant column in the high key will be different from the
* corresponding value from the last item on the page. So checking with
* the last item on the page would give a more precise answer.
*
* We skip this for the first page in the scan to evade the possible
* slowdown of the point queries.
*/
if (!so->firstPage && minoff < maxoff)
{
ItemId iid;
IndexTuple itup;

iid = PageGetItemId(page, ScanDirectionIsForward(dir) ? maxoff : minoff);
itup = (IndexTuple) PageGetItem(page, iid);

/*
* Do the precheck. Note that we pass the pointer to
* 'requiredMatchedByPrecheck' to 'continuescan' argument. That will
* set flag to true if all required keys are satisfied and false
* otherwise.
*/
(void) _bt_checkkeys(scan, itup, indnatts, dir,
&requiredMatchedByPrecheck, false);
}
else
{
so->firstPage = false;
requiredMatchedByPrecheck = false;
}

if (ScanDirectionIsForward(dir))
{
/* load items[] in ascending order */
Expand All @@ -1603,6 +1645,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
{
ItemId iid = PageGetItemId(page, offnum);
IndexTuple itup;
bool passes_quals;

/*
* If the scan specifies not to return killed tuples, then we
Expand All @@ -1616,7 +1659,18 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)

itup = (IndexTuple) PageGetItem(page, iid);

if (_bt_checkkeys(scan, itup, indnatts, dir, &continuescan))
passes_quals = _bt_checkkeys(scan, itup, indnatts, dir,
&continuescan, requiredMatchedByPrecheck);

/*
* If the result of prechecking required keys was true, then in
* assert-enabled builds we also recheck that _bt_checkkeys()
* result is is the same.
*/
Assert(!requiredMatchedByPrecheck ||
passes_quals == _bt_checkkeys(scan, itup, indnatts, dir,
&continuescan, false));
if (passes_quals)
{
/* tuple passes all scan key conditions */
if (!BTreeTupleIsPosting(itup))
Expand Down Expand Up @@ -1673,7 +1727,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
int truncatt;

truncatt = BTreeTupleGetNAtts(itup, scan->indexRelation);
_bt_checkkeys(scan, itup, truncatt, dir, &continuescan);
_bt_checkkeys(scan, itup, truncatt, dir, &continuescan, false);
}

if (!continuescan)
Expand Down Expand Up @@ -1725,7 +1779,16 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
itup = (IndexTuple) PageGetItem(page, iid);

passes_quals = _bt_checkkeys(scan, itup, indnatts, dir,
&continuescan);
&continuescan, requiredMatchedByPrecheck);

/*
* If the result of prechecking required keys was true, then in
* assert-enabled builds we also recheck that _bt_checkkeys()
* result is is the same.
*/
Assert(!requiredMatchedByPrecheck ||
passes_quals == _bt_checkkeys(scan, itup, indnatts, dir,
&continuescan, false));
if (passes_quals && tuple_alive)
{
/* tuple passes all scan key conditions */
Expand Down Expand Up @@ -2443,6 +2506,7 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir)

/* remember which buffer we have pinned */
so->currPos.buf = buf;
so->firstPage = true;

_bt_initialize_more_data(so, dir);

Expand Down
59 changes: 46 additions & 13 deletions src/backend/access/nbtree/nbtutils.c
Expand Up @@ -1372,10 +1372,13 @@ _bt_mark_scankey_required(ScanKey skey)
* tupnatts: number of attributes in tupnatts (high key may be truncated)
* dir: direction we are scanning in
* continuescan: output parameter (will be set correctly in all cases)
* requiredMatchedByPrecheck: indicates that scan keys required for
* direction scan are already matched
*/
bool
_bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, int tupnatts,
ScanDirection dir, bool *continuescan)
ScanDirection dir, bool *continuescan,
bool requiredMatchedByPrecheck)
{
TupleDesc tupdesc;
BTScanOpaque so;
Expand All @@ -1396,6 +1399,29 @@ _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, int tupnatts,
Datum datum;
bool isNull;
Datum test;
bool requiredSameDir = false,
requiredOppositeDir = false;

/*
* Check if the key is required for ordered scan in the same or
* opposite direction. Save as flag variables for future usage.
*/
if (((key->sk_flags & SK_BT_REQFWD) && ScanDirectionIsForward(dir)) ||
((key->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsBackward(dir)))
requiredSameDir = true;
else if (((key->sk_flags & SK_BT_REQFWD) && ScanDirectionIsBackward(dir)) ||
((key->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsForward(dir)))
requiredOppositeDir = true;

/*
* Is the key required for scanning for either forward or backward
* direction? If so and called told us that these types of keys are
* known to be matched, skip the check. Except for the row keys,
* where NULLs could be found in the middle of matching values.
*/
if ((requiredSameDir || requiredOppositeDir) &&
!(key->sk_flags & SK_ROW_HEADER) && requiredMatchedByPrecheck)
continue;

if (key->sk_attno > tupnatts)
{
Expand Down Expand Up @@ -1444,11 +1470,7 @@ _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, int tupnatts,
* scan direction, then we can conclude no further tuples will
* pass, either.
*/
if ((key->sk_flags & SK_BT_REQFWD) &&
ScanDirectionIsForward(dir))
*continuescan = false;
else if ((key->sk_flags & SK_BT_REQBKWD) &&
ScanDirectionIsBackward(dir))
if (requiredSameDir)
*continuescan = false;

/*
Expand Down Expand Up @@ -1498,8 +1520,23 @@ _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, int tupnatts,
return false;
}

test = FunctionCall2Coll(&key->sk_func, key->sk_collation,
datum, key->sk_argument);
/*
* Apply the key checking function. When the key is required for
* opposite direction scan, it must be already satisfied by
* _bt_first() except for the NULLs checking, which have already done
* above.
*/
if (!requiredOppositeDir)
{
test = FunctionCall2Coll(&key->sk_func, key->sk_collation,
datum, key->sk_argument);
}
else
{
test = true;
Assert(test == FunctionCall2Coll(&key->sk_func, key->sk_collation,
datum, key->sk_argument));
}

if (!DatumGetBool(test))
{
Expand All @@ -1513,11 +1550,7 @@ _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, int tupnatts,
* initial positioning in _bt_first() when they are available. See
* comments in _bt_first().
*/
if ((key->sk_flags & SK_BT_REQFWD) &&
ScanDirectionIsForward(dir))
*continuescan = false;
else if ((key->sk_flags & SK_BT_REQBKWD) &&
ScanDirectionIsBackward(dir))
if (requiredSameDir)
*continuescan = false;

/*
Expand Down
6 changes: 5 additions & 1 deletion src/include/access/nbtree.h
Expand Up @@ -1056,6 +1056,9 @@ typedef struct BTScanOpaqueData
int *killedItems; /* currPos.items indexes of killed items */
int numKilled; /* number of currently stored items */

/* flag indicating the first page in the scan */
bool firstPage;

/*
* If we are doing an index-only scan, these are the tuple storage
* workspaces for the currPos and markPos respectively. Each is of size
Expand Down Expand Up @@ -1255,7 +1258,8 @@ extern void _bt_mark_array_keys(IndexScanDesc scan);
extern void _bt_restore_array_keys(IndexScanDesc scan);
extern void _bt_preprocess_keys(IndexScanDesc scan);
extern bool _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple,
int tupnatts, ScanDirection dir, bool *continuescan);
int tupnatts, ScanDirection dir, bool *continuescan,
bool requiredMatchedByPrecheck);
extern void _bt_killitems(IndexScanDesc scan);
extern BTCycleId _bt_vacuum_cycleid(Relation rel);
extern BTCycleId _bt_start_vacuum(Relation rel);
Expand Down

0 comments on commit e0b1ee1

Please sign in to comment.