Navigation Menu

Skip to content

Commit

Permalink
Use recheck for LIKE
Browse files Browse the repository at this point in the history
  • Loading branch information
kou committed Oct 28, 2015
1 parent 83d27ae commit da4b428
Show file tree
Hide file tree
Showing 25 changed files with 455 additions and 19 deletions.
21 changes: 21 additions & 0 deletions expected/full-text-search/text/single/like/begin/bitmapscan.out
@@ -0,0 +1,21 @@
CREATE TABLE memos (
id integer,
content text
);
INSERT INTO memos VALUES (1, 'PostgreSQL is a RDBMS.');
INSERT INTO memos VALUES (2, 'Groonga is fast full text search engine.');
INSERT INTO memos VALUES (3, 'PGroonga is a PostgreSQL extension that uses Groonga.');
INSERT INTO memos VALUES (4, 'groonga command is provided.');
CREATE INDEX grnindex ON memos USING pgroonga (content);
SET enable_seqscan = off;
SET enable_indexscan = off;
SET enable_bitmapscan = on;
SELECT id, content
FROM memos
WHERE content LIKE 'PostgreSQL%';
id | content
----+------------------------
1 | PostgreSQL is a RDBMS.
(1 row)

DROP TABLE memos;
21 changes: 21 additions & 0 deletions expected/full-text-search/text/single/like/begin/indexscan.out
@@ -0,0 +1,21 @@
CREATE TABLE memos (
id integer,
content text
);
INSERT INTO memos VALUES (1, 'PostgreSQL is a RDBMS.');
INSERT INTO memos VALUES (2, 'Groonga is fast full text search engine.');
INSERT INTO memos VALUES (3, 'PGroonga is a PostgreSQL extension that uses Groonga.');
INSERT INTO memos VALUES (4, 'groonga command is provided.');
CREATE INDEX grnindex ON memos USING pgroonga (content);
SET enable_seqscan = off;
SET enable_indexscan = on;
SET enable_bitmapscan = off;
SELECT id, content
FROM memos
WHERE content LIKE 'PostgreSQL%';
id | content
----+------------------------
1 | PostgreSQL is a RDBMS.
(1 row)

DROP TABLE memos;
21 changes: 21 additions & 0 deletions expected/full-text-search/text/single/like/begin/seqscan.out
@@ -0,0 +1,21 @@
CREATE TABLE memos (
id integer,
content text
);
INSERT INTO memos VALUES (1, 'PostgreSQL is a RDBMS.');
INSERT INTO memos VALUES (2, 'Groonga is fast full text search engine.');
INSERT INTO memos VALUES (3, 'PGroonga is a PostgreSQL extension that uses Groonga.');
INSERT INTO memos VALUES (4, 'groonga command is provided.');
CREATE INDEX grnindex ON memos USING pgroonga (content);
SET enable_seqscan = on;
SET enable_indexscan = off;
SET enable_bitmapscan = off;
SELECT id, content
FROM memos
WHERE content LIKE 'PostgreSQL%';
id | content
----+------------------------
1 | PostgreSQL is a RDBMS.
(1 row)

DROP TABLE memos;
Expand Up @@ -12,12 +12,10 @@ SET enable_indexscan = off;
SET enable_bitmapscan = on;
SELECT id, content
FROM memos
WHERE content LIKE '%groonga%';
WHERE content LIKE '%Groonga.';
id | content
----+-------------------------------------------------------
2 | Groonga is fast full text search engine.
3 | PGroonga is a PostgreSQL extension that uses Groonga.
4 | groonga command is provided.
(3 rows)
(1 row)

DROP TABLE memos;
Expand Up @@ -12,12 +12,10 @@ SET enable_indexscan = on;
SET enable_bitmapscan = off;
SELECT id, content
FROM memos
WHERE content LIKE '%groonga%';
WHERE content LIKE '%Groonga.';
id | content
----+-------------------------------------------------------
2 | Groonga is fast full text search engine.
3 | PGroonga is a PostgreSQL extension that uses Groonga.
4 | groonga command is provided.
(3 rows)
(1 row)

DROP TABLE memos;
21 changes: 21 additions & 0 deletions expected/full-text-search/text/single/like/end/seqscan.out
@@ -0,0 +1,21 @@
CREATE TABLE memos (
id integer,
content text
);
INSERT INTO memos VALUES (1, 'PostgreSQL is a RDBMS.');
INSERT INTO memos VALUES (2, 'Groonga is fast full text search engine.');
INSERT INTO memos VALUES (3, 'PGroonga is a PostgreSQL extension that uses Groonga.');
INSERT INTO memos VALUES (4, 'groonga command is provided.');
CREATE INDEX grnindex ON memos USING pgroonga (content);
SET enable_seqscan = on;
SET enable_indexscan = off;
SET enable_bitmapscan = off;
SELECT id, content
FROM memos
WHERE content LIKE '%Groonga.';
id | content
----+-------------------------------------------------------
3 | PGroonga is a PostgreSQL extension that uses Groonga.
(1 row)

DROP TABLE memos;
21 changes: 21 additions & 0 deletions expected/full-text-search/text/single/like/partial/bitmapscan.out
@@ -0,0 +1,21 @@
CREATE TABLE memos (
id integer,
content text
);
INSERT INTO memos VALUES (1, 'PostgreSQL is a RDBMS.');
INSERT INTO memos VALUES (2, 'Groonga is fast full text search engine.');
INSERT INTO memos VALUES (3, 'PGroonga is a PostgreSQL extension that uses Groonga.');
INSERT INTO memos VALUES (4, 'groonga command is provided.');
CREATE INDEX grnindex ON memos USING pgroonga (content);
SET enable_seqscan = off;
SET enable_indexscan = off;
SET enable_bitmapscan = on;
SELECT id, content
FROM memos
WHERE content LIKE '%groonga%';
id | content
----+------------------------------
4 | groonga command is provided.
(1 row)

DROP TABLE memos;
21 changes: 21 additions & 0 deletions expected/full-text-search/text/single/like/partial/indexscan.out
@@ -0,0 +1,21 @@
CREATE TABLE memos (
id integer,
content text
);
INSERT INTO memos VALUES (1, 'PostgreSQL is a RDBMS.');
INSERT INTO memos VALUES (2, 'Groonga is fast full text search engine.');
INSERT INTO memos VALUES (3, 'PGroonga is a PostgreSQL extension that uses Groonga.');
INSERT INTO memos VALUES (4, 'groonga command is provided.');
CREATE INDEX grnindex ON memos USING pgroonga (content);
SET enable_seqscan = off;
SET enable_indexscan = on;
SET enable_bitmapscan = off;
SELECT id, content
FROM memos
WHERE content LIKE '%groonga%';
id | content
----+------------------------------
4 | groonga command is provided.
(1 row)

DROP TABLE memos;
@@ -0,0 +1,20 @@
CREATE TABLE memos (
id integer,
content text
);
INSERT INTO memos VALUES (1, '');
INSERT INTO memos VALUES (2, 'a');
INSERT INTO memos VALUES (3, 'ab');
CREATE INDEX grnindex ON memos USING pgroonga (content);
SET enable_seqscan = off;
SET enable_indexscan = off;
SET enable_bitmapscan = on;
SELECT id, content
FROM memos
WHERE content LIKE '_';
id | content
----+---------
2 | a
(1 row)

DROP TABLE memos;
@@ -0,0 +1,20 @@
CREATE TABLE memos (
id integer,
content text
);
INSERT INTO memos VALUES (1, '');
INSERT INTO memos VALUES (2, 'a');
INSERT INTO memos VALUES (3, 'ab');
CREATE INDEX grnindex ON memos USING pgroonga (content);
SET enable_seqscan = off;
SET enable_indexscan = on;
SET enable_bitmapscan = off;
SELECT id, content
FROM memos
WHERE content LIKE '_';
id | content
----+---------
2 | a
(1 row)

DROP TABLE memos;
20 changes: 20 additions & 0 deletions expected/full-text-search/text/single/like/underscore/seqscan.out
@@ -0,0 +1,20 @@
CREATE TABLE memos (
id integer,
content text
);
INSERT INTO memos VALUES (1, '');
INSERT INTO memos VALUES (2, 'a');
INSERT INTO memos VALUES (3, 'ab');
CREATE INDEX grnindex ON memos USING pgroonga (content);
SET enable_seqscan = on;
SET enable_indexscan = off;
SET enable_bitmapscan = off;
SELECT id, content
FROM memos
WHERE content LIKE '_';
id | content
----+---------
2 | a
(1 row)

DROP TABLE memos;
90 changes: 79 additions & 11 deletions pgroonga.c
Expand Up @@ -206,6 +206,7 @@ static grn_ctx grnContext;
static grn_ctx *ctx = NULL;
static grn_obj buffer;
static grn_obj pathBuffer;
static grn_obj keywordBuffer;
static grn_obj patternBuffer;
static grn_obj ctidBuffer;
static grn_obj scoreBuffer;
Expand Down Expand Up @@ -449,6 +450,7 @@ PGrnOnProcExit(int code, Datum arg)
GRN_OBJ_FIN(ctx, &ctidBuffer);
GRN_OBJ_FIN(ctx, &scoreBuffer);
GRN_OBJ_FIN(ctx, &patternBuffer);
GRN_OBJ_FIN(ctx, &keywordBuffer);
GRN_OBJ_FIN(ctx, &pathBuffer);
GRN_OBJ_FIN(ctx, &buffer);

Expand Down Expand Up @@ -620,6 +622,7 @@ _PG_init(void)

GRN_VOID_INIT(&buffer);
GRN_TEXT_INIT(&pathBuffer, 0);
GRN_TEXT_INIT(&keywordBuffer, 0);
GRN_TEXT_INIT(&patternBuffer, 0);
GRN_FLOAT_INIT(&scoreBuffer, 0);
GRN_UINT64_INIT(&ctidBuffer, 0);
Expand Down Expand Up @@ -3154,14 +3157,37 @@ pgroonga_beginscan(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(scan);
}

static void
PGrnSearchBuildConditionLikeMatchFlush(grn_obj *expression,
grn_obj *matchTarget,
grn_obj *keyword,
int *nKeywords)
{
if (GRN_TEXT_LEN(keyword) == 0)
return;

grn_expr_append_obj(ctx, expression, matchTarget, GRN_OP_PUSH, 1);
grn_expr_append_const_str(ctx, expression,
GRN_TEXT_VALUE(keyword),
GRN_TEXT_LEN(keyword),
GRN_OP_PUSH, 1);
grn_expr_append_op(ctx, expression, GRN_OP_MATCH, 2);
if (*nKeywords > 0)
grn_expr_append_op(ctx, expression, GRN_OP_OR, 2);
(*nKeywords)++;

GRN_BULK_REWIND(keyword);
}

static void
PGrnSearchBuildConditionLikeMatch(PGrnSearchData *data,
grn_obj *matchTarget,
grn_obj *query)
{
grn_obj *expression;
const char *queryRaw;
size_t querySize;
size_t i, querySize;
int nKeywords = 0;

expression = data->expression;
queryRaw = GRN_TEXT_VALUE(query);
Expand All @@ -3173,17 +3199,46 @@ PGrnSearchBuildConditionLikeMatch(PGrnSearchData *data,
return;
}

if (!(queryRaw[0] == '%' && queryRaw[querySize - 1] == '%'))
GRN_BULK_REWIND(&keywordBuffer);
for (i = 0; i < querySize; i++)
{
data->isEmptyCondition = true;
return;
switch (queryRaw[i])
{
case '\\':
if (i == querySize)
{
GRN_TEXT_PUTC(ctx, &keywordBuffer, '\\');
}
else
{
GRN_TEXT_PUTC(ctx, &keywordBuffer, queryRaw[i + 1]);
i++;
}
break;
case '%':
case '_':
PGrnSearchBuildConditionLikeMatchFlush(expression,
matchTarget,
&keywordBuffer,
&nKeywords);
break;
default:
GRN_TEXT_PUTC(ctx, &keywordBuffer, queryRaw[i]);
break;
}
}

grn_expr_append_obj(ctx, expression, matchTarget, GRN_OP_PUSH, 1);
grn_expr_append_const_str(ctx, expression,
queryRaw + 1, querySize - 2,
GRN_OP_PUSH, 1);
grn_expr_append_op(ctx, expression, GRN_OP_MATCH, 2);
PGrnSearchBuildConditionLikeMatchFlush(expression,
matchTarget,
&keywordBuffer,
&nKeywords);
if (nKeywords == 0)
{
grn_expr_append_obj(ctx, expression,
grn_ctx_get(ctx, "all_records", -1),
GRN_OP_PUSH, 1);
grn_expr_append_op(ctx, expression, GRN_OP_CALL, 0);
}
}

static void
Expand Down Expand Up @@ -4022,6 +4077,19 @@ PGrnEnsureCursorOpened(IndexScanDesc scan, ScanDirection dir)
{
PGrnScanOpaque so = (PGrnScanOpaque) scan->opaque;

{
int i;
for (i = 0; i < scan->numberOfKeys; i++)
{
ScanKey key = &(scan->keyData[i]);
if (key->sk_strategy == PGrnLikeStrategyNumber)
{
scan->xs_recheck = true;
break;
}
}
}

if (so->indexCursor)
return;
if (so->tableCursor)
Expand Down Expand Up @@ -4166,7 +4234,7 @@ pgroonga_getbitmap(PG_FUNCTION_ARGS)
GRN_BULK_REWIND(&ctidBuffer);
grn_obj_get_value(ctx, so->ctidAccessor, posting->rid, &ctidBuffer);
ctid = UInt64ToCtid(GRN_UINT64_VALUE(&ctidBuffer));
tbm_add_tuples(tbm, &ctid, 1, false);
tbm_add_tuples(tbm, &ctid, 1, scan->xs_recheck);
nRecords++;
}
}
Expand All @@ -4179,7 +4247,7 @@ pgroonga_getbitmap(PG_FUNCTION_ARGS)
GRN_BULK_REWIND(&ctidBuffer);
grn_obj_get_value(ctx, so->ctidAccessor, id, &ctidBuffer);
ctid = UInt64ToCtid(GRN_UINT64_VALUE(&ctidBuffer));
tbm_add_tuples(tbm, &ctid, 1, false);
tbm_add_tuples(tbm, &ctid, 1, scan->xs_recheck);
nRecords++;
}
}
Expand Down

0 comments on commit da4b428

Please sign in to comment.