Skip to content

Commit

Permalink
Support token filters
Browse files Browse the repository at this point in the history
GitHub: fix #32

Reported by Tim Bellefleur. Thanks!!!
  • Loading branch information
kou committed Jan 25, 2017
1 parent cebeb32 commit 104a432
Show file tree
Hide file tree
Showing 13 changed files with 317 additions and 43 deletions.
32 changes: 17 additions & 15 deletions src/pgrn-create.c
Expand Up @@ -8,6 +8,7 @@
#include "pgrn-value.h"

static grn_ctx *ctx = &PGrnContext;
static struct PGrnBuffers *buffers = &PGrnBuffers;

void
PGrnCreateSourcesCtidColumn(PGrnCreateData *data)
Expand All @@ -31,6 +32,7 @@ PGrnCreateSourcesTable(PGrnCreateData *data)
GRN_OBJ_TABLE_NO_KEY,
NULL,
NULL,
NULL,
NULL);

PGrnCreateSourcesCtidColumn(data);
Expand Down Expand Up @@ -112,7 +114,9 @@ PGrnCreateLexicon(PGrnCreateData *data)
grn_obj *lexicon;
grn_obj *tokenizer = NULL;
grn_obj *normalizer = NULL;
grn_obj *tokenFilters = &(buffers->tokenFilters);

GRN_BULK_REWIND(tokenFilters);
switch (data->attributeTypeID)
{
case GRN_DB_TEXT:
Expand All @@ -131,26 +135,23 @@ PGrnCreateLexicon(PGrnCreateData *data)
const char *tokenizerName;
const char *normalizerName = PGRN_DEFAULT_NORMALIZER;

if (data->forRegexpSearch) {
tokenizerName = "TokenRegexp";
} else {
if (data->forFullTextSearch)
{
tokenizerName = PGRN_DEFAULT_TOKENIZER;
}

PGrnApplyOptionValues(data->index, &tokenizerName, &normalizerName);

if (data->forFullTextSearch || data->forRegexpSearch)
else if (data->forRegexpSearch)
{
if (!PGrnIsNoneValue(tokenizerName))
{
tokenizer = PGrnLookup(tokenizerName, ERROR);
}
tokenizerName = "TokenRegexp";
}

if (!PGrnIsNoneValue(normalizerName))
else
{
normalizer = PGrnLookup(normalizerName, ERROR);
tokenizerName = NULL;
}

PGrnApplyOptionValues(data->index,
&tokenizer, tokenizerName,
&normalizer, normalizerName,
tokenFilters);
}

snprintf(lexiconName, sizeof(lexiconName),
Expand All @@ -161,7 +162,8 @@ PGrnCreateLexicon(PGrnCreateData *data)
flags,
type,
tokenizer,
normalizer);
normalizer,
tokenFilters);
GRN_PTR_PUT(ctx, data->lexicons, lexicon);
}

Expand Down
2 changes: 2 additions & 0 deletions src/pgrn-global.c
Expand Up @@ -17,6 +17,7 @@ PGrnInitializeBuffers(void)
GRN_UINT64_INIT(&(PGrnBuffers.ctid), 0);
GRN_FLOAT_INIT(&(PGrnBuffers.score), 0);
GRN_RECORD_INIT(&(PGrnBuffers.sourceIDs), GRN_OBJ_VECTOR, GRN_ID_NIL);
GRN_PTR_INIT(&(PGrnBuffers.tokenFilters), GRN_OBJ_VECTOR, GRN_ID_NIL);
GRN_UINT64_INIT(&(PGrnBuffers.jsonbValueKeys), GRN_OBJ_VECTOR);
GRN_UINT64_INIT(&(PGrnBuffers.walPosition), 0);
GRN_VOID_INIT(&(PGrnBuffers.walValue));
Expand All @@ -40,6 +41,7 @@ PGrnFinalizeBuffers(void)
GRN_OBJ_FIN(ctx, &(PGrnBuffers.ctid));
GRN_OBJ_FIN(ctx, &(PGrnBuffers.score));
GRN_OBJ_FIN(ctx, &(PGrnBuffers.sourceIDs));
GRN_OBJ_FIN(ctx, &(PGrnBuffers.tokenFilters));
GRN_OBJ_FIN(ctx, &(PGrnBuffers.jsonbValueKeys));
GRN_OBJ_FIN(ctx, &(PGrnBuffers.walPosition));
GRN_OBJ_FIN(ctx, &(PGrnBuffers.walValue));
Expand Down
1 change: 1 addition & 0 deletions src/pgrn-global.h
Expand Up @@ -11,6 +11,7 @@ struct PGrnBuffers
grn_obj ctid;
grn_obj score;
grn_obj sourceIDs;
grn_obj tokenFilters;
grn_obj jsonbValueKeys;
grn_obj walPosition;
grn_obj walValue;
Expand Down
14 changes: 10 additions & 4 deletions src/pgrn-groonga.c
Expand Up @@ -210,7 +210,8 @@ PGrnCreateTable(Relation index,
grn_table_flags flags,
grn_obj *type,
grn_obj *tokenizer,
grn_obj *normalizer)
grn_obj *normalizer,
grn_obj *tokenFilters)
{
unsigned int nameSize = 0;

Expand All @@ -223,7 +224,8 @@ PGrnCreateTable(Relation index,
flags,
type,
tokenizer,
normalizer);
normalizer,
tokenFilters);
}

grn_obj *
Expand All @@ -233,7 +235,8 @@ PGrnCreateTableWithSize(Relation index,
grn_table_flags flags,
grn_obj *type,
grn_obj *tokenizer,
grn_obj *normalizer)
grn_obj *normalizer,
grn_obj *tokenFilters)
{
const char *path = NULL;
char pathBuffer[MAXPGPATH];
Expand Down Expand Up @@ -276,14 +279,17 @@ PGrnCreateTableWithSize(Relation index,
grn_obj_set_info(ctx, table, GRN_INFO_DEFAULT_TOKENIZER, tokenizer);
if (normalizer)
grn_obj_set_info(ctx, table, GRN_INFO_NORMALIZER, normalizer);
if (tokenFilters)
grn_obj_set_info(ctx, table, GRN_INFO_TOKEN_FILTERS, tokenFilters);

PGrnWALCreateTable(index,
name,
nameSize,
flags,
type,
tokenizer,
normalizer);
normalizer,
tokenFilters);

return table;
}
Expand Down
6 changes: 4 additions & 2 deletions src/pgrn-groonga.h
Expand Up @@ -41,14 +41,16 @@ grn_obj *PGrnCreateTable(Relation index,
grn_table_flags flags,
grn_obj *type,
grn_obj *tokenizer,
grn_obj *normalizer);
grn_obj *normalizer,
grn_obj *tokenFilters);
grn_obj *PGrnCreateTableWithSize(Relation index,
const char *name,
size_t nameSize,
grn_table_flags flags,
grn_obj *type,
grn_obj *tokenizer,
grn_obj *normalizer);
grn_obj *normalizer,
grn_obj *tokenFilters);
grn_obj *PGrnCreateColumn(Relation index,
grn_obj *table,
const char*name,
Expand Down
1 change: 1 addition & 0 deletions src/pgrn-index-status.c
Expand Up @@ -31,6 +31,7 @@ PGrnInitializeIndexStatus(void)
GRN_OBJ_TABLE_HASH_KEY,
grn_ctx_at(ctx, GRN_DB_UINT32),
NULL,
NULL,
NULL);
}

Expand Down
23 changes: 14 additions & 9 deletions src/pgrn-jsonb.c
Expand Up @@ -177,6 +177,7 @@ PGrnJSONBCreatePathsTable(Relation index, const char *name)
GRN_OBJ_TABLE_PAT_KEY,
grn_ctx_at(ctx, GRN_DB_SHORT_TEXT),
NULL,
NULL,
NULL);
}

Expand All @@ -188,6 +189,7 @@ PGrnJSONBCreateTypesTable(Relation index, const char *name)
GRN_OBJ_TABLE_PAT_KEY,
grn_ctx_at(ctx, GRN_DB_SHORT_TEXT),
NULL,
NULL,
NULL);
}

Expand All @@ -199,6 +201,7 @@ PGrnJSONBCreateValuesTable(Relation index, const char *name)
GRN_OBJ_TABLE_HASH_KEY,
grn_ctx_at(ctx, GRN_DB_UINT64),
NULL,
NULL,
NULL);
}

Expand Down Expand Up @@ -778,33 +781,34 @@ static void
PGrnJSONBCreateFullTextSearchIndexColumn(PGrnCreateData *data,
PGrnJSONBCreateData *jsonbData)
{
const char *tokenizerName = PGRN_DEFAULT_TOKENIZER;
const char *normalizerName = PGRN_DEFAULT_NORMALIZER;
char lexiconName[GRN_TABLE_MAX_KEY_SIZE];
grn_table_flags flags = GRN_OBJ_TABLE_PAT_KEY;
grn_obj *type;
grn_obj *lexicon;
grn_obj *tokenizer;
grn_obj *tokenizer = NULL;
grn_obj *normalizer = NULL;
grn_obj *tokenFilters = &(buffers->tokenFilters);

PGrnApplyOptionValues(data->index, &tokenizerName, &normalizerName);
GRN_BULK_REWIND(tokenFilters);
PGrnApplyOptionValues(data->index,
&tokenizer, PGRN_DEFAULT_TOKENIZER,
&normalizer, PGRN_DEFAULT_NORMALIZER,
tokenFilters);

if (PGrnIsNoneValue(tokenizerName))
if (!tokenizer)
return;

snprintf(lexiconName, sizeof(lexiconName),
PGrnJSONValueLexiconNameFormat,
"FullTextSearch", data->relNode, data->i);
type = grn_ctx_at(ctx, GRN_DB_SHORT_TEXT);
tokenizer = PGrnLookup(tokenizerName, ERROR);
if (!PGrnIsNoneValue(normalizerName))
normalizer = PGrnLookup(normalizerName, ERROR);
lexicon = PGrnCreateTable(data->index,
lexiconName,
flags,
type,
tokenizer,
normalizer);
normalizer,
tokenFilters);
GRN_PTR_PUT(ctx, data->lexicons, lexicon);

PGrnCreateColumn(data->index,
Expand Down Expand Up @@ -832,6 +836,7 @@ PGrnJSONBCreateIndexColumn(PGrnCreateData *data,
tableType,
type,
NULL,
NULL,
NULL);
GRN_PTR_PUT(ctx, data->lexicons, lexicon);
PGrnCreateColumn(data->index,
Expand Down

0 comments on commit 104a432

Please sign in to comment.