Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add pgrooonga.match_positions_character
- Loading branch information
Showing
14 changed files
with
269 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
8 changes: 8 additions & 0 deletions
8
expected/function/match-positions-character/different-size-keyword.out
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,8 @@ | ||
| SELECT pgroonga.match_positions_character( | ||
| '100㍉メートル', | ||
| ARRAY['ミリ']); | ||
| match_positions_character | ||
| --------------------------- | ||
| {{3,1}} | ||
| (1 row) | ||
|
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| SELECT pgroonga.match_positions_character( | ||
| 'Groongaは転置索引を用いた高速・高精度な全文検索エンジンであり、' || | ||
| '登録された文書をすぐに検索結果に反映できます。', | ||
| ARRAY['検索']); | ||
| match_positions_character | ||
| --------------------------- | ||
| {{25,2},{46,2}} | ||
| (1 row) | ||
|
|
19 changes: 19 additions & 0 deletions
19
expected/function/match-positions-character/multiple-keywords.out
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| SELECT pgroonga.match_positions_character( | ||
| 'Groonga is a fast and accurate full text search engine based on ' || | ||
| 'inverted index. One of the characteristics of Groonga is that a ' || | ||
| 'newly registered document instantly appears in search results. ' || | ||
| 'Also, Groonga allows updates without read locks. These characteristics ' || | ||
| 'result in superior performance on real-time applications.' || | ||
| E'\n' || | ||
| E'\n' || | ||
| 'Groonga is also a column-oriented database management system (DBMS). ' || | ||
| 'Compared with well-known row-oriented systems, such as MySQL and ' || | ||
| 'PostgreSQL, column-oriented systems are more suited for aggregate ' || | ||
| 'queries. Due to this advantage, Groonga can cover weakness of ' || | ||
| 'row-oriented systems.', | ||
| ARRAY['fast', 'PostgreSQL']); | ||
| match_positions_character | ||
| --------------------------- | ||
| {{13,4},{455,10}} | ||
| (1 row) | ||
|
|
19 changes: 19 additions & 0 deletions
19
expected/function/match-positions-character/one-keyword.out
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| SELECT pgroonga.match_positions_character( | ||
| 'Groonga is a fast and accurate full text search engine based on ' || | ||
| 'inverted index. One of the characteristics of Groonga is that a ' || | ||
| 'newly registered document instantly appears in search results. ' || | ||
| 'Also, Groonga allows updates without read locks. These characteristics ' || | ||
| 'result in superior performance on real-time applications.' || | ||
| E'\n' || | ||
| E'\n' || | ||
| 'Groonga is also a column-oriented database management system (DBMS). ' || | ||
| 'Compared with well-known row-oriented systems, such as MySQL and ' || | ||
| 'PostgreSQL, column-oriented systems are more suited for aggregate ' || | ||
| 'queries. Due to this advantage, Groonga can cover weakness of ' || | ||
| 'row-oriented systems.', | ||
| ARRAY['Groonga']); | ||
| match_positions_character | ||
| ----------------------------------------- | ||
| {{0,7},{110,7},{197,7},{319,9},{553,7}} | ||
| (1 row) | ||
|
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| CREATE FUNCTION pgroonga.match_positions_character(target text, keywords text[]) | ||
| RETURNS integer[2][] | ||
| AS 'MODULE_PATHNAME', 'pgroonga_match_positions_character' | ||
| LANGUAGE C | ||
| VOLATILE | ||
| STRICT; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
3 changes: 3 additions & 0 deletions
3
sql/function/match-positions-character/different-size-keyword.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| SELECT pgroonga.match_positions_character( | ||
| '100㍉メートル', | ||
| ARRAY['ミリ']); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| SELECT pgroonga.match_positions_character( | ||
| 'Groongaは転置索引を用いた高速・高精度な全文検索エンジンであり、' || | ||
| '登録された文書をすぐに検索結果に反映できます。', | ||
| ARRAY['検索']); |
14 changes: 14 additions & 0 deletions
14
sql/function/match-positions-character/multiple-keywords.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| SELECT pgroonga.match_positions_character( | ||
| 'Groonga is a fast and accurate full text search engine based on ' || | ||
| 'inverted index. One of the characteristics of Groonga is that a ' || | ||
| 'newly registered document instantly appears in search results. ' || | ||
| 'Also, Groonga allows updates without read locks. These characteristics ' || | ||
| 'result in superior performance on real-time applications.' || | ||
| E'\n' || | ||
| E'\n' || | ||
| 'Groonga is also a column-oriented database management system (DBMS). ' || | ||
| 'Compared with well-known row-oriented systems, such as MySQL and ' || | ||
| 'PostgreSQL, column-oriented systems are more suited for aggregate ' || | ||
| 'queries. Due to this advantage, Groonga can cover weakness of ' || | ||
| 'row-oriented systems.', | ||
| ARRAY['fast', 'PostgreSQL']); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| SELECT pgroonga.match_positions_character( | ||
| 'Groonga is a fast and accurate full text search engine based on ' || | ||
| 'inverted index. One of the characteristics of Groonga is that a ' || | ||
| 'newly registered document instantly appears in search results. ' || | ||
| 'Also, Groonga allows updates without read locks. These characteristics ' || | ||
| 'result in superior performance on real-time applications.' || | ||
| E'\n' || | ||
| E'\n' || | ||
| 'Groonga is also a column-oriented database management system (DBMS). ' || | ||
| 'Compared with well-known row-oriented systems, such as MySQL and ' || | ||
| 'PostgreSQL, column-oriented systems are more suited for aggregate ' || | ||
| 'queries. Due to this advantage, Groonga can cover weakness of ' || | ||
| 'row-oriented systems.', | ||
| ARRAY['Groonga']); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,158 @@ | ||
| #include "pgroonga.h" | ||
|
|
||
| #include "pgrn_global.h" | ||
| #include "pgrn_groonga.h" | ||
| #include "pgrn_match_positions_character.h" | ||
| #include "pgrn_keywords.h" | ||
|
|
||
| #include <catalog/pg_type.h> | ||
| #include <utils/builtins.h> | ||
|
|
||
| static grn_ctx *ctx = &PGrnContext; | ||
| static grn_obj *keywordsTable = NULL; | ||
|
|
||
| PG_FUNCTION_INFO_V1(pgroonga_match_positions_character); | ||
|
|
||
| void | ||
| PGrnInitializeMatchPositionsCharacter(void) | ||
| { | ||
| keywordsTable = grn_table_create(ctx, NULL, 0, NULL, | ||
| GRN_OBJ_TABLE_PAT_KEY, | ||
| grn_ctx_at(ctx, GRN_DB_SHORT_TEXT), | ||
| NULL); | ||
| grn_obj_set_info(ctx, | ||
| keywordsTable, | ||
| GRN_INFO_NORMALIZER, | ||
| grn_ctx_get(ctx, "NormalizerAuto", -1)); | ||
| } | ||
|
|
||
| void | ||
| PGrnFinalizeMatchPositionsCharacter(void) | ||
| { | ||
| if (!keywordsTable) | ||
| return; | ||
|
|
||
| grn_obj_close(ctx, keywordsTable); | ||
| keywordsTable = NULL; | ||
| } | ||
|
|
||
| static ArrayType * | ||
| PGrnMatchPositionsCharacter(text *target) | ||
| { | ||
| grn_obj buffer; | ||
| ArrayType *positions; | ||
|
|
||
| GRN_UINT32_INIT(&buffer, GRN_OBJ_VECTOR); | ||
|
|
||
| { | ||
| const char *string; | ||
| size_t stringLength; | ||
| const char *stringForNCharacters; | ||
| size_t nCharacters = 0; | ||
|
|
||
| string = VARDATA_ANY(target); | ||
| stringLength = VARSIZE_ANY_EXHDR(target); | ||
|
|
||
| stringForNCharacters = string; | ||
|
|
||
| while (stringLength > 0) { | ||
| #define MAX_N_HITS 16 | ||
| grn_pat_scan_hit hits[MAX_N_HITS]; | ||
| const char *rest; | ||
| int i, nHits; | ||
| size_t chunkLength; | ||
|
|
||
| nHits = grn_pat_scan(ctx, (grn_pat *)keywordsTable, | ||
| string, stringLength, | ||
| hits, MAX_N_HITS, &rest); | ||
| for (i = 0; i < nHits; i++) { | ||
| const char *start; | ||
| const char *end; | ||
| size_t startNCharacters = 0; | ||
|
|
||
| start = string + hits[i].offset; | ||
| end = start + hits[i].length; | ||
| while (stringForNCharacters < end) { | ||
| int characterLength; | ||
| characterLength = grn_charlen(ctx, | ||
| stringForNCharacters, | ||
| end); | ||
| if (characterLength == 0) { | ||
| GRN_OBJ_FIN(ctx, &buffer); | ||
| ereport(ERROR, | ||
| (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), | ||
| errmsg("invalid string: %s", | ||
| stringForNCharacters))); | ||
| } | ||
| if (stringForNCharacters == start) { | ||
| startNCharacters = nCharacters; | ||
| } | ||
| nCharacters++; | ||
| stringForNCharacters += characterLength; | ||
| } | ||
|
|
||
| GRN_UINT32_PUT(ctx, &buffer, startNCharacters); | ||
| GRN_UINT32_PUT(ctx, &buffer, nCharacters - startNCharacters); | ||
| } | ||
|
|
||
| chunkLength = rest - string; | ||
| stringLength -= chunkLength; | ||
| string = rest; | ||
| #undef MAX_N_HITS | ||
| } | ||
| } | ||
|
|
||
| { | ||
| int i, nElements; | ||
| Datum *elements; | ||
| int dims[2]; | ||
| int lbs[2]; | ||
|
|
||
| nElements = GRN_BULK_VSIZE(&buffer) / (sizeof(uint32_t) * 2); | ||
| elements = palloc(sizeof(Datum) * 2 * nElements); | ||
| for (i = 0; i < nElements; i++) | ||
| { | ||
| uint32_t offset; | ||
| uint32_t length; | ||
|
|
||
| offset = GRN_UINT32_VALUE_AT(&buffer, i * 2); | ||
| length = GRN_UINT32_VALUE_AT(&buffer, i * 2 + 1); | ||
| elements[i * 2] = Int32GetDatum(offset); | ||
| elements[i * 2 + 1] = Int32GetDatum(length); | ||
| } | ||
| dims[0] = nElements; | ||
| dims[1] = 2; | ||
| lbs[0] = 1; | ||
| lbs[1] = 1; | ||
| positions = construct_md_array(elements, | ||
| NULL, | ||
| 2, | ||
| dims, | ||
| lbs, | ||
| INT4OID, | ||
| sizeof(int32_t), | ||
| true, | ||
| 'i'); | ||
| pfree(elements); | ||
| } | ||
|
|
||
| GRN_OBJ_FIN(ctx, &buffer); | ||
|
|
||
| return positions; | ||
| } | ||
|
|
||
| /** | ||
| * pgroonga.match_positions_character(target text, keywords text[]) : integer[2][] | ||
| */ | ||
| Datum | ||
| pgroonga_match_positions_character(PG_FUNCTION_ARGS) | ||
| { | ||
| text *target = PG_GETARG_TEXT_PP(0); | ||
| ArrayType *keywords = PG_GETARG_ARRAYTYPE_P(1); | ||
| ArrayType *positions; | ||
|
|
||
| PGrnKeywordsUpdateTable(keywords, keywordsTable); | ||
| positions = PGrnMatchPositionsCharacter(target); | ||
|
|
||
| PG_RETURN_POINTER(positions); | ||
| } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| #pragma once | ||
|
|
||
| void PGrnInitializeMatchPositionsCharacter(void); | ||
| void PGrnFinalizeMatchPositionsCharacter(void); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters