Navigation Menu

Skip to content

Commit

Permalink
Add pgrooonga.match_positions_character
Browse files Browse the repository at this point in the history
  • Loading branch information
kou committed Aug 27, 2016
1 parent fffc4f3 commit a5769e7
Show file tree
Hide file tree
Showing 14 changed files with 269 additions and 0 deletions.
1 change: 1 addition & 0 deletions Makefile
Expand Up @@ -13,6 +13,7 @@ SRCS = \
src/pgrn_keywords.c \
src/pgrn_jsonb.c \
src/pgrn_match_positions_byte.c \
src/pgrn_match_positions_character.c \
src/pgrn_options.c \
src/pgrn_query_extract_keywords.c \
src/pgrn_snippet_html.c \
Expand Down
@@ -0,0 +1,8 @@
SELECT pgroonga.match_positions_character(
'100㍉メートル',
ARRAY['ミリ']);
match_positions_character
---------------------------
{{3,1}}
(1 row)

9 changes: 9 additions & 0 deletions expected/function/match-positions-character/multibyte.out
@@ -0,0 +1,9 @@
SELECT pgroonga.match_positions_character(
'Groongaは転置索引を用いた高速・高精度な全文検索エンジンであり、' ||
'登録された文書をすぐに検索結果に反映できます。',
ARRAY['検索']);
match_positions_character
---------------------------
{{25,2},{46,2}}
(1 row)

19 changes: 19 additions & 0 deletions expected/function/match-positions-character/multiple-keywords.out
@@ -0,0 +1,19 @@
SELECT pgroonga.match_positions_character(
'Groonga is a fast and accurate full text search engine based on ' ||
'inverted index. One of the characteristics of Groonga is that a ' ||
'newly registered document instantly appears in search results. ' ||
'Also, Groonga allows updates without read locks. These characteristics ' ||
'result in superior performance on real-time applications.' ||
E'\n' ||
E'\n' ||
'Groonga is also a column-oriented database management system (DBMS). ' ||
'Compared with well-known row-oriented systems, such as MySQL and ' ||
'PostgreSQL, column-oriented systems are more suited for aggregate ' ||
'queries. Due to this advantage, Groonga can cover weakness of ' ||
'row-oriented systems.',
ARRAY['fast', 'PostgreSQL']);
match_positions_character
---------------------------
{{13,4},{455,10}}
(1 row)

19 changes: 19 additions & 0 deletions expected/function/match-positions-character/one-keyword.out
@@ -0,0 +1,19 @@
SELECT pgroonga.match_positions_character(
'Groonga is a fast and accurate full text search engine based on ' ||
'inverted index. One of the characteristics of Groonga is that a ' ||
'newly registered document instantly appears in search results. ' ||
'Also, Groonga allows updates without read locks. These characteristics ' ||
'result in superior performance on real-time applications.' ||
E'\n' ||
E'\n' ||
'Groonga is also a column-oriented database management system (DBMS). ' ||
'Compared with well-known row-oriented systems, such as MySQL and ' ||
'PostgreSQL, column-oriented systems are more suited for aggregate ' ||
'queries. Due to this advantage, Groonga can cover weakness of ' ||
'row-oriented systems.',
ARRAY['Groonga']);
match_positions_character
-----------------------------------------
{{0,7},{110,7},{197,7},{319,9},{553,7}}
(1 row)

6 changes: 6 additions & 0 deletions pgroonga--1.1.0--1.1.1.sql
@@ -0,0 +1,6 @@
CREATE FUNCTION pgroonga.match_positions_character(target text, keywords text[])
RETURNS integer[2][]
AS 'MODULE_PATHNAME', 'pgroonga_match_positions_character'
LANGUAGE C
VOLATILE
STRICT;
7 changes: 7 additions & 0 deletions pgroonga.sql
Expand Up @@ -44,6 +44,13 @@ CREATE FUNCTION pgroonga.match_positions_byte(target text, keywords text[])
VOLATILE
STRICT;

CREATE FUNCTION pgroonga.match_positions_character(target text, keywords text[])
RETURNS integer[2][]
AS 'MODULE_PATHNAME', 'pgroonga_match_positions_character'
LANGUAGE C
VOLATILE
STRICT;

CREATE FUNCTION pgroonga.query_extract_keywords(query text)
RETURNS text[]
AS 'MODULE_PATHNAME', 'pgroonga_query_extract_keywords'
Expand Down
@@ -0,0 +1,3 @@
SELECT pgroonga.match_positions_character(
'100㍉メートル',
ARRAY['ミリ']);
4 changes: 4 additions & 0 deletions sql/function/match-positions-character/multibyte.sql
@@ -0,0 +1,4 @@
SELECT pgroonga.match_positions_character(
'Groongaは転置索引を用いた高速・高精度な全文検索エンジンであり、' ||
'登録された文書をすぐに検索結果に反映できます。',
ARRAY['検索']);
14 changes: 14 additions & 0 deletions sql/function/match-positions-character/multiple-keywords.sql
@@ -0,0 +1,14 @@
SELECT pgroonga.match_positions_character(
'Groonga is a fast and accurate full text search engine based on ' ||
'inverted index. One of the characteristics of Groonga is that a ' ||
'newly registered document instantly appears in search results. ' ||
'Also, Groonga allows updates without read locks. These characteristics ' ||
'result in superior performance on real-time applications.' ||
E'\n' ||
E'\n' ||
'Groonga is also a column-oriented database management system (DBMS). ' ||
'Compared with well-known row-oriented systems, such as MySQL and ' ||
'PostgreSQL, column-oriented systems are more suited for aggregate ' ||
'queries. Due to this advantage, Groonga can cover weakness of ' ||
'row-oriented systems.',
ARRAY['fast', 'PostgreSQL']);
14 changes: 14 additions & 0 deletions sql/function/match-positions-character/one-keyword.sql
@@ -0,0 +1,14 @@
SELECT pgroonga.match_positions_character(
'Groonga is a fast and accurate full text search engine based on ' ||
'inverted index. One of the characteristics of Groonga is that a ' ||
'newly registered document instantly appears in search results. ' ||
'Also, Groonga allows updates without read locks. These characteristics ' ||
'result in superior performance on real-time applications.' ||
E'\n' ||
E'\n' ||
'Groonga is also a column-oriented database management system (DBMS). ' ||
'Compared with well-known row-oriented systems, such as MySQL and ' ||
'PostgreSQL, column-oriented systems are more suited for aggregate ' ||
'queries. Due to this advantage, Groonga can cover weakness of ' ||
'row-oriented systems.',
ARRAY['Groonga']);
158 changes: 158 additions & 0 deletions src/pgrn_match_positions_character.c
@@ -0,0 +1,158 @@
#include "pgroonga.h"

#include "pgrn_global.h"
#include "pgrn_groonga.h"
#include "pgrn_match_positions_character.h"
#include "pgrn_keywords.h"

#include <catalog/pg_type.h>
#include <utils/builtins.h>

static grn_ctx *ctx = &PGrnContext;
static grn_obj *keywordsTable = NULL;

PG_FUNCTION_INFO_V1(pgroonga_match_positions_character);

void
PGrnInitializeMatchPositionsCharacter(void)
{
keywordsTable = grn_table_create(ctx, NULL, 0, NULL,
GRN_OBJ_TABLE_PAT_KEY,
grn_ctx_at(ctx, GRN_DB_SHORT_TEXT),
NULL);
grn_obj_set_info(ctx,
keywordsTable,
GRN_INFO_NORMALIZER,
grn_ctx_get(ctx, "NormalizerAuto", -1));
}

void
PGrnFinalizeMatchPositionsCharacter(void)
{
if (!keywordsTable)
return;

grn_obj_close(ctx, keywordsTable);
keywordsTable = NULL;
}

static ArrayType *
PGrnMatchPositionsCharacter(text *target)
{
grn_obj buffer;
ArrayType *positions;

GRN_UINT32_INIT(&buffer, GRN_OBJ_VECTOR);

{
const char *string;
size_t stringLength;
const char *stringForNCharacters;
size_t nCharacters = 0;

string = VARDATA_ANY(target);
stringLength = VARSIZE_ANY_EXHDR(target);

stringForNCharacters = string;

while (stringLength > 0) {
#define MAX_N_HITS 16
grn_pat_scan_hit hits[MAX_N_HITS];
const char *rest;
int i, nHits;
size_t chunkLength;

nHits = grn_pat_scan(ctx, (grn_pat *)keywordsTable,
string, stringLength,
hits, MAX_N_HITS, &rest);
for (i = 0; i < nHits; i++) {
const char *start;
const char *end;
size_t startNCharacters = 0;

start = string + hits[i].offset;
end = start + hits[i].length;
while (stringForNCharacters < end) {
int characterLength;
characterLength = grn_charlen(ctx,
stringForNCharacters,
end);
if (characterLength == 0) {
GRN_OBJ_FIN(ctx, &buffer);
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid string: %s",
stringForNCharacters)));
}
if (stringForNCharacters == start) {
startNCharacters = nCharacters;
}
nCharacters++;
stringForNCharacters += characterLength;
}

GRN_UINT32_PUT(ctx, &buffer, startNCharacters);
GRN_UINT32_PUT(ctx, &buffer, nCharacters - startNCharacters);
}

chunkLength = rest - string;
stringLength -= chunkLength;
string = rest;
#undef MAX_N_HITS
}
}

{
int i, nElements;
Datum *elements;
int dims[2];
int lbs[2];

nElements = GRN_BULK_VSIZE(&buffer) / (sizeof(uint32_t) * 2);
elements = palloc(sizeof(Datum) * 2 * nElements);
for (i = 0; i < nElements; i++)
{
uint32_t offset;
uint32_t length;

offset = GRN_UINT32_VALUE_AT(&buffer, i * 2);
length = GRN_UINT32_VALUE_AT(&buffer, i * 2 + 1);
elements[i * 2] = Int32GetDatum(offset);
elements[i * 2 + 1] = Int32GetDatum(length);
}
dims[0] = nElements;
dims[1] = 2;
lbs[0] = 1;
lbs[1] = 1;
positions = construct_md_array(elements,
NULL,
2,
dims,
lbs,
INT4OID,
sizeof(int32_t),
true,
'i');
pfree(elements);
}

GRN_OBJ_FIN(ctx, &buffer);

return positions;
}

/**
* pgroonga.match_positions_character(target text, keywords text[]) : integer[2][]
*/
Datum
pgroonga_match_positions_character(PG_FUNCTION_ARGS)
{
text *target = PG_GETARG_TEXT_PP(0);
ArrayType *keywords = PG_GETARG_ARRAYTYPE_P(1);
ArrayType *positions;

PGrnKeywordsUpdateTable(keywords, keywordsTable);
positions = PGrnMatchPositionsCharacter(target);

PG_RETURN_POINTER(positions);
}
4 changes: 4 additions & 0 deletions src/pgrn_match_positions_character.h
@@ -0,0 +1,4 @@
#pragma once

void PGrnInitializeMatchPositionsCharacter(void);
void PGrnFinalizeMatchPositionsCharacter(void);
3 changes: 3 additions & 0 deletions src/pgroonga.c
Expand Up @@ -10,6 +10,7 @@
#include "pgrn_keywords.h"
#include "pgrn_jsonb.h"
#include "pgrn_match_positions_byte.h"
#include "pgrn_match_positions_character.h"
#include "pgrn_options.h"
#include "pgrn_query_extract_keywords.h"
#include "pgrn_search.h"
Expand Down Expand Up @@ -283,6 +284,7 @@ PGrnOnProcExit(int code, Datum arg)
PGrnFinalizeQueryExtractKeywords();

PGrnFinalizeMatchPositionsByte();
PGrnFinalizeMatchPositionsCharacter();

PGrnFinalizeHighlightHTML();

Expand Down Expand Up @@ -400,6 +402,7 @@ _PG_init(void)
PGrnInitializeHighlightHTML();

PGrnInitializeMatchPositionsByte();
PGrnInitializeMatchPositionsCharacter();

PGrnInitializeQueryExtractKeywords();
}
Expand Down

0 comments on commit a5769e7

Please sign in to comment.