Skip to content

Commit

Permalink
Add pgroonga.snippet_html()
Browse files Browse the repository at this point in the history
[groonga-dev,03398]

Suggested by Hiroaki Nakamura. Thanks!!!
  • Loading branch information
kou committed Aug 11, 2015
1 parent 6d3600c commit 9c8109a
Show file tree
Hide file tree
Showing 7 changed files with 204 additions and 0 deletions.
19 changes: 19 additions & 0 deletions expected/groonga/snippet-html/keywords/multiple.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
SELECT pgroonga.snippet_html(
'Groonga is a fast and accurate full text search engine based on ' ||
'inverted index. One of the characteristics of Groonga is that a ' ||
'newly registered document instantly appears in search results. ' ||
'Also, Groonga allows updates without read locks. These characteristics ' ||
'result in superior performance on real-time applications.' ||
'\n' ||
'\n' ||
'Groonga is also a column-oriented database management system (DBMS). ' ||
'Compared with well-known row-oriented systems, such as MySQL and ' ||
'PostgreSQL, column-oriented systems are more suited for aggregate ' ||
'queries. Due to this advantage, Groonga can cover weakness of ' ||
'row-oriented systems.',
ARRAY['fast', 'PostgreSQL']);
snippet_html
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
{"Groonga is a <span class=\"keyword\">fast</span> and accurate full text search engine based on inverted index. One of the characteristics of Groonga is that a newly registered document instantly appears in search results. Also, Gro","ase management system (DBMS). Compared with well-known row-oriented systems, such as MySQL and <span class=\"keyword\">PostgreSQL</span>, column-oriented systems are more suited for aggregate queries. Due to this advantage, Groonga"}
(1 row)

19 changes: 19 additions & 0 deletions expected/groonga/snippet-html/keywords/one.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
SELECT pgroonga.snippet_html(
'Groonga is a fast and accurate full text search engine based on ' ||
'inverted index. One of the characteristics of Groonga is that a ' ||
'newly registered document instantly appears in search results. ' ||
'Also, Groonga allows updates without read locks. These characteristics ' ||
'result in superior performance on real-time applications.' ||
'\n' ||
'\n' ||
'Groonga is also a column-oriented database management system (DBMS). ' ||
'Compared with well-known row-oriented systems, such as MySQL and ' ||
'PostgreSQL, column-oriented systems are more suited for aggregate ' ||
'queries. Due to this advantage, Groonga can cover weakness of ' ||
'row-oriented systems.',
ARRAY['Groonga']);
snippet_html
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
{"<span class=\"keyword\">Groonga</span> is a fast and accurate full text search engine based on inverted index. One of the characteristics of <span class=\"keyword\">Groonga</span> is that a newly registered document instantly appears in search results. Also, Gro","t read locks. These characteristics result in superior performance on real-time applications.\\n\\n<span class=\"keyword\">Groonga</span> is also a column-oriented database management system (DBMS). Compared with well-known row-orien","ted systems, such as MySQL and PostgreSQL, column-oriented systems are more suited for aggregate queries. Due to this advantage, <span class=\"keyword\">Groonga</span> can cover weakness of row-oriented systems."}
(1 row)

130 changes: 130 additions & 0 deletions pgroonga.c
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ static slist_head PGrnScanOpaques = SLIST_STATIC_INIT(PGrnScanOpaques);
PG_FUNCTION_INFO_V1(pgroonga_score);
PG_FUNCTION_INFO_V1(pgroonga_table_name);
PG_FUNCTION_INFO_V1(pgroonga_command);
PG_FUNCTION_INFO_V1(pgroonga_snippet_html);

PG_FUNCTION_INFO_V1(pgroonga_contain_text);
PG_FUNCTION_INFO_V1(pgroonga_contain_text_array);
Expand Down Expand Up @@ -1300,6 +1301,135 @@ pgroonga_command(PG_FUNCTION_ARGS)
PG_RETURN_TEXT_P(result);
}

static grn_obj *
PGrnSnipCreate(ArrayType *keywords)
{
grn_obj *snip;
int flags = GRN_SNIP_SKIP_LEADING_SPACES;
unsigned int width = 200;
unsigned int maxNResults = 3;
const char *openTag = "<span class=\"keyword\">";
const char *closeTag = "</span>";
grn_snip_mapping *mapping = GRN_SNIP_MAPPING_HTML_ESCAPE;

snip = grn_snip_open(ctx, flags, width, maxNResults,
openTag, strlen(openTag),
closeTag, strlen(closeTag),
mapping);
if (!snip)
{
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("pgroonga: "
"failed to allocate memory for generating snippet")));
return NULL;
}

grn_snip_set_normalizer(ctx, snip, GRN_NORMALIZER_AUTO);

{
int i, n;

n = ARR_DIMS(keywords)[0];
for (i = 1; i <= n; i++)
{
Datum keywordDatum;
text *keyword;
bool isNULL;

keywordDatum = array_ref(keywords, 1, &i, -1, -1, false,
'i', &isNULL);
if (isNULL)
continue;

keyword = DatumGetTextPP(keywordDatum);
grn_snip_add_cond(ctx, snip,
VARDATA_ANY(keyword),
VARSIZE_ANY_EXHDR(keyword),
NULL, 0, NULL, 0);
}
}

return snip;
}

static grn_rc
PGrnSnipExec(grn_obj *snip, text *target, ArrayType **snippetArray)
{
grn_rc rc;
unsigned int i, nResults, maxTaggedLength;
char *buffer;
Datum *snippets;
int dims[1];
int lbs[1];

rc = grn_snip_exec(ctx, snip,
VARDATA_ANY(target),
VARSIZE_ANY_EXHDR(target),
&nResults, &maxTaggedLength);
if (rc != GRN_SUCCESS)
{
return rc;
}

if (nResults == 0)
{
*snippetArray = construct_empty_array(TEXTOID);
return GRN_SUCCESS;
}

buffer = palloc(sizeof(char) * maxTaggedLength);
snippets = palloc(sizeof(Datum) * nResults);
for (i = 0; i < nResults; i++)
{
grn_rc rc;
unsigned int snippetLength = 0;

rc = grn_snip_get_result(ctx, snip, i, buffer, &snippetLength);
if (rc != GRN_SUCCESS)
{
pfree(buffer);
return rc;
}
snippets[i] = PointerGetDatum(cstring_to_text_with_len(buffer,
snippetLength));
}
pfree(buffer);

dims[0] = nResults;
lbs[0] = 1;

*snippetArray = construct_md_array(snippets, NULL,
1, dims, lbs,
TEXTOID, -1, false, 'i');
return GRN_SUCCESS;
}

/**
* pgroonga.snippet_html(target text, keywords text[]) : text[]
*/
Datum
pgroonga_snippet_html(PG_FUNCTION_ARGS)
{
text *target = PG_GETARG_TEXT_PP(0);
ArrayType *keywords = PG_GETARG_ARRAYTYPE_P(1);
grn_obj *snip;
grn_rc rc;
ArrayType *snippets;

snip = PGrnSnipCreate(keywords);
rc = PGrnSnipExec(snip, target, &snippets);
grn_obj_close(ctx, snip);

if (rc != GRN_SUCCESS) {
ereport(ERROR,
(errcode(PGrnRCToPgErrorCode(rc)),
errmsg("pgroonga: failed to compute snippets")));
}

PG_RETURN_POINTER(snippets);
}

static grn_bool
pgroonga_contain_raw(const char *text, unsigned int textSize,
const char *subText, unsigned int subTextSize)
Expand Down
1 change: 1 addition & 0 deletions pgroonga.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ extern void PGDLLEXPORT _PG_init(void);
extern Datum PGDLLEXPORT pgroonga_score(PG_FUNCTION_ARGS);
extern Datum PGDLLEXPORT pgroonga_table_name(PG_FUNCTION_ARGS);
extern Datum PGDLLEXPORT pgroonga_command(PG_FUNCTION_ARGS);
extern Datum PGDLLEXPORT pgroonga_snippet_html(PG_FUNCTION_ARGS);

extern Datum PGDLLEXPORT pgroonga_contain_text(PG_FUNCTION_ARGS);
extern Datum PGDLLEXPORT pgroonga_contain_text_array(PG_FUNCTION_ARGS);
Expand Down
7 changes: 7 additions & 0 deletions pgroonga.sql
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@ CREATE FUNCTION pgroonga.command(groongaCommand text)
VOLATILE
STRICT;

CREATE FUNCTION pgroonga.snippet_html(target text, keywords text[])
RETURNS text[]
AS 'MODULE_PATHNAME', 'pgroonga_snippet_html'
LANGUAGE C
VOLATILE
STRICT;

CREATE FUNCTION pgroonga.contain(target text, query text)
RETURNS bool
AS 'MODULE_PATHNAME', 'pgroonga_contain_text'
Expand Down
14 changes: 14 additions & 0 deletions sql/groonga/snippet-html/keywords/multiple.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
SELECT pgroonga.snippet_html(
'Groonga is a fast and accurate full text search engine based on ' ||
'inverted index. One of the characteristics of Groonga is that a ' ||
'newly registered document instantly appears in search results. ' ||
'Also, Groonga allows updates without read locks. These characteristics ' ||
'result in superior performance on real-time applications.' ||
'\n' ||
'\n' ||
'Groonga is also a column-oriented database management system (DBMS). ' ||
'Compared with well-known row-oriented systems, such as MySQL and ' ||
'PostgreSQL, column-oriented systems are more suited for aggregate ' ||
'queries. Due to this advantage, Groonga can cover weakness of ' ||
'row-oriented systems.',
ARRAY['fast', 'PostgreSQL']);
14 changes: 14 additions & 0 deletions sql/groonga/snippet-html/keywords/one.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
SELECT pgroonga.snippet_html(
'Groonga is a fast and accurate full text search engine based on ' ||
'inverted index. One of the characteristics of Groonga is that a ' ||
'newly registered document instantly appears in search results. ' ||
'Also, Groonga allows updates without read locks. These characteristics ' ||
'result in superior performance on real-time applications.' ||
'\n' ||
'\n' ||
'Groonga is also a column-oriented database management system (DBMS). ' ||
'Compared with well-known row-oriented systems, such as MySQL and ' ||
'PostgreSQL, column-oriented systems are more suited for aggregate ' ||
'queries. Due to this advantage, Groonga can cover weakness of ' ||
'row-oriented systems.',
ARRAY['Groonga']);

0 comments on commit 9c8109a

Please sign in to comment.