Permalink
Browse files

Restored contained operators, reworked index, fixed doc

  • Loading branch information...
1 parent 6cd2913 commit c845bbee23b7d24aa5ccbdda4b9777c0a87acb88 @theirix committed Dec 11, 2012
Showing with 395 additions and 107 deletions.
  1. +1 −1 Makefile
  2. +13 −12 README.md
  3. +73 −27 doc/parray_gin.md
  4. +30 −0 sql/parray_gin.sql
  5. +220 −67 src/parray_gin.c
  6. +15 −0 test/expected/index.out
  7. +18 −0 test/expected/op.out
  8. +11 −0 test/sql/index.sql
  9. +14 −0 test/sql/op.sql
View
2 Makefile
@@ -8,7 +8,7 @@ TESTS = $(wildcard test/sql/*.sql)
REGRESS = $(patsubst test/sql/%.sql,%,$(TESTS))
REGRESS_OPTS += --inputdir=test
PG_CONFIG := pg_config
-#PG_CPPFLAGS = -g -O0
+PG_CPPFLAGS = -g -O0
EXTRA_CLEAN = sql/$(EXTENSION)--$(EXTVERSION).sql
all: sql/$(EXTENSION)--$(EXTVERSION).sql
View
25 README.md
@@ -25,7 +25,7 @@ and registered in a database.
The easisest method to get and install an extension from PGXN network.
PGXN client downloads and builds the extension.
- pgxn --pg_config <postgresql_install_dir>/bin/pg_config install parray_gin
+ pgxn --pg_config <postgresql_install_dir>/bin/pg_config install parray_gin
PGXN client itself is available at [github](https://github.com/dvarrazzo/pgxnclient) and
can be installed with your favourite method, i.e. `easy_install pgxnclient`.
@@ -38,23 +38,23 @@ an extension on an almost any UNIX platform (Linux, Solaris, OS X).
Compilation:
- gmake PG_CONFIG=<postgresql_install_dir>/bin/pg_config
+ gmake PG_CONFIG=<postgresql_install_dir>/bin/pg_config
Installation (as superuser):
- gmake PG_CONFIG=<postgresql_install_dir>/bin/pg_config install
+ gmake PG_CONFIG=<postgresql_install_dir>/bin/pg_config install
PostgreSQL server must be restarted.
To uninstall extension completely you may use this command (as superuser):
- gmake PG_CONFIG=<postgresql_install_dir>/bin/pg_config uninstall
+ gmake PG_CONFIG=<postgresql_install_dir>/bin/pg_config uninstall
Project contains SQL tests that can be launched on PostgreSQL with installed extension.
Tests are performed on a dynamically created database with a specified user (with the
appropriated permissions - create database, for example):
- gmake PG_CONFIG=<postgresql_install_dir>/bin/pg_config PGUSER=postgres installcheck
+ gmake PG_CONFIG=<postgresql_install_dir>/bin/pg_config PGUSER=postgres installcheck
#### Manually
@@ -64,15 +64,15 @@ Or if you use Windows (use MSVC 2008 for Postgres 9.1 and MSVC 2010 for Postgres
Copy library to the PostgreSQL library directory:
- cp parray_gin.so `<postgresql_install_dir>/bin/pg_config --pkglibdir`
+ cp parray_gin.so `<postgresql_install_dir>/bin/pg_config --pkglibdir`
Copy control file to the extension directory:
-
- cp parray_gin.control `<postgresql_install_dir>/bin/pg_config --sharedir`/extension
+
+ cp parray_gin.control `<postgresql_install_dir>/bin/pg_config --sharedir`/extension
Copy SQL prototypes file to the extension directory:
-
- cp parray_gin--<version>.sql `<postgresql_install_dir>/bin/pg_config --sharedir`/extension
+
+ cp parray_gin--<version>.sql `<postgresql_install_dir>/bin/pg_config --sharedir`/extension
To uninstall extension just remove files you copied before.
@@ -82,15 +82,15 @@ Extension must be previously installed to a PostgreSQL directory.
Extension is created in a particular database (as superuser):
- create extension parray_gin;
+ create extension parray_gin;
It creates all the functions, operators and other stuff from extension.
Note that you must restart a server if a previous library was already installed
at the same place. In other words, always restart to be sure.
To drop an extension use:
- drop extension parray_gin cascade;
+ drop extension parray_gin cascade;
License information
@@ -99,3 +99,4 @@ License information
You can use any code from this project under the terms of [PostgreSQL License](http://www.postgresql.org/about/licence/).
Please consult with the COPYING for license information.
+<!-- vim: set noexpandtab tabstop=4 shiftwidth=4 colorcolumn=80: -->
View
100 doc/parray_gin.md
@@ -4,71 +4,114 @@ parray_gin extension
Installing
----------
- CREATE EXTENSION parray_gin;
+ CREATE EXTENSION parray_gin;
Extension is compatible witgh PostgreSQL 9.1 and 9.2.
Description
-----------
-Extension `parray_gin` provides GIN index and operator support for arrays with partial match.
+Extension `parray_gin` provides GIN index and operator support for arrays with
+partial match.
Usage
-----
-Extension contains operator class named `parray_gin_ops` for using GIN index with the text arrays. Matchin can be strict and partial (by substring). Surely operators can be used separately.
+Extension contains operator class named `parray_gin_ops` for using GIN index
+with the text arrays. Matching can be strict (array items must be equal)
+or partial (array items of query may contain like expressions).
+Surely operators can be used separately from the index.
Index can be created for the table with the following commands:
- -- test table, column `val` needs to be indexed
- create table test_table(id bigserial, val text[]);
- -- create the index
- create index test_tags_idx on test_table using gin (val parray_gin_ops);
- -- select using index
- select * from test_table where val @> array['must','contain'];
+ -- test table, column `val` needs to be indexed
+ create table test_table(id bigserial, val text[]);
+ -- create the index
+ create index test_tags_idx on test_table using gin (val parray_gin_ops);
+ -- select using index
+ select * from test_table where val @> array['must','contain'];
+ -- select using index
+ select * from test_table where val @@> array['what%like%'];
-GIN index can be used with three operators: `@>`, `<@@`, `@@>`.
+GIN index can be used with three operators: `@>`, `<@`, `@@>`, `<@@`.
-Developers of an extension succesfully used GIN index on JSON arrays extracted from JSON text fields using `json_accessors` extension.
+Developers of an extension succesfully used GIN index on JSON arrays extracted
+from JSON text fields using `json_accessors` extension.
+
+GIN index is based on trigram decomposition. Trigrams implementation from
+pg_trgm contrib module is used.
+Indexed keys are splitted to trigrams which are stored as GIN keys.
+Query is splitted to trigrams too and carefully checked against GIN keys.
+Query can contain like expressions which could slow down an index a little.
+Trigram index can fetch rows with false positive so provided array matching
+operators recheck fetched rows for sure.
Interface
---------
### Operators
-#### `@> (anyarray, anyarray) -> bool`
+#### `@> (text[], text[]) -> bool`
+
+Strict array _contains_. Returns true if LHS array contains all items from
+the RHS array.
+
+Sample index search:
+
+ $ select * from test_table;
+ {star,wars}
+ {long,time,ago,in}
+ {a,galaxy,far}
+ {far,away}
+
+ -- must contain any item from right side, strict matched
+ $ select * from test_table where val @> array['far'];
+ {a,galaxy,far}
+ {far,away}
+
+#### `<@ (text[], text[]) -> bool`
-Strict array _contains_. Returns true if LHS array contains all items from the RHS array.
+Strict array _contained_. Returns true if LHS array is contained by the
+RHS array.
Sample index search:
-
- -- must contain all items from right side, partial matched
- select * from test_table where val @@> array['contain'];
+
+ -- must contain all items from right side, partial matched
+ $ select * from test_table where val <@ array['galaxy','ago','vader'];
+ {long,time,ago,in}
+ {a,galaxy,far}
+
#### `@@> (text[], text[]) -> bool`
-Partial array _contains_. Returns true if LHS array contains all items from the RHS array,
-matched partially (i.e. _foobar_ contains _oobar_).
+Partial array _contains_. Returns true if LHS array contains all items from
+the RHS array,
+matched partially (i.e. `'foobar' ~~ 'foo%'` or `'foobar' ~~ '%oo%`)
Sample index search:
-
- -- must contain all items from right side, partial matched
- select * from test_table where val @@> array['cont'];
+
+ -- must contain any item from right side, partially matched
+ $ select * from test_table where val @@> array['%ar%'];
+ {star,wars}
#### `<@@ (text[], text[]) -> bool`
-Partial array _contained by_. Returns true if LHS array is contained by all items from the RHS array, matched partially (i.e. _foobar_ contains _oobar_). Inversion of the `@@>`.
+Partial array _contained by_. Returns true if LHS array is contained by all
+items from the RHS array, matched partially (i.e. _foobar_ contains _oobar_).
+Inversion of the `@@>`.
Sample index search:
-
- -- must be contained by all items from the right side, partial matched
- select * from test_table where val @@> array['must','contains','or','not'];
+
+ -- must contain all items from right side, partially matched
+ $ select * from test_table where val <@@ array['%ar%','vader'];
+ {star,wars}
### Operator class
#### `operator class parray_gin_ops`
-GIN-capable operator class. Support indexing strategies based on these operators.
+GIN-capable operator class. Support indexing strategies based on
+these operators.
Author
------
@@ -80,6 +123,9 @@ Developed by [Eugene Seliverstov](theirix@concerteza.ru)
Copyright and License
---------------------
-You can use any code from this project under the terms of [PostgreSQL License](http://www.postgresql.org/about/licence/).
+You can use any code from this project under the terms of
+[PostgreSQL License](http://www.postgresql.org/about/licence/).
Please consult with the COPYING for license information.
+
+<!-- vim: set noexpandtab tabstop=4 shiftwidth=4 colorcolumn=80: -->
View
30 sql/parray_gin.sql
@@ -32,15 +32,33 @@ create or replace function parray_contains_partial(_text, _text) returns bool
as 'MODULE_PATHNAME' language C immutable strict;
comment on function parray_contains_partial(_text,_text) is 'text array contains compared by partial';
+create or replace function parray_contained_partial(_text, _text) returns bool
+ as 'MODULE_PATHNAME' language C immutable strict;
+comment on function parray_contained_partial(_text,_text) is 'text array contained compared by partial';
+
create or replace function parray_contains_strict(_text, _text) returns bool
as 'MODULE_PATHNAME' language C immutable strict;
comment on function parray_contains_strict(_text,_text) is 'text array contains compared by strict';
+create or replace function parray_contained_strict(_text, _text) returns bool
+ as 'MODULE_PATHNAME' language C immutable strict;
+comment on function parray_contained_strict(_text,_text) is 'text array contained compared by strict';
+
-- strict contains
create operator @> (
leftarg = _text,
rightarg = _text,
procedure = parray_contains_strict,
+ commutator = '<@',
+ restrict = contsel,
+ join = contjoinsel
+);
+
+create operator <@ (
+ leftarg = _text,
+ rightarg = _text,
+ procedure = parray_contained_strict,
+ commutator = '@>',
restrict = contsel,
join = contjoinsel
);
@@ -50,6 +68,16 @@ create operator @@> (
leftarg = _text,
rightarg = _text,
procedure = parray_contains_partial,
+ commutator = '<@@',
+ restrict = contsel,
+ join = contjoinsel
+);
+
+create operator <@@ (
+ leftarg = _text,
+ rightarg = _text,
+ procedure = parray_contained_partial,
+ commutator = '@@>',
restrict = contsel,
join = contjoinsel
);
@@ -60,7 +88,9 @@ create operator class parray_gin_ops
for type _text using gin
as
operator 7 @> (_text,_text), -- strict
+ operator 8 <@ (_text,_text), -- strict
operator 9 @@> (_text,_text), -- partial
+ operator 10 <@@ (_text,_text), -- partial
function 1 parray_gin_compare(internal, internal),
function 2 parray_gin_extract_value(internal, internal, internal),
function 3 parray_gin_extract_query(internal, internal, internal, internal, internal, internal, internal),
View
287 src/parray_gin.c
@@ -1,11 +1,16 @@
/*-------------------------------------------------------------------------
*
* parray_gin.c
- * GIN support for arrays with partial match
+ * GIN support for arrays with partial match
*
* Copyright (c) 2012, Con Certeza
* Author: irix <theirix@concerteza.ru>
*
+ * GIN index heavily uses trigram implementation from pg_trgm contrib
+ * module. Files trgm.c and trgm.h are copied without changes
+ * (excluded only PG_MODULE_MAGIC singleton). We thought postgresql
+ * license allows this kind of code reuse.
+ *
*-------------------------------------------------------------------------
*/
@@ -43,19 +48,21 @@ PG_MODULE_MAGIC;
/* @> operator strategy */
#define PARRAY_GIN_STRATEGY_CONTAINS 7
/* <@ operator strategy */
-/*#define PARRAY_GIN_STRATEGY_CONTAINED_BY 8*/
+#define PARRAY_GIN_STRATEGY_CONTAINED_BY 8
/* @@> operator strategy */
#define PARRAY_GIN_STRATEGY_CONTAINS_PARTIAL 9
/* <@@ operator strategy */
-/*#define PARRAY_GIN_STRATEGY_CONTAINED_BY_PARTIAL 10*/
+#define PARRAY_GIN_STRATEGY_CONTAINED_BY_PARTIAL 10
/*
* Internal functions declarations
*/
bool is_valid_strategy(int strategy);
+int32 *palloc_int32(int32 value);
ArrayType *construct_bool_array(bool *raw_array, int count);
+Datum dump_op_args(PG_FUNCTION_ARGS);
Datum dump_array(PG_FUNCTION_ARGS);
Datum trigrams_from_textarray(PG_FUNCTION_ARGS);
@@ -69,7 +76,9 @@ PGDLLEXPORT Datum parray_gin_extract_query(PG_FUNCTION_ARGS);
PGDLLEXPORT Datum parray_gin_consistent(PG_FUNCTION_ARGS);
PGDLLEXPORT Datum parray_contains_strict(PG_FUNCTION_ARGS);
+PGDLLEXPORT Datum parray_contained_strict(PG_FUNCTION_ARGS);
PGDLLEXPORT Datum parray_contains_partial(PG_FUNCTION_ARGS);
+PGDLLEXPORT Datum parray_contained_partial(PG_FUNCTION_ARGS);
/*
* Declare V1 exports
@@ -81,8 +90,11 @@ PG_FUNCTION_INFO_V1(parray_gin_extract_query);
PG_FUNCTION_INFO_V1(parray_gin_consistent);
PG_FUNCTION_INFO_V1(parray_contains_strict);
+PG_FUNCTION_INFO_V1(parray_contained_strict);
PG_FUNCTION_INFO_V1(parray_contains_partial);
+PG_FUNCTION_INFO_V1(parray_contained_partial);
+PG_FUNCTION_INFO_V1(dump_op_args);
PG_FUNCTION_INFO_V1(dump_array);
PG_FUNCTION_INFO_V1(trigrams_from_textarray);
@@ -93,9 +105,10 @@ PG_FUNCTION_INFO_V1(trigrams_from_textarray);
*/
static bool
-text_array_contains_partial(ArrayType *array1, ArrayType *array2, Oid collation,
- bool matchall, bool partial)
+text_array_contains_partial(ArrayType *array1, ArrayType *array2,
+ Oid collation, bool partial, bool switch_args)
{
+ bool matchall = true;
bool result = matchall;
Oid element_type = ARR_ELEMTYPE(array1);
int nelems1;
@@ -189,16 +202,36 @@ text_array_contains_partial(ArrayType *array1, ArrayType *array2, Oid collation,
if (isnull2)
continue; /* can't match */
+#if TRACE_LIKE_HELL && 0
+ if (switch_args)
+ elog(PARRAY_GIN_TRACE, " cmp %s vs %s",
+ TextDatumGetCString(elt2), TextDatumGetCString(elt1));
+ else
+ elog(PARRAY_GIN_TRACE, " cmp %s vs %s",
+ TextDatumGetCString(elt1), TextDatumGetCString(elt2));
+#endif
/*
* Apply the operator to the element pair
*/
if (partial)
- oprresult = DatumGetBool(DirectFunctionCall2Coll(textlike,
+ {
+ if (switch_args)
+ oprresult = DatumGetBool(DirectFunctionCall2Coll(textlike,
collation, elt2, elt1));
+ else
+ oprresult = DatumGetBool(DirectFunctionCall2Coll(textlike,
+ collation, elt1, elt2));
+ }
else
- oprresult = DatumGetBool(DirectFunctionCall2Coll(texteq,
+ {
+ if (switch_args)
+ oprresult = DatumGetBool(DirectFunctionCall2Coll(texteq,
collation, elt2, elt1));
+ else
+ oprresult = DatumGetBool(DirectFunctionCall2Coll(texteq,
+ collation, elt1, elt2));
+ }
if (oprresult)
break;
@@ -244,7 +277,8 @@ construct_bool_array(bool *raw_array, int count)
for (i = 0; i < count; ++i)
pdatum[i] = BoolGetDatum(raw_array[i]);
get_typlenbyvalalign(elmtype, &elmlen, &elmbyval, &elmalign);
- array = construct_array(pdatum, count, elmtype, elmlen, elmbyval, elmalign);
+ array = construct_array(pdatum, count,
+ elmtype, elmlen, elmbyval, elmalign);
pfree(pdatum);
return array;
}
@@ -259,17 +293,39 @@ dump_array(PG_FUNCTION_ARGS)
int nelems;
tstr = DatumGetTextP(OidFunctionCall3Coll(F_ARRAY_TO_TEXT_NULL,
- PG_GET_COLLATION(),
- PointerGetDatum(array),
- CStringGetTextDatum(delim),
- CStringGetTextDatum("NULL")));
+ PG_GET_COLLATION(),
+ PointerGetDatum(array),
+ CStringGetTextDatum(delim),
+ CStringGetTextDatum("NULL")));
nelems = ArrayGetNItems(ARR_NDIM(array), ARR_DIMS(array));
elog(PARRAY_GIN_TRACE, "%s, count %d, items: %s",
prefix, nelems, TextDatumGetCString(tstr));
PG_RETURN_VOID();
}
+Datum
+dump_op_args(PG_FUNCTION_ARGS)
+{
+ bool result = PG_GETARG_BOOL(2);
+ const char *prefix = PG_GETARG_CSTRING(3);
+ char buf1[512],
+ buf2[512];
+
+ sprintf(buf1, "GIN %s lhs", prefix);
+ sprintf(buf2, "GIN %s rhs", prefix);
+ DirectFunctionCall3Coll(dump_array, PG_GET_COLLATION(),
+ PG_GETARG_DATUM(0),
+ CStringGetDatum(buf1),
+ CStringGetDatum("#"));
+ DirectFunctionCall3Coll(dump_array, PG_GET_COLLATION(),
+ PG_GETARG_DATUM(1),
+ CStringGetDatum(buf2),
+ CStringGetDatum("#"));
+ elog(PARRAY_GIN_TRACE, "GIN %s result=%d", prefix, result);
+ PG_RETURN_VOID();
+}
+
/*
* Underlying functions for @> operator
*/
@@ -281,21 +337,14 @@ parray_contains_strict(PG_FUNCTION_ARGS)
bool result;
result = text_array_contains_partial(array2, array1,
- PG_GET_COLLATION(), true, true);
-
+ PG_GET_COLLATION(),
+ false, false);
#if TRACE_LIKE_HELL
- DirectFunctionCall3Coll(dump_array, PG_GET_COLLATION(),
- PointerGetDatum(array1),
- CStringGetDatum("GIN parray_contains_partial lhs"),
- CStringGetDatum("#"));
- DirectFunctionCall3Coll(dump_array, PG_GET_COLLATION(),
- PointerGetDatum(array2),
- CStringGetDatum("GIN parray_contains_partial rhs"),
- CStringGetDatum("#"));
- elog(PARRAY_GIN_TRACE, "GIN parray_contains_partial result=%d",
- result);
+ DirectFunctionCall4Coll(dump_op_args, PG_GET_COLLATION(),
+ PointerGetDatum(array1), PointerGetDatum(array2),
+ BoolGetDatum(result),
+ CStringGetDatum("parray_contains_strict"));
#endif
-
PG_RETURN_BOOL(result);
}
@@ -310,21 +359,58 @@ parray_contains_partial(PG_FUNCTION_ARGS)
bool result;
result = text_array_contains_partial(array2, array1,
- PG_GET_COLLATION(), true, true);
+ PG_GET_COLLATION(),
+ true, true);
+#if TRACE_LIKE_HELL
+ DirectFunctionCall4Coll(dump_op_args, PG_GET_COLLATION(),
+ PointerGetDatum(array1), PointerGetDatum(array2),
+ BoolGetDatum(result),
+ CStringGetDatum("parray_contains_partial"));
+#endif
+ PG_RETURN_BOOL(result);
+}
+
+/*
+ * Underlying functions for <@ operator
+ */
+Datum
+parray_contained_strict(PG_FUNCTION_ARGS)
+{
+ ArrayType *array1 = PG_GETARG_ARRAYTYPE_P(0);
+ ArrayType *array2 = PG_GETARG_ARRAYTYPE_P(1);
+ bool result;
+ result = text_array_contains_partial(array1, array2,
+ PG_GET_COLLATION(),
+ false, true);
#if TRACE_LIKE_HELL
- DirectFunctionCall3Coll(dump_array, PG_GET_COLLATION(),
- PointerGetDatum(array1),
- CStringGetDatum("GIN parray_contains_partial lhs"),
- CStringGetDatum("#"));
- DirectFunctionCall3Coll(dump_array, PG_GET_COLLATION(),
- PointerGetDatum(array2),
- CStringGetDatum("GIN parray_contains_partial rhs"),
- CStringGetDatum("#"));
- elog(PARRAY_GIN_TRACE, "GIN parray_contains_partial result=%d",
- result);
+ DirectFunctionCall4Coll(dump_op_args, PG_GET_COLLATION(),
+ PointerGetDatum(array1), PointerGetDatum(array2),
+ BoolGetDatum(result),
+ CStringGetDatum("parray_contained_strict"));
#endif
+ PG_RETURN_BOOL(result);
+}
+
+/*
+ * Underlying functions for <@@ operator
+ */
+Datum
+parray_contained_partial(PG_FUNCTION_ARGS)
+{
+ ArrayType *array1 = PG_GETARG_ARRAYTYPE_P(0);
+ ArrayType *array2 = PG_GETARG_ARRAYTYPE_P(1);
+ bool result;
+ result = text_array_contains_partial(array1, array2,
+ PG_GET_COLLATION(),
+ true, false);
+#if TRACE_LIKE_HELL
+ DirectFunctionCall4Coll(dump_op_args, PG_GET_COLLATION(),
+ PointerGetDatum(array1), PointerGetDatum(array2),
+ BoolGetDatum(result),
+ CStringGetDatum("parray_contained_partial"));
+#endif
PG_RETURN_BOOL(result);
}
@@ -338,9 +424,19 @@ is_valid_strategy(int strategy)
{
return
strategy == PARRAY_GIN_STRATEGY_CONTAINS ||
- strategy == PARRAY_GIN_STRATEGY_CONTAINS_PARTIAL;
+ strategy == PARRAY_GIN_STRATEGY_CONTAINED_BY ||
+ strategy == PARRAY_GIN_STRATEGY_CONTAINS_PARTIAL ||
+ strategy == PARRAY_GIN_STRATEGY_CONTAINED_BY_PARTIAL;
}
+int32 *
+palloc_int32(int32 value)
+{
+ int32 *p = palloc(sizeof(int32));
+
+ *p = value;
+ return p;
+}
/*
* TODO document
@@ -351,6 +447,7 @@ trigrams_from_textarray(PG_FUNCTION_ARGS)
ArrayType *items = PG_GETARG_ARRAYTYPE_P(0);
int32 *countTrigrams = (int32 *) PG_GETARG_POINTER(1);
bool useWildcards = (bool) PG_GETARG_BOOL(2);
+ Pointer **lengthsTrigrams = (Pointer **) PG_GETARG_POINTER(3);
/*
* Result type, contains int32 datums with all trigrams for all indexed
@@ -384,13 +481,17 @@ trigrams_from_textarray(PG_FUNCTION_ARGS)
*/
for (indexKey = 0; indexKey < countItemKeys; ++indexKey)
if (!itemNullFlags[indexKey])
- countArrTrigram +=
+ countArrTrigram += 2 +
DatumGetInt32(OidFunctionCall1Coll(F_TEXTLEN,
- PG_GET_COLLATION(), itemKeys[indexKey])) + 2;
+ PG_GET_COLLATION(), itemKeys[indexKey]));
keys = (Datum *) palloc(countArrTrigram * sizeof(TRGM));
- /*elog(PARRAY_GIN_TRACE,
- "GIN trigrams_from_textarray allocate %ld items", countArrTrigram);*/
+ if (lengthsTrigrams)
+ {
+ *lengthsTrigrams = (Pointer *) palloc0(
+ (1 + countItemKeys) * sizeof(Pointer));
+ (*lengthsTrigrams)[0] = (Pointer) palloc_int32(countItemKeys);
+ }
for (indexKey = 0; indexKey < countItemKeys; ++indexKey)
{
char *pstr;
@@ -405,6 +506,11 @@ trigrams_from_textarray(PG_FUNCTION_ARGS)
trg = generate_wildcard_trgm(pstr, strlen(pstr));
else
trg = generate_trgm(pstr, strlen(pstr));
+
+ if (lengthsTrigrams)
+ (*lengthsTrigrams)[indexKey + 1] =
+ (Pointer) palloc_int32(ARRNELEM(trg));
+
ptr = GETARR(trg);
for (i = 0; i < ARRNELEM(trg); i++)
{
@@ -422,22 +528,28 @@ trigrams_from_textarray(PG_FUNCTION_ARGS)
}
#if TRACE_LIKE_HELL
{
- text *tstr = DatumGetTextP(OidFunctionCall3Coll(F_ARRAY_TO_TEXT_NULL,
- PG_GET_COLLATION(),
- PointerGetDatum(items),
- CStringGetTextDatum("#"),
- CStringGetTextDatum("NULL")
- ));
+ text *tstr;
+
+ tstr = DatumGetTextP(OidFunctionCall3Coll(F_ARRAY_TO_TEXT_NULL,
+ PG_GET_COLLATION(),
+ PointerGetDatum(items),
+ CStringGetTextDatum("#"),
+ CStringGetTextDatum("NULL")
+ ));
+
elog(PARRAY_GIN_TRACE,
"GIN trigrams_from_textarray: %d items, %s, %d trigrams",
countItemKeys, TextDatumGetCString(tstr), *countTrigrams);
for (indexKey = 0; indexKey < countItemKeys; ++indexKey)
{
elog(PARRAY_GIN_TRACE,
- " trigrams_from_textarray item %d = %s", indexKey,
+ " trigrams_from_textarray item %d = %s (%d)", indexKey,
itemNullFlags[indexKey]
- ? "NULL"
- : (const char*)TextDatumGetCString(itemKeys[indexKey]));
+ ? "NULL"
+ : (const char *) TextDatumGetCString(itemKeys[indexKey]),
+ (lengthsTrigrams
+ ? *((int32 *) (*lengthsTrigrams)[indexKey + 1])
+ : -1));
}
for (i = 0; i < *countTrigrams; ++i)
{
@@ -474,11 +586,12 @@ parray_gin_compare(PG_FUNCTION_ARGS)
int32 result = DatumGetInt32(DirectFunctionCall2Coll(btint4cmp,
PG_GET_COLLATION(),
- PointerGetDatum(key1), PointerGetDatum(key2)));
+ PointerGetDatum(key1),
+ PointerGetDatum(key2)));
#if TRACE_LIKE_HELL
- elog(PARRAY_GIN_TRACE, "GIN compare: %d vs %d -> %d",
- key1, key2, result);
+/* elog(PARRAY_GIN_TRACE, "GIN compare: %d vs %d -> %d",
+ key1, key2, result);*/
#endif
PG_RETURN_INT32(result);
@@ -505,11 +618,12 @@ parray_gin_extract_value(PG_FUNCTION_ARGS)
elog(PARRAY_GIN_TRACE, "GIN extract_value invoked");
#endif
- keys = (Datum *) DirectFunctionCall3Coll(trigrams_from_textarray,
- PG_GET_COLLATION(),
- PointerGetDatum(itemValue),
- PointerGetDatum(nkeys),
- BoolGetDatum(false));
+ keys = (Datum *) DirectFunctionCall4Coll(trigrams_from_textarray,
+ PG_GET_COLLATION(),
+ PointerGetDatum(itemValue),
+ PointerGetDatum(nkeys),
+ BoolGetDatum(false),
+ PointerGetDatum(NULL));
*nullFlags = NULL;
@@ -529,9 +643,11 @@ parray_gin_extract_query(PG_FUNCTION_ARGS)
int32 *nkeys = (int32 *) PG_GETARG_POINTER(1);
StrategyNumber strategy = PG_GETARG_UINT16(2);
bool **pmatch = (bool **) PG_GETARG_POINTER(3);
+ Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
bool **nullFlags = (bool **) PG_GETARG_POINTER(5);
Datum *keys;
+ bool is_partial;
#if TRACE_LIKE_HELL
elog(PARRAY_GIN_TRACE, "GIN extract_query invoked");
@@ -542,13 +658,17 @@ parray_gin_extract_query(PG_FUNCTION_ARGS)
ereport(ERROR, (errcode(ERRCODE_INVALID_NAME),
errmsg("wrong strategy %d", strategy)));
}
+ is_partial = strategy == PARRAY_GIN_STRATEGY_CONTAINS_PARTIAL ||
+ strategy == PARRAY_GIN_STRATEGY_CONTAINED_BY_PARTIAL;
/* query is an array of texts, parse it and return trigrams */
- keys = (Datum *) DirectFunctionCall3Coll(trigrams_from_textarray,
- PG_GET_COLLATION(), query, PointerGetDatum(nkeys),
- BoolGetDatum(strategy == PARRAY_GIN_STRATEGY_CONTAINS_PARTIAL));
+ keys = (Datum *) DirectFunctionCall4Coll(trigrams_from_textarray,
+ PG_GET_COLLATION(),
+ query,
+ PointerGetDatum(nkeys),
+ BoolGetDatum(is_partial),
+ PointerGetDatum(extra_data));
*nullFlags = NULL;
-
*pmatch = NULL;
PG_RETURN_POINTER(keys);
@@ -570,6 +690,7 @@ parray_gin_consistent(PG_FUNCTION_ARGS)
/* Datum query = PG_GETARG_DATUM(2); */
int32 nkeys = PG_GETARG_INT32(3);
+ Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
bool *recheck = (bool *) PG_GETARG_POINTER(5);
bool result = false;
@@ -581,11 +702,43 @@ parray_gin_consistent(PG_FUNCTION_ARGS)
errmsg("wrong strategy %d", strategy)));
}
*recheck = true;
- /* all */
- result = true;
- for (i = 0; i < nkeys; ++i)
- if (!check[i])
- result = false;
+
+ if (strategy == PARRAY_GIN_STRATEGY_CONTAINS ||
+ strategy == PARRAY_GIN_STRATEGY_CONTAINS_PARTIAL)
+ {
+ /* all */
+ result = true;
+ for (i = 0; i < nkeys; ++i)
+ if (!check[i])
+ result = false;
+ }
+ else
+ {
+ int32 **positions = (int32 **) extra_data;
+ int extent,
+ prev = 0;
+
+ /*
+ * Contained-by query is very suspicious because it triggers even if a
+ * single key from a query is found in a indexed item. Recheck should
+ * drop all false positive items
+ *
+ * Check each extent described by extra_data (= each query element)
+ */
+ if (!extra_data)
+ ereport(ERROR, (errcode(ERRCODE_INVALID_NAME),
+ errmsg("not enough data for strategy %d", strategy)));
+ for (extent = 0; extent < *(positions[0]) - 1; ++extent)
+ {
+ result = true;
+ for (i = prev; i < prev + *(positions[extent + 1]); ++i)
+ if (!check[i])
+ result = false;
+ if (result)
+ break;
+ prev += *(positions[extent]);
+ }
+ }
#if TRACE_LIKE_HELL
{
View
15 test/expected/index.out
@@ -72,6 +72,21 @@ select count(*) from test_table where val @> array['%'];
-- 0
select count(*) from test_table where val @@> array['%'];
0
+-- 3
+select count(*) from test_table where val <@ array['foo4', 'bar4', 'baz4'];
+3
+-- 3
+select count(*) from test_table where val <@ array['foo4', 'bar4', 'baz4', 'qux'];
+3
+-- 32
+select count(*) from test_table where val <@@ array['foo%', 'bar%', 'baz%'];
+32
+-- 2
+select count(*) from test_table where val <@@ array['foo4', 'baz%', 'bar4%e'];
+2
+-- 0
+select count(*) from test_table where val <@@ array['qux'];
+0
set enable_seqscan to on;
\t off
\pset format aligned
View
18 test/expected/op.out
@@ -76,5 +76,23 @@ f
-- t
select (array['food', 'booze', 'baz']) @@> array['%ooz%'];
t
+-- t
+select array['foo', 'cow'] <@@ array['f%', 'cow'];
+t
+-- t
+select array['foo', 'cow'] <@@ array['cow', 'f%'];
+t
+-- f
+select array['foo', 'cow'] <@@ array['qux', 'f%'];
+f
+-- t
+select array['foo', 'cow'] <@@ array['f%', 'cow', 'baz'];
+t
+-- t
+select array['foo'] <@@ array['f%', 'c%'];
+t
+-- f
+select array['cow'] <@@ array['f%'];
+f
\t off
\pset format aligned
View
11 test/sql/index.sql
@@ -67,6 +67,17 @@ select count(*) from test_table where val @> array['%'];
-- 0
select count(*) from test_table where val @@> array['%'];
+-- 3
+select count(*) from test_table where val <@ array['foo4', 'bar4', 'baz4'];
+-- 3
+select count(*) from test_table where val <@ array['foo4', 'bar4', 'baz4', 'qux'];
+-- 32
+select count(*) from test_table where val <@@ array['foo%', 'bar%', 'baz%'];
+-- 2
+select count(*) from test_table where val <@@ array['foo4', 'baz%', 'bar4%e'];
+-- 0
+select count(*) from test_table where val <@@ array['qux'];
+
set enable_seqscan to on;
\t off
View
14 test/sql/op.sql
@@ -58,5 +58,19 @@ select (array['foo', 'boo', 'baz']) @@> array['%ooz%'];
-- t
select (array['food', 'booze', 'baz']) @@> array['%ooz%'];
+
+-- t
+select array['foo', 'cow'] <@@ array['f%', 'cow'];
+-- t
+select array['foo', 'cow'] <@@ array['cow', 'f%'];
+-- f
+select array['foo', 'cow'] <@@ array['qux', 'f%'];
+-- t
+select array['foo', 'cow'] <@@ array['f%', 'cow', 'baz'];
+-- t
+select array['foo'] <@@ array['f%', 'c%'];
+-- f
+select array['cow'] <@@ array['f%'];
+
\t off
\pset format aligned

0 comments on commit c845bbe

Please sign in to comment.