Skip to content

Commit

Permalink
Make compression use the defaults functions
Browse files Browse the repository at this point in the history
Previously, we create functions to calculate default order by and
segment by values. This PR makes those functions be used by default
when compression is enabled. We also added GUCs to disable those
functions or to use alternative functions for the defaults calculation.
  • Loading branch information
cevian committed Mar 18, 2024
1 parent 8a40e55 commit f64c19b
Show file tree
Hide file tree
Showing 97 changed files with 1,306 additions and 104 deletions.
1 change: 1 addition & 0 deletions .unreleased/pr_6696
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Implements: #6696 Improve defaults for compression segment_by and order_by
24 changes: 12 additions & 12 deletions sql/compression_defaults.sql
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@ BEGIN
INNER JOIN
pg_attribute a on (a.attnum = i.attnum AND a.attrelid = relation)
--right now stats are from the hypertable itself. Use chunks in the future.
INNER JOIN pg_statistic s ON (s.staattnum = a.attnum and s.starelid = relation)
INNER JOIN pg_stats s ON (s.attname = a.attname and s.schemaname = _schema_name and s.tablename = _table_name)
WHERE
a.attname NOT IN (SELECT column_name FROM _timescaledb_catalog.dimension d WHERE d.hypertable_id = _hypertable_row.id)
AND s.stadistinct > 1
AND s.n_distinct > 1
ORDER BY i.pos
LIMIT 1;

Expand All @@ -75,10 +75,10 @@ BEGIN
INNER JOIN
pg_attribute a on (a.attnum = i.attnum AND a.attrelid = relation)
--right now stats are from the hypertable itself. Use chunks in the future.
INNER JOIN pg_statistic s ON (s.staattnum = a.attnum and s.starelid = relation)
INNER JOIN pg_stats s ON (s.attname = a.attname and s.schemaname = _schema_name and s.tablename = _table_name)
WHERE
a.attname NOT IN (SELECT column_name FROM _timescaledb_catalog.dimension d WHERE d.hypertable_id = _hypertable_row.id)
AND s.stadistinct > 1
AND s.n_distinct > 1
ORDER BY i.pos
LIMIT 1;

Expand Down Expand Up @@ -106,10 +106,10 @@ BEGIN
LEFT JOIN
pg_catalog.pg_attrdef ad ON (ad.adrelid = relation AND ad.adnum = a.attnum)
LEFT JOIN
pg_statistic s ON (s.staattnum = a.attnum and s.starelid = relation)
pg_stats s ON (s.attname = a.attname and s.schemaname = _schema_name and s.tablename = _table_name)
WHERE
a.attname NOT IN (SELECT column_name FROM _timescaledb_catalog.dimension d WHERE d.hypertable_id = _hypertable_row.id)
AND s.stadistinct is null
AND s.n_distinct is null
AND a.attidentity = '' AND (ad.adbin IS NULL OR pg_get_expr(adbin, adrelid) not like 'nextval%')
ORDER BY i.pos
LIMIT 1;
Expand Down Expand Up @@ -141,10 +141,10 @@ BEGIN
LEFT JOIN
pg_catalog.pg_attrdef ad ON (ad.adrelid = relation AND ad.adnum = a.attnum)
LEFT JOIN
pg_statistic s ON (s.staattnum = a.attnum and s.starelid = relation)
pg_stats s ON (s.attname = a.attname and s.schemaname = _schema_name and s.tablename = _table_name)
WHERE
a.attname NOT IN (SELECT column_name FROM _timescaledb_catalog.dimension d WHERE d.hypertable_id = _hypertable_row.id)
AND s.stadistinct is null
AND s.n_distinct is null
AND a.attidentity = '' AND (ad.adbin IS NULL OR pg_get_expr(adbin, adrelid) not like 'nextval%')
ORDER BY i.pos
LIMIT 1;
Expand Down Expand Up @@ -285,12 +285,12 @@ BEGIN

--add DESC to any dimensions
SELECT
array_agg(
coalesce(array_agg(
CASE WHEN d.column_name IS NULL THEN
a.colname
format('%I', a.colname)
ELSE
a.colname || ' DESC'
END ORDER BY pos) INTO STRICT _orderby_clauses
format('%I DESC', a.colname)
END ORDER BY pos), array[]::text[]) INTO STRICT _orderby_clauses
FROM unnest(_orderby_names) WITH ORDINALITY as a(colname, pos)
LEFT JOIN _timescaledb_catalog.dimension d ON (d.column_name = a.colname AND d.hypertable_id = _hypertable_row.id);

Expand Down
14 changes: 5 additions & 9 deletions src/compression_with_clause.c
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,8 @@ throw_order_by_error(char *order_by)
}

/* compress_orderby is parsed same as order by in select queries */
static OrderBySettings
parse_order_collist(char *inpstr, Hypertable *hypertable)
OrderBySettings
ts_compress_parse_order_collist(char *inpstr, Hypertable *hypertable)
{
StringInfoData buf;
List *parsed;
Expand Down Expand Up @@ -322,13 +322,9 @@ ts_compress_hypertable_parse_segment_by(WithClauseResult *parsed_options, Hypert
OrderBySettings
ts_compress_hypertable_parse_order_by(WithClauseResult *parsed_options, Hypertable *hypertable)
{
if (parsed_options[CompressOrderBy].is_default == false)
{
Datum textarg = parsed_options[CompressOrderBy].parsed;
return parse_order_collist(TextDatumGetCString(textarg), hypertable);
}
else
return (OrderBySettings){ 0 };
Ensure(parsed_options[CompressOrderBy].is_default == false, "with clause is not default");
Datum textarg = parsed_options[CompressOrderBy].parsed;
return ts_compress_parse_order_collist(TextDatumGetCString(textarg), hypertable);
}

/* returns List of CompressedParsedCol
Expand Down
2 changes: 2 additions & 0 deletions src/compression_with_clause.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,5 @@ ts_compress_hypertable_parse_order_by(WithClauseResult *parsed_options, Hypertab
extern TSDLLEXPORT Interval *
ts_compress_hypertable_parse_chunk_time_interval(WithClauseResult *parsed_options,
Hypertable *hypertable);
extern TSDLLEXPORT OrderBySettings ts_compress_parse_order_collist(char *inpstr,
Hypertable *hypertable);
116 changes: 116 additions & 0 deletions src/guc.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@
*/
#include <postgres.h>
#include <utils/guc.h>
#include <utils/varlena.h>
#include <utils/regproc.h>
#include <parser/parse_func.h>
#include <miscadmin.h>

#include "guc.h"
#include "extension.h"
#include "license_guc.h"
#include "config.h"
#include "hypertable_cache.h"
Expand Down Expand Up @@ -76,6 +80,8 @@ TSDLLEXPORT bool ts_guc_enable_bulk_decompression = true;
TSDLLEXPORT bool ts_guc_auto_sparse_indexes = true;
TSDLLEXPORT int ts_guc_bgw_log_level = WARNING;
TSDLLEXPORT bool ts_guc_enable_skip_scan = true;
static char *ts_guc_default_segmentby_fn = NULL;
static char *ts_guc_default_orderby_fn = NULL;
/* default value of ts_guc_max_open_chunks_per_insert and ts_guc_max_cached_chunks_per_hypertable
* will be set as their respective boot-value when the GUC mechanism starts up */
int ts_guc_max_open_chunks_per_insert;
Expand Down Expand Up @@ -218,6 +224,90 @@ assign_max_open_chunks_per_insert_hook(int newval, void *extra)
validate_chunk_cache_sizes(ts_guc_max_cached_chunks_per_hypertable, newval);
}

static Oid
get_segmentby_func(char *input_name)
{
List *namelist = NIL;

if (strlen(input_name) == 0)
{
return InvalidOid;
}

#if PG16_LT
namelist = stringToQualifiedNameList(input_name);
#else
namelist = stringToQualifiedNameList(input_name, NULL);
#endif
Oid argtyp[] = { REGCLASSOID };
return LookupFuncName(namelist, lengthof(argtyp), argtyp, true);
}

static bool
check_segmentby_func(char **newval, void **extra, GucSource source)
{
/* if the extension doesn't exist you can't check for the function, have to take it on faith */
if (ts_extension_is_loaded())
{
Oid segment_func_oid = get_segmentby_func(*newval);

if (strlen(*newval) > 0 && !OidIsValid(segment_func_oid))
{
GUC_check_errdetail("Function \"%s\" does not exist.", *newval);
return false;
}
}
return true;
}

Oid
ts_guc_default_segmentby_fn_oid()
{
return get_segmentby_func(ts_guc_default_segmentby_fn);
}

static Oid
get_orderby_func(char *input_name)
{
List *namelist = NIL;

if (strlen(input_name) == 0)
{
return InvalidOid;
}

#if PG16_LT
namelist = stringToQualifiedNameList(input_name);
#else
namelist = stringToQualifiedNameList(input_name, NULL);
#endif
Oid argtyp[] = { REGCLASSOID, TEXTARRAYOID };
return LookupFuncName(namelist, lengthof(argtyp), argtyp, true);
}

static bool
check_orderby_func(char **newval, void **extra, GucSource source)
{
/* if the extension doesn't exist you can't check for the function, have to take it on faith */
if (ts_extension_is_loaded())
{
Oid func_oid = get_orderby_func(*newval);

if (strlen(*newval) > 0 && !OidIsValid(func_oid))
{
GUC_check_errdetail("Function \"%s\" does not exist.", *newval);
return false;
}
}
return true;
}

Oid
ts_guc_default_orderby_fn_oid()
{
return get_orderby_func(ts_guc_default_orderby_fn);
}

void
_guc_init(void)
{
Expand Down Expand Up @@ -570,6 +660,32 @@ _guc_init(void)
NULL);
#endif

DefineCustomStringVariable(/* name= */ MAKE_EXTOPTION("compression_segmentby_default_function"),
/* short_desc= */ "Function that sets default segment_by",
/* long_desc= */
"Function to use for calculating default segment_by setting for "
"compression",
/* valueAddr= */ &ts_guc_default_segmentby_fn,
/* Value= */ "_timescaledb_functions.get_segmentby_defaults",
/* context= */ PGC_USERSET,
/* flags= */ 0,
/* check_hook= */ check_segmentby_func,
/* assign_hook= */ NULL,
/* show_hook= */ NULL);

DefineCustomStringVariable(/* name= */ MAKE_EXTOPTION("compression_orderby_default_function"),
/* short_desc= */ "Function that sets default order_by",
/* long_desc= */
"Function to use for calculating default order_by setting for "
"compression",
/* valueAddr= */ &ts_guc_default_orderby_fn,
/* Value= */ "_timescaledb_functions.get_orderby_defaults",
/* context= */ PGC_USERSET,
/* flags= */ 0,
/* check_hook= */ check_orderby_func,
/* assign_hook= */ NULL,
/* show_hook= */ NULL);

DefineCustomStringVariable(/* name= */ MAKE_EXTOPTION("license"),
/* short_desc= */ "TimescaleDB license type",
/* long_desc= */ "Determines which features are enabled",
Expand Down
2 changes: 2 additions & 0 deletions src/guc.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,5 @@ typedef enum
} FeatureFlagType;

extern TSDLLEXPORT void ts_feature_flag_check(FeatureFlagType);
extern TSDLLEXPORT Oid ts_guc_default_segmentby_fn_oid(void);
extern TSDLLEXPORT Oid ts_guc_default_orderby_fn_oid(void);
36 changes: 36 additions & 0 deletions src/ts_catalog/array_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,42 @@ ts_array_is_member(ArrayType *arr, const char *name)
return ret;
}

extern TSDLLEXPORT void
ts_array_append_stringinfo(ArrayType *arr, StringInfo info)
{
bool first = true;
Datum datum;
bool null;

if (!arr)
return;

Assert(ARR_NDIM(arr) <= 1);
Assert(arr->elemtype == TEXTOID);

ArrayIterator it = array_create_iterator(arr, 0, NULL);
while (array_iterate(it, &datum, &null))
{
Assert(!null);
/*
* Our internal catalog arrays should either be NULL or
* have non-NULL members. During normal operation it should
* never have NULL members. If we have NULL members either
* the catalog is corrupted or some catalog tampering has
* happened.
*/
Ensure(!null, "array element was NULL");
if (!first)
appendStringInfoString(info, ", ");
else
first = false;

appendStringInfo(info, "%s", TextDatumGetCString(datum));
}

array_free_iterator(it);
}

extern TSDLLEXPORT int
ts_array_position(ArrayType *arr, const char *name)
{
Expand Down
1 change: 1 addition & 0 deletions src/ts_catalog/array_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
extern TSDLLEXPORT int ts_array_length(ArrayType *arr);
extern TSDLLEXPORT bool ts_array_equal(ArrayType *left, ArrayType *right);
extern TSDLLEXPORT bool ts_array_is_member(ArrayType *arr, const char *name);
extern TSDLLEXPORT void ts_array_append_stringinfo(ArrayType *arr, StringInfo info);
extern TSDLLEXPORT int ts_array_position(ArrayType *arr, const char *name);

extern TSDLLEXPORT bool ts_array_get_element_bool(ArrayType *arr, int position);
Expand Down
Loading

0 comments on commit f64c19b

Please sign in to comment.