From c54cf3ea56de9e05852ea54f90c55c5a221fa1bb Mon Sep 17 00:00:00 2001 From: Konstantina Skovola Date: Fri, 14 Oct 2022 10:43:18 +0300 Subject: [PATCH] Add job execution statistics to telemetry This patch adds two new fields to the telemetry report, `stats_by_job_type` and `errors_by_sqlerrcode`. Both report results grouped by job type (different types of policies or user defined action). The patch also adds a new field to the `bgw_job_stats` table, `total_duration_errors` to separate the duration of the failed runs from the duration of successful ones. --- sql/pre_install/tables.sql | 1 + sql/updates/latest-dev.sql | 3 +- src/bgw/job_stat.c | 14 +- src/telemetry/stats.h | 12 + src/telemetry/telemetry.c | 285 ++++++++++++++++++ src/ts_catalog/catalog.h | 2 + test/expected/telemetry.out | 16 +- tsl/test/expected/bgw_db_scheduler.out | 93 +++--- tsl/test/expected/bgw_db_scheduler_fixed.out | 36 +-- tsl/test/expected/bgw_reorder_drop_chunks.out | 12 +- tsl/test/expected/scheduler_fixed.out | 4 +- tsl/test/expected/telemetry_stats.out | 105 +++++++ tsl/test/sql/telemetry_stats.sql | 42 +++ 13 files changed, 542 insertions(+), 83 deletions(-) diff --git a/sql/pre_install/tables.sql b/sql/pre_install/tables.sql index 3180b38e704..1b6798499d9 100644 --- a/sql/pre_install/tables.sql +++ b/sql/pre_install/tables.sql @@ -307,6 +307,7 @@ CREATE TABLE _timescaledb_internal.bgw_job_stat ( last_run_success bool NOT NULL, total_runs bigint NOT NULL, total_duration interval NOT NULL, + total_duration_failures interval NOT NULL, total_successes bigint NOT NULL, total_failures bigint NOT NULL, total_crashes bigint NOT NULL, diff --git a/sql/updates/latest-dev.sql b/sql/updates/latest-dev.sql index 49a6813b525..3b8b0984d1b 100644 --- a/sql/updates/latest-dev.sql +++ b/sql/updates/latest-dev.sql @@ -57,6 +57,7 @@ CREATE TABLE _timescaledb_internal.bgw_job_stat ( last_run_success bool NOT NULL, total_runs bigint NOT NULL, total_duration interval NOT NULL, + total_duration_failures interval NOT NULL, total_successes bigint NOT NULL, total_failures bigint NOT NULL, total_crashes bigint NOT NULL, @@ -69,7 +70,7 @@ CREATE TABLE _timescaledb_internal.bgw_job_stat ( ); INSERT INTO _timescaledb_internal.bgw_job_stat SELECT - job_id, last_start, last_finish, next_start, last_successful_finish, last_run_success, total_runs, total_duration, total_successes, total_failures, total_crashes, consecutive_failures, consecutive_crashes, 0 + job_id, last_start, last_finish, next_start, last_successful_finish, last_run_success, total_runs, total_duration, '00:00:00'::interval, total_successes, total_failures, total_crashes, consecutive_failures, consecutive_crashes, 0 FROM _timescaledb_internal._tmp_bgw_job_stat; DROP TABLE _timescaledb_internal._tmp_bgw_job_stat; diff --git a/src/bgw/job_stat.c b/src/bgw/job_stat.c index b560b88798a..8ecbf9a84da 100644 --- a/src/bgw/job_stat.c +++ b/src/bgw/job_stat.c @@ -433,10 +433,6 @@ bgw_job_stat_tuple_mark_end(TupleInfo *ti, void *const data) duration = DatumGetIntervalP(DirectFunctionCall2(timestamp_mi, TimestampTzGetDatum(fd->last_finish), TimestampTzGetDatum(fd->last_start))); - fd->total_duration = - *DatumGetIntervalP(DirectFunctionCall2(interval_pl, - IntervalPGetDatum(&fd->total_duration), - IntervalPGetDatum(duration))); /* undo marking created by start marks */ fd->last_run_success = result_ctx->result == JOB_SUCCESS ? true : false; @@ -449,6 +445,10 @@ bgw_job_stat_tuple_mark_end(TupleInfo *ti, void *const data) fd->total_success++; fd->consecutive_failures = 0; fd->last_successful_finish = fd->last_finish; + fd->total_duration = + *DatumGetIntervalP(DirectFunctionCall2(interval_pl, + IntervalPGetDatum(&fd->total_duration), + IntervalPGetDatum(duration))); /* Mark the next start at the end if the job itself hasn't */ if (!bgw_job_stat_next_start_was_set(fd)) fd->next_start = calculate_next_start_on_success(fd->last_finish, result_ctx->job); @@ -457,6 +457,10 @@ bgw_job_stat_tuple_mark_end(TupleInfo *ti, void *const data) { fd->total_failures++; fd->consecutive_failures++; + fd->total_duration_failures = + *DatumGetIntervalP(DirectFunctionCall2(interval_pl, + IntervalPGetDatum(&fd->total_duration_failures), + IntervalPGetDatum(duration))); /* * Mark the next start at the end if the job itself hasn't (this may @@ -543,6 +547,8 @@ bgw_job_stat_insert_relation(Relation rel, int32 bgw_job_id, bool mark_start, Int64GetDatum((mark_start ? 1 : 0)); values[AttrNumberGetAttrOffset(Anum_bgw_job_stat_total_duration)] = IntervalPGetDatum(&zero_ival); + values[AttrNumberGetAttrOffset(Anum_bgw_job_stat_total_duration_failures)] = + IntervalPGetDatum(&zero_ival); values[AttrNumberGetAttrOffset(Anum_bgw_job_stat_total_success)] = Int64GetDatum(0); values[AttrNumberGetAttrOffset(Anum_bgw_job_stat_total_failures)] = Int64GetDatum(0); values[AttrNumberGetAttrOffset(Anum_bgw_job_stat_consecutive_failures)] = Int32GetDatum(0); diff --git a/src/telemetry/stats.h b/src/telemetry/stats.h index 1db0404607e..f0b04a191ec 100644 --- a/src/telemetry/stats.h +++ b/src/telemetry/stats.h @@ -90,6 +90,18 @@ typedef struct TelemetryStats BaseStats views; } TelemetryStats; +typedef struct TelemetryJobStats +{ + int64 total_runs; + int64 total_successes; + int64 total_failures; + int64 total_crashes; + int32 max_consecutive_failures; + int32 max_consecutive_crashes; + Interval *total_duration; + Interval *total_duration_failures; +} TelemetryJobStats; + extern void ts_telemetry_stats_gather(TelemetryStats *stats); #endif /* TIMESCALEDB_TELEMETRY_STATS_H */ diff --git a/src/telemetry/telemetry.c b/src/telemetry/telemetry.c index bf2b98e7c62..2e9b19c9a0b 100644 --- a/src/telemetry/telemetry.c +++ b/src/telemetry/telemetry.c @@ -35,6 +35,8 @@ #include "cross_module_fn.h" +#include + #define TS_TELEMETRY_VERSION 2 #define TS_VERSION_JSON_FIELD "current_timescaledb_version" #define TS_IS_UPTODATE_JSON_FIELD "is_up_to_date" @@ -87,6 +89,9 @@ #define TIMESCALE_ANALYTICS "timescale_analytics" #define TIMESCALEDB_TOOLKIT "timescaledb_toolkit" +#define REQ_JOB_STATS_BY_JOB_TYPE "stats_by_job_type" +#define REQ_NUM_ERR_BY_SQLERRCODE "errors_by_sqlerrcode" + static const char *related_extensions[] = { PG_PROMETHEUS, PROMSCALE, POSTGIS, TIMESCALE_ANALYTICS, TIMESCALEDB_TOOLKIT, }; @@ -264,6 +269,266 @@ add_job_counts(JsonbParseState *state) ts_jsonb_add_int32(state, REQ_NUM_USER_DEFINED_ACTIONS_FIXED, counts.user_defined_action_fixed); } +static JsonbValue * +add_errors_by_sqlerrcode_internal(JsonbParseState *parse_state, const char *job_type, + Jsonb *sqlerrs_jsonb) +{ + JsonbIterator *it; + JsonbIteratorToken type; + JsonbValue val; + JsonbValue *ret; + JsonbValue key = { + .type = jbvString, + .val.string.val = pstrdup(job_type), + .val.string.len = strlen(job_type), + }; + + ret = pushJsonbValue(&parse_state, WJB_KEY, &key); + ret = pushJsonbValue(&parse_state, WJB_BEGIN_OBJECT, NULL); + + /* we don't expect nested values here */ + it = JsonbIteratorInit(&sqlerrs_jsonb->root); + type = JsonbIteratorNext(&it, &val, true /*skip_nested*/); + if (type != WJB_BEGIN_OBJECT) + elog(ERROR, "invalid JSON format"); + while ((type = JsonbIteratorNext(&it, &val, true))) + { + const char *errcode; + int64 errcnt; + + if (type == WJB_END_OBJECT) + break; + else if (type == WJB_KEY) + { + errcode = pnstrdup(val.val.string.val, val.val.string.len); + /* get the corresponding value for this key */ + type = JsonbIteratorNext(&it, &val, true); + if (type != WJB_VALUE) + elog(ERROR, "unexpected jsonb type"); + errcnt = + DatumGetInt64(DirectFunctionCall1(numeric_int8, NumericGetDatum(val.val.numeric))); + ts_jsonb_add_int64(parse_state, errcode, errcnt); + } + else + elog(ERROR, "unexpected jsonb type"); + } + + ret = pushJsonbValue(&parse_state, WJB_END_OBJECT, NULL); + return ret; +} +/* this function queries the database through SPI and gets back a set of records + that look like (job_type TEXT, jsonb_object_agg JSONB). + For example, (user_defined_action, {"P0001": 2, "42883": 5}) + (we are expecting about 6 rows depending + on how we write the query and if we exclude any jobs) + Then for each returned row adds a new kv pair to the jsonb, + which looks like "job_type": {"errtype1": errcnt1, ...} */ +static void +add_errors_by_sqlerrcode(JsonbParseState *parse_state) +{ + int res; + StringInfo command; + MemoryContext old_context = CurrentMemoryContext, spi_context; + + const char *command_string = + "SELECT " + "job_type, jsonb_object_agg(sqlerrcode, count) " + "FROM" + "(" + " SELECT (" + " CASE " + " WHEN error_data ->> \'proc_schema\' = \'_timescaledb_internal\'" + " AND error_data ->> \'proc_name\' ~ " + "\'^policy_(retention|compression|reorder|refresh_continuous_" + "aggregate|telemetry|job_error_retention)$\' " + " THEN error_data ->> \'proc_name\' " + " ELSE \'user_defined_action\'" + " END" + " ) as job_type, " + " error_data ->> \'sqlerrcode\' as sqlerrcode, " + " pg_catalog.COUNT(*) " + " FROM " + " _timescaledb_internal.job_errors " + " WHERE error_data ->> \'sqlerrcode\' IS NOT NULL " + " GROUP BY job_type, error_data->> \'sqlerrcode\' " + " ORDER BY job_type" + ") q " + "GROUP BY q.job_type"; + + if (SPI_connect() != SPI_OK_CONNECT) + elog(ERROR, "could not connect to SPI"); + + /* SPI calls must be qualified otherwise they are unsafe */ + res = SPI_exec("SET search_path TO pg_catalog, pg_temp", 0); + if (res < 0) + ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), (errmsg("could not set search_path")))); + + command = makeStringInfo(); + + appendStringInfoString(command, command_string); + res = SPI_execute(command->data, true /*read only*/, 0 /* count */); + if (res < 0) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + (errmsg("could not get errors by sqlerrcode and job type")))); + + /* we expect about 6 rows returned, each row is a record (TEXT, JSONB) */ + for (uint64 i = 0; i < SPI_processed; i++) + { + Datum record_jobtype, record_jsonb; + bool isnull_jobtype, isnull_jsonb; + + record_jobtype = + SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull_jobtype); + if (isnull_jobtype) + elog(ERROR, "null job type returned"); + record_jsonb = + SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull_jsonb); + /* this jsonb looks like {"P0001": 32, "42883": 6} */ + Jsonb *sqlerrs_jsonb = isnull_jsonb ? NULL : DatumGetJsonbP(record_jsonb); + + if (sqlerrs_jsonb == NULL) + continue; + /* the jsonb object cannot be created in the SPI context or it will be lost */ + spi_context = MemoryContextSwitchTo(old_context); + add_errors_by_sqlerrcode_internal(parse_state, + TextDatumGetCString(record_jobtype), + sqlerrs_jsonb); + old_context = MemoryContextSwitchTo(spi_context); + } + + res = SPI_exec("RESET search_path", 0); + res = SPI_finish(); + + Assert(res == SPI_OK_FINISH); +} + +static JsonbValue * +add_job_stats_internal(JsonbParseState *state, const char *job_type, TelemetryJobStats *stats) +{ + JsonbValue key = { + .type = jbvString, + .val.string.val = pstrdup(job_type), + .val.string.len = strlen(job_type), + }; + pushJsonbValue(&state, WJB_KEY, &key); + pushJsonbValue(&state, WJB_BEGIN_OBJECT, NULL); + + ts_jsonb_add_int64(state, "total_runs", stats->total_runs); + ts_jsonb_add_int64(state, "total_successes", stats->total_successes); + ts_jsonb_add_int64(state, "total_failures", stats->total_failures); + ts_jsonb_add_int64(state, "total_crashes", stats->total_crashes); + ts_jsonb_add_int32(state, "max_consecutive_failures", stats->max_consecutive_failures); + ts_jsonb_add_int32(state, "max_consecutive_crashes", stats->max_consecutive_crashes); + ts_jsonb_add_interval(state, "total_duration", stats->total_duration); + ts_jsonb_add_interval(state, "total_duration_failures", stats->total_duration_failures); + + return pushJsonbValue(&state, WJB_END_OBJECT, NULL); +} + +static void +add_job_stats_by_job_type(JsonbParseState *parse_state) +{ + StringInfo command; + int res; + MemoryContext old_context = CurrentMemoryContext, spi_context; + SPITupleTable *tuptable = NULL; + + const char *command_string = + "SELECT (" + " CASE " + " WHEN j.proc_schema = \'_timescaledb_internal\' AND j.proc_name ~ " + "\'^policy_(retention|compression|reorder|refresh_continuous_aggregate|telemetry|job_error_" + "retention)$\' " + " THEN j.proc_name::TEXT " + " ELSE \'user_defined_action\' " + " END" + ") AS job_type, " + " SUM(total_runs)::BIGINT AS total_runs, " + " SUM(total_successes)::BIGINT AS total_successes, " + " SUM(total_failures)::BIGINT AS total_failures, " + " SUM(total_crashes)::BIGINT AS total_crashes, " + " SUM(total_duration) AS total_duration, " + " SUM(total_duration_failures) AS total_duration_failures, " + " MAX(consecutive_failures) AS max_consecutive_failures, " + " MAX(consecutive_crashes) AS max_consecutive_crashes " + "FROM " + " _timescaledb_internal.bgw_job_stat s " + " JOIN _timescaledb_config.bgw_job j on j.id = s.job_id " + "GROUP BY " + "job_type"; + + if (SPI_connect() != SPI_OK_CONNECT) + elog(ERROR, "could not connect to SPI"); + + /* SPI calls must be qualified otherwise they are unsafe */ + res = SPI_exec("SET search_path TO pg_catalog, pg_temp", 0); + if (res < 0) + ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), (errmsg("could not set search_path")))); + + command = makeStringInfo(); + + appendStringInfoString(command, command_string); + res = SPI_execute(command->data, true /* read_only */, 0 /*count*/); + if (res < 0) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + (errmsg("could not get job statistics by job type")))); + /* + * a row returned looks like this: + * (job_type, total_runs, total_successes, total_failures, total_crashes, total_duration, + * total_duration_failures, max_consec_fails, max_consec_crashes) + * ("policy_telemetry", 12, 10, 1, 1, 00:00:11, 00:00:01, 1, 1) + */ + for (uint64 i = 0; i < SPI_processed; i++) + { + tuptable = SPI_tuptable; + TupleDesc tupdesc = tuptable->tupdesc; + Datum jobtype_datum; + Datum total_runs, total_successes, total_failures, total_crashes; + Datum total_duration, total_duration_failures, max_consec_crashes, max_consec_fails; + + bool isnull_jobtype, isnull_runs, isnull_successes, isnull_failures, isnull_crashes; + bool isnull_duration, isnull_duration_failures, isnull_consec_crashes, isnull_consec_fails; + + jobtype_datum = + SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull_jobtype); + if (isnull_jobtype) + elog(ERROR, "null job type returned"); + total_runs = SPI_getbinval(tuptable->vals[i], tupdesc, 2, &isnull_runs); + total_successes = SPI_getbinval(tuptable->vals[i], tupdesc, 3, &isnull_successes); + total_failures = SPI_getbinval(tuptable->vals[i], tupdesc, 4, &isnull_failures); + total_crashes = SPI_getbinval(tuptable->vals[i], tupdesc, 5, &isnull_crashes); + total_duration = SPI_getbinval(tuptable->vals[i], tupdesc, 6, &isnull_duration); + total_duration_failures = + SPI_getbinval(tuptable->vals[i], tupdesc, 7, &isnull_duration_failures); + max_consec_fails = SPI_getbinval(tuptable->vals[i], tupdesc, 8, &isnull_consec_fails); + max_consec_crashes = SPI_getbinval(tuptable->vals[i], tupdesc, 9, &isnull_consec_crashes); + + if (isnull_jobtype || isnull_runs || isnull_successes || isnull_failures || + isnull_crashes || isnull_duration || isnull_consec_crashes || isnull_consec_fails) + { + elog(ERROR, "null record field returned"); + } + + spi_context = MemoryContextSwitchTo(old_context); + TelemetryJobStats stats = { .total_runs = DatumGetInt64(total_runs), + .total_successes = DatumGetInt64(total_successes), + .total_failures = DatumGetInt64(total_failures), + .total_crashes = DatumGetInt64(total_crashes), + .max_consecutive_failures = DatumGetInt32(max_consec_fails), + .max_consecutive_crashes = DatumGetInt32(max_consec_crashes), + .total_duration = DatumGetIntervalP(total_duration), + .total_duration_failures = + DatumGetIntervalP(total_duration_failures) }; + add_job_stats_internal(parse_state, TextDatumGetCString(jobtype_datum), &stats); + old_context = MemoryContextSwitchTo(spi_context); + } + res = SPI_exec("RESET search_path", 0); + res = SPI_finish(); + Assert(res == SPI_OK_FINISH); +} + static int64 get_database_size() { @@ -545,6 +810,26 @@ build_telemetry_report() ts_jsonb_add_str(parse_state, REQ_BUILD_ARCHITECTURE, BUILD_PROCESSOR); ts_jsonb_add_int32(parse_state, REQ_BUILD_ARCHITECTURE_BIT_SIZE, get_architecture_bit_size()); ts_jsonb_add_int64(parse_state, REQ_DATA_VOLUME, get_database_size()); + /* add job execution stats */ + key.type = jbvString; + key.val.string.val = REQ_NUM_ERR_BY_SQLERRCODE; + key.val.string.len = strlen(REQ_NUM_ERR_BY_SQLERRCODE); + pushJsonbValue(&parse_state, WJB_KEY, &key); + pushJsonbValue(&parse_state, WJB_BEGIN_OBJECT, NULL); + + add_errors_by_sqlerrcode(parse_state); + + pushJsonbValue(&parse_state, WJB_END_OBJECT, NULL); + + key.type = jbvString; + key.val.string.val = REQ_JOB_STATS_BY_JOB_TYPE; + key.val.string.len = strlen(REQ_JOB_STATS_BY_JOB_TYPE); + pushJsonbValue(&parse_state, WJB_KEY, &key); + pushJsonbValue(&parse_state, WJB_BEGIN_OBJECT, NULL); + + add_job_stats_by_job_type(parse_state); + + pushJsonbValue(&parse_state, WJB_END_OBJECT, NULL); /* Add relation stats */ ts_telemetry_stats_gather(&relstats); diff --git a/src/ts_catalog/catalog.h b/src/ts_catalog/catalog.h index eac4a351c1b..62a843039bf 100644 --- a/src/ts_catalog/catalog.h +++ b/src/ts_catalog/catalog.h @@ -785,6 +785,7 @@ enum Anum_bgw_job_stat Anum_bgw_job_stat_last_run_success, Anum_bgw_job_stat_total_runs, Anum_bgw_job_stat_total_duration, + Anum_bgw_job_stat_total_duration_failures, Anum_bgw_job_stat_total_success, Anum_bgw_job_stat_total_failures, Anum_bgw_job_stat_total_crashes, @@ -806,6 +807,7 @@ typedef struct FormData_bgw_job_stat bool last_run_success; int64 total_runs; Interval total_duration; + Interval total_duration_failures; int64 total_success; int64 total_failures; int64 total_crashes; diff --git a/test/expected/telemetry.out b/test/expected/telemetry.out index e08dbf60c16..fcb06634c68 100644 --- a/test/expected/telemetry.out +++ b/test/expected/telemetry.out @@ -383,12 +383,14 @@ WHERE key != 'os_name_pretty'; build_os_version exported_db_uuid instance_metadata + stats_by_job_type telemetry_version build_architecture last_tuned_version postgresql_version related_extensions timescaledb_version + errors_by_sqlerrcode num_reorder_policies num_retention_policies num_compression_policies @@ -400,7 +402,7 @@ WHERE key != 'os_name_pretty'; num_compression_policies_fixed num_user_defined_actions_fixed num_continuous_aggs_policies_fixed -(34 rows) +(36 rows) CREATE MATERIALIZED VIEW telemetry_report AS SELECT t FROM get_telemetry_report() t; @@ -555,16 +557,16 @@ EXECUTE record_from_prepared; DEALLOCATE record_from_prepared; SELECT get_telemetry_report()->'functions_used'; - ?column? ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - {"pg_catalog.abs(integer)": 1, "pg_catalog.min(integer)": 1, "pg_catalog.sum(integer)": 1, "public.get_telemetry_report()": 1, "pg_catalog.int4eq(integer,integer)": 1, "pg_catalog.int4mi(integer,integer)": 11, "pg_catalog.int4pl(integer,integer)": 3, "pg_catalog.jsonb_object_field(pg_catalog.jsonb,pg_catalog.text)": 1} + ?column? +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"pg_catalog.count()": 1, "pg_catalog.sum(bigint)": 4, "pg_catalog.abs(integer)": 1, "pg_catalog.max(integer)": 2, "pg_catalog.min(integer)": 1, "pg_catalog.sum(integer)": 1, "pg_catalog.int8(numeric)": 4, "pg_catalog.sum(interval)": 2, "public.get_telemetry_report()": 1, "pg_catalog.text(pg_catalog.name)": 1, "pg_catalog.int4eq(integer,integer)": 2, "pg_catalog.int4mi(integer,integer)": 11, "pg_catalog.int4pl(integer,integer)": 3, "pg_catalog.nameeq(pg_catalog.name,pg_catalog.name)": 1, "pg_catalog.texteq(pg_catalog.text,pg_catalog.text)": 1, "pg_catalog.nameregexeq(pg_catalog.name,pg_catalog.text)": 1, "pg_catalog.textregexeq(pg_catalog.text,pg_catalog.text)": 1, "pg_catalog.jsonb_object_agg(pg_catalog.\"any\",pg_catalog.\"any\")": 1, "pg_catalog.jsonb_object_field(pg_catalog.jsonb,pg_catalog.text)": 1, "pg_catalog.jsonb_object_field_text(pg_catalog.jsonb,pg_catalog.text)": 5} (1 row) -- check the report again to see if resetting works SELECT get_telemetry_report()->'functions_used'; - ?column? ------------------------------------------------------------------------------------------------------------- - {"public.get_telemetry_report()": 1, "pg_catalog.jsonb_object_field(pg_catalog.jsonb,pg_catalog.text)": 1} + ?column? +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + {"pg_catalog.count()": 1, "pg_catalog.sum(bigint)": 4, "pg_catalog.max(integer)": 2, "pg_catalog.int8(numeric)": 4, "pg_catalog.sum(interval)": 2, "public.get_telemetry_report()": 1, "pg_catalog.text(pg_catalog.name)": 1, "pg_catalog.int4eq(integer,integer)": 1, "pg_catalog.nameeq(pg_catalog.name,pg_catalog.name)": 1, "pg_catalog.texteq(pg_catalog.text,pg_catalog.text)": 1, "pg_catalog.nameregexeq(pg_catalog.name,pg_catalog.text)": 1, "pg_catalog.textregexeq(pg_catalog.text,pg_catalog.text)": 1, "pg_catalog.jsonb_object_agg(pg_catalog.\"any\",pg_catalog.\"any\")": 1, "pg_catalog.jsonb_object_field(pg_catalog.jsonb,pg_catalog.text)": 1, "pg_catalog.jsonb_object_field_text(pg_catalog.jsonb,pg_catalog.text)": 5} (1 row) \c :TEST_DBNAME :ROLE_SUPERUSER diff --git a/tsl/test/expected/bgw_db_scheduler.out b/tsl/test/expected/bgw_db_scheduler.out index d890d9c8936..7f410d88126 100644 --- a/tsl/test/expected/bgw_db_scheduler.out +++ b/tsl/test/expected/bgw_db_scheduler.out @@ -168,21 +168,22 @@ SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(50); \x on SELECT * FROM _timescaledb_internal.bgw_job_stat; --[ RECORD 1 ]----------+-------------------------------- -job_id | 1000 -last_start | Fri Dec 31 16:00:00.05 1999 PST -last_finish | Fri Dec 31 16:00:00.05 1999 PST -next_start | Fri Dec 31 16:00:00.15 1999 PST -last_successful_finish | Fri Dec 31 16:00:00.05 1999 PST -last_run_success | t -total_runs | 1 -total_duration | @ 0 -total_successes | 1 -total_failures | 0 -total_crashes | 0 -consecutive_failures | 0 -consecutive_crashes | 0 -flags | 0 +-[ RECORD 1 ]-----------+-------------------------------- +job_id | 1000 +last_start | Fri Dec 31 16:00:00.05 1999 PST +last_finish | Fri Dec 31 16:00:00.05 1999 PST +next_start | Fri Dec 31 16:00:00.15 1999 PST +last_successful_finish | Fri Dec 31 16:00:00.05 1999 PST +last_run_success | t +total_runs | 1 +total_duration | @ 0 +total_duration_failures | @ 0 +total_successes | 1 +total_failures | 0 +total_crashes | 0 +consecutive_failures | 0 +consecutive_crashes | 0 +flags | 0 \x off SELECT * FROM timescaledb_information.job_stats; @@ -1451,21 +1452,22 @@ SELECT wait_for_job_1_to_run(2); \x on select * from _timescaledb_internal.bgw_job_stat; --[ RECORD 1 ]----------+-------------------------------- -job_id | 1024 -last_start | Fri Dec 31 16:00:00.15 1999 PST -last_finish | Fri Dec 31 16:00:00.15 1999 PST -next_start | Fri Dec 31 16:00:00.25 1999 PST -last_successful_finish | Fri Dec 31 16:00:00.15 1999 PST -last_run_success | t -total_runs | 2 -total_duration | @ 0 -total_successes | 2 -total_failures | 0 -total_crashes | 0 -consecutive_failures | 0 -consecutive_crashes | 0 -flags | 0 +-[ RECORD 1 ]-----------+-------------------------------- +job_id | 1024 +last_start | Fri Dec 31 16:00:00.15 1999 PST +last_finish | Fri Dec 31 16:00:00.15 1999 PST +next_start | Fri Dec 31 16:00:00.25 1999 PST +last_successful_finish | Fri Dec 31 16:00:00.15 1999 PST +last_run_success | t +total_runs | 2 +total_duration | @ 0 +total_duration_failures | @ 0 +total_successes | 2 +total_failures | 0 +total_crashes | 0 +consecutive_failures | 0 +consecutive_crashes | 0 +flags | 0 \x off SELECT delete_job(x.id) FROM (select * from _timescaledb_config.bgw_job) x; @@ -1583,21 +1585,22 @@ SELECT * FROM sorted_bgw_log; \x on SELECT * FROM _timescaledb_internal.bgw_job_stat; --[ RECORD 1 ]----------+-------------------------------- -job_id | 1025 -last_start | Fri Dec 31 16:00:00.48 1999 PST -last_finish | Fri Dec 31 16:00:00.48 1999 PST -next_start | Fri Dec 31 16:00:00.49 1999 PST -last_successful_finish | Fri Dec 31 16:00:00.48 1999 PST -last_run_success | t -total_runs | 2 -total_duration | @ 0 -total_successes | 2 -total_failures | 0 -total_crashes | 0 -consecutive_failures | 0 -consecutive_crashes | 0 -flags | 0 +-[ RECORD 1 ]-----------+-------------------------------- +job_id | 1025 +last_start | Fri Dec 31 16:00:00.48 1999 PST +last_finish | Fri Dec 31 16:00:00.48 1999 PST +next_start | Fri Dec 31 16:00:00.49 1999 PST +last_successful_finish | Fri Dec 31 16:00:00.48 1999 PST +last_run_success | t +total_runs | 2 +total_duration | @ 0 +total_duration_failures | @ 0 +total_successes | 2 +total_failures | 0 +total_crashes | 0 +consecutive_failures | 0 +consecutive_crashes | 0 +flags | 0 \x off -- clean up jobs diff --git a/tsl/test/expected/bgw_db_scheduler_fixed.out b/tsl/test/expected/bgw_db_scheduler_fixed.out index e91aaaa8357..a847ecc547e 100644 --- a/tsl/test/expected/bgw_db_scheduler_fixed.out +++ b/tsl/test/expected/bgw_db_scheduler_fixed.out @@ -115,8 +115,8 @@ SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(50); -- empty SELECT * FROM _timescaledb_internal.bgw_job_stat; - job_id | last_start | last_finish | next_start | last_successful_finish | last_run_success | total_runs | total_duration | total_successes | total_failures | total_crashes | consecutive_failures | consecutive_crashes | flags ---------+------------+-------------+------------+------------------------+------------------+------------+----------------+-----------------+----------------+---------------+----------------------+---------------------+------- + job_id | last_start | last_finish | next_start | last_successful_finish | last_run_success | total_runs | total_duration | total_duration_failures | total_successes | total_failures | total_crashes | consecutive_failures | consecutive_crashes | flags +--------+------------+-------------+------------+------------------------+------------------+------------+----------------+-------------------------+-----------------+----------------+---------------+----------------------+---------------------+------- (0 rows) -- empty @@ -152,8 +152,8 @@ SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(50); -- empty SELECT * FROM _timescaledb_internal.bgw_job_stat; - job_id | last_start | last_finish | next_start | last_successful_finish | last_run_success | total_runs | total_duration | total_successes | total_failures | total_crashes | consecutive_failures | consecutive_crashes | flags ---------+------------+-------------+------------+------------------------+------------------+------------+----------------+-----------------+----------------+---------------+----------------------+---------------------+------- + job_id | last_start | last_finish | next_start | last_successful_finish | last_run_success | total_runs | total_duration | total_duration_failures | total_successes | total_failures | total_crashes | consecutive_failures | consecutive_crashes | flags +--------+------------+-------------+------------+------------------------+------------------+------------+----------------+-------------------------+-----------------+----------------+---------------+----------------------+---------------------+------- (0 rows) SELECT * FROM timescaledb_information.job_stats; @@ -174,9 +174,9 @@ SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(50); (1 row) SELECT * FROM _timescaledb_internal.bgw_job_stat; - job_id | last_start | last_finish | next_start | last_successful_finish | last_run_success | total_runs | total_duration | total_successes | total_failures | total_crashes | consecutive_failures | consecutive_crashes | flags ---------+---------------------------------+---------------------------------+--------------------------------+---------------------------------+------------------+------------+----------------+-----------------+----------------+---------------+----------------------+---------------------+------- - 1000 | Fri Dec 31 16:00:00.05 1999 PST | Fri Dec 31 16:00:00.05 1999 PST | Fri Dec 31 16:00:00.1 1999 PST | Fri Dec 31 16:00:00.05 1999 PST | t | 1 | @ 0 | 1 | 0 | 0 | 0 | 0 | 0 + job_id | last_start | last_finish | next_start | last_successful_finish | last_run_success | total_runs | total_duration | total_duration_failures | total_successes | total_failures | total_crashes | consecutive_failures | consecutive_crashes | flags +--------+---------------------------------+---------------------------------+--------------------------------+---------------------------------+------------------+------------+----------------+-------------------------+-----------------+----------------+---------------+----------------------+---------------------+------- + 1000 | Fri Dec 31 16:00:00.05 1999 PST | Fri Dec 31 16:00:00.05 1999 PST | Fri Dec 31 16:00:00.1 1999 PST | Fri Dec 31 16:00:00.05 1999 PST | t | 1 | @ 0 | @ 0 | 1 | 0 | 0 | 0 | 0 | 0 (1 row) SELECT * FROM timescaledb_information.job_stats; @@ -1445,9 +1445,9 @@ SELECT wait_for_job_1_to_run(2); (1 row) select * from _timescaledb_internal.bgw_job_stat; - job_id | last_start | last_finish | next_start | last_successful_finish | last_run_success | total_runs | total_duration | total_successes | total_failures | total_crashes | consecutive_failures | consecutive_crashes | flags ---------+---------------------------------+---------------------------------+--------------------------------+---------------------------------+------------------+------------+----------------+-----------------+----------------+---------------+----------------------+---------------------+------- - 1024 | Fri Dec 31 16:00:00.15 1999 PST | Fri Dec 31 16:00:00.15 1999 PST | Fri Dec 31 16:00:00.2 1999 PST | Fri Dec 31 16:00:00.15 1999 PST | t | 2 | @ 0 | 2 | 0 | 0 | 0 | 0 | 0 + job_id | last_start | last_finish | next_start | last_successful_finish | last_run_success | total_runs | total_duration | total_duration_failures | total_successes | total_failures | total_crashes | consecutive_failures | consecutive_crashes | flags +--------+---------------------------------+---------------------------------+--------------------------------+---------------------------------+------------------+------------+----------------+-------------------------+-----------------+----------------+---------------+----------------------+---------------------+------- + 1024 | Fri Dec 31 16:00:00.15 1999 PST | Fri Dec 31 16:00:00.15 1999 PST | Fri Dec 31 16:00:00.2 1999 PST | Fri Dec 31 16:00:00.15 1999 PST | t | 2 | @ 0 | @ 0 | 2 | 0 | 0 | 0 | 0 | 0 (1 row) SELECT delete_job(x.id) FROM (select * from _timescaledb_config.bgw_job) x; @@ -1564,9 +1564,9 @@ SELECT * FROM sorted_bgw_log; (16 rows) SELECT * FROM _timescaledb_internal.bgw_job_stat; - job_id | last_start | last_finish | next_start | last_successful_finish | last_run_success | total_runs | total_duration | total_successes | total_failures | total_crashes | consecutive_failures | consecutive_crashes | flags ---------+---------------------------------+---------------------------------+---------------------------------+---------------------------------+------------------+------------+----------------+-----------------+----------------+---------------+----------------------+---------------------+------- - 1025 | Fri Dec 31 16:00:00.48 1999 PST | Fri Dec 31 16:00:00.48 1999 PST | Fri Dec 31 16:00:00.49 1999 PST | Fri Dec 31 16:00:00.48 1999 PST | t | 2 | @ 0 | 2 | 0 | 0 | 0 | 0 | 0 + job_id | last_start | last_finish | next_start | last_successful_finish | last_run_success | total_runs | total_duration | total_duration_failures | total_successes | total_failures | total_crashes | consecutive_failures | consecutive_crashes | flags +--------+---------------------------------+---------------------------------+---------------------------------+---------------------------------+------------------+------------+----------------+-------------------------+-----------------+----------------+---------------+----------------------+---------------------+------- + 1025 | Fri Dec 31 16:00:00.48 1999 PST | Fri Dec 31 16:00:00.48 1999 PST | Fri Dec 31 16:00:00.49 1999 PST | Fri Dec 31 16:00:00.48 1999 PST | t | 2 | @ 0 | @ 0 | 2 | 0 | 0 | 0 | 0 | 0 (1 row) -- clean up jobs @@ -1674,11 +1674,11 @@ SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25); (1 row) SELECT * from _timescaledb_internal.bgw_job_stat; - job_id | last_start | last_finish | next_start | last_successful_finish | last_run_success | total_runs | total_duration | total_successes | total_failures | total_crashes | consecutive_failures | consecutive_crashes | flags ---------+----------------------------------+----------------------------------+----------------------------------+----------------------------------+------------------+------------+----------------+-----------------+----------------+---------------+----------------------+---------------------+------- - 1026 | Fri Dec 31 16:00:00 1999 PST | Fri Dec 31 16:00:00 1999 PST | Sat Jan 01 16:00:00 2000 PST | Fri Dec 31 16:00:00 1999 PST | t | 1 | @ 0 | 1 | 0 | 0 | 0 | 0 | 0 - 1027 | Fri Dec 31 16:00:00 1999 PST | Fri Dec 31 16:00:00 1999 PST | Sat Jan 15 16:00:00 2000 PST | Fri Dec 31 16:00:00 1999 PST | t | 1 | @ 0 | 1 | 0 | 0 | 0 | 0 | 0 - 1028 | Fri Dec 31 16:00:00.005 1999 PST | Fri Dec 31 16:00:00.005 1999 PST | Fri Jan 21 16:00:00.005 2000 PST | Fri Dec 31 16:00:00.005 1999 PST | t | 1 | @ 0 | 1 | 0 | 0 | 0 | 0 | 0 + job_id | last_start | last_finish | next_start | last_successful_finish | last_run_success | total_runs | total_duration | total_duration_failures | total_successes | total_failures | total_crashes | consecutive_failures | consecutive_crashes | flags +--------+----------------------------------+----------------------------------+----------------------------------+----------------------------------+------------------+------------+----------------+-------------------------+-----------------+----------------+---------------+----------------------+---------------------+------- + 1026 | Fri Dec 31 16:00:00 1999 PST | Fri Dec 31 16:00:00 1999 PST | Sat Jan 01 16:00:00 2000 PST | Fri Dec 31 16:00:00 1999 PST | t | 1 | @ 0 | @ 0 | 1 | 0 | 0 | 0 | 0 | 0 + 1027 | Fri Dec 31 16:00:00 1999 PST | Fri Dec 31 16:00:00 1999 PST | Sat Jan 15 16:00:00 2000 PST | Fri Dec 31 16:00:00 1999 PST | t | 1 | @ 0 | @ 0 | 1 | 0 | 0 | 0 | 0 | 0 + 1028 | Fri Dec 31 16:00:00.005 1999 PST | Fri Dec 31 16:00:00.005 1999 PST | Fri Jan 21 16:00:00.005 2000 PST | Fri Dec 31 16:00:00.005 1999 PST | t | 1 | @ 0 | @ 0 | 1 | 0 | 0 | 0 | 0 | 0 (3 rows) SELECT show_chunks('test_table_scheduler'); diff --git a/tsl/test/expected/bgw_reorder_drop_chunks.out b/tsl/test/expected/bgw_reorder_drop_chunks.out index f4fe2096a6e..d4cf17da6fd 100644 --- a/tsl/test/expected/bgw_reorder_drop_chunks.out +++ b/tsl/test/expected/bgw_reorder_drop_chunks.out @@ -230,9 +230,9 @@ SELECT * FROM timescaledb_information.jobs WHERE job_id=:reorder_job_id; SELECT * FROM _timescaledb_internal.bgw_job_stat where job_id=:reorder_job_id; - job_id | last_start | last_finish | next_start | last_successful_finish | last_run_success | total_runs | total_duration | total_successes | total_failures | total_crashes | consecutive_failures | consecutive_crashes | flags ---------+---------------------------------+---------------------------------+---------------------------------+---------------------------------+------------------+------------+----------------+-----------------+----------------+---------------+----------------------+---------------------+------- - 1000 | Fri Dec 31 16:00:00.05 1999 PST | Fri Dec 31 16:00:00.05 1999 PST | Tue Jan 04 16:00:00.05 2000 PST | Fri Dec 31 16:00:00.05 1999 PST | t | 3 | @ 0 | 3 | 0 | 0 | 0 | 0 | 0 + job_id | last_start | last_finish | next_start | last_successful_finish | last_run_success | total_runs | total_duration | total_duration_failures | total_successes | total_failures | total_crashes | consecutive_failures | consecutive_crashes | flags +--------+---------------------------------+---------------------------------+---------------------------------+---------------------------------+------------------+------------+----------------+-------------------------+-----------------+----------------+---------------+----------------------+---------------------+------- + 1000 | Fri Dec 31 16:00:00.05 1999 PST | Fri Dec 31 16:00:00.05 1999 PST | Tue Jan 04 16:00:00.05 2000 PST | Fri Dec 31 16:00:00.05 1999 PST | t | 3 | @ 0 | @ 0 | 3 | 0 | 0 | 0 | 0 | 0 (1 row) -- three chunks clustered @@ -274,9 +274,9 @@ SELECT * FROM timescaledb_information.jobs WHERE job_id=:reorder_job_id; SELECT * FROM _timescaledb_internal.bgw_job_stat where job_id=:reorder_job_id; - job_id | last_start | last_finish | next_start | last_successful_finish | last_run_success | total_runs | total_duration | total_successes | total_failures | total_crashes | consecutive_failures | consecutive_crashes | flags ---------+---------------------------------+---------------------------------+---------------------------------+---------------------------------+------------------+------------+----------------+-----------------+----------------+---------------+----------------------+---------------------+------- - 1000 | Fri Dec 31 16:00:00.05 1999 PST | Fri Dec 31 16:00:00.05 1999 PST | Tue Jan 04 16:00:00.05 2000 PST | Fri Dec 31 16:00:00.05 1999 PST | t | 3 | @ 0 | 3 | 0 | 0 | 0 | 0 | 0 + job_id | last_start | last_finish | next_start | last_successful_finish | last_run_success | total_runs | total_duration | total_duration_failures | total_successes | total_failures | total_crashes | consecutive_failures | consecutive_crashes | flags +--------+---------------------------------+---------------------------------+---------------------------------+---------------------------------+------------------+------------+----------------+-------------------------+-----------------+----------------+---------------+----------------------+---------------------+------- + 1000 | Fri Dec 31 16:00:00.05 1999 PST | Fri Dec 31 16:00:00.05 1999 PST | Tue Jan 04 16:00:00.05 2000 PST | Fri Dec 31 16:00:00.05 1999 PST | t | 3 | @ 0 | @ 0 | 3 | 0 | 0 | 0 | 0 | 0 (1 row) -- still have 3 chunks clustered diff --git a/tsl/test/expected/scheduler_fixed.out b/tsl/test/expected/scheduler_fixed.out index 96bbbff5279..5f1e2d4dd3d 100644 --- a/tsl/test/expected/scheduler_fixed.out +++ b/tsl/test/expected/scheduler_fixed.out @@ -45,8 +45,8 @@ perform pg_sleep(5); end $$; select * from _timescaledb_internal.bgw_job_stat; - job_id | last_start | last_finish | next_start | last_successful_finish | last_run_success | total_runs | total_duration | total_successes | total_failures | total_crashes | consecutive_failures | consecutive_crashes | flags ---------+------------+-------------+------------+------------------------+------------------+------------+----------------+-----------------+----------------+---------------+----------------------+---------------------+------- + job_id | last_start | last_finish | next_start | last_successful_finish | last_run_success | total_runs | total_duration | total_duration_failures | total_successes | total_failures | total_crashes | consecutive_failures | consecutive_crashes | flags +--------+------------+-------------+------------+------------------------+------------------+------------+----------------+-------------------------+-----------------+----------------+---------------+----------------------+---------------------+------- (0 rows) -- add job that has a runtime well under the schedule interval diff --git a/tsl/test/expected/telemetry_stats.out b/tsl/test/expected/telemetry_stats.out index 8f2b095a46f..28baa3726a5 100644 --- a/tsl/test/expected/telemetry_stats.out +++ b/tsl/test/expected/telemetry_stats.out @@ -1037,6 +1037,111 @@ select r->'num_compression_policies_fixed' as compress_fixed, r->'num_retention_ 1 | 1 (1 row) +DELETE FROM _timescaledb_config.bgw_job WHERE id = 2; +TRUNCATE _timescaledb_internal.job_errors; +-- create some "errors" for testing +INSERT INTO +_timescaledb_config.bgw_job(id, application_name, schedule_interval, max_runtime, max_retries, retry_period, proc_schema, proc_name) +VALUES (2000, 'User-Defined Action [2000]', interval '3 days', interval '1 hour', 5, interval '5 min', 'public', 'custom_action_1'), +(2001, 'User-Defined Action [2001]', interval '3 days', interval '1 hour', 5, interval '5 min', 'public', 'custom_action_2'), +(2002, 'Compression Policy [2002]', interval '3 days', interval '1 hour', 5, interval '5 min', '_timescaledb_internal', 'policy_compression'), +(2003, 'Retention Policy [2003]', interval '3 days', interval '1 hour', 5, interval '5 min', '_timescaledb_internal', 'policy_retention'), +(2004, 'Refresh Continuous Aggregate Policy [2004]', interval '3 days', interval '1 hour', 5, interval '5 min', '_timescaledb_internal', 'policy_refresh_continuous_aggregate'), +-- user decided to define a custom action in the _timescaledb_internal schema, we group it with the User-defined actions +(2005, 'User-Defined Action [2005]', interval '3 days', interval '1 hour', 5, interval '5 min', '_timescaledb_internal', 'policy_refresh_continuous_aggregate'); +-- create some errors for them +INSERT INTO +_timescaledb_internal.job_errors(job_id, pid, start_time, finish_time, error_data) +values (2000, 12345, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '{"sqlerrcode":"P0001", "proc_schema":"public", "proc_name": "custom_action_1"}'), +(2000, 23456, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '{"sqlerrcode":"ABCDE", "proc_schema": "public", "proc_name": "custom_action_1"}'), +(2001, 54321, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '{"sqlerrcode":"P0001", "proc_schema":"public", "proc_name": "custom_action_2"}'), +(2002, 23443, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '{"sqlerrcode":"JF009", "proc_schema":"_timescaledb_internal", "proc_name": "policy_compression"}'), +(2003, 14567, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '{"sqlerrcode":"P0001", "proc_schema":"_timescaledb_internal", "proc_name": "policy_retention"}'), +(2004, 78907, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '{"sqlerrcode":"P0001", "proc_schema":"_timescaledb_internal", "proc_name": "policy_refresh_continuous_aggregate"}'), +(2005, 45757, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '{"sqlerrcode":"P0001", "proc_schema":"_timescaledb_internal", "proc_name": "policy_refresh_continuous_aggregate"}'); +-- we have 3 error records for user-defined actions, and three for policies, so we expect 4 types of jobs +SELECT jsonb_pretty(get_telemetry_report() -> 'errors_by_sqlerrcode'); + jsonb_pretty +---------------------------------------------- + { + + "policy_retention": { + + "P0001": 1 + + }, + + "policy_compression": { + + "JF009": 1 + + }, + + "user_defined_action": { + + "ABCDE": 1, + + "P0001": 2 + + }, + + "policy_refresh_continuous_aggregate": {+ + "P0001": 2 + + } + + } +(1 row) + +-- for job statistics, insert some records into bgw_job_stats +INSERT INTO _timescaledb_internal.bgw_job_stat +values +(2000, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '-infinity'::timestamptz, '-infinity'::timestamptz, +false, 1, interval '00:00:00', interval '00:00:02', 0, 1, 0, 1, 0), +(2001, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '-infinity'::timestamptz, '-infinity'::timestamptz, +false, 1, interval '00:00:00', interval '00:00:02', 0, 1, 0, 1, 0), +(2002, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '-infinity'::timestamptz, '-infinity'::timestamptz, +false, 1, interval '00:00:00', interval '00:00:02', 0, 1, 0, 1, 0), +(2003, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '-infinity'::timestamptz, '-infinity'::timestamptz, +false, 1, interval '00:00:00', interval '00:00:02', 0, 1, 0, 1, 0), +(2004, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '-infinity'::timestamptz, '-infinity'::timestamptz, +false, 1, interval '00:00:00', interval '00:00:02', 0, 1, 0, 1, 0), +(2005, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '-infinity'::timestamptz, '-infinity'::timestamptz, +false, 1, interval '00:00:00', interval '00:00:02', 0, 1, 0, 1, 0); +SELECT jsonb_pretty(get_telemetry_report() -> 'stats_by_job_type'); + jsonb_pretty +------------------------------------------------ + { + + "policy_retention": { + + "total_runs": 1, + + "total_crashes": 0, + + "total_duration": "@ 0", + + "total_failures": 1, + + "total_successes": 0, + + "max_consecutive_crashes": 0, + + "total_duration_failures": "@ 2 secs",+ + "max_consecutive_failures": 1 + + }, + + "policy_compression": { + + "total_runs": 1, + + "total_crashes": 0, + + "total_duration": "@ 0", + + "total_failures": 1, + + "total_successes": 0, + + "max_consecutive_crashes": 0, + + "total_duration_failures": "@ 2 secs",+ + "max_consecutive_failures": 1 + + }, + + "user_defined_action": { + + "total_runs": 2, + + "total_crashes": 0, + + "total_duration": "@ 0", + + "total_failures": 2, + + "total_successes": 0, + + "max_consecutive_crashes": 0, + + "total_duration_failures": "@ 4 secs",+ + "max_consecutive_failures": 1 + + }, + + "policy_refresh_continuous_aggregate": { + + "total_runs": 2, + + "total_crashes": 0, + + "total_duration": "@ 0", + + "total_failures": 2, + + "total_successes": 0, + + "max_consecutive_crashes": 0, + + "total_duration_failures": "@ 4 secs",+ + "max_consecutive_failures": 1 + + } + + } +(1 row) + DROP VIEW relations; DROP MATERIALIZED VIEW telemetry_report; \c :TEST_DBNAME :ROLE_CLUSTER_SUPERUSER diff --git a/tsl/test/sql/telemetry_stats.sql b/tsl/test/sql/telemetry_stats.sql index ef317df0fca..e8cd6cd8ccf 100644 --- a/tsl/test/sql/telemetry_stats.sql +++ b/tsl/test/sql/telemetry_stats.sql @@ -270,6 +270,48 @@ select add_compression_policy('hyper', interval '3 weeks', initial_start => now( select r->'num_user_defined_actions_fixed' as UDA_fixed, r->'num_user_defined_actions' AS UDA_drifting FROM get_telemetry_report() r; select r->'num_continuous_aggs_policies_fixed' as contagg_fixed, r->'num_continuous_aggs_policies' as contagg_drifting FROM get_telemetry_report() r; select r->'num_compression_policies_fixed' as compress_fixed, r->'num_retention_policies_fixed' as retention_fixed FROM get_telemetry_report() r; +DELETE FROM _timescaledb_config.bgw_job WHERE id = 2; +TRUNCATE _timescaledb_internal.job_errors; +-- create some "errors" for testing +INSERT INTO +_timescaledb_config.bgw_job(id, application_name, schedule_interval, max_runtime, max_retries, retry_period, proc_schema, proc_name) +VALUES (2000, 'User-Defined Action [2000]', interval '3 days', interval '1 hour', 5, interval '5 min', 'public', 'custom_action_1'), +(2001, 'User-Defined Action [2001]', interval '3 days', interval '1 hour', 5, interval '5 min', 'public', 'custom_action_2'), +(2002, 'Compression Policy [2002]', interval '3 days', interval '1 hour', 5, interval '5 min', '_timescaledb_internal', 'policy_compression'), +(2003, 'Retention Policy [2003]', interval '3 days', interval '1 hour', 5, interval '5 min', '_timescaledb_internal', 'policy_retention'), +(2004, 'Refresh Continuous Aggregate Policy [2004]', interval '3 days', interval '1 hour', 5, interval '5 min', '_timescaledb_internal', 'policy_refresh_continuous_aggregate'), +-- user decided to define a custom action in the _timescaledb_internal schema, we group it with the User-defined actions +(2005, 'User-Defined Action [2005]', interval '3 days', interval '1 hour', 5, interval '5 min', '_timescaledb_internal', 'policy_refresh_continuous_aggregate'); +-- create some errors for them +INSERT INTO +_timescaledb_internal.job_errors(job_id, pid, start_time, finish_time, error_data) +values (2000, 12345, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '{"sqlerrcode":"P0001", "proc_schema":"public", "proc_name": "custom_action_1"}'), +(2000, 23456, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '{"sqlerrcode":"ABCDE", "proc_schema": "public", "proc_name": "custom_action_1"}'), +(2001, 54321, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '{"sqlerrcode":"P0001", "proc_schema":"public", "proc_name": "custom_action_2"}'), +(2002, 23443, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '{"sqlerrcode":"JF009", "proc_schema":"_timescaledb_internal", "proc_name": "policy_compression"}'), +(2003, 14567, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '{"sqlerrcode":"P0001", "proc_schema":"_timescaledb_internal", "proc_name": "policy_retention"}'), +(2004, 78907, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '{"sqlerrcode":"P0001", "proc_schema":"_timescaledb_internal", "proc_name": "policy_refresh_continuous_aggregate"}'), +(2005, 45757, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '{"sqlerrcode":"P0001", "proc_schema":"_timescaledb_internal", "proc_name": "policy_refresh_continuous_aggregate"}'); + +-- we have 3 error records for user-defined actions, and three for policies, so we expect 4 types of jobs +SELECT jsonb_pretty(get_telemetry_report() -> 'errors_by_sqlerrcode'); +-- for job statistics, insert some records into bgw_job_stats +INSERT INTO _timescaledb_internal.bgw_job_stat +values +(2000, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '-infinity'::timestamptz, '-infinity'::timestamptz, +false, 1, interval '00:00:00', interval '00:00:02', 0, 1, 0, 1, 0), +(2001, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '-infinity'::timestamptz, '-infinity'::timestamptz, +false, 1, interval '00:00:00', interval '00:00:02', 0, 1, 0, 1, 0), +(2002, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '-infinity'::timestamptz, '-infinity'::timestamptz, +false, 1, interval '00:00:00', interval '00:00:02', 0, 1, 0, 1, 0), +(2003, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '-infinity'::timestamptz, '-infinity'::timestamptz, +false, 1, interval '00:00:00', interval '00:00:02', 0, 1, 0, 1, 0), +(2004, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '-infinity'::timestamptz, '-infinity'::timestamptz, +false, 1, interval '00:00:00', interval '00:00:02', 0, 1, 0, 1, 0), +(2005, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '-infinity'::timestamptz, '-infinity'::timestamptz, +false, 1, interval '00:00:00', interval '00:00:02', 0, 1, 0, 1, 0); +SELECT jsonb_pretty(get_telemetry_report() -> 'stats_by_job_type'); + DROP VIEW relations; DROP MATERIALIZED VIEW telemetry_report;