Skip to content

Commit

Permalink
Add tests
Browse files Browse the repository at this point in the history
Also modify query appropriately for functions in the
internal schema
Note: some memory is not accessed as it should, because,
the value for consecutive failures is garbage
  • Loading branch information
konskov committed Oct 17, 2022
1 parent fc64761 commit a10e16b
Show file tree
Hide file tree
Showing 4 changed files with 175 additions and 21 deletions.
1 change: 1 addition & 0 deletions src/telemetry/stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ typedef struct TelemetryJobStats
int32 max_consecutive_failures;
int32 max_consecutive_crashes;
Interval *total_duration;
Interval *total_duration_failures;
} TelemetryJobStats;

extern void ts_telemetry_stats_gather(TelemetryStats *stats);
Expand Down
48 changes: 27 additions & 21 deletions src/telemetry/telemetry.c
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,7 @@ add_job_stats_internal(JsonbParseState *state, const char *job_type, TelemetryJo
ts_jsonb_add_int32(state, "max_consecutive_failures", stats->max_consecutive_failures);
ts_jsonb_add_int32(state, "max_consecutive_crashes", stats->max_consecutive_crashes);
ts_jsonb_add_interval(state, "total_duration", stats->total_duration);
ts_jsonb_add_interval(state, "total_duration_failures", stats->total_duration_failures);

return pushJsonbValue(&state, WJB_END_OBJECT, NULL);
}
Expand All @@ -392,18 +393,21 @@ add_job_stats_by_job_type(JsonbParseState *parse_state)

const char *command_string =
"SELECT ("
" case WHEN q.proc_schema = \'_timescaledb_internal\' then q.proc_name::text "
" case WHEN q.proc_schema = \'_timescaledb_internal\' AND q.proc_name = \'policy_retention\' then q.proc_name::text "
" WHEN q.proc_schema = \'_timescaledb_internal\' AND q.proc_name = \'policy_compression\' then q.proc_name::text "
" WHEN q.proc_schema = \'_timescaledb_internal\' AND q.proc_name = \'policy_reorder\' then q.proc_name::text "
" WHEN q.proc_schema = \'_timescaledb_internal\' AND q.proc_name = \'policy_refresh_continuous_aggregate\' then q.proc_name::text "
" ELSE \'user_defined_action\'::text "
" end"
") as job_type, sum(total_runs)::bigint as total_runs, sum(total_successes)::bigint as "
"total_successes,"
"sum(total_failures)::bigint as total_failures, sum(total_crashes)::bigint as "
"total_crashes,"
"sum(total_duration) as total_duration, max(consecutive_failures) as "
"max_consecutive_failures, max(consecutive_crashes) as max_consecutive_crashes "
"sum(total_duration) as total_duration, sum(total_duration_failures) as total_duration_failures, max(consecutive_failures)::int as "
"max_consecutive_failures, max(consecutive_crashes)::int as max_consecutive_crashes "
"FROM "
"(select j.proc_schema, j.proc_name, s.total_runs, s.total_successes, s.total_failures,"
"s.total_crashes, s.total_duration, s.consecutive_crashes, s.consecutive_failures "
"s.total_crashes, s.total_duration, s.total_duration_failures, s.consecutive_crashes, s.consecutive_failures "
"FROM "
"_timescaledb_internal.bgw_job_stat s join _timescaledb_config.bgw_job j on j.id = "
"s.job_id) q "
Expand All @@ -428,23 +432,24 @@ add_job_stats_by_job_type(JsonbParseState *parse_state)
tuptable = SPI_tuptable;
TupleDesc tupdesc = tuptable->tupdesc;
Datum jobtype_datum;
Datum total_runs_datum, total_successes_datum, total_failures_datum, total_crashes_datum;
Datum total_duration_datum, max_consec_crashes_datum, max_consec_fails_datum;
Datum total_runs, total_successes, total_failures, total_crashes;
Datum total_duration, total_duration_failures, max_consec_crashes, max_consec_fails;

bool isnull_jobtype, isnull_runs, isnull_successes, isnull_failures, isnull_crashes;
bool isnull_duration, isnull_consec_crashes, isnull_consec_fails;
bool isnull_duration, isnull_duration_failures, isnull_consec_crashes, isnull_consec_fails;

jobtype_datum =
SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull_jobtype);
if (isnull_jobtype)
elog(ERROR, "null job type returned");
total_runs_datum = SPI_getbinval(tuptable->vals[i], tupdesc, 2, &isnull_runs);
total_successes_datum = SPI_getbinval(tuptable->vals[i], tupdesc, 3, &isnull_successes);
total_failures_datum = SPI_getbinval(tuptable->vals[i], tupdesc, 4, &isnull_failures);
total_crashes_datum = SPI_getbinval(tuptable->vals[i], tupdesc, 5, &isnull_crashes);
total_duration_datum = SPI_getbinval(tuptable->vals[i], tupdesc, 6, &isnull_duration);
max_consec_fails_datum = SPI_getbinval(tuptable->vals[i], tupdesc, 7, &isnull_consec_fails);
max_consec_crashes_datum =
total_runs = SPI_getbinval(tuptable->vals[i], tupdesc, 2, &isnull_runs);
total_successes = SPI_getbinval(tuptable->vals[i], tupdesc, 3, &isnull_successes);
total_failures = SPI_getbinval(tuptable->vals[i], tupdesc, 4, &isnull_failures);
total_crashes = SPI_getbinval(tuptable->vals[i], tupdesc, 5, &isnull_crashes);
total_duration = SPI_getbinval(tuptable->vals[i], tupdesc, 6, &isnull_duration);
total_duration_failures = SPI_getbinval(tuptable->vals[i], tupdesc, 6, &isnull_duration_failures);
max_consec_fails = SPI_getbinval(tuptable->vals[i], tupdesc, 7, &isnull_consec_fails);
max_consec_crashes =
SPI_getbinval(tuptable->vals[i], tupdesc, 8, &isnull_consec_crashes);

if (isnull_jobtype || isnull_runs || isnull_successes || isnull_failures ||
Expand All @@ -454,13 +459,14 @@ add_job_stats_by_job_type(JsonbParseState *parse_state)
}

TelemetryJobStats stats = {
.total_runs = DatumGetInt64(total_runs_datum),
.total_successes = DatumGetInt64(total_successes_datum),
.total_failures = DatumGetInt64(total_failures_datum),
.total_crashes = DatumGetInt64(total_crashes_datum),
.total_duration = DatumGetIntervalP(total_duration_datum),
.max_consecutive_failures = DatumGetInt32(max_consec_fails_datum),
.max_consecutive_crashes = DatumGetInt32(max_consec_crashes_datum),
.total_runs = DatumGetInt64(total_runs),
.total_successes = DatumGetInt64(total_successes),
.total_failures = DatumGetInt64(total_failures),
.total_crashes = DatumGetInt64(total_crashes),
.total_duration = DatumGetIntervalP(total_duration),
.total_duration_failures = DatumGetIntervalP(total_duration_failures),
.max_consecutive_failures = DatumGetInt32(max_consec_fails),
.max_consecutive_crashes = DatumGetInt32(max_consec_crashes)
};
spi_context = MemoryContextSwitchTo(old_context);
add_job_stats_internal(parse_state, TextDatumGetCString(jobtype_datum), &stats);
Expand Down
105 changes: 105 additions & 0 deletions tsl/test/expected/telemetry_stats.out
Original file line number Diff line number Diff line change
Expand Up @@ -956,6 +956,111 @@ FROM relations;
}
(1 row)

DELETE FROM _timescaledb_config.bgw_job WHERE id = 2;
TRUNCATE _timescaledb_internal.job_errors;
-- create some "errors" for testing
INSERT INTO
_timescaledb_config.bgw_job(id, application_name, schedule_interval, max_runtime, max_retries, retry_period, proc_schema, proc_name)
VALUES (2000, 'User-Defined Action [2000]', interval '3 days', interval '1 hour', 5, interval '5 min', 'public', 'custom_action_1'),
(2001, 'User-Defined Action [2001]', interval '3 days', interval '1 hour', 5, interval '5 min', 'public', 'custom_action_2'),
(2002, 'Compression Policy [2002]', interval '3 days', interval '1 hour', 5, interval '5 min', '_timescaledb_internal', 'policy_compression'),
(2003, 'Retention Policy [2003]', interval '3 days', interval '1 hour', 5, interval '5 min', '_timescaledb_internal', 'policy_retention'),
(2004, 'Refresh Continuous Aggregate Policy [2004]', interval '3 days', interval '1 hour', 5, interval '5 min', '_timescaledb_internal', 'policy_refresh_continuous_aggregate'),
-- user decided to define a custom action in the _timescaledb_internal schema, we group it with the User-defined actions
(2005, 'User-Defined Action [2005]', interval '3 days', interval '1 hour', 5, interval '5 min', '_timescaledb_internal', 'policy_refresh_continuous_aggregate');
-- create some errors for them
INSERT INTO
_timescaledb_internal.job_errors(job_id, pid, start_time, finish_time, error_data)
values (2000, 12345, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '{"sqlerrcode":"P0001", "proc_schema":"public", "proc_name": "custom_action_1"}'),
(2000, 23456, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '{"sqlerrcode":"ABCDE", "proc_schema": "public", "proc_name": "custom_action_1"}'),
(2001, 54321, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '{"sqlerrcode":"P0001", "proc_schema":"public", "proc_name": "custom_action_2"}'),
(2002, 23443, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '{"sqlerrcode":"JF009", "proc_schema":"_timescaledb_internal", "proc_name": "policy_compression"}'),
(2003, 14567, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '{"sqlerrcode":"P0001", "proc_schema":"_timescaledb_internal", "proc_name": "policy_retention"}'),
(2004, 78907, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '{"sqlerrcode":"P0001", "proc_schema":"_timescaledb_internal", "proc_name": "policy_refresh_continuous_aggregate"}'),
(2005, 45757, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '{"sqlerrcode":"P0001", "proc_schema":"_timescaledb_internal", "proc_name": "policy_refresh_continuous_aggregate"}');
-- we have 3 error records for user-defined actions, and three for policies, so we expect 4 types of jobs
SELECT jsonb_pretty(get_telemetry_report() -> 'errors_by_sqlerrcode');
jsonb_pretty
----------------------------------------------
{ +
"policy_retention": { +
"P0001": 1 +
}, +
"policy_compression": { +
"JF009": 1 +
}, +
"user_defined_action": { +
"ABCDE": 1, +
"P0001": 2 +
}, +
"policy_refresh_continuous_aggregate": {+
"P0001": 2 +
} +
}
(1 row)

-- for job statistics, insert some records into bgw_job_stats
INSERT INTO _timescaledb_internal.bgw_job_stat
values
(2000, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '-infinity'::timestamptz, '-infinity'::timestamptz,
false, 1, interval '00:00:00', interval '00:00:02', 0, 1, 0, 1, 0),
(2001, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '-infinity'::timestamptz, '-infinity'::timestamptz,
false, 1, interval '00:00:00', interval '00:00:02', 0, 1, 0, 1, 0),
(2002, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '-infinity'::timestamptz, '-infinity'::timestamptz,
false, 1, interval '00:00:00', interval '00:00:02', 0, 1, 0, 1, 0),
(2003, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '-infinity'::timestamptz, '-infinity'::timestamptz,
false, 1, interval '00:00:00', interval '00:00:02', 0, 1, 0, 1, 0),
(2004, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '-infinity'::timestamptz, '-infinity'::timestamptz,
false, 1, interval '00:00:00', interval '00:00:02', 0, 1, 0, 1, 0),
(2005, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '-infinity'::timestamptz, '-infinity'::timestamptz,
false, 1, interval '00:00:00', interval '00:00:02', 0, 1, 0, 1, 0);
SELECT jsonb_pretty(get_telemetry_report() -> 'stats_by_job_type');
jsonb_pretty
------------------------------------------------
{ +
"policy_retention": { +
"total_runs": 1, +
"total_crashes": 0, +
"total_duration": "@ 0", +
"total_failures": 1, +
"total_successes": 0, +
"max_consecutive_crashes": 1, +
"total_duration_failures": "@ 0", +
"max_consecutive_failures": 1610703160+
}, +
"policy_compression": { +
"total_runs": 1, +
"total_crashes": 0, +
"total_duration": "@ 0", +
"total_failures": 1, +
"total_successes": 0, +
"max_consecutive_crashes": 1, +
"total_duration_failures": "@ 0", +
"max_consecutive_failures": 1610702880+
}, +
"user_defined_action": { +
"total_runs": 2, +
"total_crashes": 0, +
"total_duration": "@ 0", +
"total_failures": 2, +
"total_successes": 0, +
"max_consecutive_crashes": 1, +
"total_duration_failures": "@ 0", +
"max_consecutive_failures": 1610703440+
}, +
"policy_refresh_continuous_aggregate": { +
"total_runs": 2, +
"total_crashes": 0, +
"total_duration": "@ 0", +
"total_failures": 2, +
"total_successes": 0, +
"max_consecutive_crashes": 1, +
"total_duration_failures": "@ 0", +
"max_consecutive_failures": 1610703736+
} +
}
(1 row)

DROP VIEW relations;
DROP MATERIALIZED VIEW telemetry_report;
\c :TEST_DBNAME :ROLE_CLUSTER_SUPERUSER
Expand Down
42 changes: 42 additions & 0 deletions tsl/test/sql/telemetry_stats.sql
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,48 @@ SELECT
jsonb_pretty(rels -> 'continuous_aggregates') AS continuous_aggregates
FROM relations;

DELETE FROM _timescaledb_config.bgw_job WHERE id = 2;
TRUNCATE _timescaledb_internal.job_errors;
-- create some "errors" for testing
INSERT INTO
_timescaledb_config.bgw_job(id, application_name, schedule_interval, max_runtime, max_retries, retry_period, proc_schema, proc_name)
VALUES (2000, 'User-Defined Action [2000]', interval '3 days', interval '1 hour', 5, interval '5 min', 'public', 'custom_action_1'),
(2001, 'User-Defined Action [2001]', interval '3 days', interval '1 hour', 5, interval '5 min', 'public', 'custom_action_2'),
(2002, 'Compression Policy [2002]', interval '3 days', interval '1 hour', 5, interval '5 min', '_timescaledb_internal', 'policy_compression'),
(2003, 'Retention Policy [2003]', interval '3 days', interval '1 hour', 5, interval '5 min', '_timescaledb_internal', 'policy_retention'),
(2004, 'Refresh Continuous Aggregate Policy [2004]', interval '3 days', interval '1 hour', 5, interval '5 min', '_timescaledb_internal', 'policy_refresh_continuous_aggregate'),
-- user decided to define a custom action in the _timescaledb_internal schema, we group it with the User-defined actions
(2005, 'User-Defined Action [2005]', interval '3 days', interval '1 hour', 5, interval '5 min', '_timescaledb_internal', 'policy_refresh_continuous_aggregate');
-- create some errors for them
INSERT INTO
_timescaledb_internal.job_errors(job_id, pid, start_time, finish_time, error_data)
values (2000, 12345, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '{"sqlerrcode":"P0001", "proc_schema":"public", "proc_name": "custom_action_1"}'),
(2000, 23456, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '{"sqlerrcode":"ABCDE", "proc_schema": "public", "proc_name": "custom_action_1"}'),
(2001, 54321, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '{"sqlerrcode":"P0001", "proc_schema":"public", "proc_name": "custom_action_2"}'),
(2002, 23443, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '{"sqlerrcode":"JF009", "proc_schema":"_timescaledb_internal", "proc_name": "policy_compression"}'),
(2003, 14567, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '{"sqlerrcode":"P0001", "proc_schema":"_timescaledb_internal", "proc_name": "policy_retention"}'),
(2004, 78907, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '{"sqlerrcode":"P0001", "proc_schema":"_timescaledb_internal", "proc_name": "policy_refresh_continuous_aggregate"}'),
(2005, 45757, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '{"sqlerrcode":"P0001", "proc_schema":"_timescaledb_internal", "proc_name": "policy_refresh_continuous_aggregate"}');

-- we have 3 error records for user-defined actions, and three for policies, so we expect 4 types of jobs
SELECT jsonb_pretty(get_telemetry_report() -> 'errors_by_sqlerrcode');
-- for job statistics, insert some records into bgw_job_stats
INSERT INTO _timescaledb_internal.bgw_job_stat
values
(2000, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '-infinity'::timestamptz, '-infinity'::timestamptz,
false, 1, interval '00:00:00', interval '00:00:02', 0, 1, 0, 1, 0),
(2001, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '-infinity'::timestamptz, '-infinity'::timestamptz,
false, 1, interval '00:00:00', interval '00:00:02', 0, 1, 0, 1, 0),
(2002, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '-infinity'::timestamptz, '-infinity'::timestamptz,
false, 1, interval '00:00:00', interval '00:00:02', 0, 1, 0, 1, 0),
(2003, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '-infinity'::timestamptz, '-infinity'::timestamptz,
false, 1, interval '00:00:00', interval '00:00:02', 0, 1, 0, 1, 0),
(2004, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '-infinity'::timestamptz, '-infinity'::timestamptz,
false, 1, interval '00:00:00', interval '00:00:02', 0, 1, 0, 1, 0),
(2005, '2040-01-01 00:00:00+00'::timestamptz, '2040-01-01 00:00:01+00'::timestamptz, '-infinity'::timestamptz, '-infinity'::timestamptz,
false, 1, interval '00:00:00', interval '00:00:02', 0, 1, 0, 1, 0);
SELECT jsonb_pretty(get_telemetry_report() -> 'stats_by_job_type');

DROP VIEW relations;
DROP MATERIALIZED VIEW telemetry_report;

Expand Down

0 comments on commit a10e16b

Please sign in to comment.