Skip to content

Commit

Permalink
Don't reuse job id
Browse files Browse the repository at this point in the history
We shouldnt reuse job ids to make it easy to recognize the job
log entries for a job. We also need to keep the old job around
to not break loading dumps from older versions.
  • Loading branch information
svenklemm committed May 3, 2024
1 parent fc46eab commit e298ecd
Show file tree
Hide file tree
Showing 16 changed files with 122 additions and 73 deletions.
42 changes: 41 additions & 1 deletion sql/job_stat_history_log_retention.sql
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ BEGIN
END;
$BODY$ SET search_path TO pg_catalog, pg_temp;

-- this is the previous job that was created for the same purpose
-- which has scheduled set to false. We need to keep it around to
-- not break loading dumps from older versions.
INSERT INTO _timescaledb_config.bgw_job (
id,
application_name,
Expand All @@ -57,7 +60,44 @@ INSERT INTO _timescaledb_config.bgw_job (
VALUES
(
2,
'Job History Log Retention Policy [2]',
'Error Log Retention Policy [2]',
INTERVAL '1 month',
INTERVAL '1 hour',
-1,
INTERVAL '1h',
'_timescaledb_functions',
'policy_job_error_retention',
pg_catalog.quote_ident(current_role)::regrole,
false,
'{"drop_after":"1 month"}',
'_timescaledb_functions',
'policy_job_error_retention_check',
false,
'2000-01-01 00:00:00+00'::timestamptz
) ON CONFLICT (id) DO NOTHING;


INSERT INTO _timescaledb_config.bgw_job (
id,
application_name,
schedule_interval,
max_runtime,
max_retries,
retry_period,
proc_schema,
proc_name,
owner,
scheduled,
config,
check_schema,
check_name,
fixed_schedule,
initial_start
)
VALUES
(
3,
'Job History Log Retention Policy [3]',
INTERVAL '1 month',
INTERVAL '1 hour',
-1,
Expand Down
46 changes: 36 additions & 10 deletions sql/updates/latest-dev.sql
Original file line number Diff line number Diff line change
Expand Up @@ -373,16 +373,42 @@ ALTER EXTENSION timescaledb

DROP TABLE _timescaledb_internal.job_errors;

UPDATE
_timescaledb_config.bgw_job
SET
application_name = 'Job History Log Retention Policy [2]',
proc_schema = '_timescaledb_functions',
proc_name = 'policy_job_stat_history_retention',
check_schema = '_timescaledb_functions',
check_name = 'policy_job_stat_history_retention_check'
WHERE
id = 2;
UPDATE _timescaledb_config.bgw_job SET scheduled = false WHERE id = 2;
INSERT INTO _timescaledb_config.bgw_job (
id,
application_name,
schedule_interval,
max_runtime,
max_retries,
retry_period,
proc_schema,
proc_name,
owner,
scheduled,
config,
check_schema,
check_name,
fixed_schedule,
initial_start
)
VALUES
(
3,
'Job History Log Retention Policy [3]',
INTERVAL '1 month',
INTERVAL '1 hour',
-1,
INTERVAL '1h',
'_timescaledb_functions',
'policy_job_stat_history_retention',
pg_catalog.quote_ident(current_role)::regrole,
true,
'{"drop_after":"1 month"}',
'_timescaledb_functions',
'policy_job_stat_history_retention_check',
true,
'2000-01-01 00:00:00+00'::timestamptz
) ON CONFLICT (id) DO NOTHING;

DROP FUNCTION IF EXISTS _timescaledb_internal.policy_job_error_retention(job_id integer,config jsonb);
DROP FUNCTION IF EXISTS _timescaledb_internal.policy_job_error_retention_check(config jsonb);
Expand Down
13 changes: 2 additions & 11 deletions sql/updates/reverse-dev.sql
Original file line number Diff line number Diff line change
Expand Up @@ -284,16 +284,7 @@ BEGIN
END;
$BODY$ SET search_path TO pg_catalog, pg_temp;

UPDATE
_timescaledb_config.bgw_job
SET
application_name = 'Error Log Retention Policy [2]',
proc_schema = '_timescaledb_functions',
proc_name = 'policy_job_error_retention',
check_schema = '_timescaledb_functions',
check_name = 'policy_job_error_retention_check'
WHERE
id = 2;

UPDATE _timescaledb_config.bgw_job SET scheduled = true WHERE id = 2;
DELETE FROM _timescaledb_config.bgw_job WHERE id = 3;

DROP PROCEDURE IF EXISTS _timescaledb_functions.cagg_migrate_to_time_bucket(cagg REGCLASS);
5 changes: 2 additions & 3 deletions tsl/test/expected/bgw_custom-13.out
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,8 @@ SELECT add_job('custom_func_definer', '1h', config:='{"type":"function"}'::jsonb
1004
(1 row)

-- exclude the telemetry[1] and job error retention[2] jobs
-- job 2 may have already run which will set its next_start field thus making the test flaky
SELECT * FROM timescaledb_information.jobs WHERE job_id NOT IN (1,2) ORDER BY 1;
-- exclude internal jobs
SELECT * FROM timescaledb_information.jobs WHERE job_id >= 1000 ORDER BY 1;
job_id | application_name | schedule_interval | max_runtime | max_retries | retry_period | proc_schema | proc_name | owner | scheduled | fixed_schedule | config | next_start | initial_start | hypertable_schema | hypertable_name | check_schema | check_name
--------+----------------------------+-------------------+-------------+-------------+--------------+-------------+---------------------+-------------------+-----------+----------------+-----------------------+------------------------------+------------------------------+-------------------+-----------------+--------------+------------
1000 | User-Defined Action [1000] | @ 1 hour | @ 0 | -1 | @ 5 mins | public | custom_func | default_perm_user | t | t | {"type": "function"} | Fri Dec 31 16:00:00 1999 PST | Fri Dec 31 16:00:00 1999 PST | | | |
Expand Down
5 changes: 2 additions & 3 deletions tsl/test/expected/bgw_custom-14.out
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,8 @@ SELECT add_job('custom_func_definer', '1h', config:='{"type":"function"}'::jsonb
1004
(1 row)

-- exclude the telemetry[1] and job error retention[2] jobs
-- job 2 may have already run which will set its next_start field thus making the test flaky
SELECT * FROM timescaledb_information.jobs WHERE job_id NOT IN (1,2) ORDER BY 1;
-- exclude internal jobs
SELECT * FROM timescaledb_information.jobs WHERE job_id >= 1000 ORDER BY 1;
job_id | application_name | schedule_interval | max_runtime | max_retries | retry_period | proc_schema | proc_name | owner | scheduled | fixed_schedule | config | next_start | initial_start | hypertable_schema | hypertable_name | check_schema | check_name
--------+----------------------------+-------------------+-------------+-------------+--------------+-------------+---------------------+-------------------+-----------+----------------+-----------------------+------------------------------+------------------------------+-------------------+-----------------+--------------+------------
1000 | User-Defined Action [1000] | @ 1 hour | @ 0 | -1 | @ 5 mins | public | custom_func | default_perm_user | t | t | {"type": "function"} | Fri Dec 31 16:00:00 1999 PST | Fri Dec 31 16:00:00 1999 PST | | | |
Expand Down
5 changes: 2 additions & 3 deletions tsl/test/expected/bgw_custom-15.out
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,8 @@ SELECT add_job('custom_func_definer', '1h', config:='{"type":"function"}'::jsonb
1004
(1 row)

-- exclude the telemetry[1] and job error retention[2] jobs
-- job 2 may have already run which will set its next_start field thus making the test flaky
SELECT * FROM timescaledb_information.jobs WHERE job_id NOT IN (1,2) ORDER BY 1;
-- exclude internal jobs
SELECT * FROM timescaledb_information.jobs WHERE job_id >= 1000 ORDER BY 1;
job_id | application_name | schedule_interval | max_runtime | max_retries | retry_period | proc_schema | proc_name | owner | scheduled | fixed_schedule | config | next_start | initial_start | hypertable_schema | hypertable_name | check_schema | check_name
--------+----------------------------+-------------------+-------------+-------------+--------------+-------------+---------------------+-------------------+-----------+----------------+-----------------------+------------------------------+------------------------------+-------------------+-----------------+--------------+------------
1000 | User-Defined Action [1000] | @ 1 hour | @ 0 | -1 | @ 5 mins | public | custom_func | default_perm_user | t | t | {"type": "function"} | Fri Dec 31 16:00:00 1999 PST | Fri Dec 31 16:00:00 1999 PST | | | |
Expand Down
5 changes: 2 additions & 3 deletions tsl/test/expected/bgw_custom-16.out
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,8 @@ SELECT add_job('custom_func_definer', '1h', config:='{"type":"function"}'::jsonb
1004
(1 row)

-- exclude the telemetry[1] and job error retention[2] jobs
-- job 2 may have already run which will set its next_start field thus making the test flaky
SELECT * FROM timescaledb_information.jobs WHERE job_id NOT IN (1,2) ORDER BY 1;
-- exclude internal jobs
SELECT * FROM timescaledb_information.jobs WHERE job_id >= 1000 ORDER BY 1;
job_id | application_name | schedule_interval | max_runtime | max_retries | retry_period | proc_schema | proc_name | owner | scheduled | fixed_schedule | config | next_start | initial_start | hypertable_schema | hypertable_name | check_schema | check_name
--------+----------------------------+-------------------+-------------+-------------+--------------+-------------+---------------------+-------------------+-----------+----------------+-----------------------+------------------------------+------------------------------+-------------------+-----------------+--------------+------------
1000 | User-Defined Action [1000] | @ 1 hour | @ 0 | -1 | @ 5 mins | public | custom_func | default_perm_user | t | t | {"type": "function"} | Fri Dec 31 16:00:00 1999 PST | Fri Dec 31 16:00:00 1999 PST | | | |
Expand Down
12 changes: 6 additions & 6 deletions tsl/test/expected/bgw_job_stat_history_errors.out
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,9 @@ select pg_sleep(20);

(1 row)

-- exclude the retention policy
-- exclude internal jobs
select job_id, data->'job'->>'proc_name' as proc_name, data->'error_data'->>'message' as err_message, data->'error_data'->>'sqlerrcode' as sqlerrcode
from _timescaledb_internal.bgw_job_stat_history WHERE job_id != 2 and succeeded is false;
from _timescaledb_internal.bgw_job_stat_history WHERE job_id >= 1000 and succeeded is false;
job_id | proc_name | err_message | sqlerrcode
--------+--------------+-----------------------------------------------------+------------
1000 | job_fail | raising an exception | P0001
Expand All @@ -104,7 +104,7 @@ SELECT pg_reload_conf();
(1 row)

-- test the retention job
SELECT next_start FROM alter_job(2, next_start => '2060-01-01 00:00:00+00'::timestamptz);
SELECT next_start FROM alter_job(3, next_start => '2060-01-01 00:00:00+00'::timestamptz);
next_start
------------------------------
Wed Dec 31 16:00:00 2059 PST
Expand All @@ -128,9 +128,9 @@ WHERE succeeded IS FALSE;
(3 rows)

-- drop all job_stats for the retention job
DELETE FROM _timescaledb_internal.bgw_job_stat WHERE job_id = 2;
SELECT next_start FROM alter_job(2, next_start => now() + interval '2 seconds') \gset
SELECT test.wait_for_job_to_run(2, 1);
DELETE FROM _timescaledb_internal.bgw_job_stat WHERE job_id = 3;
SELECT next_start FROM alter_job(3, next_start => now() + interval '2 seconds') \gset
SELECT test.wait_for_job_to_run(3, 1);
wait_for_job_to_run
---------------------
t
Expand Down
16 changes: 7 additions & 9 deletions tsl/test/expected/bgw_policy.out
Original file line number Diff line number Diff line change
Expand Up @@ -620,18 +620,16 @@ ERROR: must be owner of hypertable "test_table"
select add_retention_policy('test_strict', drop_after => NULL);
ERROR: need to specify one of "drop_after" or "drop_created_before"
\set ON_ERROR_STOP 1
-- Check the number of non-telemetry policies. We check for telemetry
-- policy in telemetry_community.sql
-- Check the number of non-internal policies
SELECT proc_name, count(*)
FROM _timescaledb_config.bgw_job
WHERE proc_name NOT LIKE '%telemetry%'
WHERE id >= 1000
GROUP BY proc_name;
proc_name | count
-----------------------------------+-------
policy_reorder | 1
policy_job_stat_history_retention | 1
policy_retention | 2
(3 rows)
proc_name | count
------------------+-------
policy_reorder | 1
policy_retention | 2
(2 rows)

-- test retention with null arguments
select add_retention_policy(NULL, NULL);
Expand Down
6 changes: 3 additions & 3 deletions tsl/test/expected/telemetry_stats-13.out
Original file line number Diff line number Diff line change
Expand Up @@ -526,12 +526,12 @@ SELECT jsonb_pretty(get_telemetry_report() -> 'stats_by_job_type');
"max_consecutive_failures": 1 +
}, +
"user_defined_action": { +
"total_runs": 2, +
"total_crashes": 0, +
"total_runs": 3, +
"total_crashes": 1, +
"total_duration": "@ 0", +
"total_failures": 2, +
"total_successes": 0, +
"max_consecutive_crashes": 0, +
"max_consecutive_crashes": 1, +
"total_duration_failures": "@ 4 secs",+
"max_consecutive_failures": 1 +
}, +
Expand Down
6 changes: 3 additions & 3 deletions tsl/test/expected/telemetry_stats-14.out
Original file line number Diff line number Diff line change
Expand Up @@ -526,12 +526,12 @@ SELECT jsonb_pretty(get_telemetry_report() -> 'stats_by_job_type');
"max_consecutive_failures": 1 +
}, +
"user_defined_action": { +
"total_runs": 2, +
"total_crashes": 0, +
"total_runs": 3, +
"total_crashes": 1, +
"total_duration": "@ 0", +
"total_failures": 2, +
"total_successes": 0, +
"max_consecutive_crashes": 0, +
"max_consecutive_crashes": 1, +
"total_duration_failures": "@ 4 secs",+
"max_consecutive_failures": 1 +
}, +
Expand Down
6 changes: 3 additions & 3 deletions tsl/test/expected/telemetry_stats-15.out
Original file line number Diff line number Diff line change
Expand Up @@ -526,12 +526,12 @@ SELECT jsonb_pretty(get_telemetry_report() -> 'stats_by_job_type');
"max_consecutive_failures": 1 +
}, +
"user_defined_action": { +
"total_runs": 2, +
"total_crashes": 0, +
"total_runs": 3, +
"total_crashes": 1, +
"total_duration": "@ 0", +
"total_failures": 2, +
"total_successes": 0, +
"max_consecutive_crashes": 0, +
"max_consecutive_crashes": 1, +
"total_duration_failures": "@ 4 secs",+
"max_consecutive_failures": 1 +
}, +
Expand Down
6 changes: 3 additions & 3 deletions tsl/test/expected/telemetry_stats-16.out
Original file line number Diff line number Diff line change
Expand Up @@ -526,12 +526,12 @@ SELECT jsonb_pretty(get_telemetry_report() -> 'stats_by_job_type');
"max_consecutive_failures": 1 +
}, +
"user_defined_action": { +
"total_runs": 2, +
"total_crashes": 0, +
"total_runs": 3, +
"total_crashes": 1, +
"total_duration": "@ 0", +
"total_failures": 2, +
"total_successes": 0, +
"max_consecutive_crashes": 0, +
"max_consecutive_crashes": 1, +
"total_duration_failures": "@ 4 secs",+
"max_consecutive_failures": 1 +
}, +
Expand Down
5 changes: 2 additions & 3 deletions tsl/test/sql/bgw_custom.sql.in
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,8 @@ SELECT add_job('custom_proc2','1h', config:= '{"type":"procedure"}'::jsonb, init
SELECT add_job('custom_func', '1h', config:='{"type":"function"}'::jsonb, initial_start => :'time_zero'::TIMESTAMPTZ);
SELECT add_job('custom_func_definer', '1h', config:='{"type":"function"}'::jsonb, initial_start => :'time_zero'::TIMESTAMPTZ);

-- exclude the telemetry[1] and job error retention[2] jobs
-- job 2 may have already run which will set its next_start field thus making the test flaky
SELECT * FROM timescaledb_information.jobs WHERE job_id NOT IN (1,2) ORDER BY 1;
-- exclude internal jobs
SELECT * FROM timescaledb_information.jobs WHERE job_id >= 1000 ORDER BY 1;

SELECT count(*) FROM _timescaledb_config.bgw_job WHERE config->>'type' IN ('procedure', 'function');

Expand Down
12 changes: 6 additions & 6 deletions tsl/test/sql/bgw_job_stat_history_errors.sql
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,15 @@ from _timescaledb_internal.bgw_job_stat_history where job_id = :jobf_id and succ
select delete_job(:jobf_id);

select pg_sleep(20);
-- exclude the retention policy
-- exclude internal jobs
select job_id, data->'job'->>'proc_name' as proc_name, data->'error_data'->>'message' as err_message, data->'error_data'->>'sqlerrcode' as sqlerrcode
from _timescaledb_internal.bgw_job_stat_history WHERE job_id != 2 and succeeded is false;
from _timescaledb_internal.bgw_job_stat_history WHERE job_id >= 1000 and succeeded is false;

ALTER SYSTEM RESET DEFAULT_TRANSACTION_ISOLATION;
SELECT pg_reload_conf();

-- test the retention job
SELECT next_start FROM alter_job(2, next_start => '2060-01-01 00:00:00+00'::timestamptz);
SELECT next_start FROM alter_job(3, next_start => '2060-01-01 00:00:00+00'::timestamptz);
TRUNCATE TABLE _timescaledb_internal.bgw_job_stat_history;
INSERT INTO _timescaledb_internal.bgw_job_stat_history(job_id, pid, succeeded, execution_start, execution_finish, data)
VALUES (123, 12345, false, '2000-01-01 00:00:00+00'::timestamptz, '2000-01-01 00:00:10+00'::timestamptz, '{}'),
Expand All @@ -79,9 +79,9 @@ SELECT job_id, pid, succeeded, execution_start, execution_finish, data
FROM _timescaledb_internal.bgw_job_stat_history
WHERE succeeded IS FALSE;
-- drop all job_stats for the retention job
DELETE FROM _timescaledb_internal.bgw_job_stat WHERE job_id = 2;
SELECT next_start FROM alter_job(2, next_start => now() + interval '2 seconds') \gset
SELECT test.wait_for_job_to_run(2, 1);
DELETE FROM _timescaledb_internal.bgw_job_stat WHERE job_id = 3;
SELECT next_start FROM alter_job(3, next_start => now() + interval '2 seconds') \gset
SELECT test.wait_for_job_to_run(3, 1);
-- only the last row remains
SELECT job_id, pid, succeeded, execution_start, execution_finish, data
FROM _timescaledb_internal.bgw_job_stat_history
Expand Down
5 changes: 2 additions & 3 deletions tsl/test/sql/bgw_policy.sql
Original file line number Diff line number Diff line change
Expand Up @@ -326,11 +326,10 @@ select remove_retention_policy('test_table');
select add_retention_policy('test_strict', drop_after => NULL);
\set ON_ERROR_STOP 1

-- Check the number of non-telemetry policies. We check for telemetry
-- policy in telemetry_community.sql
-- Check the number of non-internal policies
SELECT proc_name, count(*)
FROM _timescaledb_config.bgw_job
WHERE proc_name NOT LIKE '%telemetry%'
WHERE id >= 1000
GROUP BY proc_name;


Expand Down

0 comments on commit e298ecd

Please sign in to comment.