Skip to content

Commit

Permalink
Add execution time for jobs to log
Browse files Browse the repository at this point in the history
When a job finishes execution, either because of an error or a success,
this commit will print out the execution time of the job in the log
together with a message about what job that finished.
  • Loading branch information
mkindahl committed Jul 13, 2023
1 parent 1bd5273 commit 4c5c0b1
Show file tree
Hide file tree
Showing 12 changed files with 300 additions and 207 deletions.
1 change: 1 addition & 0 deletions .unreleased/feature_5876
@@ -0,0 +1 @@
Implements: #5876 Add execution time for jobs to log
29 changes: 23 additions & 6 deletions src/bgw/job.c
Expand Up @@ -25,6 +25,7 @@
#include <utils/acl.h>
#include <utils/elog.h>
#include <executor/execdebug.h>
#include <executor/instrument.h>
#include <utils/jsonb.h>
#include <utils/snapmgr.h>
#include <unistd.h>
Expand Down Expand Up @@ -1175,6 +1176,8 @@ ts_bgw_job_entrypoint(PG_FUNCTION_ARGS)
BgwJob *job;
JobResult res = JOB_FAILURE;
bool got_lock;
instr_time start;
instr_time duration;

memcpy(&params, MyBgworkerEntry->bgw_extra, sizeof(BgwParams));
Ensure(params.user_oid != 0 && params.job_id != 0,
Expand All @@ -1196,6 +1199,8 @@ ts_bgw_job_entrypoint(PG_FUNCTION_ARGS)

ts_license_enable_module_loading();

INSTR_TIME_SET_CURRENT(start);

StartTransactionCommand();
/* Grab a session lock on the job row to prevent concurrent deletes. Lock is released
* when the job process exits */
Expand Down Expand Up @@ -1270,11 +1275,18 @@ ts_bgw_job_entrypoint(PG_FUNCTION_ARGS)
pfree(job);
}

INSTR_TIME_SET_CURRENT(duration);
INSTR_TIME_SUBTRACT(duration, start);

/*
* the rethrow will log the error; but also log which job threw the
* error
*/
elog(LOG, "job %d threw an error", params.job_id);
elog(LOG,
"exiting job %d (%s) with error: execution time %.2fms",
params.job_id,
NameStr(job->fd.application_name),
INSTR_TIME_GET_MILLISEC(duration));

ErrorData *edata;
FormData_job_error jerr = { 0 };
Expand Down Expand Up @@ -1316,17 +1328,22 @@ ts_bgw_job_entrypoint(PG_FUNCTION_ARGS)

CommitTransactionCommand();

INSTR_TIME_SET_CURRENT(duration);
INSTR_TIME_SUBTRACT(duration, start);

elog(LOG,
"exiting job %d (%s) with %s: execution time %.2fms",
params.job_id,
NameStr(job->fd.application_name),
(res == JOB_SUCCESS ? "success" : "failure"),
INSTR_TIME_GET_MILLISEC(duration));

if (job != NULL)
{
pfree(job);
job = NULL;
}

elog(DEBUG1,
"exiting job %d with %s",
params.job_id,
(res == JOB_SUCCESS ? "success" : "failure"));

PG_RETURN_VOID();
}

Expand Down
5 changes: 1 addition & 4 deletions src/cross_module_fn.c
Expand Up @@ -19,10 +19,7 @@

#define CROSSMODULE_WRAPPER(func) \
TS_FUNCTION_INFO_V1(ts_##func); \
Datum ts_##func(PG_FUNCTION_ARGS) \
{ \
PG_RETURN_DATUM(ts_cm_functions->func(fcinfo)); \
}
Datum ts_##func(PG_FUNCTION_ARGS) { PG_RETURN_DATUM(ts_cm_functions->func(fcinfo)); }

/* bgw policy functions */
CROSSMODULE_WRAPPER(policy_compression_add);
Expand Down
89 changes: 55 additions & 34 deletions tsl/test/expected/bgw_db_scheduler.out
Expand Up @@ -96,7 +96,10 @@ CREATE TABLE public.bgw_log(
msg TEXT
);
CREATE VIEW sorted_bgw_log AS
SELECT msg_no, application_name, regexp_replace(regexp_replace(msg, 'Wait until [0-9]+, started at [0-9]+', 'Wait until (RANDOM), started at (RANDOM)'), 'background worker "[^"]+"','connection') AS msg FROM bgw_log ORDER BY mock_time, application_name COLLATE "C", msg_no;
SELECT msg_no,
application_name,
regexp_replace(regexp_replace(msg, '(Wait until|started at|execution time) [0-9]+(\.[0-9]+)?', '\1 (RANDOM)', 'g'), 'background worker "[^"]+"','connection') AS msg
FROM bgw_log ORDER BY mock_time, application_name COLLATE "C", msg_no;
CREATE TABLE public.bgw_dsm_handle_store(
handle BIGINT
);
Expand Down Expand Up @@ -202,13 +205,14 @@ SELECT * FROM timescaledb_information.job_stats;
(1 row)

SELECT * FROM sorted_bgw_log;
msg_no | application_name | msg
--------+------------------+----------------------------------------------------
msg_no | application_name | msg
--------+------------------+------------------------------------------------------------------------
0 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
0 | DB Scheduler | [TESTING] Registered new background worker
1 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
0 | unscheduled | Execute job 1
(4 rows)
1 | unscheduled | exiting job 1000 (unscheduled) with success: execution time (RANDOM)ms
(5 rows)

SELECT delete_job(1000);
delete_job
Expand Down Expand Up @@ -312,12 +316,13 @@ FROM _timescaledb_internal.bgw_job_stat;
(1 row)

SELECT * FROM sorted_bgw_log;
msg_no | application_name | msg
--------+------------------+----------------------------------------------------
msg_no | application_name | msg
--------+------------------+-----------------------------------------------------------------------
0 | DB Scheduler | [TESTING] Registered new background worker
1 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
0 | test_job_1 | Execute job 1
(3 rows)
1 | test_job_1 | exiting job 1000 (test_job_1) with success: execution time (RANDOM)ms
(4 rows)

--Test that the scheduler will not run job again if not enough time has passed
SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25, 25);
Expand All @@ -334,13 +339,14 @@ FROM _timescaledb_internal.bgw_job_stat;
(1 row)

SELECT * FROM sorted_bgw_log;
msg_no | application_name | msg
--------+------------------+----------------------------------------------------
msg_no | application_name | msg
--------+------------------+-----------------------------------------------------------------------
0 | DB Scheduler | [TESTING] Registered new background worker
1 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
0 | test_job_1 | Execute job 1
1 | test_job_1 | exiting job 1000 (test_job_1) with success: execution time (RANDOM)ms
0 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
(4 rows)
(5 rows)

--After enough time has passed the scheduler will run the job again
SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(100, 50);
Expand All @@ -357,17 +363,19 @@ FROM _timescaledb_internal.bgw_job_stat;
(1 row)

SELECT * FROM sorted_bgw_log;
msg_no | application_name | msg
--------+------------------+----------------------------------------------------
msg_no | application_name | msg
--------+------------------+-----------------------------------------------------------------------
0 | DB Scheduler | [TESTING] Registered new background worker
1 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
0 | test_job_1 | Execute job 1
1 | test_job_1 | exiting job 1000 (test_job_1) with success: execution time (RANDOM)ms
0 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
0 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
1 | DB Scheduler | [TESTING] Registered new background worker
2 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
0 | test_job_1 | Execute job 1
(8 rows)
1 | test_job_1 | exiting job 1000 (test_job_1) with success: execution time (RANDOM)ms
(10 rows)

--Now it runs it one more time
SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(120, 100);
Expand All @@ -384,21 +392,24 @@ FROM _timescaledb_internal.bgw_job_stat;
(1 row)

SELECT * FROM sorted_bgw_log;
msg_no | application_name | msg
--------+------------------+----------------------------------------------------
msg_no | application_name | msg
--------+------------------+-----------------------------------------------------------------------
0 | DB Scheduler | [TESTING] Registered new background worker
1 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
0 | test_job_1 | Execute job 1
1 | test_job_1 | exiting job 1000 (test_job_1) with success: execution time (RANDOM)ms
0 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
0 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
1 | DB Scheduler | [TESTING] Registered new background worker
2 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
0 | test_job_1 | Execute job 1
1 | test_job_1 | exiting job 1000 (test_job_1) with success: execution time (RANDOM)ms
0 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
1 | DB Scheduler | [TESTING] Registered new background worker
2 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
0 | test_job_1 | Execute job 1
(12 rows)
1 | test_job_1 | exiting job 1000 (test_job_1) with success: execution time (RANDOM)ms
(15 rows)

--
-- Test what happens when running a job that throws an error
Expand Down Expand Up @@ -718,13 +729,14 @@ FROM _timescaledb_internal.bgw_job_stat;
(1 row)

SELECT * FROM sorted_bgw_log;
msg_no | application_name | msg
--------+------------------+----------------------------------------------------
msg_no | application_name | msg
--------+------------------+----------------------------------------------------------------------------
0 | DB Scheduler | [TESTING] Registered new background worker
1 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
0 | test_job_3_long | Before sleep job 3
1 | test_job_3_long | After sleep job 3
(4 rows)
2 | test_job_3_long | exiting job 1003 (test_job_3_long) with success: execution time (RANDOM)ms
(5 rows)

SELECT ts_bgw_params_mock_wait_returns_immediately(:WAIT_ON_JOB);
ts_bgw_params_mock_wait_returns_immediately
Expand Down Expand Up @@ -805,8 +817,8 @@ FROM _timescaledb_internal.bgw_job_stat;
(1 row)

SELECT * FROM sorted_bgw_log;
msg_no | application_name | msg
--------+------------------+-----------------------------------------------------
msg_no | application_name | msg
--------+------------------+----------------------------------------------------------------------------
0 | DB Scheduler | [TESTING] Registered new background worker
1 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
0 | test_job_3_long | Before sleep job 3
Expand All @@ -817,7 +829,8 @@ SELECT * FROM sorted_bgw_log;
2 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
0 | test_job_3_long | Before sleep job 3
1 | test_job_3_long | After sleep job 3
(10 rows)
2 | test_job_3_long | exiting job 1004 (test_job_3_long) with success: execution time (RANDOM)ms
(11 rows)

--Test sending a SIGHUP to a job
\c :TEST_DBNAME :ROLE_SUPERUSER
Expand Down Expand Up @@ -982,8 +995,8 @@ FROM _timescaledb_internal.bgw_job_stat;
(1 row)

SELECT * FROM sorted_bgw_log;
msg_no | application_name | msg
--------+------------------+-----------------------------------------------------
msg_no | application_name | msg
--------+------------------+----------------------------------------------------------------------------
0 | DB Scheduler | [TESTING] Registered new background worker
0 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
1 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
Expand All @@ -995,7 +1008,8 @@ SELECT * FROM sorted_bgw_log;
2 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
0 | test_job_3_long | Before sleep job 3
1 | test_job_3_long | After sleep job 3
(11 rows)
2 | test_job_3_long | exiting job 1005 (test_job_3_long) with success: execution time (RANDOM)ms
(12 rows)

CREATE FUNCTION wait_for_timer_to_run(started_at INTEGER, spins INTEGER=:TEST_SPINWAIT_ITERS) RETURNS BOOLEAN LANGUAGE PLPGSQL AS
$BODY$
Expand Down Expand Up @@ -1191,12 +1205,13 @@ FROM _timescaledb_internal.bgw_job_stat;
(1 row)

SELECT * FROM sorted_bgw_log;
msg_no | application_name | msg
--------+------------------+----------------------------------------------------
msg_no | application_name | msg
--------+------------------+-----------------------------------------------------------------------
0 | DB Scheduler | [TESTING] Registered new background worker
1 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
0 | test_job_4 | Execute job 4
(3 rows)
1 | test_job_4 | exiting job 1012 (test_job_4) with success: execution time (RANDOM)ms
(4 rows)

-- Now just make sure that the job actually runs in 200ms
SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(200);
Expand All @@ -1215,16 +1230,18 @@ FROM _timescaledb_internal.bgw_job_stat;
(1 row)

SELECT * FROM sorted_bgw_log;
msg_no | application_name | msg
--------+------------------+----------------------------------------------------
msg_no | application_name | msg
--------+------------------+-----------------------------------------------------------------------
0 | DB Scheduler | [TESTING] Registered new background worker
1 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
0 | test_job_4 | Execute job 4
1 | test_job_4 | exiting job 1012 (test_job_4) with success: execution time (RANDOM)ms
0 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
1 | DB Scheduler | [TESTING] Registered new background worker
2 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
0 | test_job_4 | Execute job 4
(7 rows)
1 | test_job_4 | exiting job 1012 (test_job_4) with success: execution time (RANDOM)ms
(9 rows)

-- Test updating jobs list
TRUNCATE bgw_log;
Expand Down Expand Up @@ -1572,25 +1589,29 @@ SELECT ts_bgw_params_reset_time(500000, true);
(1 row)

SELECT * FROM sorted_bgw_log;
msg_no | application_name | msg
--------+------------------+----------------------------------------------------
msg_no | application_name | msg
--------+------------------+--------------------------------------------------------------------
0 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
1 | DB Scheduler | [TESTING] Registered new background worker
2 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
0 | another | Execute job 1
1 | another | exiting job 1024 (another) with success: execution time (RANDOM)ms
3 | DB Scheduler | [TESTING] Registered new background worker
4 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
0 | another | Execute job 1
1 | another | exiting job 1024 (another) with success: execution time (RANDOM)ms
5 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
6 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
7 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
8 | DB Scheduler | [TESTING] Registered new background worker
9 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
0 | new_job | Execute job 1
1 | new_job | exiting job 1025 (new_job) with success: execution time (RANDOM)ms
10 | DB Scheduler | [TESTING] Registered new background worker
11 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM)
0 | new_job | Execute job 1
(16 rows)
1 | new_job | exiting job 1025 (new_job) with success: execution time (RANDOM)ms
(20 rows)

\x on
SELECT * FROM _timescaledb_internal.bgw_job_stat;
Expand Down

0 comments on commit 4c5c0b1

Please sign in to comment.