Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add role-level security to job error log #5218

Merged
merged 1 commit into from Jan 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Expand Up @@ -12,6 +12,7 @@ accidentally triggering the load of a previous DB version.**
**Bugfixes**
* #4804 Skip bucketing when start or end of refresh job is null
* #4926 Fix corruption when inserting into compressed chunks
* #5218 Add role-level security to job error log

## 2.9.2 (2023-01-26)

Expand Down
5 changes: 5 additions & 0 deletions sql/pre_install/tables.sql
Expand Up @@ -570,6 +570,7 @@ CREATE TABLE _timescaledb_internal.job_errors (
);

SELECT pg_catalog.pg_extension_config_dump('_timescaledb_internal.job_errors', '');

-- Set table permissions
-- We need to grant SELECT to PUBLIC for all tables even those not
-- marked as being dumped because pg_dump will try to access all
Expand All @@ -586,3 +587,7 @@ GRANT SELECT ON ALL SEQUENCES IN SCHEMA _timescaledb_catalog TO PUBLIC;
GRANT SELECT ON ALL SEQUENCES IN SCHEMA _timescaledb_config TO PUBLIC;

GRANT SELECT ON ALL SEQUENCES IN SCHEMA _timescaledb_internal TO PUBLIC;

-- We want to restrict access to the job errors to only work through
-- the job_errors view.
REVOKE ALL ON _timescaledb_internal.job_errors FROM PUBLIC;
37 changes: 37 additions & 0 deletions sql/updates/latest-dev.sql
@@ -0,0 +1,37 @@
CREATE OR REPLACE VIEW timescaledb_information.job_errors
WITH (security_barrier = true) AS
SELECT
job_id,
error_data ->> 'proc_schema' as proc_schema,
error_data ->> 'proc_name' as proc_name,
pid,
start_time,
finish_time,
error_data ->> 'sqlerrcode' AS sqlerrcode,
CASE WHEN error_data ->>'message' IS NOT NULL THEN
CASE WHEN error_data ->>'detail' IS NOT NULL THEN
CASE WHEN error_data ->>'hint' IS NOT NULL THEN concat(error_data ->>'message', '. ', error_data ->>'detail', '. ', error_data->>'hint')
ELSE concat(error_data ->>'message', ' ', error_data ->>'detail')
END
ELSE
CASE WHEN error_data ->>'hint' IS NOT NULL THEN concat(error_data ->>'message', '. ', error_data->>'hint')
ELSE error_data ->>'message'
END
END
ELSE
'job crash detected, see server logs'
END
AS err_message
fabriziomello marked this conversation as resolved.
Show resolved Hide resolved
FROM
_timescaledb_internal.job_errors
LEFT JOIN
_timescaledb_config.bgw_job ON (bgw_job.id = job_errors.job_id)
WHERE
pg_catalog.pg_has_role(current_user,
(SELECT pg_catalog.pg_get_userbyid(datdba)
FROM pg_catalog.pg_database
WHERE datname = current_database()),
'MEMBER') IS TRUE
OR pg_catalog.pg_has_role(current_user, owner, 'MEMBER') IS TRUE;

REVOKE ALL ON _timescaledb_internal.job_errors FROM PUBLIC;
5 changes: 5 additions & 0 deletions sql/updates/reverse-dev.sql
@@ -0,0 +1,5 @@
GRANT ALL ON _timescaledb_internal.job_errors TO PUBLIC;

ALTER EXTENSION timescaledb DROP VIEW timescaledb_information.job_errors;

DROP VIEW timescaledb_information.job_errors;
42 changes: 30 additions & 12 deletions sql/views.sql
Expand Up @@ -301,31 +301,49 @@ ORDER BY table_name,
segmentby_column_index,
orderby_column_index;

-- troubleshooting job errors view
CREATE OR REPLACE VIEW timescaledb_information.job_errors AS
-- Job errors view that adds a security barrier on the job_errors
-- table in _timescaledb_internal. The view only allows users to view
-- log entries belonging to jobs that are owned by any of the users
-- role. A special case is added so that the superuser or the database
-- owner can see all job log entries, even those that do not have an
-- associated job.
--
-- Note that we have to use a sub-select here since pg_database_owner
-- does not exist before PostgreSQL 14.
CREATE OR REPLACE VIEW timescaledb_information.job_errors
WITH (security_barrier = true) AS
SELECT
job_id,
error_data ->> 'proc_schema' as proc_schema,
error_data ->> 'proc_name' as proc_name,
error_data->>'proc_schema' as proc_schema,
error_data->>'proc_name' as proc_name,
pid,
start_time,
finish_time,
error_data ->> 'sqlerrcode' AS sqlerrcode,
CASE WHEN error_data ->>'message' IS NOT NULL THEN
CASE WHEN error_data ->>'detail' IS NOT NULL THEN
CASE WHEN error_data ->>'hint' IS NOT NULL THEN concat(error_data ->>'message', '. ', error_data ->>'detail', '. ', error_data->>'hint')
ELSE concat(error_data ->>'message', ' ', error_data ->>'detail')
error_data->>'sqlerrcode' AS sqlerrcode,
CASE WHEN error_data->>'message' IS NOT NULL THEN
CASE WHEN error_data->>'detail' IS NOT NULL THEN
CASE WHEN error_data->>'hint' IS NOT NULL THEN concat(error_data->>'message', '. ', error_data->>'detail', '. ', error_data->>'hint')
ELSE concat(error_data->>'message', ' ', error_data ->>'detail')
END
ELSE
CASE WHEN error_data ->>'hint' IS NOT NULL THEN concat(error_data ->>'message', '. ', error_data->>'hint')
ELSE error_data ->>'message'
CASE WHEN error_data->>'hint' IS NOT NULL THEN concat(error_data->>'message', '. ', error_data->>'hint')
ELSE error_data->>'message'
END
END
ELSE
'job crash detected, see server logs'
END
AS err_message
FROM
_timescaledb_internal.job_errors;
_timescaledb_internal.job_errors
LEFT JOIN
_timescaledb_config.bgw_job ON (bgw_job.id = job_errors.job_id)
WHERE
pg_catalog.pg_has_role(current_user,
(SELECT pg_catalog.pg_get_userbyid(datdba)
FROM pg_catalog.pg_database
WHERE datname = current_database()),
'MEMBER') IS TRUE
OR pg_catalog.pg_has_role(current_user, owner, 'MEMBER') IS TRUE;

GRANT SELECT ON ALL TABLES IN SCHEMA timescaledb_information TO PUBLIC;
4 changes: 3 additions & 1 deletion sql/with_telemetry.sql
Expand Up @@ -2,9 +2,11 @@
-- Please see the included NOTICE for copyright information and
-- LICENSE-APACHE for a copy of the license.

-- Security definer is good to use here since the function computes an
-- aggregate of error codes and does not expose the individual messages.
CREATE OR REPLACE FUNCTION @extschema@.get_telemetry_report() RETURNS jsonb
AS '@MODULE_PATHNAME@', 'ts_telemetry_get_report_jsonb'
LANGUAGE C STABLE PARALLEL SAFE;
LANGUAGE C STABLE PARALLEL SAFE SECURITY DEFINER;

INSERT INTO _timescaledb_config.bgw_job (id, application_name, schedule_interval, max_runtime, max_retries, retry_period, proc_schema, proc_name, owner, scheduled, fixed_schedule) VALUES
(1, 'Telemetry Reporter [1]', INTERVAL '24h', INTERVAL '100s', -1, INTERVAL '1h', '_timescaledb_internal', 'policy_telemetry', CURRENT_ROLE, true, false)
Expand Down
132 changes: 132 additions & 0 deletions tsl/test/expected/job_errors_permissions.out
@@ -0,0 +1,132 @@
-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
\c :TEST_DBNAME :ROLE_SUPERUSER
-- Table to update concurrently to generate error message
CREATE TABLE my_table (a int, b int);
INSERT INTO my_table VALUES (0, 0);
GRANT ALL ON my_table TO PUBLIC;
ALTER SYSTEM SET DEFAULT_TRANSACTION_ISOLATION TO 'serializable';
SELECT pg_reload_conf();
pg_reload_conf
----------------
t
(1 row)

SET ROLE :ROLE_DEFAULT_PERM_USER;
CREATE OR REPLACE PROCEDURE job_fail(jobid int, config jsonb)
AS $$
BEGIN
RAISE EXCEPTION 'raising an exception';
END
$$ LANGUAGE plpgsql;
SELECT add_job('job_fail', '4 minutes', initial_start => now()) as job_fail_id \gset
CREATE OR REPLACE PROCEDURE custom_proc1(jobid int, config jsonb) LANGUAGE PLPGSQL AS
$$
BEGIN
UPDATE my_table SET b = 1 WHERE a = 0;
PERFORM pg_sleep(10);
COMMIT;
END
$$;
SELECT add_job('custom_proc1', '2 min', initial_start => now()) as custom_proc1_id \gset
SET ROLE :ROLE_DEFAULT_PERM_USER_2;
CREATE OR REPLACE PROCEDURE custom_proc2(jobid int, config jsonb) LANGUAGE PLPGSQL AS
$$
BEGIN
UPDATE my_table SET b = 2 WHERE a = 0;
PERFORM pg_sleep(10);
COMMIT;
END
$$;
-- to make sure custom_log is first updated by custom_proc_1
select add_job('custom_proc2', '2 min', initial_start => now() + interval '5 seconds') as custom_proc2_id \gset
SET ROLE :ROLE_SUPERUSER;
SELECT _timescaledb_internal.start_background_workers();
start_background_workers
--------------------------
t
(1 row)

SELECT pg_sleep(20);
pg_sleep
----------

(1 row)

\d timescaledb_information.job_errors
View "timescaledb_information.job_errors"
Column | Type | Collation | Nullable | Default
-------------+--------------------------+-----------+----------+---------
job_id | integer | | |
proc_schema | text | | |
proc_name | text | | |
pid | integer | | |
start_time | timestamp with time zone | | |
finish_time | timestamp with time zone | | |
sqlerrcode | text | | |
err_message | text | | |

-- We add a few entries without a matching job id, so that we get a
-- null owner. Note that the second entry does not have a message
-- defined, so it will print a standardized message assuming that the
-- job crashed.
\set start '2000-01-01 00:00:00+00'
\set finish '2000-01-01 00:00:10+00'
INSERT INTO _timescaledb_internal.job_errors(job_id, pid, start_time, finish_time, error_data) VALUES
(11111, 12345, :'start'::timestamptz, :'finish'::timestamptz, '{"message": "not an error"}'),
(22222, 45678, :'start'::timestamptz, :'finish'::timestamptz, '{}');
-- We check the log as different users and should only see what we
-- have permissions to see. We only bother about jobs at 1000 or
-- larger since the standard jobs are flaky.
SET ROLE :ROLE_DEFAULT_PERM_USER;
SELECT job_id, proc_schema, proc_name, sqlerrcode, err_message
FROM timescaledb_information.job_errors WHERE job_id >= 1000;
job_id | proc_schema | proc_name | sqlerrcode | err_message
--------+-------------+-----------+------------+----------------------
1000 | public | job_fail | P0001 | raising an exception
(1 row)

SET ROLE :ROLE_DEFAULT_PERM_USER_2;
SELECT job_id, proc_schema, proc_name, sqlerrcode, err_message
FROM timescaledb_information.job_errors WHERE job_id >= 1000;
job_id | proc_schema | proc_name | sqlerrcode | err_message
--------+-------------+--------------+------------+-----------------------------------------------------
1002 | public | custom_proc2 | 40001 | could not serialize access due to concurrent update
(1 row)

SET ROLE :ROLE_SUPERUSER;
SELECT job_id, proc_schema, proc_name, sqlerrcode, err_message
FROM timescaledb_information.job_errors WHERE job_id >= 1000;
job_id | proc_schema | proc_name | sqlerrcode | err_message
--------+-------------+--------------+------------+-----------------------------------------------------
1000 | public | job_fail | P0001 | raising an exception
1002 | public | custom_proc2 | 40001 | could not serialize access due to concurrent update
11111 | | | | not an error
22222 | | | | job crash detected, see server logs
(4 rows)

SELECT _timescaledb_internal.stop_background_workers();
stop_background_workers
-------------------------
t
(1 row)

SELECT delete_job(:custom_proc2_id);
delete_job
------------

(1 row)

SELECT delete_job(:custom_proc1_id);
delete_job
------------

(1 row)

SELECT delete_job(:job_fail_id);
delete_job
------------

(1 row)

2 changes: 2 additions & 0 deletions tsl/test/sql/CMakeLists.txt
Expand Up @@ -31,6 +31,7 @@ if(CMAKE_BUILD_TYPE MATCHES Debug)
APPEND
TEST_FILES
bgw_db_scheduler.sql
job_errors_permissions.sql
troubleshooting_job_errors.sql
bgw_db_scheduler_fixed.sql
bgw_reorder_drop_chunks.sql
Expand Down Expand Up @@ -107,6 +108,7 @@ set(SOLO_TESTS
# dist_hypertable needs a lot of memory when the Sanitizer is active
dist_hypertable-${PG_VERSION_MAJOR}
bgw_db_scheduler
job_errors_permissions
troubleshooting_job_errors
bgw_db_scheduler_fixed
bgw_reorder_drop_chunks
Expand Down
85 changes: 85 additions & 0 deletions tsl/test/sql/job_errors_permissions.sql
@@ -0,0 +1,85 @@
-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.

\c :TEST_DBNAME :ROLE_SUPERUSER

-- Table to update concurrently to generate error message
CREATE TABLE my_table (a int, b int);
INSERT INTO my_table VALUES (0, 0);
GRANT ALL ON my_table TO PUBLIC;
ALTER SYSTEM SET DEFAULT_TRANSACTION_ISOLATION TO 'serializable';
SELECT pg_reload_conf();

SET ROLE :ROLE_DEFAULT_PERM_USER;

CREATE OR REPLACE PROCEDURE job_fail(jobid int, config jsonb)
AS $$
BEGIN
RAISE EXCEPTION 'raising an exception';
END
$$ LANGUAGE plpgsql;

SELECT add_job('job_fail', '4 minutes', initial_start => now()) as job_fail_id \gset

CREATE OR REPLACE PROCEDURE custom_proc1(jobid int, config jsonb) LANGUAGE PLPGSQL AS
$$
BEGIN
UPDATE my_table SET b = 1 WHERE a = 0;
PERFORM pg_sleep(10);
COMMIT;
END
$$;

SELECT add_job('custom_proc1', '2 min', initial_start => now()) as custom_proc1_id \gset

SET ROLE :ROLE_DEFAULT_PERM_USER_2;

CREATE OR REPLACE PROCEDURE custom_proc2(jobid int, config jsonb) LANGUAGE PLPGSQL AS
$$
BEGIN
UPDATE my_table SET b = 2 WHERE a = 0;
PERFORM pg_sleep(10);
COMMIT;
END
$$;

-- to make sure custom_log is first updated by custom_proc_1
select add_job('custom_proc2', '2 min', initial_start => now() + interval '5 seconds') as custom_proc2_id \gset

SET ROLE :ROLE_SUPERUSER;
SELECT _timescaledb_internal.start_background_workers();
SELECT pg_sleep(20);

\d timescaledb_information.job_errors

-- We add a few entries without a matching job id, so that we get a
-- null owner. Note that the second entry does not have a message
-- defined, so it will print a standardized message assuming that the
-- job crashed.
\set start '2000-01-01 00:00:00+00'
\set finish '2000-01-01 00:00:10+00'
INSERT INTO _timescaledb_internal.job_errors(job_id, pid, start_time, finish_time, error_data) VALUES
(11111, 12345, :'start'::timestamptz, :'finish'::timestamptz, '{"message": "not an error"}'),
(22222, 45678, :'start'::timestamptz, :'finish'::timestamptz, '{}');

-- We check the log as different users and should only see what we
-- have permissions to see. We only bother about jobs at 1000 or
-- larger since the standard jobs are flaky.
SET ROLE :ROLE_DEFAULT_PERM_USER;
SELECT job_id, proc_schema, proc_name, sqlerrcode, err_message
FROM timescaledb_information.job_errors WHERE job_id >= 1000;

SET ROLE :ROLE_DEFAULT_PERM_USER_2;
SELECT job_id, proc_schema, proc_name, sqlerrcode, err_message
FROM timescaledb_information.job_errors WHERE job_id >= 1000;

SET ROLE :ROLE_SUPERUSER;
SELECT job_id, proc_schema, proc_name, sqlerrcode, err_message
FROM timescaledb_information.job_errors WHERE job_id >= 1000;

SELECT _timescaledb_internal.stop_background_workers();

SELECT delete_job(:custom_proc2_id);
SELECT delete_job(:custom_proc1_id);
SELECT delete_job(:job_fail_id);