-
Notifications
You must be signed in to change notification settings - Fork 848
/
dist_remote_error-12.out
229 lines (217 loc) · 12.9 KB
/
dist_remote_error-12.out
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
-- Import setup file to data nodes.
\unset ECHO
-- Disable SSL to get stable error output across versions. SSL adds some output
-- that changed in PG 14.
set timescaledb.debug_enable_ssl to off;
set client_min_messages to error;
SET timescaledb.hide_data_node_name_in_errors = 'on';
-- A relatively big table on one data node
create table metrics_dist_remote_error(like metrics_dist);
select table_name from create_distributed_hypertable('metrics_dist_remote_error', 'time', 'device_id',
data_nodes => '{"data_node_1"}');
table_name
metrics_dist_remote_error
(1 row)
insert into metrics_dist_remote_error select * from metrics_dist order by metrics_dist limit 20000;
-- The error messages vary wildly between the Postgres versions, dependent on
-- the particular behavior of libqp in this or that case. The purpose of this
-- test is not to solidify this accidental behavior, but to merely exercise the
-- error handling code to make sure it doesn't have fatal errors. Unfortunately,
-- there is no way to suppress error output from a psql script.
set client_min_messages to ERROR;
\set ON_ERROR_STOP off
set timescaledb.remote_data_fetcher = 'copy';
explain (analyze, verbose, costs off, timing off, summary off)
select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(0, device_id)::int != 0;
ERROR: [<hidden node name>]: debug point: requested to error out after 0 rows, 1 rows seen
explain (analyze, verbose, costs off, timing off, summary off)
select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(1, device_id)::int != 0;
ERROR: [<hidden node name>]: debug point: requested to error out after 1 rows, 1 rows seen
explain (analyze, verbose, costs off, timing off, summary off)
select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(2, device_id)::int != 0;
ERROR: [<hidden node name>]: debug point: requested to error out after 2 rows, 2 rows seen
explain (analyze, verbose, costs off, timing off, summary off)
select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(701, device_id)::int != 0;
ERROR: [<hidden node name>]: debug point: requested to error out after 701 rows, 701 rows seen
explain (analyze, verbose, costs off, timing off, summary off)
select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000, device_id)::int != 0;
ERROR: [<hidden node name>]: debug point: requested to error out after 10000 rows, 10000 rows seen
explain (analyze, verbose, costs off, timing off, summary off)
select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(16384, device_id)::int != 0;
ERROR: [<hidden node name>]: debug point: requested to error out after 16384 rows, 16384 rows seen
explain (analyze, verbose, costs off, timing off, summary off)
select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000000, device_id)::int != 0;
QUERY PLAN
Custom Scan (DataNodeScan) on public.metrics_dist_remote_error (actual rows=20000 loops=1)
Output: 1
Data node: data_node_1
Fetcher Type: COPY
Chunks: _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk
Remote SQL: SELECT NULL FROM public.metrics_dist_remote_error WHERE _timescaledb_internal.chunks_in(public.metrics_dist_remote_error.*, ARRAY[..]) AND ((public.ts_debug_shippable_error_after_n_rows(10000000, device_id) <> 0))
(6 rows)
-- We don't test fatal errors here, because PG versions before 14 are unable to
-- report them properly to the access node, so we get different errors in these
-- versions.
-- Now test the same with the cursor fetcher.
set timescaledb.remote_data_fetcher = 'cursor';
explain (analyze, verbose, costs off, timing off, summary off)
select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(0, device_id)::int != 0;
ERROR: [<hidden node name>]: debug point: requested to error out after 0 rows, 1 rows seen
explain (analyze, verbose, costs off, timing off, summary off)
select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(1, device_id)::int != 0;
ERROR: [<hidden node name>]: debug point: requested to error out after 1 rows, 1 rows seen
explain (analyze, verbose, costs off, timing off, summary off)
select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(2, device_id)::int != 0;
ERROR: [<hidden node name>]: debug point: requested to error out after 2 rows, 2 rows seen
explain (analyze, verbose, costs off, timing off, summary off)
select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(701, device_id)::int != 0;
ERROR: [<hidden node name>]: debug point: requested to error out after 701 rows, 701 rows seen
explain (analyze, verbose, costs off, timing off, summary off)
select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000, device_id)::int != 0;
ERROR: [<hidden node name>]: debug point: requested to error out after 10000 rows, 10000 rows seen
explain (analyze, verbose, costs off, timing off, summary off)
select 1 from metrics_dist_remote_error where ts_debug_shippable_error_after_n_rows(10000000, device_id)::int != 0;
QUERY PLAN
Custom Scan (DataNodeScan) on public.metrics_dist_remote_error (actual rows=20000 loops=1)
Output: 1
Data node: data_node_1
Fetcher Type: Cursor
Chunks: _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk
Remote SQL: SELECT NULL FROM public.metrics_dist_remote_error WHERE _timescaledb_internal.chunks_in(public.metrics_dist_remote_error.*, ARRAY[..]) AND ((public.ts_debug_shippable_error_after_n_rows(10000000, device_id) <> 0))
(6 rows)
-- Table with broken send for a data type.
create table metrics_dist_bs(like metrics_dist);
alter table metrics_dist_bs alter column v0 type bs;
select table_name from create_distributed_hypertable('metrics_dist_bs',
'time', 'device_id');
table_name
metrics_dist_bs
(1 row)
set timescaledb.enable_connection_binary_data to off;
insert into metrics_dist_bs
select * from metrics_dist_remote_error;
set timescaledb.enable_connection_binary_data to on;
explain (analyze, verbose, costs off, timing off, summary off)
select * from metrics_dist_bs;
ERROR: [<hidden node name>]: debug point: requested to error out after 7103 rows, 7103 rows seen
drop table metrics_dist_bs;
-- Table with broken receive for a data type.
create table metrics_dist_br(like metrics_dist);
alter table metrics_dist_br alter column v0 type br;
select table_name from create_distributed_hypertable('metrics_dist_br',
'time', 'device_id');
table_name
metrics_dist_br
(1 row)
select hypertable_name, replication_factor from timescaledb_information.hypertables
where hypertable_name = 'metrics_dist_br';
hypertable_name | replication_factor
-----------------+--------------------
metrics_dist_br | 1
(1 row)
-- Test that INSERT and COPY fail on data nodes.
-- Note that we use the text format for the COPY input, so that the access node
-- doesn't call `recv` and fail by itself. It's going to use binary format for
-- transfer to data nodes regardless of the input format.
set timescaledb.dist_copy_transfer_format = 'binary';
-- First, create the reference.
\copy (select * from metrics_dist_remote_error) to 'dist_remote_error.text' with (format text);
-- We have to test various interleavings of COPY and INSERT to check that
-- one can recover from connection failure states introduced by another.
\copy metrics_dist_br from 'dist_remote_error.text' with (format text);
ERROR: [<hidden node name>]: debug point: requested to error out after 7103 rows, 7103 rows seen
\copy metrics_dist_br from 'dist_remote_error.text' with (format text);
ERROR: [<hidden node name>]: debug point: requested to error out after 7103 rows, 7103 rows seen
insert into metrics_dist_br select * from metrics_dist_remote_error;
ERROR: [<hidden node name>]: debug point: requested to error out after 7103 rows, 7103 rows seen
insert into metrics_dist_br select * from metrics_dist_remote_error;
ERROR: [<hidden node name>]: debug point: requested to error out after 7103 rows, 7103 rows seen
\copy metrics_dist_br from 'dist_remote_error.text' with (format text);
ERROR: [<hidden node name>]: debug point: requested to error out after 7103 rows, 7103 rows seen
-- Fail at different points
set timescaledb.debug_broken_sendrecv_throw_after = 1;
\copy metrics_dist_br from 'dist_remote_error.text' with (format text);
ERROR: [<hidden node name>]: debug point: requested to error out after 1 rows, 1 rows seen
set timescaledb.debug_broken_sendrecv_throw_after = 2;
\copy metrics_dist_br from 'dist_remote_error.text' with (format text);
ERROR: [<hidden node name>]: debug point: requested to error out after 2 rows, 2 rows seen
set timescaledb.debug_broken_sendrecv_throw_after = 1023;
\copy metrics_dist_br from 'dist_remote_error.text' with (format text);
ERROR: [<hidden node name>]: debug point: requested to error out after 1023 rows, 1023 rows seen
set timescaledb.debug_broken_sendrecv_throw_after = 1024;
\copy metrics_dist_br from 'dist_remote_error.text' with (format text);
ERROR: [<hidden node name>]: debug point: requested to error out after 1024 rows, 1024 rows seen
set timescaledb.debug_broken_sendrecv_throw_after = 1025;
\copy metrics_dist_br from 'dist_remote_error.text' with (format text);
ERROR: [<hidden node name>]: debug point: requested to error out after 1025 rows, 1025 rows seen
reset timescaledb.debug_broken_sendrecv_throw_after;
-- Same with different replication factor
truncate metrics_dist_br;
select set_replication_factor('metrics_dist_br', 2);
set_replication_factor
(1 row)
select hypertable_name, replication_factor from timescaledb_information.hypertables
where hypertable_name = 'metrics_dist_br';
hypertable_name | replication_factor
-----------------+--------------------
metrics_dist_br | 2
(1 row)
\copy metrics_dist_br from 'dist_remote_error.text' with (format text);
ERROR: [<hidden node name>]: debug point: requested to error out after 7103 rows, 7103 rows seen
\copy metrics_dist_br from 'dist_remote_error.text' with (format text);
ERROR: [<hidden node name>]: debug point: requested to error out after 7103 rows, 7103 rows seen
insert into metrics_dist_br select * from metrics_dist_remote_error;
ERROR: [<hidden node name>]: debug point: requested to error out after 7103 rows, 7103 rows seen
insert into metrics_dist_br select * from metrics_dist_remote_error;
ERROR: [<hidden node name>]: debug point: requested to error out after 7103 rows, 7103 rows seen
set timescaledb.debug_broken_sendrecv_throw_after = 1;
\copy metrics_dist_br from 'dist_remote_error.text' with (format text);
ERROR: [<hidden node name>]: debug point: requested to error out after 1 rows, 1 rows seen
set timescaledb.debug_broken_sendrecv_throw_after = 2;
\copy metrics_dist_br from 'dist_remote_error.text' with (format text);
ERROR: [<hidden node name>]: debug point: requested to error out after 2 rows, 2 rows seen
set timescaledb.debug_broken_sendrecv_throw_after = 1023;
\copy metrics_dist_br from 'dist_remote_error.text' with (format text);
ERROR: [<hidden node name>]: debug point: requested to error out after 1023 rows, 1023 rows seen
set timescaledb.debug_broken_sendrecv_throw_after = 1024;
\copy metrics_dist_br from 'dist_remote_error.text' with (format text);
ERROR: [<hidden node name>]: debug point: requested to error out after 1024 rows, 1024 rows seen
set timescaledb.debug_broken_sendrecv_throw_after = 1025;
\copy metrics_dist_br from 'dist_remote_error.text' with (format text);
ERROR: [<hidden node name>]: debug point: requested to error out after 1025 rows, 1025 rows seen
-- Should succeed with text format for data transfer.
set timescaledb.dist_copy_transfer_format = 'text';
\copy metrics_dist_br from 'dist_remote_error.text' with (format text);
-- Final check.
set timescaledb.enable_connection_binary_data = false;
select count(*) from metrics_dist_br;
count
20000
(1 row)
set timescaledb.enable_connection_binary_data = true;
reset timescaledb.debug_broken_sendrecv_throw_after;
drop table metrics_dist_br;
-- Table with sleepy receive for a data type, to improve coverage of the waiting
-- code on the access node.
create table metrics_dist_bl(like metrics_dist);
alter table metrics_dist_bl alter column v0 type bl;
select table_name from create_distributed_hypertable('metrics_dist_bl',
'time', 'device_id');
table_name
metrics_dist_bl
(1 row)
-- We're using sleepy recv function, so need the binary transfer format for it
-- to be called on the data nodes.
set timescaledb.dist_copy_transfer_format = 'binary';
-- Test INSERT and COPY with slow data node.
\copy metrics_dist_bl from 'dist_remote_error.text' with (format text);
insert into metrics_dist_bl select * from metrics_dist_remote_error;
select count(*) from metrics_dist_bl;
count
40000
(1 row)
drop table metrics_dist_bl;
drop table metrics_dist_remote_error;