Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix crash and cancel when adding data node #2751

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 7 additions & 0 deletions tsl/src/data_node.c
Expand Up @@ -726,6 +726,12 @@ data_node_add_internal(PG_FUNCTION_ARGS, bool set_distid)
{
TSConnection *conn =
connect_for_bootstrapping(node_name, host, port, username, password);

if (NULL == conn)
ereport(ERROR,
(errcode(ERRCODE_SQLCLIENT_UNABLE_TO_ESTABLISH_SQLCONNECTION),
errmsg("could not connect to \"%s\"", node_name)));

data_node_validate_extension_availability(conn);
database_created = data_node_bootstrap_database(conn, &database);
remote_connection_close(conn);
Expand All @@ -742,6 +748,7 @@ data_node_add_internal(PG_FUNCTION_ARGS, bool set_distid)
* we do not need 2PC support. */
node_options = create_data_node_options(host, port, dbname, username, password);
conn = remote_connection_open_with_options(node_name, node_options, false);
Assert(NULL != conn);
remote_connection_cmd_ok(conn, "BEGIN");

if (bootstrap)
Expand Down
4 changes: 2 additions & 2 deletions tsl/src/dist_util.c
Expand Up @@ -130,8 +130,8 @@ dist_util_set_id_with_uuid_check(Datum dist_id, bool check_uuid)
(errmsg("cannot add the current database as a data node to itself"),
errdetail("Adding the current database as a data node to itself would create a "
"cycle. Use a different instance or database for the data node."),
errhint("Check that the 'port' parameter is given and refer to a different "
"instance or that the 'database' parameter is given and refer to a "
errhint("Check that the 'port' parameter refers to a different "
"instance or that the 'database' parameter refers to a "
"different database."))));

ts_metadata_insert(CStringGetDatum(METADATA_DISTRIBUTED_UUID_KEY_NAME),
Expand Down
75 changes: 74 additions & 1 deletion tsl/src/remote/connection.c
Expand Up @@ -1181,7 +1181,74 @@ remote_connection_open_with_options_nothrow(const char *node_name, List *connect
keywords[option_pos] = values[option_pos] = NULL;
Assert(option_pos <= option_count);

pg_conn = PQconnectdbParams(keywords, values, false);
/* Try to establish a connection to a database while handling
* interrupts. The connection attempt should be aborted if the user
* cancels the transaction (e.g., ctrl-c), so we need to use wait events
* and poll the socket and latch. Wrap in a try-catch since
* CHECK_FOR_INTERRUPTS() will throw an error and we need to clean up the
* connection in that case. */
PG_TRY();
{
bool stop_waiting = false;

pg_conn = PQconnectStartParams(keywords, values, 0 /* do not expand DB names */);

if (PQstatus(pg_conn) == CONNECTION_BAD)
{
PQfinish(pg_conn);
pg_conn = NULL;
}

while (NULL != pg_conn)
{
int events = WL_LATCH_SET;

#if PG12_GE
events |= WL_EXIT_ON_PM_DEATH;
#endif

switch (PQconnectPoll(pg_conn))
{
case PGRES_POLLING_FAILED:
/* connection attempt failed */
stop_waiting = true;
PQfinish(pg_conn);
pg_conn = NULL;
break;
case PGRES_POLLING_OK:
/* Connection attempt succeeded */
stop_waiting = true;
break;
case PGRES_POLLING_READING:
/* Should wait for read on socket */
events |= WL_SOCKET_READABLE;
break;
case PGRES_POLLING_WRITING:
/* Should wait for write on socket */
events |= WL_SOCKET_WRITEABLE;
break;
default:
break;
}

if (stop_waiting)
break;

WaitLatchOrSocket(MyLatch, events, PQsocket(pg_conn), -1L, PG_WAIT_EXTENSION);

ResetLatch(MyLatch);

CHECK_FOR_INTERRUPTS();
}
}
PG_CATCH();
{
if (NULL != pg_conn)
PQfinish(pg_conn);

PG_RE_THROW();
}
PG_END_TRY();

/* Cast to (char **) to silence warning with MSVC compiler */
pfree((char **) keywords);
Expand All @@ -1190,6 +1257,12 @@ remote_connection_open_with_options_nothrow(const char *node_name, List *connect
if (NULL == pg_conn)
return NULL;

if (PQstatus(pg_conn) != CONNECTION_OK)
{
PQfinish(pg_conn);
return NULL;
}

ts_conn = remote_connection_create(pg_conn, false, node_name);

if (NULL == ts_conn)
Expand Down
3 changes: 1 addition & 2 deletions tsl/test/expected/data_node.out
Expand Up @@ -1324,8 +1324,7 @@ SET ROLE :ROLE_3;
-- ROLE_3 doesn't have a password in the passfile and has not way to
-- authenticate so adding a data node will still fail.
SELECT * FROM add_data_node('data_node_6', host => 'localhost', database => 'data_node_6');
ERROR: no connection to the server

ERROR: could not connect to "data_node_6"
\set ON_ERROR_STOP 0
-- Providing the password on the command line should work
SELECT * FROM add_data_node('data_node_6', host => 'localhost', database => 'data_node_6', password => :'ROLE_3_PASS');
Expand Down