Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix thread safety in LifecycleNode::get_current_state() for Humble #2183

Merged
merged 8 commits into from Jun 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions rclcpp_lifecycle/CMakeLists.txt
Expand Up @@ -20,6 +20,8 @@ find_package(lifecycle_msgs REQUIRED)
add_library(rclcpp_lifecycle
src/lifecycle_node.cpp
src/managed_entity.cpp
src/mutex_map.cpp
src/mutex_map.hpp
src/node_interfaces/lifecycle_node_interface.cpp
src/state.cpp
src/transition.cpp
Expand Down
17 changes: 17 additions & 0 deletions rclcpp_lifecycle/include/rclcpp_lifecycle/state.hpp
Expand Up @@ -24,6 +24,8 @@

namespace rclcpp_lifecycle
{
/// Forward declaration of mutex helper class
class MutexMap;

/// Abstract class for the Lifecycle's states.
/**
Expand Down Expand Up @@ -92,6 +94,21 @@ class State
bool owns_rcl_state_handle_;

rcl_lifecycle_state_t * state_handle_;

private:
/// Maps state handle mutexes to each instance of State.
/**
* \details A mutex is added to this map when each new instance of State is constructed.
*
* The mutex is removed when the instance of State is destroyed.
*
* The mutex is locked while state_handle_ is being accessed.
*
* This static member exists to allow implementing the fix described in ros2/rclcpp#1756
* in Humble without breaking ABI compatibility, since adding a new static data
* member is permitted under REP-0009.
*/
static MutexMap state_handle_mutex_map_;
fujitatomoya marked this conversation as resolved.
Show resolved Hide resolved
};

} // namespace rclcpp_lifecycle
Expand Down
138 changes: 86 additions & 52 deletions rclcpp_lifecycle/src/lifecycle_node_interface_impl.hpp
Expand Up @@ -20,6 +20,7 @@
#include <functional>
#include <map>
#include <memory>
#include <mutex>
#include <string>
#include <vector>
#include <utility>
Expand Down Expand Up @@ -64,7 +65,11 @@ class LifecycleNode::LifecycleNodeInterfaceImpl
~LifecycleNodeInterfaceImpl()
{
rcl_node_t * node_handle = node_base_interface_->get_rcl_node_handle();
auto ret = rcl_lifecycle_state_machine_fini(&state_machine_, node_handle);
rcl_ret_t ret;
{
std::lock_guard<std::recursive_mutex> lock(state_machine_mutex_);
ret = rcl_lifecycle_state_machine_fini(&state_machine_, node_handle);
}
if (ret != RCL_RET_OK) {
RCUTILS_LOG_FATAL_NAMED(
"rclcpp_lifecycle",
Expand All @@ -78,7 +83,6 @@ class LifecycleNode::LifecycleNodeInterfaceImpl
rcl_node_t * node_handle = node_base_interface_->get_rcl_node_handle();
const rcl_node_options_t * node_options =
rcl_node_get_options(node_base_interface_->get_rcl_node_handle());
state_machine_ = rcl_lifecycle_get_zero_initialized_state_machine();
auto state_machine_options = rcl_lifecycle_get_default_state_machine_options();
state_machine_options.enable_com_interface = enable_communication_interface;
state_machine_options.allocator = node_options->allocator;
Expand All @@ -89,6 +93,8 @@ class LifecycleNode::LifecycleNodeInterfaceImpl
// The publisher takes a C-Typesupport since the publishing (i.e. creating
// the message) is done fully in RCL.
// Services are handled in C++, so that it needs a C++ typesupport structure.
std::lock_guard<std::recursive_mutex> lock(state_machine_mutex_);
state_machine_ = rcl_lifecycle_get_zero_initialized_state_machine();
rcl_ret_t ret = rcl_lifecycle_state_machine_init(
&state_machine_,
node_handle,
Expand All @@ -105,6 +111,8 @@ class LifecycleNode::LifecycleNodeInterfaceImpl
node_base_interface_->get_name());
}

current_state_ = State(state_machine_.current_state);

if (enable_communication_interface) {
{ // change_state
auto cb = std::bind(
Expand Down Expand Up @@ -206,28 +214,30 @@ class LifecycleNode::LifecycleNodeInterfaceImpl
std::shared_ptr<ChangeStateSrv::Response> resp)
{
(void)header;
if (rcl_lifecycle_state_machine_is_initialized(&state_machine_) != RCL_RET_OK) {
throw std::runtime_error(
"Can't get state. State machine is not initialized.");
}

auto transition_id = req->transition.id;
// if there's a label attached to the request,
// we check the transition attached to this label.
// we further can't compare the id of the looked up transition
// because ros2 service call defaults all intergers to zero.
// that means if we call ros2 service call ... {transition: {label: shutdown}}
// the id of the request is 0 (zero) whereas the id from the lookup up transition
// can be different.
// the result of this is that the label takes presedence of the id.
if (req->transition.label.size() != 0) {
auto rcl_transition = rcl_lifecycle_get_transition_by_label(
state_machine_.current_state, req->transition.label.c_str());
if (rcl_transition == nullptr) {
resp->success = false;
return;
std::uint8_t transition_id;
{
std::lock_guard<std::recursive_mutex> lock(state_machine_mutex_);
if (rcl_lifecycle_state_machine_is_initialized(&state_machine_) != RCL_RET_OK) {
throw std::runtime_error("Can't get state. State machine is not initialized.");
}
transition_id = req->transition.id;
// if there's a label attached to the request,
// we check the transition attached to this label.
// we further can't compare the id of the looked up transition
// because ros2 service call defaults all intergers to zero.
// that means if we call ros2 service call ... {transition: {label: shutdown}}
// the id of the request is 0 (zero) whereas the id from the lookup up transition
// can be different.
// the result of this is that the label takes presedence of the id.
if (req->transition.label.size() != 0) {
auto rcl_transition = rcl_lifecycle_get_transition_by_label(
state_machine_.current_state, req->transition.label.c_str());
if (rcl_transition == nullptr) {
resp->success = false;
return;
}
transition_id = static_cast<std::uint8_t>(rcl_transition->id);
}
transition_id = static_cast<std::uint8_t>(rcl_transition->id);
}

node_interfaces::LifecycleNodeInterface::CallbackReturn cb_return_code;
Expand All @@ -248,6 +258,7 @@ class LifecycleNode::LifecycleNodeInterfaceImpl
{
(void)header;
(void)req;
std::lock_guard<std::recursive_mutex> lock(state_machine_mutex_);
if (rcl_lifecycle_state_machine_is_initialized(&state_machine_) != RCL_RET_OK) {
throw std::runtime_error(
"Can't get state. State machine is not initialized.");
Expand All @@ -264,6 +275,7 @@ class LifecycleNode::LifecycleNodeInterfaceImpl
{
(void)header;
(void)req;
std::lock_guard<std::recursive_mutex> lock(state_machine_mutex_);
if (rcl_lifecycle_state_machine_is_initialized(&state_machine_) != RCL_RET_OK) {
throw std::runtime_error(
"Can't get available states. State machine is not initialized.");
Expand All @@ -286,6 +298,7 @@ class LifecycleNode::LifecycleNodeInterfaceImpl
{
(void)header;
(void)req;
std::lock_guard<std::recursive_mutex> lock(state_machine_mutex_);
if (rcl_lifecycle_state_machine_is_initialized(&state_machine_) != RCL_RET_OK) {
throw std::runtime_error(
"Can't get available transitions. State machine is not initialized.");
Expand Down Expand Up @@ -313,6 +326,7 @@ class LifecycleNode::LifecycleNodeInterfaceImpl
{
(void)header;
(void)req;
std::lock_guard<std::recursive_mutex> lock(state_machine_mutex_);
if (rcl_lifecycle_state_machine_is_initialized(&state_machine_) != RCL_RET_OK) {
throw std::runtime_error(
"Can't get available transitions. State machine is not initialized.");
Expand Down Expand Up @@ -343,6 +357,7 @@ class LifecycleNode::LifecycleNodeInterfaceImpl
get_available_states()
{
std::vector<State> states;
std::lock_guard<std::recursive_mutex> lock(state_machine_mutex_);
states.reserve(state_machine_.transition_map.states_size);

for (unsigned int i = 0; i < state_machine_.transition_map.states_size; ++i) {
Expand All @@ -355,6 +370,7 @@ class LifecycleNode::LifecycleNodeInterfaceImpl
get_available_transitions()
{
std::vector<Transition> transitions;
std::lock_guard<std::recursive_mutex> lock(state_machine_mutex_);
transitions.reserve(state_machine_.current_state->valid_transition_size);

for (unsigned int i = 0; i < state_machine_.current_state->valid_transition_size; ++i) {
Expand All @@ -367,6 +383,7 @@ class LifecycleNode::LifecycleNodeInterfaceImpl
get_transition_graph()
{
std::vector<Transition> transitions;
std::lock_guard<std::recursive_mutex> lock(state_machine_mutex_);
transitions.reserve(state_machine_.transition_map.transitions_size);

for (unsigned int i = 0; i < state_machine_.transition_map.transitions_size; ++i) {
Expand All @@ -378,26 +395,32 @@ class LifecycleNode::LifecycleNodeInterfaceImpl
rcl_ret_t
change_state(std::uint8_t transition_id, LifecycleNodeInterface::CallbackReturn & cb_return_code)
{
if (rcl_lifecycle_state_machine_is_initialized(&state_machine_) != RCL_RET_OK) {
RCUTILS_LOG_ERROR(
"Unable to change state for state machine for %s: %s",
node_base_interface_->get_name(), rcl_get_error_string().str);
return RCL_RET_ERROR;
}

constexpr bool publish_update = true;
// keep the initial state to pass to a transition callback
State initial_state(state_machine_.current_state);
State initial_state;
unsigned int current_state_id;

if (
rcl_lifecycle_trigger_transition_by_id(
&state_machine_, transition_id, publish_update) != RCL_RET_OK)
{
RCUTILS_LOG_ERROR(
"Unable to start transition %u from current state %s: %s",
transition_id, state_machine_.current_state->label, rcl_get_error_string().str);
rcutils_reset_error();
return RCL_RET_ERROR;
std::lock_guard<std::recursive_mutex> lock(state_machine_mutex_);
if (rcl_lifecycle_state_machine_is_initialized(&state_machine_) != RCL_RET_OK) {
RCUTILS_LOG_ERROR(
"Unable to change state for state machine for %s: %s",
node_base_interface_->get_name(), rcl_get_error_string().str);
return RCL_RET_ERROR;
}
// keep the initial state to pass to a transition callback
initial_state = State(state_machine_.current_state);

if (
rcl_lifecycle_trigger_transition_by_id(
&state_machine_, transition_id, publish_update) != RCL_RET_OK)
{
RCUTILS_LOG_ERROR(
"Unable to start transition %u from current state %s: %s",
transition_id, state_machine_.current_state->label, rcl_get_error_string().str);
rcutils_reset_error();
return RCL_RET_ERROR;
}
current_state_id = state_machine_.current_state->id;
}

auto get_label_for_return_code =
Expand All @@ -411,27 +434,32 @@ class LifecycleNode::LifecycleNodeInterfaceImpl
return rcl_lifecycle_transition_error_label;
};

cb_return_code = execute_callback(state_machine_.current_state->id, initial_state);
cb_return_code = execute_callback(current_state_id, initial_state);
auto transition_label = get_label_for_return_code(cb_return_code);

if (
rcl_lifecycle_trigger_transition_by_label(
&state_machine_, transition_label, publish_update) != RCL_RET_OK)
{
RCUTILS_LOG_ERROR(
"Failed to finish transition %u. Current state is now: %s (%s)",
transition_id, state_machine_.current_state->label, rcl_get_error_string().str);
rcutils_reset_error();
return RCL_RET_ERROR;
std::lock_guard<std::recursive_mutex> lock(state_machine_mutex_);
if (
rcl_lifecycle_trigger_transition_by_label(
&state_machine_, transition_label, publish_update) != RCL_RET_OK)
{
RCUTILS_LOG_ERROR(
"Failed to finish transition %u. Current state is now: %s (%s)",
transition_id, state_machine_.current_state->label, rcl_get_error_string().str);
rcutils_reset_error();
return RCL_RET_ERROR;
}
current_state_id = state_machine_.current_state->id;
}

// error handling ?!
// TODO(karsten1987): iterate over possible ret value
if (cb_return_code == node_interfaces::LifecycleNodeInterface::CallbackReturn::ERROR) {
RCUTILS_LOG_WARN("Error occurred while doing error handling.");

auto error_cb_code = execute_callback(state_machine_.current_state->id, initial_state);
auto error_cb_code = execute_callback(current_state_id, initial_state);
auto error_cb_label = get_label_for_return_code(error_cb_code);
std::lock_guard<std::recursive_mutex> lock(state_machine_mutex_);
if (
rcl_lifecycle_trigger_transition_by_label(
&state_machine_, error_cb_label, publish_update) != RCL_RET_OK)
Expand Down Expand Up @@ -476,8 +504,13 @@ class LifecycleNode::LifecycleNodeInterfaceImpl
const State & trigger_transition(
const char * transition_label, LifecycleNodeInterface::CallbackReturn & cb_return_code)
{
auto transition =
rcl_lifecycle_get_transition_by_label(state_machine_.current_state, transition_label);
const rcl_lifecycle_transition_t * transition;
{
std::lock_guard<std::recursive_mutex> lock(state_machine_mutex_);

transition =
rcl_lifecycle_get_transition_by_label(state_machine_.current_state, transition_label);
}
if (transition) {
change_state(static_cast<uint8_t>(transition->id), cb_return_code);
}
Expand Down Expand Up @@ -534,6 +567,7 @@ class LifecycleNode::LifecycleNodeInterfaceImpl
}
}

mutable std::recursive_mutex state_machine_mutex_;
rcl_lifecycle_state_machine_t state_machine_;
State current_state_;
std::map<
Expand Down
43 changes: 43 additions & 0 deletions rclcpp_lifecycle/src/mutex_map.cpp
@@ -0,0 +1,43 @@
// Copyright 2023 PickNik, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "mutex_map.hpp"

#include <memory>
#include <shared_mutex>
#include <thread>

namespace rclcpp_lifecycle
{
void MutexMap::add(const State * key)
{
// Adding a new mutex to the map requires exclusive access
std::unique_lock lock(map_access_mutex_);
schornakj marked this conversation as resolved.
Show resolved Hide resolved
mutex_map_.emplace(key, std::make_unique<std::recursive_mutex>());
schornakj marked this conversation as resolved.
Show resolved Hide resolved
}

std::recursive_mutex & MutexMap::getMutex(const State * key) const
{
// Multiple threads can retrieve mutexes from the map at the same time
std::shared_lock lock(map_access_mutex_);
schornakj marked this conversation as resolved.
Show resolved Hide resolved
return *(mutex_map_.at(key));
}

void MutexMap::remove(const State * key)
{
// Removing a mutex from the map requires exclusive access
std::unique_lock lock(map_access_mutex_);
mutex_map_.erase(key);
}
} // namespace rclcpp_lifecycle