Skip to content

Commit

Permalink
[yugabyte#16892] docdb: AutoFlags should provide the capability to en…
Browse files Browse the repository at this point in the history
…able flags on Install scenarios only, but not upgrade

Summary:
This change adds a capability to enable auto flags on Install scenarios only, but not upgrade
(so as to lower the risk of new big features like PackedRow, AutomaticTabletSplitting from being
enabled automatically without explicit Customer knowledge, during an upgrade).

- Added a new value `kNewInstallsOnly` to `AutoFlagClass` enum.
- The new value `kNewInstallsOnly` restrict the set of auto flags that will be promoted during
upgrade, to exclude the ones tagged as `kNewInstallsOnly`.

Test Plan:
ybd --cxx-test integration-tests_auto_flags-itest --gtest_filter AutoFlagsMiniClusterTest.NewCluster
ybd --cxx-test integration-tests_auto_flags-itest --gtest_filter AutoFlagsMiniClusterTest.DisableAutoFlagManagement
ybd --cxx-test integration-tests_auto_flags-itest --gtest_filter AutoFlagsMiniClusterTest.Promote

ybd --cxx-test integration-tests_auto_flags-itest --gtest_filter AutoFlagsExternalMiniClusterTest.NewCluster
ybd --cxx-test integration-tests_auto_flags-itest --gtest_filter AutoFlagsExternalMiniClusterTest.UpgradeCluster

ybd --cxx-test tools_yb-admin-test --gtest_filter AdminCliTest.PromoteAutoFlags

Reviewers: rthallam, hsunder

Reviewed By: hsunder

Subscribers: ybase, bogdan

Differential Revision: https://phabricator.dev.yugabyte.com/D24621
  • Loading branch information
arybochkin authored and omkaark committed Apr 21, 2023
1 parent e1d16ed commit 5348eff
Show file tree
Hide file tree
Showing 7 changed files with 125 additions and 47 deletions.
3 changes: 2 additions & 1 deletion architecture/design/auto_flags.md
Expand Up @@ -22,7 +22,7 @@ New AutoFlags are defined using the following syntax in the primary cpp file whe

- value_type: [`bool`, `int32`, `int64`, `uint64`, `double`, `string`]
- flag_name: A friendly descriptive name for the AutoFlag.
- flag_class: [`kLocalVolatile`, `kLocalPersisted`, `kExternal`]
- flag_class: [`kLocalVolatile`, `kLocalPersisted`, `kExternal`, `kNewInstallsOnly`]
- initial_value: The initial value of type `<value_type>`.
- target_value: The target value of type `<value_type>`.
- usage: Usage information about the AutoFlag.
Expand Down Expand Up @@ -65,6 +65,7 @@ AutoFlag class is picked based on the persistence property of the data, and whic
| LocalVolatile | After all the processes in our universe have been upgraded to the new code version. | Yes, after the AutoFlag is demoted [^1] | yb_enable_expression_pushdown |
| LocalPersisted | After all the processes in our universe have been upgraded to the new code version. | No | TEST_auto_flags_initialized |
| External | After all the processes in our universe and other dependent universes and processes have been upgraded to the new code version. | No | regular_tablets_data_block_key_value_encoding, enable_stream_compression |
| NewInstallsOnly | After all the processes have been installed. No promotion for universe upgrades. | No | TEST_auto_flags_new_install |

[^1]: Not yet implemented.

Expand Down
20 changes: 14 additions & 6 deletions src/yb/client/auto_flags_manager.cc
Expand Up @@ -13,12 +13,18 @@

#include "yb/client/auto_flags_manager.h"
#include "yb/client/client.h"

#include "yb/fs/fs_manager.h"

#include "yb/gutil/strings/join.h"

#include "yb/master/master_cluster.pb.h"

#include "yb/rpc/messenger.h"
#include "yb/rpc/secure_stream.h"

#include "yb/server/secure.h"

#include "yb/util/flags/auto_flags.h"
#include "yb/util/net/net_util.h"
#include "yb/util/flags.h"
Expand All @@ -33,20 +39,21 @@ DEFINE_UNKNOWN_bool(disable_auto_flags_management, false,
TAG_FLAG(disable_auto_flags_management, advanced);
TAG_FLAG(disable_auto_flags_management, unsafe);

DEFINE_RUNTIME_AUTO_bool(
TEST_auto_flags_initialized, kLocalPersisted, false, true,
DEFINE_RUNTIME_AUTO_bool(TEST_auto_flags_initialized, kLocalPersisted, false, true,
"AutoFlag that indicates initialization of AutoFlags. Not meant to be overridden.");
TAG_FLAG(TEST_auto_flags_initialized, hidden);

DEFINE_UNKNOWN_int32(
auto_flags_load_from_master_backoff_increment_ms, 100,
DEFINE_RUNTIME_AUTO_bool(TEST_auto_flags_new_install, kNewInstallsOnly, false, true,
"AutoFlag that indicates initialization of AutoFlags for new installs only.");
TAG_FLAG(TEST_auto_flags_new_install, hidden);

DEFINE_UNKNOWN_int32(auto_flags_load_from_master_backoff_increment_ms, 100,
"Number of milliseconds added to the delay between reties of fetching AutoFlags config from "
"master leader. This delay is applied after the RPC reties have been exhausted.");
TAG_FLAG(auto_flags_load_from_master_backoff_increment_ms, stable);
TAG_FLAG(auto_flags_load_from_master_backoff_increment_ms, advanced);

DEFINE_UNKNOWN_int32(
auto_flags_load_from_master_max_backoff_sec, 3,
DEFINE_UNKNOWN_int32(auto_flags_load_from_master_max_backoff_sec, 3,
"Maximum number of seconds to delay between reties of fetching AutoFlags config from master "
"leader. This delay is applied after the RPC reties have been exhausted.");
TAG_FLAG(auto_flags_load_from_master_max_backoff_sec, stable);
Expand All @@ -58,6 +65,7 @@ DECLARE_int32(yb_client_admin_operation_timeout_sec);
namespace yb {

namespace {

class AutoFlagClient {
public:
Status Init(
Expand Down
98 changes: 72 additions & 26 deletions src/yb/integration-tests/auto_flags-itest.cc
Expand Up @@ -12,23 +12,31 @@
//

#include "yb/common/wire_protocol.h"

#include "yb/consensus/log.h"
#include "yb/consensus/raft_consensus.h"

#include "yb/integration-tests/external_mini_cluster-itest-base.h"
#include "yb/integration-tests/mini_cluster.h"
#include "yb/integration-tests/ts_itest-base.h"
#include "yb/integration-tests/yb_mini_cluster_test_base.h"

#include "yb/master/catalog_manager.h"
#include "yb/master/mini_master.h"
#include "yb/master/master.h"

#include "yb/server/server_base.proxy.h"

#include "yb/tablet/tablet_peer.h"

#include "yb/tserver/mini_tablet_server.h"
#include "yb/tserver/tablet_server.h"

#include "yb/util/flags.h"
#include "yb/util/backoff_waiter.h"

DECLARE_bool(TEST_auto_flags_initialized);
DECLARE_bool(TEST_auto_flags_new_install);
DECLARE_bool(disable_auto_flags_management);
DECLARE_int32(limit_auto_flag_promote_for_new_universe);
DECLARE_int32(heartbeat_interval_ms);
Expand All @@ -37,26 +45,41 @@ DECLARE_int32(heartbeat_interval_ms);
DISABLE_PROMOTE_ALL_AUTO_FLAGS_FOR_TEST;

using std::string;
using std::vector;

namespace yb {

using OK = Status::OK;

const string kDisableAutoFlagsManagementFlagName = "disable_auto_flags_management";
const string kTESTAutoFlagsInitializedFlagName = "TEST_auto_flags_initialized";
const string kTESTAutoFlagsNewInstallFlagName = "TEST_auto_flags_new_install";
const string kTrue = "true";
const string kFalse = "false";
const MonoDelta kTimeout = 20s * kTimeMultiplier;
const int kNumMasterServers = 3;
const int kNumTServers = 3;

namespace {

size_t CountFlag(
const std::string& flag_name,
decltype(std::declval<AutoFlagsConfigPB>().promoted_flags()) promoted_flags) {
size_t count = 0;
for (const auto& per_process_flags : promoted_flags) {
for (const auto& flag : per_process_flags.flags()) {
count += (flag == flag_name);
}
}
return count;
}

void TestPromote(
std::function<Result<AutoFlagsConfigPB>()> get_current_config,
std::function<Result<master::PromoteAutoFlagsResponsePB>(master::PromoteAutoFlagsRequestPB)>
promote_auto_flags,
std::function<Status(uint32)>
validate_config_on_all_nodes) {
// Initial empty config
// Initial empty config.
auto previous_config = ASSERT_RESULT(get_current_config());
ASSERT_EQ(0, previous_config.config_version());
ASSERT_EQ(0, previous_config.promoted_flags_size());
Expand All @@ -66,7 +89,7 @@ void TestPromote(
req.set_promote_non_runtime_flags(true);
req.set_force(false);

// Promote all AutoFlags
// Promote all AutoFlags.
auto resp = ASSERT_RESULT(promote_auto_flags(req));
ASSERT_TRUE(resp.has_new_config_version());
ASSERT_EQ(resp.new_config_version(), previous_config.config_version() + 1);
Expand All @@ -75,24 +98,36 @@ void TestPromote(
previous_config = ASSERT_RESULT(get_current_config());
ASSERT_EQ(resp.new_config_version(), previous_config.config_version());
ASSERT_GE(previous_config.promoted_flags_size(), 1);
ASSERT_EQ(0, CountFlag(kTESTAutoFlagsNewInstallFlagName, previous_config.promoted_flags()));
ASSERT_EQ(
previous_config.promoted_flags_size(),
CountFlag(kTESTAutoFlagsInitializedFlagName, previous_config.promoted_flags()));

// Running again should be no op
resp.Clear();
const auto result = promote_auto_flags(req);
// Running again should be no op.
auto result = promote_auto_flags(req);
ASSERT_NOK(result);
const auto status = result.status();
ASSERT_TRUE(status.IsAlreadyPresent()) << status.ToString();
ASSERT_FALSE(resp.has_new_config_version()) << resp.new_config_version();
ASSERT_TRUE(result.status().IsAlreadyPresent()) << result.status().ToString();

// Force to bump up version alone
// Force to bump up version alone, make sure flags still have expected values.
req.set_force(true);
resp.Clear();
resp = ASSERT_RESULT(promote_auto_flags(req));
ASSERT_EQ(resp.new_config_version(), previous_config.config_version() + 1);
ASSERT_FALSE(resp.non_runtime_flags_promoted());

ASSERT_OK(validate_config_on_all_nodes(resp.new_config_version()));
previous_config = ASSERT_RESULT(get_current_config());
ASSERT_EQ(resp.new_config_version(), previous_config.config_version());
ASSERT_EQ(0, CountFlag(kTESTAutoFlagsNewInstallFlagName, previous_config.promoted_flags()));
ASSERT_EQ(
previous_config.promoted_flags_size(),
CountFlag(kTESTAutoFlagsInitializedFlagName, previous_config.promoted_flags()));

// Verify it is not possible to promote with AutoFlagClass::kNewInstallsOnly.
req.set_max_flag_class(ToString(AutoFlagClass::kNewInstallsOnly));
result = promote_auto_flags(req);
ASSERT_NOK(result);
ASSERT_TRUE(result.status().IsInvalidArgument()) << result.status().ToString();
}

} // namespace

class AutoFlagsMiniClusterTest : public YBMiniClusterTestBase<MiniCluster> {
Expand All @@ -111,31 +146,31 @@ class AutoFlagsMiniClusterTest : public YBMiniClusterTestBase<MiniCluster> {
}

Status ValidateConfig() {
int count_flags = 0;
auto leader_master = VERIFY_RESULT(cluster_->GetLeaderMiniMaster());
const AutoFlagsConfigPB leader_config = leader_master->master()->GetAutoFlagsConfig();
for (const auto& per_process_flags : leader_config.promoted_flags()) {
auto it = std::find(
per_process_flags.flags().begin(), per_process_flags.flags().end(),
kTESTAutoFlagsInitializedFlagName);
SCHECK(it != per_process_flags.flags().end(), IllegalState, "Unable to find");
count_flags++;
}
const auto leader_config_process_count =
static_cast<size_t>(leader_config.promoted_flags().size());
SCHECK_EQ(
CountFlag(kTESTAutoFlagsInitializedFlagName, leader_config.promoted_flags()),
leader_config_process_count, IllegalState, "Unable to find");
SCHECK_EQ(
CountFlag(kTESTAutoFlagsNewInstallFlagName, leader_config.promoted_flags()),
leader_config_process_count, IllegalState, "Unable to find");

if (FLAGS_disable_auto_flags_management) {
SCHECK(
!FLAGS_TEST_auto_flags_initialized, IllegalState,
"TEST_auto_flags_initialized should not be set");
SCHECK_EQ(
count_flags, 0, IllegalState,
"TEST_auto_flags_initialized should not be set in any process");
SCHECK(
!FLAGS_TEST_auto_flags_new_install, IllegalState,
"TEST_auto_flags_new_install should not be set");
} else {
SCHECK(
FLAGS_TEST_auto_flags_initialized, IllegalState,
"TEST_auto_flags_initialized should be set");
SCHECK_EQ(
count_flags, leader_config.promoted_flags().size(), IllegalState,
"TEST_auto_flags_initialized should be set in every process");
SCHECK(
FLAGS_TEST_auto_flags_new_install, IllegalState,
"TEST_auto_flags_new_install should be set");
}

for (size_t i = 0; i < cluster_->num_masters(); i++) {
Expand Down Expand Up @@ -309,6 +344,7 @@ TEST_F(AutoFlagsExternalMiniClusterTest, NewCluster) {
ASSERT_NO_FATALS(BuildAndStart());

ASSERT_OK(CheckFlagOnAllNodes(kTESTAutoFlagsInitializedFlagName, kTrue));
ASSERT_OK(CheckFlagOnAllNodes(kTESTAutoFlagsNewInstallFlagName, kTrue));

ExternalMaster* new_master = nullptr;
cluster_->StartShellMaster(&new_master);
Expand All @@ -319,23 +355,27 @@ TEST_F(AutoFlagsExternalMiniClusterTest, NewCluster) {
ASSERT_OK(cluster_->WaitForMastersToCommitUpTo(op_id.index()));

ASSERT_OK(CheckFlagOnNode(kTESTAutoFlagsInitializedFlagName, kTrue, new_master));
ASSERT_OK(CheckFlagOnNode(kTESTAutoFlagsNewInstallFlagName, kTrue, new_master));

ASSERT_OK(cluster_->AddTabletServer());
ASSERT_OK(cluster_->WaitForTabletServerCount(opts_.num_tablet_servers + 1, kTimeout));

ASSERT_OK(CheckFlagOnAllNodes(kTESTAutoFlagsInitializedFlagName, kTrue));
ASSERT_OK(CheckFlagOnAllNodes(kTESTAutoFlagsNewInstallFlagName, kTrue));

for (auto* master : cluster_->master_daemons()) {
master->Shutdown();
ASSERT_OK(master->Restart());
ASSERT_OK(CheckFlagOnNode(kTESTAutoFlagsInitializedFlagName, kTrue, master));
ASSERT_OK(CheckFlagOnNode(kTESTAutoFlagsNewInstallFlagName, kTrue, master));
ASSERT_EQ(GetAutoFlagConfigVersion(master), 1);
}

for (auto* tserver : cluster_->tserver_daemons()) {
tserver->Shutdown();
ASSERT_OK(tserver->Restart());
ASSERT_OK(CheckFlagOnNode(kTESTAutoFlagsInitializedFlagName, kTrue, tserver));
ASSERT_OK(CheckFlagOnNode(kTESTAutoFlagsNewInstallFlagName, kTrue, tserver));
ASSERT_EQ(GetAutoFlagConfigVersion(tserver), 1);
}
}
Expand All @@ -351,6 +391,7 @@ TEST_F(AutoFlagsExternalMiniClusterTest, UpgradeCluster) {

ASSERT_OK(CheckFlagOnAllNodes(kDisableAutoFlagsManagementFlagName, kTrue));
ASSERT_OK(CheckFlagOnAllNodes(kTESTAutoFlagsInitializedFlagName, kFalse));
ASSERT_OK(CheckFlagOnAllNodes(kTESTAutoFlagsNewInstallFlagName, kFalse));

// Remove the disable_auto_flag_management flag from cluster config
ASSERT_TRUE(Erase(disable_auto_flag_management, cluster_->mutable_extra_master_flags()));
Expand All @@ -363,6 +404,7 @@ TEST_F(AutoFlagsExternalMiniClusterTest, UpgradeCluster) {
auto* new_tserver = cluster_->tablet_server(cluster_->num_tablet_servers() - 1);
ASSERT_OK(CheckFlagOnNode(kDisableAutoFlagsManagementFlagName, kFalse, new_tserver));
ASSERT_OK(CheckFlagOnNode(kTESTAutoFlagsInitializedFlagName, kFalse, new_tserver));
ASSERT_OK(CheckFlagOnNode(kTESTAutoFlagsNewInstallFlagName, kFalse, new_tserver));
ASSERT_EQ(GetAutoFlagConfigVersion(new_tserver), 0);

// Restart the new tserver
Expand All @@ -372,6 +414,7 @@ TEST_F(AutoFlagsExternalMiniClusterTest, UpgradeCluster) {
ASSERT_OK(cluster_->WaitForTabletServerCount(opts_.num_tablet_servers + 1, kTimeout));
ASSERT_OK(CheckFlagOnNode(kDisableAutoFlagsManagementFlagName, kFalse, new_tserver));
ASSERT_OK(CheckFlagOnNode(kTESTAutoFlagsInitializedFlagName, kFalse, new_tserver));
ASSERT_OK(CheckFlagOnNode(kTESTAutoFlagsNewInstallFlagName, kFalse, new_tserver));
ASSERT_EQ(GetAutoFlagConfigVersion(new_tserver), 0);

// Add a new master
Expand All @@ -384,13 +427,15 @@ TEST_F(AutoFlagsExternalMiniClusterTest, UpgradeCluster) {
ASSERT_OK(cluster_->WaitForMastersToCommitUpTo(op_id.index()));
ASSERT_OK(CheckFlagOnNode(kDisableAutoFlagsManagementFlagName, kFalse, new_master));
ASSERT_OK(CheckFlagOnNode(kTESTAutoFlagsInitializedFlagName, kFalse, new_master));
ASSERT_OK(CheckFlagOnNode(kTESTAutoFlagsNewInstallFlagName, kFalse, new_master));
ASSERT_EQ(GetAutoFlagConfigVersion(new_master), 0);

// Restart the master
new_master->Shutdown();
ASSERT_OK(new_master->Restart());
ASSERT_OK(CheckFlagOnNode(kDisableAutoFlagsManagementFlagName, kFalse, new_master));
ASSERT_OK(CheckFlagOnNode(kTESTAutoFlagsInitializedFlagName, kFalse, new_master));
ASSERT_OK(CheckFlagOnNode(kTESTAutoFlagsNewInstallFlagName, kFalse, new_master));
ASSERT_EQ(GetAutoFlagConfigVersion(new_master), 0);

// Remove disable_auto_flag_management from each process config and restart
Expand All @@ -415,6 +460,7 @@ TEST_F(AutoFlagsExternalMiniClusterTest, UpgradeCluster) {
}

ASSERT_OK(CheckFlagOnAllNodes(kTESTAutoFlagsInitializedFlagName, kFalse));
ASSERT_OK(CheckFlagOnAllNodes(kTESTAutoFlagsNewInstallFlagName, kFalse));

ASSERT_NO_FATALS(TestPromote(
/* get_current_config */
Expand Down

0 comments on commit 5348eff

Please sign in to comment.