From 7a8148110944e45010a506f7bd8478d6665de08b Mon Sep 17 00:00:00 2001 From: Hu# Date: Fri, 24 Nov 2023 15:23:43 +0800 Subject: [PATCH] This is an automated cherry-pick of #15837 ref tikv/tikv#15184 Signed-off-by: ti-chi-bot --- components/pd_client/src/client.rs | 4 +- components/raftstore-v2/src/batch/store.rs | 26 ++++++++ components/raftstore-v2/src/worker/pd/mod.rs | 39 +++++++++++ .../tests/integrations/cluster.rs | 2 +- components/raftstore/src/store/config.rs | 30 +++++++++ components/raftstore/src/store/fsm/store.rs | 37 +++++++++-- components/raftstore/src/store/metrics.rs | 1 + components/raftstore/src/store/msg.rs | 6 ++ components/raftstore/src/store/worker/pd.rs | 65 +++++++++++++++---- .../resolved_ts/tests/failpoints/mod.rs | 5 +- components/test_raftstore/src/util.rs | 12 ++++ etc/config-template.toml | 3 + tests/integrations/config/mod.rs | 4 ++ tests/integrations/config/test-custom.toml | 2 +- 14 files changed, 211 insertions(+), 25 deletions(-) diff --git a/components/pd_client/src/client.rs b/components/pd_client/src/client.rs index 4654a3ebe3aa..03a6ad0bc9fc 100644 --- a/components/pd_client/src/client.rs +++ b/components/pd_client/src/client.rs @@ -1063,9 +1063,7 @@ impl PdClient for RpcClient { }) as PdFuture<_> }; - self.pd_client - .request(req, executor, LEADER_CHANGE_RETRY) - .execute() + self.pd_client.request(req, executor, NO_RETRY).execute() } fn report_region_buckets(&self, bucket_stat: &BucketStat, period: Duration) -> PdFuture<()> { diff --git a/components/raftstore-v2/src/batch/store.rs b/components/raftstore-v2/src/batch/store.rs index 199e8cafbd8f..027902f25bd7 100644 --- a/components/raftstore-v2/src/batch/store.rs +++ b/components/raftstore-v2/src/batch/store.rs @@ -418,6 +418,7 @@ impl StoreSystem { .async_read_worker .start("async-read-worker", read_runner); +<<<<<<< HEAD let pd_scheduler = workers.pd_worker.start( "pd-worker", PdRunner::new( @@ -425,6 +426,31 @@ impl StoreSystem { pd_client, raft_engine.clone(), tablet_factory.clone(), +======= + workers.pd.start(pd::Runner::new( + store_id, + pd_client, + raft_engine.clone(), + tablet_registry.clone(), + snap_mgr.clone(), + router.clone(), + workers.pd.remote(), + concurrency_manager, + causal_ts_provider, + workers.pd.scheduler(), + auto_split_controller, + collector_reg_handle, + grpc_service_mgr, + self.logger.clone(), + self.shutdown.clone(), + cfg.clone(), + )?); + + let split_check_scheduler = workers.background.start( + "split-check", + SplitCheckRunner::with_registry( + tablet_registry.clone(), +>>>>>>> bc1ae30437 (pd_client: support dynamically modifying `min-resolved-ts` report interval and reduce retry times (#15837)) router.clone(), workers.pd_worker.remote(), concurrency_manager, diff --git a/components/raftstore-v2/src/worker/pd/mod.rs b/components/raftstore-v2/src/worker/pd/mod.rs index 132678e21f21..9f5397ca484d 100644 --- a/components/raftstore-v2/src/worker/pd/mod.rs +++ b/components/raftstore-v2/src/worker/pd/mod.rs @@ -13,10 +13,29 @@ use collections::HashMap; use concurrency_manager::ConcurrencyManager; use engine_traits::{KvEngine, RaftEngine, TabletFactory}; use kvproto::{metapb, pdpb}; +<<<<<<< HEAD use pd_client::PdClient; use raftstore::store::{util::KeysInfoFormatter, TxnExt}; use slog::{error, info, Logger}; use tikv_util::{time::UnixSecs, worker::Runnable}; +======= +use pd_client::{BucketStat, PdClient}; +use raftstore::store::{ + metrics::STORE_INSPECT_DURATION_HISTOGRAM, + util::{KeysInfoFormatter, LatencyInspector, RaftstoreDuration}, + AutoSplitController, Config, FlowStatsReporter, PdStatsMonitor, ReadStats, SplitInfo, + StoreStatsReporter, TabletSnapManager, TxnExt, WriteStats, + NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, +}; +use resource_metering::{Collector, CollectorRegHandle, RawRecords}; +use service::service_manager::GrpcServiceManager; +use slog::{error, warn, Logger}; +use tikv_util::{ + config::VersionTrack, + time::{Instant as TiInstant, UnixSecs}, + worker::{Runnable, Scheduler}, +}; +>>>>>>> bc1ae30437 (pd_client: support dynamically modifying `min-resolved-ts` report interval and reduce retry times (#15837)) use yatp::{task::future::TaskCell, Remote}; use crate::{batch::StoreRouter, router::PeerMsg}; @@ -135,10 +154,30 @@ where remote: Remote, concurrency_manager: ConcurrencyManager, causal_ts_provider: Option>, // used for rawkv apiv2 +<<<<<<< HEAD logger: Logger, shutdown: Arc, ) -> Self { Self { +======= + pd_scheduler: Scheduler, + auto_split_controller: AutoSplitController, + collector_reg_handle: CollectorRegHandle, + grpc_service_manager: GrpcServiceManager, + logger: Logger, + shutdown: Arc, + cfg: Arc>, + ) -> Result { + let store_heartbeat_interval = cfg.value().pd_store_heartbeat_tick_interval.0; + let mut stats_monitor = PdStatsMonitor::new( + store_heartbeat_interval / NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT, + cfg.value().inspect_interval.0, + PdReporter::new(pd_scheduler, logger.clone()), + ); + stats_monitor.start(auto_split_controller, collector_reg_handle)?; + let slowness_stats = slowness::SlownessStatistics::new(&cfg.value()); + Ok(Self { +>>>>>>> bc1ae30437 (pd_client: support dynamically modifying `min-resolved-ts` report interval and reduce retry times (#15837)) store_id, pd_client, raft_engine, diff --git a/components/raftstore-v2/tests/integrations/cluster.rs b/components/raftstore-v2/tests/integrations/cluster.rs index 24184233117a..d51b7a1830d1 100644 --- a/components/raftstore-v2/tests/integrations/cluster.rs +++ b/components/raftstore-v2/tests/integrations/cluster.rs @@ -401,6 +401,7 @@ pub fn disable_all_auto_ticks(cfg: &mut Config) { cfg.region_compact_check_interval = ReadableDuration::ZERO; cfg.pd_heartbeat_tick_interval = ReadableDuration::ZERO; cfg.pd_store_heartbeat_tick_interval = ReadableDuration::ZERO; + cfg.pd_report_min_resolved_ts_interval = ReadableDuration::ZERO; cfg.snap_mgr_gc_tick_interval = ReadableDuration::ZERO; cfg.lock_cf_compact_interval = ReadableDuration::ZERO; cfg.peer_stale_state_check_interval = ReadableDuration::ZERO; @@ -410,7 +411,6 @@ pub fn disable_all_auto_ticks(cfg: &mut Config) { cfg.merge_check_tick_interval = ReadableDuration::ZERO; cfg.cleanup_import_sst_interval = ReadableDuration::ZERO; cfg.inspect_interval = ReadableDuration::ZERO; - cfg.report_min_resolved_ts_interval = ReadableDuration::ZERO; cfg.reactive_memory_lock_tick_interval = ReadableDuration::ZERO; cfg.report_region_buckets_tick_interval = ReadableDuration::ZERO; cfg.check_long_uncommitted_interval = ReadableDuration::ZERO; diff --git a/components/raftstore/src/store/config.rs b/components/raftstore/src/store/config.rs index 3ca422c0bfec..4b91a945bf9c 100644 --- a/components/raftstore/src/store/config.rs +++ b/components/raftstore/src/store/config.rs @@ -118,6 +118,7 @@ pub struct Config { pub region_compact_tombstones_percent: u64, pub pd_heartbeat_tick_interval: ReadableDuration, pub pd_store_heartbeat_tick_interval: ReadableDuration, + pub pd_report_min_resolved_ts_interval: ReadableDuration, pub snap_mgr_gc_tick_interval: ReadableDuration, pub snap_gc_timeout: ReadableDuration, pub lock_cf_compact_interval: ReadableDuration, @@ -316,8 +317,17 @@ pub struct Config { // Interval to inspect the latency of raftstore for slow store detection. pub inspect_interval: ReadableDuration, +<<<<<<< HEAD // Interval to report min resolved ts, if it is zero, it means disabled. pub report_min_resolved_ts_interval: ReadableDuration, +======= + // The unsensitive(increase it to reduce sensitiveness) of the cause-trend detection + pub slow_trend_unsensitive_cause: f64, + // The unsensitive(increase it to reduce sensitiveness) of the result-trend detection + pub slow_trend_unsensitive_result: f64, + // The sensitiveness of slowness on network-io. + pub slow_trend_network_io_factor: f64, +>>>>>>> bc1ae30437 (pd_client: support dynamically modifying `min-resolved-ts` report interval and reduce retry times (#15837)) /// Interval to check whether to reactivate in-memory pessimistic lock after /// being disabled before transferring leader. @@ -381,6 +391,15 @@ impl Default for Config { region_compact_tombstones_percent: 30, pd_heartbeat_tick_interval: ReadableDuration::minutes(1), pd_store_heartbeat_tick_interval: ReadableDuration::secs(10), +<<<<<<< HEAD +======= + pd_report_min_resolved_ts_interval: ReadableDuration::secs(1), + // Disable periodic full compaction by default. + periodic_full_compact_start_times: ReadableSchedule::default(), + // If periodic full compaction is enabled, do not start a full compaction + // if the CPU utilization is over 10%. + periodic_full_compact_start_max_cpu: 0.1, +>>>>>>> bc1ae30437 (pd_client: support dynamically modifying `min-resolved-ts` report interval and reduce retry times (#15837)) notify_capacity: 40960, snap_mgr_gc_tick_interval: ReadableDuration::minutes(1), snap_gc_timeout: ReadableDuration::hours(4), @@ -449,7 +468,15 @@ impl Default for Config { region_split_size: ReadableSize(0), clean_stale_peer_delay: ReadableDuration::minutes(0), inspect_interval: ReadableDuration::millis(500), +<<<<<<< HEAD report_min_resolved_ts_interval: ReadableDuration::secs(1), +======= + // The param `slow_trend_unsensitive_cause == 2.0` can yield good results, + // make it `10.0` to reduce a bit sensitiveness because SpikeFilter is disabled + slow_trend_unsensitive_cause: 10.0, + slow_trend_unsensitive_result: 0.5, + slow_trend_network_io_factor: 0.0, +>>>>>>> bc1ae30437 (pd_client: support dynamically modifying `min-resolved-ts` report interval and reduce retry times (#15837)) check_leader_lease_interval: ReadableDuration::secs(0), renew_leader_lease_advance_duration: ReadableDuration::secs(0), allow_unsafe_vote_after_start: false, @@ -892,6 +919,9 @@ impl Config { CONFIG_RAFTSTORE_GAUGE .with_label_values(&["pd_store_heartbeat_tick_interval"]) .set(self.pd_store_heartbeat_tick_interval.as_secs_f64()); + CONFIG_RAFTSTORE_GAUGE + .with_label_values(&["pd_report_min_resolved_ts_interval"]) + .set(self.pd_report_min_resolved_ts_interval.as_secs_f64()); CONFIG_RAFTSTORE_GAUGE .with_label_values(&["snap_mgr_gc_tick_interval"]) .set(self.snap_mgr_gc_tick_interval.as_secs_f64()); diff --git a/components/raftstore/src/store/fsm/store.rs b/components/raftstore/src/store/fsm/store.rs index 29907ea68d81..4c4a9d140cd8 100644 --- a/components/raftstore/src/store/fsm/store.rs +++ b/components/raftstore/src/store/fsm/store.rs @@ -726,6 +726,7 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> StoreTick::CompactCheck => self.on_compact_check_tick(), StoreTick::ConsistencyCheck => self.on_consistency_check_tick(), StoreTick::CleanupImportSst => self.on_cleanup_import_sst_tick(), + StoreTick::PdReportMinResolvedTs => self.on_pd_report_min_resolved_ts_tick(), } let elapsed = timer.saturating_elapsed(); self.ctx @@ -815,6 +816,7 @@ impl<'a, EK: KvEngine + 'static, ER: RaftEngine + 'static, T: Transport> self.register_cleanup_import_sst_tick(); self.register_compact_check_tick(); self.register_pd_store_heartbeat_tick(); + self.register_pd_report_min_resolved_ts_tick(); self.register_compact_lock_cf_tick(); self.register_snap_mgr_gc_tick(); self.register_consistency_check_tick(); @@ -1626,7 +1628,6 @@ impl RaftBatchSystem { &cfg, )?; - let region_read_progress = store_meta.lock().unwrap().region_read_progress.clone(); let mut builder = RaftPollerBuilder { cfg, store: meta, @@ -1663,7 +1664,6 @@ impl RaftBatchSystem { mgr, pd_client, collector_reg_handle, - region_read_progress, health_service, causal_ts_provider, )?; @@ -1680,7 +1680,6 @@ impl RaftBatchSystem { snap_mgr: SnapManager, pd_client: Arc, collector_reg_handle: CollectorRegHandle, - region_read_progress: RegionReadProgressRegistry, health_service: Option, causal_ts_provider: Option>, // used for rawkv apiv2 ) -> Result<()> { @@ -1761,7 +1760,6 @@ impl RaftBatchSystem { snap_mgr, workers.pd_worker.remote(), collector_reg_handle, - region_read_progress, health_service, coprocessor_host, causal_ts_provider, @@ -2428,6 +2426,25 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER } } + fn report_min_resolved_ts(&self) { + let read_progress = { + let meta = self.ctx.store_meta.lock().unwrap(); + meta.region_read_progress().clone() + }; + let min_resolved_ts = read_progress.get_min_resolved_ts(); + + let task = PdTask::ReportMinResolvedTs { + store_id: self.fsm.store.id, + min_resolved_ts, + }; + if let Err(e) = self.ctx.pd_scheduler.schedule(task) { + error!("failed to send min resolved ts to pd worker"; + "store_id" => self.fsm.store.id, + "err" => ?e + ); + } + } + fn store_heartbeat_pd(&mut self, report: Option) { let mut stats = StoreStats::default(); @@ -2532,6 +2549,11 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER self.register_pd_store_heartbeat_tick(); } + fn on_pd_report_min_resolved_ts_tick(&mut self) { + self.report_min_resolved_ts(); + self.register_pd_report_min_resolved_ts_tick(); + } + fn on_snap_mgr_gc(&mut self) { // refresh multi_snapshot_files enable flag self.ctx.snap_mgr.set_enable_multi_snapshot_files( @@ -2635,6 +2657,13 @@ impl<'a, EK: KvEngine, ER: RaftEngine, T: Transport> StoreFsmDelegate<'a, EK, ER ); } + fn register_pd_report_min_resolved_ts_tick(&self) { + self.ctx.schedule_store_tick( + StoreTick::PdReportMinResolvedTs, + self.ctx.cfg.pd_report_min_resolved_ts_interval.0, + ); + } + fn register_snap_mgr_gc_tick(&self) { self.ctx .schedule_store_tick(StoreTick::SnapGc, self.ctx.cfg.snap_mgr_gc_tick_interval.0) diff --git a/components/raftstore/src/store/metrics.rs b/components/raftstore/src/store/metrics.rs index c67537e9126c..61e3ffc02c37 100644 --- a/components/raftstore/src/store/metrics.rs +++ b/components/raftstore/src/store/metrics.rs @@ -211,6 +211,7 @@ make_static_metric! { pub label_enum RaftEventDurationType { compact_check, pd_store_heartbeat, + pd_report_min_resolved_ts, snap_gc, compact_lock_cf, consistency_check, diff --git a/components/raftstore/src/store/msg.rs b/components/raftstore/src/store/msg.rs index 436ee46d3545..1d5d48bdc2c9 100644 --- a/components/raftstore/src/store/msg.rs +++ b/components/raftstore/src/store/msg.rs @@ -425,6 +425,7 @@ pub enum StoreTick { CompactLockCf, ConsistencyCheck, CleanupImportSst, + PdReportMinResolvedTs, } impl StoreTick { @@ -437,6 +438,11 @@ impl StoreTick { StoreTick::CompactLockCf => RaftEventDurationType::compact_lock_cf, StoreTick::ConsistencyCheck => RaftEventDurationType::consistency_check, StoreTick::CleanupImportSst => RaftEventDurationType::cleanup_import_sst, +<<<<<<< HEAD +======= + StoreTick::LoadMetricsWindow => RaftEventDurationType::load_metrics_window, + StoreTick::PdReportMinResolvedTs => RaftEventDurationType::pd_report_min_resolved_ts, +>>>>>>> bc1ae30437 (pd_client: support dynamically modifying `min-resolved-ts` report interval and reduce retry times (#15837)) } } } diff --git a/components/raftstore/src/store/worker/pd.rs b/components/raftstore/src/store/worker/pd.rs index c2186bcdc451..4363d90f8a0b 100644 --- a/components/raftstore/src/store/worker/pd.rs +++ b/components/raftstore/src/store/worker/pd.rs @@ -65,7 +65,11 @@ use crate::{ AutoSplitController, ReadStats, SplitConfigChange, WriteStats, }, Callback, CasualMessage, Config, PeerMsg, RaftCmdExtraOpts, RaftCommand, RaftRouter, +<<<<<<< HEAD RegionReadProgressRegistry, SignificantMsg, SnapManager, StoreInfo, StoreMsg, TxnExt, +======= + SnapManager, StoreInfo, StoreMsg, TxnExt, +>>>>>>> bc1ae30437 (pd_client: support dynamically modifying `min-resolved-ts` report interval and reduce retry times (#15837)) }, }; @@ -456,16 +460,6 @@ fn default_collect_tick_interval() -> Duration { DEFAULT_COLLECT_TICK_INTERVAL } -fn config(interval: Duration) -> Duration { - fail_point!("mock_min_resolved_ts_interval", |_| { - Duration::from_millis(50) - }); - fail_point!("mock_min_resolved_ts_interval_disable", |_| { - Duration::from_millis(0) - }); - interval -} - #[inline] fn convert_record_pairs(m: HashMap) -> RecordPairVec { m.into_iter() @@ -491,7 +485,11 @@ where collect_store_infos_interval: Duration, load_base_split_check_interval: Duration, collect_tick_interval: Duration, +<<<<<<< HEAD report_min_resolved_ts_interval: Duration, +======= + inspect_latency_interval: Duration, +>>>>>>> bc1ae30437 (pd_client: support dynamically modifying `min-resolved-ts` report interval and reduce retry times (#15837)) } impl StatsMonitor @@ -499,11 +497,15 @@ where EK: KvEngine, ER: RaftEngine, { +<<<<<<< HEAD pub fn new( interval: Duration, report_min_resolved_ts_interval: Duration, scheduler: Scheduler>, ) -> Self { +======= + pub fn new(interval: Duration, inspect_latency_interval: Duration, reporter: T) -> Self { +>>>>>>> bc1ae30437 (pd_client: support dynamically modifying `min-resolved-ts` report interval and reduce retry times (#15837)) StatsMonitor { scheduler, handle: None, @@ -515,8 +517,17 @@ where DEFAULT_LOAD_BASE_SPLIT_CHECK_INTERVAL, interval, ), +<<<<<<< HEAD report_min_resolved_ts_interval: config(report_min_resolved_ts_interval), collect_tick_interval: cmp::min(default_collect_tick_interval(), interval), +======= + // Use `inspect_latency_interval` as the minimal limitation for collecting tick. + collect_tick_interval: cmp::min( + inspect_latency_interval, + cmp::min(default_collect_tick_interval(), interval), + ), + inspect_latency_interval, +>>>>>>> bc1ae30437 (pd_client: support dynamically modifying `min-resolved-ts` report interval and reduce retry times (#15837)) } } @@ -525,8 +536,12 @@ where pub fn start( &mut self, mut auto_split_controller: AutoSplitController, +<<<<<<< HEAD region_read_progress: RegionReadProgressRegistry, store_id: u64, +======= + collector_reg_handle: CollectorRegHandle, +>>>>>>> bc1ae30437 (pd_client: support dynamically modifying `min-resolved-ts` report interval and reduce retry times (#15837)) ) -> Result<(), io::Error> { if self.collect_tick_interval < default_collect_tick_interval() || self.collect_store_infos_interval < self.collect_tick_interval @@ -544,8 +559,13 @@ where let load_base_split_check_interval = self .load_base_split_check_interval .div_duration_f64(tick_interval) as u64; +<<<<<<< HEAD let report_min_resolved_ts_interval = self .report_min_resolved_ts_interval +======= + let update_latency_stats_interval = self + .inspect_latency_interval +>>>>>>> bc1ae30437 (pd_client: support dynamically modifying `min-resolved-ts` report interval and reduce retry times (#15837)) .div_duration_f64(tick_interval) as u64; let (timer_tx, timer_rx) = mpsc::channel(); @@ -590,12 +610,17 @@ where &scheduler, ); } +<<<<<<< HEAD if is_enable_tick(timer_cnt, report_min_resolved_ts_interval) { StatsMonitor::report_min_resolved_ts( ®ion_read_progress, store_id, &scheduler, ); +======= + if is_enable_tick(timer_cnt, update_latency_stats_interval) { + reporter.update_latency_stats(timer_cnt); +>>>>>>> bc1ae30437 (pd_client: support dynamically modifying `min-resolved-ts` report interval and reduce retry times (#15837)) } timer_cnt += 1; } @@ -938,7 +963,6 @@ where snap_mgr: SnapManager, remote: Remote, collector_reg_handle: CollectorRegHandle, - region_read_progress: RegionReadProgressRegistry, health_service: Option, coprocessor_host: CoprocessorHost, causal_ts_provider: Option>, // used for rawkv apiv2 @@ -958,10 +982,17 @@ where let interval = store_heartbeat_interval / Self::INTERVAL_DIVISOR; let mut stats_monitor = StatsMonitor::new( interval, +<<<<<<< HEAD cfg.report_min_resolved_ts_interval.0, scheduler.clone(), ); if let Err(e) = stats_monitor.start(auto_split_controller, region_read_progress, store_id) { +======= + cfg.inspect_interval.0, + WrappedScheduler(scheduler.clone()), + ); + if let Err(e) = stats_monitor.start(auto_split_controller, collector_reg_handle) { +>>>>>>> bc1ae30437 (pd_client: support dynamically modifying `min-resolved-ts` report interval and reduce retry times (#15837)) error!("failed to start stats collector, error = {:?}", e); } @@ -2490,8 +2521,6 @@ mod tests { use engine_test::{kv::KvTestEngine, raft::RaftTestEngine}; use tikv_util::worker::LazyWorker; - use crate::store::fsm::StoreMeta; - struct RunnerTest { store_stat: Arc>, stats_monitor: StatsMonitor, @@ -2505,6 +2534,7 @@ mod tests { ) -> RunnerTest { let mut stats_monitor = StatsMonitor::new( Duration::from_secs(interval), +<<<<<<< HEAD Duration::from_secs(0), scheduler, ); @@ -2513,6 +2543,15 @@ mod tests { if let Err(e) = stats_monitor.start(AutoSplitController::default(), region_read_progress, 1) { +======= + Duration::from_secs(interval), + WrappedScheduler(scheduler), + ); + if let Err(e) = stats_monitor.start( + AutoSplitController::default(), + CollectorRegHandle::new_for_test(), + ) { +>>>>>>> bc1ae30437 (pd_client: support dynamically modifying `min-resolved-ts` report interval and reduce retry times (#15837)) error!("failed to start stats collector, error = {:?}", e); } diff --git a/components/resolved_ts/tests/failpoints/mod.rs b/components/resolved_ts/tests/failpoints/mod.rs index 808f5ed62ff4..992ad0f34431 100644 --- a/components/resolved_ts/tests/failpoints/mod.rs +++ b/components/resolved_ts/tests/failpoints/mod.rs @@ -58,15 +58,14 @@ fn test_report_min_resolved_ts() { fail::cfg("mock_collect_tick_interval", "return(0)").unwrap(); fail::cfg("mock_min_resolved_ts_interval", "return(0)").unwrap(); let mut suite = TestSuite::new(1); - // default config is 1s assert_eq!( suite .cluster .cfg .tikv .raft_store - .report_min_resolved_ts_interval, - ReadableDuration::secs(1) + .pd_report_min_resolved_ts_interval, + ReadableDuration::millis(50) ); let region = suite.cluster.get_region(&[]); let ts1 = suite.cluster.pd_client.get_min_resolved_ts(); diff --git a/components/test_raftstore/src/util.rs b/components/test_raftstore/src/util.rs index 31eb7708cf5d..d2e9cd51bf32 100644 --- a/components/test_raftstore/src/util.rs +++ b/components/test_raftstore/src/util.rs @@ -19,6 +19,7 @@ use engine_traits::{ Engines, Iterable, Peekable, RaftEngineDebug, RaftEngineReadOnly, TabletFactory, ALL_CFS, CF_DEFAULT, CF_RAFT, }; +use fail::fail_point; use file_system::IoRateLimiter; use futures::executor::block_on; use grpcio::{ChannelBuilder, Environment}; @@ -155,9 +156,20 @@ pub fn new_tikv_config_with_api_ver(cluster_id: u64, api_ver: ApiVersion) -> Tik let mut cfg = TEST_CONFIG.clone(); cfg.server.cluster_id = cluster_id; cfg.storage.set_api_version(api_ver); + cfg.raft_store.pd_report_min_resolved_ts_interval = config(ReadableDuration::secs(1)); cfg } +fn config(interval: ReadableDuration) -> ReadableDuration { + fail_point!("mock_min_resolved_ts_interval", |_| { + ReadableDuration::millis(50) + }); + fail_point!("mock_min_resolved_ts_interval_disable", |_| { + ReadableDuration::millis(0) + }); + interval +} + // Create a base request. pub fn new_base_request(region_id: u64, epoch: RegionEpoch, read_quorum: bool) -> RaftCmdRequest { let mut req = RaftCmdRequest::default(); diff --git a/etc/config-template.toml b/etc/config-template.toml index 79217b7ea651..6bc3f81abdef 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -383,6 +383,9 @@ ## Store heartbeat tick interval for reporting to PD. # pd-store-heartbeat-tick-interval = "10s" +## Store min resolved ts tick interval for reporting to PD. +# pd-report-min-resolved-ts-interval = "1s" + ## The threshold of triggering Region split check. ## When Region size change exceeds this config, TiKV will check whether the Region should be split ## or not. To reduce the cost of scanning data in the checking process, you can set the value to diff --git a/tests/integrations/config/mod.rs b/tests/integrations/config/mod.rs index 1e9db6fd9e8a..e1c506325ad9 100644 --- a/tests/integrations/config/mod.rs +++ b/tests/integrations/config/mod.rs @@ -196,6 +196,7 @@ fn test_serde_custom_tikv_config() { region_compact_tombstones_percent: 33, pd_heartbeat_tick_interval: ReadableDuration::minutes(12), pd_store_heartbeat_tick_interval: ReadableDuration::secs(12), + pd_report_min_resolved_ts_interval: ReadableDuration::millis(233), notify_capacity: 12_345, snap_mgr_gc_tick_interval: ReadableDuration::minutes(12), snap_gc_timeout: ReadableDuration::hours(12), @@ -243,8 +244,11 @@ fn test_serde_custom_tikv_config() { io_reschedule_concurrent_max_count: 1234, io_reschedule_hotpot_duration: ReadableDuration::secs(4321), inspect_interval: ReadableDuration::millis(444), +<<<<<<< HEAD report_min_resolved_ts_interval: ReadableDuration::millis(233), raft_msg_flush_interval: ReadableDuration::micros(250), +======= +>>>>>>> bc1ae30437 (pd_client: support dynamically modifying `min-resolved-ts` report interval and reduce retry times (#15837)) check_leader_lease_interval: ReadableDuration::millis(123), renew_leader_lease_advance_duration: ReadableDuration::millis(456), reactive_memory_lock_tick_interval: ReadableDuration::millis(566), diff --git a/tests/integrations/config/test-custom.toml b/tests/integrations/config/test-custom.toml index 779801d715d1..921c19b95923 100644 --- a/tests/integrations/config/test-custom.toml +++ b/tests/integrations/config/test-custom.toml @@ -169,6 +169,7 @@ region-compact-min-tombstones = 999 region-compact-tombstones-percent = 33 pd-heartbeat-tick-interval = "12m" pd-store-heartbeat-tick-interval = "12s" +pd-report-min-resolved-ts-interval = "233ms" snap-mgr-gc-tick-interval = "12m" snap-gc-timeout = "12h" lock-cf-compact-interval = "12m" @@ -222,7 +223,6 @@ reactive-memory-lock-tick-interval = "566ms" reactive-memory-lock-timeout-tick = 8 check-long-uncommitted-interval = "1s" long-uncommitted-base-threshold = "1s" -report-min-resolved-ts-interval = "233ms" report-region-buckets-tick-interval = "1234s" max-snapshot-file-raw-size = "10GB" unreachable-backoff = "111s"