From b226300d4e837113902384ebe6db23df79beffc9 Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Tue, 19 Oct 2021 22:30:04 +0900 Subject: [PATCH] Fixed caching by moving it above the bundle storage. Caching was broken after the move to bundled split files. This PR duct tapes the issue by - using a global cache - relying on the segment file names being universal. This works correctly as a temporary solution that we need to revisit later. --- quickwit-search/src/leaf.rs | 13 ++++-- quickwit-serve/src/lib.rs | 8 +--- quickwit-storage/src/cache/mod.rs | 24 ++++++++++- .../src/cache}/quickwit_cache.rs | 17 ++------ .../src/cache/storage_with_cache.rs | 40 ++----------------- quickwit-storage/src/lib.rs | 2 +- 6 files changed, 41 insertions(+), 63 deletions(-) rename {quickwit-serve/src => quickwit-storage/src/cache}/quickwit_cache.rs (92%) diff --git a/quickwit-search/src/leaf.rs b/quickwit-search/src/leaf.rs index 7a1d0ad83d4..9e2809b6352 100644 --- a/quickwit-search/src/leaf.rs +++ b/quickwit-search/src/leaf.rs @@ -31,19 +31,23 @@ use quickwit_index_config::IndexConfig; use quickwit_proto::{ LeafSearchResponse, SearchRequest, SplitIdAndFooterOffsets, SplitSearchError, }; -use quickwit_storage::{BundleStorage, MemorySizedCache, OwnedBytes, Storage}; +use quickwit_storage::{ + wrap_storage_with_long_term_cache, BundleStorage, MemorySizedCache, OwnedBytes, Storage, +}; use tantivy::collector::Collector; use tantivy::query::Query; use tantivy::{Index, ReloadPolicy, Searcher, Term}; use tokio::task::spawn_blocking; use tracing::*; +const SPLIT_FOOTER_CACHE_CAPACITY: usize = 500_000_000; + use crate::collector::{make_collector_for_split, make_merge_collector, GenericQuickwitCollector}; use crate::SearchError; fn global_split_footer_cache() -> &'static MemorySizedCache { static INSTANCE: OnceCell> = OnceCell::new(); - INSTANCE.get_or_init(|| MemorySizedCache::with_capacity_in_bytes(500_000_000)) + INSTANCE.get_or_init(|| MemorySizedCache::with_capacity_in_bytes(SPLIT_FOOTER_CACHE_CAPACITY)) } async fn get_split_footer_from_cache_or_fetch( @@ -97,8 +101,9 @@ pub(crate) async fn open_index( let hotcache_bytes = footer_data.split_off(footer_data.len() - hotcache_num_bytes); - let bundle = BundleStorage::new(index_storage, split_file, &footer_data)?; - let directory = StorageDirectory::new(Arc::new(bundle)); + let bundle_storage = BundleStorage::new(index_storage, split_file, &footer_data)?; + let bundle_storage_with_cache = wrap_storage_with_long_term_cache(Arc::new(bundle_storage)); + let directory = StorageDirectory::new(bundle_storage_with_cache); let caching_directory = CachingDirectory::new_with_unlimited_capacity(Arc::new(directory)); let hot_directory = HotDirectory::open(caching_directory, hotcache_bytes)?; let index = Index::open(hot_directory)?; diff --git a/quickwit-serve/src/lib.rs b/quickwit-serve/src/lib.rs index 95b5e37df0e..76d542bcba2 100644 --- a/quickwit-serve/src/lib.rs +++ b/quickwit-serve/src/lib.rs @@ -23,14 +23,12 @@ mod error; mod grpc; mod grpc_adapter; mod http_handler; -mod quickwit_cache; mod rest; use std::io::Write; use std::net::SocketAddr; use std::sync::Arc; -use quickwit_cache::QuickwitCache; use quickwit_cluster::cluster::{read_or_create_host_key, Cluster}; use quickwit_cluster::service::ClusterServiceImpl; use quickwit_metastore::MetastoreUriResolver; @@ -40,7 +38,6 @@ use quickwit_search::{ }; use quickwit_storage::{ LocalFileStorageFactory, RegionProvider, S3CompatibleObjectStorageFactory, StorageUriResolver, - StorageWithCacheFactory, }; use quickwit_telemetry::payload::{ServeEvent, TelemetryEvent}; use termcolor::{self, Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; @@ -84,10 +81,7 @@ fn display_help_message( /// - s3+localstack:// /// - file:// uris. fn storage_uri_resolver() -> StorageUriResolver { - let s3_storage = StorageWithCacheFactory::new( - Arc::new(S3CompatibleObjectStorageFactory::default()), - Arc::new(QuickwitCache::default()), - ); + let s3_storage = S3CompatibleObjectStorageFactory::default(); StorageUriResolver::builder() .register(LocalFileStorageFactory::default()) .register(s3_storage) diff --git a/quickwit-storage/src/cache/mod.rs b/quickwit-storage/src/cache/mod.rs index c237de8b670..6e630126415 100644 --- a/quickwit-storage/src/cache/mod.rs +++ b/quickwit-storage/src/cache/mod.rs @@ -19,17 +19,37 @@ mod in_ram_slice_cache; mod memory_sized_cache; +mod quickwit_cache; mod storage_with_cache; use std::ops::Range; use std::path::{Path, PathBuf}; +use std::sync::Arc; use async_trait::async_trait; +use once_cell::sync::OnceCell; pub use self::in_ram_slice_cache::SliceCache; pub use self::memory_sized_cache::MemorySizedCache; -pub use self::storage_with_cache::StorageWithCacheFactory; -use crate::OwnedBytes; +use crate::cache::quickwit_cache::QuickwitCache; +use crate::cache::storage_with_cache::StorageWithCache; +use crate::{OwnedBytes, Storage}; + +/// Wraps the given directory with a slice cache that is actually global +/// to quickwit. +/// +/// FIXME The current approach is quite horrible in that: +/// - it uses a global +/// - it relies on the idea that all of the files we attempt to cache +/// have universally unique names. It happens to be true today, but this might be very error prone +/// in the future. +pub fn wrap_storage_with_long_term_cache(storage: Arc) -> Arc { + static SINGLETON: OnceCell> = OnceCell::new(); + let cache = SINGLETON + .get_or_init(|| Arc::new(QuickwitCache::default())) + .clone(); + Arc::new(StorageWithCache { storage, cache }) +} /// The `Cache` trait is the abstraction used to describe the caching logic /// used in front of a storage. See `StorageWithCache`. diff --git a/quickwit-serve/src/quickwit_cache.rs b/quickwit-storage/src/cache/quickwit_cache.rs similarity index 92% rename from quickwit-serve/src/quickwit_cache.rs rename to quickwit-storage/src/cache/quickwit_cache.rs index 77260964f2f..03f636bea99 100644 --- a/quickwit-serve/src/quickwit_cache.rs +++ b/quickwit-storage/src/cache/quickwit_cache.rs @@ -22,20 +22,16 @@ use std::path::{Path, PathBuf}; use std::sync::Arc; use async_trait::async_trait; -use quickwit_common::HOTCACHE_FILENAME; -use quickwit_storage::{Cache, OwnedBytes, SliceCache}; -const FULL_SLICE: Range = 0..usize::MAX; +use crate::{Cache, OwnedBytes, SliceCache}; -/// Hotcache cache capacity is hardcoded to 500 MB. -/// Once the capacity is reached, a LRU strategy is used. -const HOTCACHE_CACHE_CAPACITY: usize = 500_000_000; +const FULL_SLICE: Range = 0..usize::MAX; /// Fast field cache capacity is hardcoded to 3GB. /// Once the capacity is reached, a LRU strategy is used. const FAST_CACHE_CAPACITY: usize = 3_000_000_000; -pub struct QuickwitCache { +pub(crate) struct QuickwitCache { router: Vec<(&'static str, Arc)>, } @@ -48,10 +44,6 @@ impl From)>> for QuickwitCache { impl Default for QuickwitCache { fn default() -> Self { let mut quickwit_cache = QuickwitCache::empty(); - quickwit_cache.add_route( - HOTCACHE_FILENAME, - Arc::new(SimpleCache::with_capacity_in_bytes(HOTCACHE_CACHE_CAPACITY)), - ); quickwit_cache.add_route( ".fast", Arc::new(SimpleCache::with_capacity_in_bytes(FAST_CACHE_CAPACITY)), @@ -156,9 +148,8 @@ mod tests { use std::path::Path; use std::sync::Arc; - use quickwit_storage::{Cache, MockCache, OwnedBytes}; - use super::QuickwitCache; + use crate::{Cache, MockCache, OwnedBytes}; #[tokio::test] async fn test_quickwit_cache_get_all() { diff --git a/quickwit-storage/src/cache/storage_with_cache.rs b/quickwit-storage/src/cache/storage_with_cache.rs index e7c212c0403..80c751eb8d2 100644 --- a/quickwit-storage/src/cache/storage_with_cache.rs +++ b/quickwit-storage/src/cache/storage_with_cache.rs @@ -23,12 +23,12 @@ use std::sync::Arc; use async_trait::async_trait; -use crate::{Cache, OwnedBytes, Storage, StorageFactory, StorageResult}; +use crate::{Cache, OwnedBytes, Storage, StorageResult}; /// Use with care, StorageWithCache is read-only. -struct StorageWithCache { - storage: Arc, - cache: Arc, +pub(crate) struct StorageWithCache { + pub storage: Arc, + pub cache: Arc, } #[async_trait] @@ -84,38 +84,6 @@ impl Storage for StorageWithCache { } } -/// A StorageFactory that wraps all Storage that are produced with a cache. -/// -/// The cache is shared with all of the storage instances. -pub struct StorageWithCacheFactory { - storage_factory: Arc, - cache: Arc, -} - -impl StorageWithCacheFactory { - /// Creates a new StorageFactory with the given cache. - pub fn new(storage_factory: Arc, cache: Arc) -> Self { - StorageWithCacheFactory { - storage_factory, - cache, - } - } -} - -impl StorageFactory for StorageWithCacheFactory { - fn protocol(&self) -> String { - self.storage_factory.protocol() - } - - fn resolve(&self, uri: &str) -> crate::StorageResult> { - let storage = self.storage_factory.resolve(uri)?; - Ok(Arc::new(StorageWithCache { - storage, - cache: self.cache.clone(), - })) - } -} - #[cfg(test)] mod tests { use std::collections::HashMap; diff --git a/quickwit-storage/src/lib.rs b/quickwit-storage/src/lib.rs index 04eb57fb791..42972596ff6 100644 --- a/quickwit-storage/src/lib.rs +++ b/quickwit-storage/src/lib.rs @@ -64,7 +64,7 @@ pub use self::storage_resolver::{ }; #[cfg(feature = "testsuite")] pub use self::tests::storage_test_suite; -pub use crate::cache::{Cache, MemorySizedCache, SliceCache, StorageWithCacheFactory}; +pub use crate::cache::{wrap_storage_with_long_term_cache, Cache, MemorySizedCache, SliceCache}; pub use crate::error::{StorageError, StorageErrorKind, StorageResolverError, StorageResult}; #[cfg(any(test, feature = "testsuite"))]