From 6dc9de4212c4da0667a5d74f9db34e305104900e Mon Sep 17 00:00:00 2001 From: Bas Zalmstra Date: Fri, 10 May 2024 17:39:21 +0200 Subject: [PATCH 1/2] feat: clear gateway subdirs --- .../src/gateway/mod.rs | 89 ++++++++++++++++++- 1 file changed, 88 insertions(+), 1 deletion(-) diff --git a/crates/rattler_repodata_gateway/src/gateway/mod.rs b/crates/rattler_repodata_gateway/src/gateway/mod.rs index 30c9c3206..4159d9557 100644 --- a/crates/rattler_repodata_gateway/src/gateway/mod.rs +++ b/crates/rattler_repodata_gateway/src/gateway/mod.rs @@ -53,6 +53,23 @@ impl Default for Gateway { } } +/// A selection of subdirectories. +#[derive(Default, Clone, Debug)] +pub enum SubdirSelection { + #[default] + All, + Some(Vec), +} + +impl SubdirSelection { + pub fn contains(&self, subdir: &str) -> bool { + match self { + SubdirSelection::All => true, + SubdirSelection::Some(subdirs) => subdirs.contains(&subdir.to_string()), + } + } +} + impl Gateway { /// Constructs a simple gateway with the default configuration. Use [`Gateway::builder`] if you /// want more control over how the gateway is constructed. @@ -87,6 +104,17 @@ impl Gateway { specs.into_iter().map(Into::into).collect(), ) } + + /// Clears any in-memory cache for the given channel. + /// + /// Any subsequent query will re-fetch any required data from the source. + /// + /// This method does not clear any on-disk cache. + pub fn clear_repodata_cache(&self, channel: &Channel, subdirs: SubdirSelection) { + self.inner + .subdirs + .retain(|key, _| key.0 != *channel || !subdirs.contains(key.1.as_str())); + } } struct GatewayInner { @@ -289,13 +317,16 @@ enum PendingOrFetched { #[cfg(test)] mod test { + use crate::fetch::CacheAction; use crate::gateway::Gateway; use crate::utils::simple_channel_server::SimpleChannelServer; - use crate::GatewayError; + use crate::{GatewayError, Reporter, SourceConfig}; + use dashmap::DashSet; use rattler_conda_types::{Channel, ChannelConfig, PackageName, Platform}; use rstest::rstest; use std::path::{Path, PathBuf}; use std::str::FromStr; + use std::sync::Arc; use std::time::Instant; use url::Url; @@ -403,4 +434,60 @@ mod test { let total_records: usize = records.iter().map(|r| r.len()).sum(); assert_eq!(total_records, 84242); } + + #[tokio::test] + async fn test_clear_cache() { + let local_channel = remote_conda_forge().await; + + // Create a gateway with a custom channel configuration that disables caching. + let gateway = Gateway::builder() + .with_channel_config(super::ChannelConfig { + default: SourceConfig { + cache_action: CacheAction::NoCache, + ..Default::default() + }, + ..Default::default() + }) + .finish(); + + #[derive(Default)] + struct Downloads { + urls: DashSet, + } + let downloads = Arc::new(Downloads::default()); + impl Reporter for Arc { + fn on_download_complete(&self, url: &Url, _index: usize) { + self.urls.insert(url.clone()); + } + } + + // Construct a simpel query + let query = gateway + .query( + vec![local_channel.channel()], + vec![Platform::Linux64, Platform::NoArch], + vec![PackageName::from_str("python").unwrap()].into_iter(), + ) + .with_reporter(downloads.clone()); + + // Run the query once. We expect some activity. + query.clone().execute().await.unwrap(); + assert!(downloads.urls.len() > 0, "there should be some urls"); + downloads.urls.clear(); + + // Run the query a second time. + query.clone().execute().await.unwrap(); + assert!( + downloads.urls.is_empty(), + "there should be NO new url fetches" + ); + + // Now clear the cache and run the query again. + gateway.clear_repodata_cache(&local_channel.channel(), Default::default()); + query.clone().execute().await.unwrap(); + assert!( + downloads.urls.len() > 0, + "after clearing the cache there should be new urls fetched" + ); + } } From 014550f6dc318aececf41f898ca0b33494c064d2 Mon Sep 17 00:00:00 2001 From: Bas Zalmstra Date: Sat, 11 May 2024 11:12:49 +0200 Subject: [PATCH 2/2] feat: python bindings --- .../src/gateway/mod.rs | 7 ++- crates/rattler_repodata_gateway/src/lib.rs | 4 +- py-rattler/Cargo.lock | 60 +++---------------- py-rattler/rattler/repo_data/gateway.py | 29 +++++++++ py-rattler/src/repo_data/gateway.rs | 23 ++++++- 5 files changed, 67 insertions(+), 56 deletions(-) diff --git a/crates/rattler_repodata_gateway/src/gateway/mod.rs b/crates/rattler_repodata_gateway/src/gateway/mod.rs index 4159d9557..92d094168 100644 --- a/crates/rattler_repodata_gateway/src/gateway/mod.rs +++ b/crates/rattler_repodata_gateway/src/gateway/mod.rs @@ -22,6 +22,7 @@ use file_url::url_to_path; use local_subdir::LocalSubdirClient; use rattler_conda_types::{Channel, MatchSpec, Platform}; use reqwest_middleware::ClientWithMiddleware; +use std::collections::HashSet; use std::{ path::PathBuf, sync::{Arc, Weak}, @@ -56,12 +57,16 @@ impl Default for Gateway { /// A selection of subdirectories. #[derive(Default, Clone, Debug)] pub enum SubdirSelection { + /// Select all subdirectories #[default] All, - Some(Vec), + + /// Select these specific subdirectories + Some(HashSet), } impl SubdirSelection { + /// Returns `true` if the given subdirectory is part of the selection. pub fn contains(&self, subdir: &str) -> bool { match self { SubdirSelection::All => true, diff --git a/crates/rattler_repodata_gateway/src/lib.rs b/crates/rattler_repodata_gateway/src/lib.rs index e86eaf556..87be23295 100644 --- a/crates/rattler_repodata_gateway/src/lib.rs +++ b/crates/rattler_repodata_gateway/src/lib.rs @@ -71,4 +71,6 @@ pub use reporter::Reporter; mod gateway; #[cfg(feature = "gateway")] -pub use gateway::{ChannelConfig, Gateway, GatewayBuilder, GatewayError, RepoData, SourceConfig}; +pub use gateway::{ + ChannelConfig, Gateway, GatewayBuilder, GatewayError, RepoData, SourceConfig, SubdirSelection, +}; diff --git a/py-rattler/Cargo.lock b/py-rattler/Cargo.lock index 97c9e24ef..a8b4508fc 100644 --- a/py-rattler/Cargo.lock +++ b/py-rattler/Cargo.lock @@ -505,25 +505,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "crossbeam-deque" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" -dependencies = [ - "crossbeam-epoch", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.9.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" -dependencies = [ - "crossbeam-utils", -] - [[package]] name = "crossbeam-utils" version = "0.8.19" @@ -806,6 +787,7 @@ version = "0.1.0" dependencies = [ "itertools", "percent-encoding", + "typed-path", "url", ] @@ -2556,6 +2538,7 @@ dependencies = [ "strum", "thiserror", "tracing", + "typed-path", "url", ] @@ -2601,7 +2584,6 @@ dependencies = [ "purl", "rattler_conda_types", "rattler_digest", - "rayon", "serde", "serde_json", "serde_repr", @@ -2701,7 +2683,6 @@ dependencies = [ "rattler_conda_types", "rattler_digest", "rattler_networking", - "rayon", "reqwest 0.12.3", "reqwest-middleware", "rmp-serde", @@ -2712,7 +2693,6 @@ dependencies = [ "tempfile", "thiserror", "tokio", - "tokio-rayon", "tokio-util", "tracing", "url", @@ -2767,26 +2747,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "rayon" -version = "1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" -dependencies = [ - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" -dependencies = [ - "crossbeam-deque", - "crossbeam-utils", -] - [[package]] name = "redox_syscall" version = "0.4.1" @@ -3679,16 +3639,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "tokio-rayon" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cf33a76e0b1dd03b778f83244137bd59887abf25c0e87bc3e7071105f457693" -dependencies = [ - "rayon", - "tokio", -] - [[package]] name = "tokio-rustls" version = "0.24.1" @@ -3828,6 +3778,12 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "typed-path" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6069e2cc1d241fd4ff5fa067e8882996fcfce20986d078696e05abccbcf27b43" + [[package]] name = "typenum" version = "1.17.0" diff --git a/py-rattler/rattler/repo_data/gateway.py b/py-rattler/rattler/repo_data/gateway.py index b1b01f9aa..33721b152 100644 --- a/py-rattler/rattler/repo_data/gateway.py +++ b/py-rattler/rattler/repo_data/gateway.py @@ -173,6 +173,35 @@ async def query( # Convert the records into python objects return [[RepoDataRecord._from_py_record(record) for record in records] for records in py_records] + def clear_repodata_cache( + self, channel: Channel | str, subdirs: Optional[List[Platform | PlatformLiteral]] = None + ) -> None: + """ + Clears any in-memory cache for the given channel. + + Any subsequent query will re-fetch any required data from the source. + + This method does not clear any on-disk cache. + + Arguments: + channel: The channel to clear the cache for. + subdirs: A selection of subdirectories to clear, if `None` is specified + all subdirectories of the channel are cleared. + + Examples + -------- + >>> gateway = Gateway() + >>> gateway.clear_repodata_cache("conda-forge", ["linux-64"]) + >>> gateway.clear_repodata_cache("robostack") + >>> + """ + self._gateway.clear_repodata_cache( + channel._channel if isinstance(channel, Channel) else Channel(channel)._channel, + {subdir._inner if isinstance(subdir, Platform) else Platform(subdir)._inner for subdir in subdirs} + if subdirs is not None + else None, + ) + def __repr__(self) -> str: """ Returns a representation of the Gateway. diff --git a/py-rattler/src/repo_data/gateway.rs b/py-rattler/src/repo_data/gateway.rs index 9cf8fbf1e..ae632ba27 100644 --- a/py-rattler/src/repo_data/gateway.rs +++ b/py-rattler/src/repo_data/gateway.rs @@ -7,8 +7,8 @@ use pyo3::exceptions::PyValueError; use pyo3::{pyclass, pymethods, FromPyObject, PyAny, PyResult, Python}; use pyo3_asyncio::tokio::future_into_py; use rattler_repodata_gateway::fetch::CacheAction; -use rattler_repodata_gateway::{ChannelConfig, Gateway, SourceConfig}; -use std::collections::HashMap; +use rattler_repodata_gateway::{ChannelConfig, Gateway, SourceConfig, SubdirSelection}; +use std::collections::{HashMap, HashSet}; use std::path::PathBuf; #[pyclass] @@ -30,6 +30,21 @@ impl From for PyGateway { } } +impl<'source> FromPyObject<'source> for Wrap { + fn extract(ob: &'source PyAny) -> PyResult { + let parsed = match ob.extract::>>()? { + Some(platforms) => SubdirSelection::Some( + platforms + .into_iter() + .map(|p| p.inner.as_str().to_owned()) + .collect(), + ), + None => SubdirSelection::All, + }; + Ok(Wrap(parsed)) + } +} + #[pymethods] impl PyGateway { #[new] @@ -60,6 +75,10 @@ impl PyGateway { }) } + pub fn clear_repodata_cache(&self, channel: &PyChannel, subdirs: Wrap) { + self.inner.clear_repodata_cache(&channel.inner, subdirs.0); + } + pub fn query<'a>( &self, py: Python<'a>,