Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: clear gateway subdirs #650

Merged
merged 2 commits into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
94 changes: 93 additions & 1 deletion crates/rattler_repodata_gateway/src/gateway/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use file_url::url_to_path;
use local_subdir::LocalSubdirClient;
use rattler_conda_types::{Channel, MatchSpec, Platform};
use reqwest_middleware::ClientWithMiddleware;
use std::collections::HashSet;
use std::{
path::PathBuf,
sync::{Arc, Weak},
Expand Down Expand Up @@ -53,6 +54,27 @@ impl Default for Gateway {
}
}

/// A selection of subdirectories.
#[derive(Default, Clone, Debug)]
pub enum SubdirSelection {
/// Select all subdirectories
#[default]
All,

/// Select these specific subdirectories
Some(HashSet<String>),
}

impl SubdirSelection {
/// Returns `true` if the given subdirectory is part of the selection.
pub fn contains(&self, subdir: &str) -> bool {
match self {
SubdirSelection::All => true,
SubdirSelection::Some(subdirs) => subdirs.contains(&subdir.to_string()),
}
}
}

impl Gateway {
/// Constructs a simple gateway with the default configuration. Use [`Gateway::builder`] if you
/// want more control over how the gateway is constructed.
Expand Down Expand Up @@ -87,6 +109,17 @@ impl Gateway {
specs.into_iter().map(Into::into).collect(),
)
}

/// Clears any in-memory cache for the given channel.
///
/// Any subsequent query will re-fetch any required data from the source.
///
/// This method does not clear any on-disk cache.
pub fn clear_repodata_cache(&self, channel: &Channel, subdirs: SubdirSelection) {
self.inner
.subdirs
.retain(|key, _| key.0 != *channel || !subdirs.contains(key.1.as_str()));
}
}

struct GatewayInner {
Expand Down Expand Up @@ -289,13 +322,16 @@ enum PendingOrFetched<T> {

#[cfg(test)]
mod test {
use crate::fetch::CacheAction;
use crate::gateway::Gateway;
use crate::utils::simple_channel_server::SimpleChannelServer;
use crate::GatewayError;
use crate::{GatewayError, Reporter, SourceConfig};
use dashmap::DashSet;
use rattler_conda_types::{Channel, ChannelConfig, PackageName, Platform};
use rstest::rstest;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use std::sync::Arc;
use std::time::Instant;
use url::Url;

Expand Down Expand Up @@ -403,4 +439,60 @@ mod test {
let total_records: usize = records.iter().map(|r| r.len()).sum();
assert_eq!(total_records, 84242);
}

#[tokio::test]
async fn test_clear_cache() {
let local_channel = remote_conda_forge().await;

// Create a gateway with a custom channel configuration that disables caching.
let gateway = Gateway::builder()
.with_channel_config(super::ChannelConfig {
default: SourceConfig {
cache_action: CacheAction::NoCache,
..Default::default()
},
..Default::default()
})
.finish();

#[derive(Default)]
struct Downloads {
urls: DashSet<Url>,
}
let downloads = Arc::new(Downloads::default());
impl Reporter for Arc<Downloads> {
fn on_download_complete(&self, url: &Url, _index: usize) {
self.urls.insert(url.clone());
}
}

// Construct a simpel query
let query = gateway
.query(
vec![local_channel.channel()],
vec![Platform::Linux64, Platform::NoArch],
vec![PackageName::from_str("python").unwrap()].into_iter(),
)
.with_reporter(downloads.clone());

// Run the query once. We expect some activity.
query.clone().execute().await.unwrap();
assert!(downloads.urls.len() > 0, "there should be some urls");
downloads.urls.clear();

// Run the query a second time.
query.clone().execute().await.unwrap();
assert!(
downloads.urls.is_empty(),
"there should be NO new url fetches"
);

// Now clear the cache and run the query again.
gateway.clear_repodata_cache(&local_channel.channel(), Default::default());
query.clone().execute().await.unwrap();
assert!(
downloads.urls.len() > 0,
"after clearing the cache there should be new urls fetched"
);
}
}
4 changes: 3 additions & 1 deletion crates/rattler_repodata_gateway/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,4 +71,6 @@ pub use reporter::Reporter;
mod gateway;

#[cfg(feature = "gateway")]
pub use gateway::{ChannelConfig, Gateway, GatewayBuilder, GatewayError, RepoData, SourceConfig};
pub use gateway::{
ChannelConfig, Gateway, GatewayBuilder, GatewayError, RepoData, SourceConfig, SubdirSelection,
};
60 changes: 8 additions & 52 deletions py-rattler/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 29 additions & 0 deletions py-rattler/rattler/repo_data/gateway.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,35 @@ async def query(
# Convert the records into python objects
return [[RepoDataRecord._from_py_record(record) for record in records] for records in py_records]

def clear_repodata_cache(
self, channel: Channel | str, subdirs: Optional[List[Platform | PlatformLiteral]] = None
) -> None:
"""
Clears any in-memory cache for the given channel.

Any subsequent query will re-fetch any required data from the source.

This method does not clear any on-disk cache.

Arguments:
channel: The channel to clear the cache for.
subdirs: A selection of subdirectories to clear, if `None` is specified
all subdirectories of the channel are cleared.

Examples
--------
>>> gateway = Gateway()
>>> gateway.clear_repodata_cache("conda-forge", ["linux-64"])
>>> gateway.clear_repodata_cache("robostack")
>>>
"""
self._gateway.clear_repodata_cache(
channel._channel if isinstance(channel, Channel) else Channel(channel)._channel,
{subdir._inner if isinstance(subdir, Platform) else Platform(subdir)._inner for subdir in subdirs}
if subdirs is not None
else None,
)

def __repr__(self) -> str:
"""
Returns a representation of the Gateway.
Expand Down
23 changes: 21 additions & 2 deletions py-rattler/src/repo_data/gateway.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ use pyo3::exceptions::PyValueError;
use pyo3::{pyclass, pymethods, FromPyObject, PyAny, PyResult, Python};
use pyo3_asyncio::tokio::future_into_py;
use rattler_repodata_gateway::fetch::CacheAction;
use rattler_repodata_gateway::{ChannelConfig, Gateway, SourceConfig};
use std::collections::HashMap;
use rattler_repodata_gateway::{ChannelConfig, Gateway, SourceConfig, SubdirSelection};
use std::collections::{HashMap, HashSet};
use std::path::PathBuf;

#[pyclass]
Expand All @@ -30,6 +30,21 @@ impl From<Gateway> for PyGateway {
}
}

impl<'source> FromPyObject<'source> for Wrap<SubdirSelection> {
fn extract(ob: &'source PyAny) -> PyResult<Self> {
let parsed = match ob.extract::<Option<HashSet<PyPlatform>>>()? {
Some(platforms) => SubdirSelection::Some(
platforms
.into_iter()
.map(|p| p.inner.as_str().to_owned())
.collect(),
),
None => SubdirSelection::All,
};
Ok(Wrap(parsed))
}
}

#[pymethods]
impl PyGateway {
#[new]
Expand Down Expand Up @@ -60,6 +75,10 @@ impl PyGateway {
})
}

pub fn clear_repodata_cache(&self, channel: &PyChannel, subdirs: Wrap<SubdirSelection>) {
self.inner.clear_repodata_cache(&channel.inner, subdirs.0);
}

pub fn query<'a>(
&self,
py: Python<'a>,
Expand Down