Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: switch to compressed mapping #1335

Merged
merged 16 commits into from
May 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1,504 changes: 1,139 additions & 365 deletions examples/conda_mapping/pixi.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions examples/conda_mapping/pixi.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ python = "~=3.11.0"
scipy = "~=1.11.4"
boltons = "*"
jupyter-ros = { version = "*", channel = "robostack" }
jupyter-amphion = {version = "*", channel = "robostack"}

[pypi-dependencies]
black = { version = "~=23.10", extras = ["jupyter"] }
Expand Down
3 changes: 2 additions & 1 deletion examples/conda_mapping/robostack_mapping.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{
"jupyter-ros": "my-name-from-mapping"
"jupyter-ros": "my-name-from-mapping",
"jupyter-amphion": null
}
34 changes: 28 additions & 6 deletions examples/conda_mapping/test_conda_mapping.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,40 @@
import yaml

# This test verify if we generate right purls for our packages
# We use one remote mapping for conda-forge channel
# and one local mapping for robostack channel

PACKAGE_NAME_TO_TEST = {"boltons": "my-boltons-name", "jupyter-ros": "my-name-from-mapping"}

# For packages that are present in local-mapping
# we verify if source=project-defined-mapping qualifier is present in purl
# so purl should look like this:
# pkg:pypi/my-boltons-name?source=project-defined-mapping

PACKAGE_NAME_TO_TEST = {
"boltons": "my-boltons-name?source=project-defined-mapping",
"jupyter-ros": "my-name-from-mapping?source=project-defined-mapping"
}



# We test if having a null for conda name
# will mark a conda package as not a pypi package
# and will not add any purls for it
# "jupyter-amphion": null
PACKAGE_NAME_SHOULD_BE_NULL = ("jupyter-amphion",)

if __name__ == "__main__":
# this will test if we map correctly our packages
# we have one remote mapping for conda-forge
# and one local mapping for robostack

if __name__ == "__main__":
nichmor marked this conversation as resolved.
Show resolved Hide resolved
with open("pixi.lock") as pixi_lock:
lock = yaml.safe_load(pixi_lock)

expected_packages = [
package for package in lock["packages"] if package["name"] in PACKAGE_NAME_TO_TEST
]

assert len(expected_packages) == 2
expected_null_packages = [
package for package in lock["packages"] if package["name"] in PACKAGE_NAME_SHOULD_BE_NULL
]

for package in expected_packages:
package_name = package["name"]
Expand All @@ -29,3 +47,7 @@
expected_purl = f"pkg:pypi/{PACKAGE_NAME_TO_TEST[package_name]}"

assert purls[0] == expected_purl


for package in expected_null_packages:
assert "purls" not in package
958 changes: 501 additions & 457 deletions examples/pypi/pixi.lock

Large diffs are not rendered by default.

38 changes: 4 additions & 34 deletions src/lock_file/package_identifier.rs
nichmor marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::{project::manifest::python::PyPiPackageName, pypi_mapping};
use crate::project::manifest::python::PyPiPackageName;
use pep508_rs::{Requirement, VersionOrUrl};
use rattler_conda_types::{PackageUrl, RepoDataRecord};
use std::{collections::HashSet, str::FromStr};
Expand Down Expand Up @@ -32,51 +32,21 @@ impl PypiPackageIdentifier {
result: &mut Vec<Self>,
) -> Result<(), ConversionError> {
// Check the PURLs for a python package.
let mut has_pypi_purl = false;
for purl in record.package_record.purls.iter() {
if let Some(entry) = Self::try_from_purl(purl, &record.package_record.version.as_str())?
if let Some(entry) =
Self::convert_from_purl(purl, &record.package_record.version.as_str())?
{
result.push(entry);
has_pypi_purl = true;
}
}

// If there is no pypi purl, but the package is a conda-forge package, we just assume that
// the name of the package is equivalent to the name of the python package.
if !has_pypi_purl && pypi_mapping::is_conda_forge_record(record) {
// Convert the conda package names to pypi package names. If the conversion fails we
// just assume that its not a valid python package.
let name = PackageName::from_str(record.package_record.name.as_source()).ok();
let version =
pep440_rs::Version::from_str(&record.package_record.version.as_str()).ok();
if let (Some(name), Some(version)) = (name, version) {
result.push(PypiPackageIdentifier {
name: PyPiPackageName::from_normalized(name),
version,
url: record.url.clone(),
// TODO: We can't really tell which python extras are enabled in a conda package.
extras: Default::default(),
})
}
}

Ok(())
}

// /// Given a list of conda package records, extract the python packages that will be installed
// /// when these conda packages are installed.
// pub fn from_records(records: &[RepoDataRecord]) -> Result<Vec<Self>, ConversionError> {
// let mut result = Vec::new();
// for record in records {
// Self::from_record_into(record, &mut result)?;
// }
// Ok(result)
// }

/// Tries to construct an instance from a generic PURL.
///
/// The `fallback_version` is used if the PURL does not contain a version.
pub fn try_from_purl(
pub fn convert_from_purl(
package_url: &PackageUrl,
fallback_version: &str,
) -> Result<Option<Self>, ConversionError> {
Expand Down
70 changes: 45 additions & 25 deletions src/pypi_mapping/custom_pypi_mapping.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,17 @@ use async_once_cell::OnceCell;
use crate::pypi_mapping::MappingLocation;

use super::{
prefix_pypi_name_mapping::{self},
build_pypi_purl_from_package_record, is_conda_forge_record, prefix_pypi_name_mapping,
MappingMap, Reporter,
};

pub async fn fetch_mapping_from_url(
pub async fn fetch_mapping_from_url<T>(
client: &ClientWithMiddleware,
url: &Url,
) -> miette::Result<HashMap<String, String>> {
) -> miette::Result<T>
where
T: serde::de::DeserializeOwned,
{
let response = client
.get(url.clone())
.send()
Expand All @@ -34,8 +37,7 @@ pub async fn fetch_mapping_from_url(
));
}

let mapping_by_name: HashMap<String, String> =
response.json().await.into_diagnostic().context(format!(
let mapping_by_name: T = response.json().await.into_diagnostic().context(format!(
"failed to parse pypi name mapping located at {}. Please make sure that it's a valid json",
url
))?;
Expand All @@ -46,11 +48,11 @@ pub async fn fetch_mapping_from_url(
pub async fn fetch_custom_mapping(
client: &ClientWithMiddleware,
mapping_url: &MappingMap,
) -> miette::Result<&'static HashMap<String, HashMap<String, String>>> {
static MAPPING: OnceCell<HashMap<String, HashMap<String, String>>> = OnceCell::new();
) -> miette::Result<&'static HashMap<String, HashMap<String, Option<String>>>> {
static MAPPING: OnceCell<HashMap<String, HashMap<String, Option<String>>>> = OnceCell::new();
MAPPING
.get_or_try_init(async {
let mut mapping_url_to_name: HashMap<String, HashMap<String, String>> =
let mut mapping_url_to_name: HashMap<String, HashMap<String, Option<String>>> =
Default::default();

for (name, url) in mapping_url.iter() {
Expand Down Expand Up @@ -83,10 +85,12 @@ pub async fn fetch_custom_mapping(
let contents = std::fs::read_to_string(path)
.into_diagnostic()
.context(format!("mapping on {path:?} could not be loaded"))?;
let data: HashMap<String, String> = serde_json::from_str(&contents)
.unwrap_or_else(|_| {
panic!("Failed to parse JSON mapping located at {path:?}")
});
let data: HashMap<String, Option<String>> = serde_json::from_str(&contents)
.into_diagnostic()
.context(format!(
"Failed to parse JSON mapping located at {}",
path.display()
))?;

mapping_url_to_name.insert(name.to_string(), data);
}
Expand Down Expand Up @@ -149,7 +153,7 @@ pub async fn amend_pypi_purls(
/// a conda-forge package.
fn amend_pypi_purls_for_record(
record: &mut RepoDataRecord,
custom_mapping: &'static HashMap<String, HashMap<String, String>>,
custom_mapping: &'static HashMap<String, HashMap<String, Option<String>>>,
) -> miette::Result<()> {
// If the package already has a pypi name we can stop here.
if record
Expand All @@ -161,27 +165,43 @@ fn amend_pypi_purls_for_record(
return Ok(());
}

// If this package is a conda-forge package or user specified a custom channel mapping
// we can try to guess the pypi name from the conda name
if custom_mapping.contains_key(&record.channel) {
if let Some(mapped_channel) = custom_mapping.get(&record.channel) {
if let Some(mapped_name) =
mapped_channel.get(record.package_record.name.as_normalized())
{
record.package_record.purls.push(
PackageUrl::new(String::from("pypi"), mapped_name)
.expect("valid pypi package url"),
);
let mut not_a_pypi = false;

// we verify if we have package channel and name in user provided mapping
if let Some(mapped_channel) = custom_mapping.get(&record.channel) {
if let Some(mapped_name) = mapped_channel.get(record.package_record.name.as_normalized()) {
// we have a pypi name for it so we record a purl
if let Some(name) = mapped_name {
let purl = PackageUrl::builder(String::from("pypi"), name.to_string())
.with_qualifier("source", "project-defined-mapping")
.expect("valid qualifier");

record
.package_record
.purls
.push(purl.build().expect("valid pypi package url"));
} else {
not_a_pypi = true;
}
}
}

// if we don't have it and it's channel is conda-forge
// we assume that it's the pypi package
if !not_a_pypi && record.package_record.purls.is_empty() && is_conda_forge_record(record) {
// Convert the conda package names to pypi package names. If the conversion fails we
// just assume that its not a valid python package.
if let Some(purl) = build_pypi_purl_from_package_record(&record.package_record) {
record.package_record.purls.push(purl);
}
}
nichmor marked this conversation as resolved.
Show resolved Hide resolved

Ok(())
}

pub fn _amend_only_custom_pypi_purls(
conda_packages: &mut [RepoDataRecord],
custom_mapping: &'static HashMap<String, HashMap<String, String>>,
custom_mapping: &'static HashMap<String, HashMap<String, Option<String>>>,
) -> miette::Result<()> {
for record in conda_packages.iter_mut() {
amend_pypi_purls_for_record(record, custom_mapping)?;
Expand Down
38 changes: 34 additions & 4 deletions src/pypi_mapping/mod.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
use std::{collections::HashMap, path::PathBuf, str::FromStr, sync::Arc};

use http_cache_reqwest::{CACacheManager, Cache, CacheMode, HttpCache, HttpCacheOptions};
use rattler_conda_types::RepoDataRecord;
use rattler_conda_types::{PackageRecord, PackageUrl, RepoDataRecord};
use reqwest_middleware::ClientBuilder;
use reqwest_retry::{policies::ExponentialBackoff, RetryTransientMiddleware};
use url::Url;

use crate::config::get_cache_dir;

mod custom_pypi_mapping;
pub mod custom_pypi_mapping;
pub mod prefix_pypi_name_mapping;

pub trait Reporter: Send + Sync {
Expand All @@ -19,19 +19,34 @@ pub trait Reporter: Send + Sync {

pub type ChannelName = String;

type MappingMap = HashMap<ChannelName, MappingLocation>;
pub type MappingMap = HashMap<ChannelName, MappingLocation>;

#[derive(Debug)]
#[derive(Debug, Clone)]
pub enum MappingLocation {
Path(PathBuf),
Url(Url),
}

/// This enum represents the source of mapping
/// it can be user-defined ( custom )
/// or from prefix.dev ( prefix )

pub enum MappingSource {
Custom { mapping: MappingMap },
Prefix,
}

impl MappingSource {
/// Return the custom `MappingMap`
/// for `MappingSource::Custom`
pub fn custom(&self) -> Option<MappingMap> {
nichmor marked this conversation as resolved.
Show resolved Hide resolved
match self {
MappingSource::Custom { mapping } => Some(mapping.clone()),
_ => None,
}
}
}

pub async fn amend_pypi_purls(
client: reqwest::Client,
mapping_source: &MappingSource,
Expand Down Expand Up @@ -78,3 +93,18 @@ pub fn is_conda_forge_record(record: &RepoDataRecord) -> bool {
pub fn is_conda_forge_url(url: &Url) -> bool {
url.path().starts_with("/conda-forge")
}

/// Build a purl for a `PackageRecord`
/// it will return a purl in this format
/// `pkg:pypi/aiofiles`
pub fn build_pypi_purl_from_package_record(package_record: &PackageRecord) -> Option<PackageUrl> {
nichmor marked this conversation as resolved.
Show resolved Hide resolved
let name = pep508_rs::PackageName::from_str(package_record.name.as_source()).ok();
let version = pep440_rs::Version::from_str(&package_record.version.as_str()).ok();
if let (Some(name), Some(_)) = (name, version) {
let purl = PackageUrl::builder(String::from("pypi"), name.to_string());
let built_purl = purl.build().expect("valid pypi package url");
return Some(built_purl);
}

None
}