diff --git a/etc/scripts/generate_validators.py b/etc/scripts/generate_validators.py index d3dddcb..7d91050 100644 --- a/etc/scripts/generate_validators.py +++ b/etc/scripts/generate_validators.py @@ -44,7 +44,7 @@ ] } """ -from packageurl import PackageURL + from pathlib import Path import json @@ -76,32 +76,140 @@ Validate each type according to the PURL spec type definitions """ -class TypeValidator: +class BasePurlType: + """ + Base class for all PURL type classes + """ + + type: str + """The type string for this Package-URL type.""" + + type_name: str + """The name for this PURL type.""" + + description: str + """The description of this PURL type.""" + + use_repository: bool = False + """true if this PURL type use a public package repository.""" + + default_repository_url: str + """The default public repository URL for this PURL type""" + + namespace_requirement: str + """"States if this namespace is required, optional, or prohibited.""" + + allowed_qualifiers: dict = {"repository_url", "arch"} + """Set of allowed qualifier keys for this PURL type.""" + + namespace_case_sensitive: bool = True + """true if namespace is case sensitive. If false, the canonical form must be lowercased.""" + + name_case_sensitive: bool = True + """true if name is case sensitive. If false, the canonical form must be lowercased.""" + + version_case_sensitive: bool = True + """true if version is case sensitive. If false, the canonical form must be lowercased.""" + + purl_pattern: str + """A regex pattern that matches valid purls of this type.""" + @classmethod def validate(cls, purl, strict=False): + """ + Validate a PackageURL instance or string. + Yields ValidationMessage and performs strict validation if strict=True + """ + from packageurl import ValidationMessage + from packageurl import ValidationSeverity + + if not purl: + yield ValidationMessage( + severity=ValidationSeverity.ERROR, + message="No purl provided", + ) + return + + from packageurl import PackageURL + + if not isinstance(purl, PackageURL): + try: + purl = PackageURL.from_string(purl, normalize_purl=False) + except Exception as e: + yield ValidationMessage( + severity=ValidationSeverity.ERROR, + message=f"Invalid purl {purl!r} string: {e}", + ) + return + if not strict: purl = cls.normalize(purl) + yield from cls._validate_namespace(purl) + yield from cls._validate_name(purl) + yield from cls._validate_version(purl) + if strict: + yield from cls._validate_qualifiers(purl) + + messages = cls.validate_using_type_rules(purl, strict=strict) + if messages: + yield from messages + + @classmethod + def _validate_namespace(cls, purl): + from packageurl import ValidationMessage + from packageurl import ValidationSeverity + if cls.namespace_requirement == "prohibited" and purl.namespace: - yield f"Namespace is prohibited for purl type: {cls.type!r}" + yield ValidationMessage( + severity=ValidationSeverity.ERROR, + message=f"Namespace is prohibited for purl type: {cls.type!r}", + ) elif cls.namespace_requirement == "required" and not purl.namespace: - yield f"Namespace is required for purl type: {cls.type!r}" + yield ValidationMessage( + severity=ValidationSeverity.ERROR, + message=f"Namespace is required for purl type: {cls.type!r}", + ) - if ( + # TODO: Check pending CPAN PR and decide if we want to upgrade the type definition schema + if purl.type == "cpan": + if purl.namespace and purl.namespace != purl.namespace.upper(): + yield ValidationMessage( + severity=ValidationSeverity.WARNING, + message=f"Namespace must be uppercase for purl type: {cls.type!r}", + ) + elif ( not cls.namespace_case_sensitive and purl.namespace and purl.namespace.lower() != purl.namespace ): - yield f"Namespace is not lowercased for purl type: {cls.type!r}" + yield ValidationMessage( + severity=ValidationSeverity.WARNING, + message=f"Namespace is not lowercased for purl type: {cls.type!r}", + ) + @classmethod + def _validate_name(cls, purl): if not cls.name_case_sensitive and purl.name and purl.name.lower() != purl.name: - yield f"Name is not lowercased for purl type: {cls.type!r}" + from packageurl import ValidationMessage + from packageurl import ValidationSeverity + + yield ValidationMessage( + severity=ValidationSeverity.WARNING, + message=f"Name is not lowercased for purl type: {cls.type!r}", + ) + @classmethod + def _validate_version(cls, purl): if not cls.version_case_sensitive and purl.version and purl.version.lower() != purl.version: - yield f"Version is not lowercased for purl type: {cls.type!r}" + from packageurl import ValidationMessage + from packageurl import ValidationSeverity - yield from cls.validate_type(purl, strict=strict) + yield ValidationMessage( + severity=ValidationSeverity.WARNING, + message=f"Version is not lowercased for purl type: {cls.type!r}", + ) @classmethod def normalize(cls, purl): @@ -130,12 +238,16 @@ def normalize(cls, purl): ) @classmethod - def validate_type(cls, purl, strict=False): - if strict: - yield from cls.validate_qualifiers(purl=purl) + def validate_using_type_rules(cls, purl, strict=False): + """ + Validate using any additional type specific rules. + Yield validation messages. + Subclasses can override this method to add type specific validation rules. + """ + return iter([]) @classmethod - def validate_qualifiers(cls, purl): + def _validate_qualifiers(cls, purl): if not purl.qualifiers: return @@ -145,9 +257,15 @@ def validate_qualifiers(cls, purl): disallowed = purl_qualifiers_keys - allowed_qualifiers_set if disallowed: - yield ( - f"Invalid qualifiers found: {', '.join(sorted(disallowed))}. " - f"Allowed qualifiers are: {', '.join(sorted(allowed_qualifiers_set))}" + from packageurl import ValidationMessage + from packageurl import ValidationSeverity + + yield ValidationMessage( + severity=ValidationSeverity.INFO, + message=( + f"Invalid qualifiers found: {', '.join(sorted(disallowed))}. " + f"Allowed qualifiers are: {', '.join(sorted(allowed_qualifiers_set))}" + ), ) ''' @@ -185,10 +303,10 @@ def generate_validators(): type_def = json.loads(type.read_text()) _type = type_def["type"] - standard_validator_class = "TypeValidator" + standard_validator_class = "BasePurlType" class_prefix = _type.capitalize() - class_name = f"{class_prefix}{standard_validator_class}" + class_name = f"{class_prefix}TypeDefinition" validators_by_type[_type] = class_name name_normalization_rules=type_def["name_definition"].get("normalization_rules") or [] allowed_qualifiers = [defintion.get("key") for defintion in type_def.get("qualifiers_definition") or []] diff --git a/spec b/spec index ce67457..c398646 160000 --- a/spec +++ b/spec @@ -1 +1 @@ -Subproject commit ce6745797a85a3121f2f1aef718d52f26d3f6a84 +Subproject commit c398646bb2d642ccdd43bfbf5923cf650d69dc6a diff --git a/src/packageurl/__init__.py b/src/packageurl/__init__.py index 9d6e3e6..3bfae87 100644 --- a/src/packageurl/__init__.py +++ b/src/packageurl/__init__.py @@ -24,10 +24,13 @@ from __future__ import annotations +import dataclasses import re import string from collections import namedtuple from collections.abc import Mapping +from dataclasses import dataclass +from enum import Enum from typing import TYPE_CHECKING from typing import Any from typing import Optional @@ -58,6 +61,19 @@ """ +class ValidationSeverity(str, Enum): + ERROR = "error" + WARNING = "warning" + INFO = "info" + + +@dataclass +class ValidationMessage: + severity: ValidationSeverity + message: str + to_dict = dataclasses.asdict + + def quote(s: AnyStr) -> str: """ Return a percent-encoded unicode string, except for colon :, given an `s` @@ -188,12 +204,15 @@ def normalize_name( "apk", "bitnami", "hex", + "pub", ): name_str = name_str.lower() if ptype == "pypi": name_str = name_str.replace("_", "-").lower() if ptype == "hackage": name_str = name_str.replace("_", "-") + if ptype == "pub": + name_str = re.sub(r"[^a-z0-9]", "_", name_str.lower()) return name_str or None @@ -521,24 +540,41 @@ def to_string(self, encode: bool | None = True) -> str: return "".join(purl) - def validate(self, strict: bool = False) -> list[str]: + def validate(self, strict: bool = False) -> list["ValidationMessage"]: """ Validate this PackageURL object and return a list of validation error messages. """ - from packageurl.validate import VALIDATORS_BY_TYPE - - if self: - try: - validator_class = VALIDATORS_BY_TYPE.get(self.type) - if not validator_class: - return [f"Given type: {self.type} can not be validated"] - messages = list(validator_class.validate(self, strict)) # type: ignore[no-untyped-call] - return messages - except NoRouteAvailable: - return [f"Given type: {self.type} can not be validated"] + from packageurl.validate import DEFINITIONS_BY_TYPE + + validator_class = DEFINITIONS_BY_TYPE.get(self.type) + if not validator_class: + return [ + ValidationMessage( + severity=ValidationSeverity.ERROR, + message=f"Unexpected purl type: expected {self.type!r}", + ) + ] + return list(validator_class.validate(purl=self, strict=strict)) # type: ignore[no-untyped-call] + + @classmethod + def validate_string(cls, purl: str, strict: bool = False) -> list["ValidationMessage"]: + """ + Validate a PURL string and return a list of validation error messages. + """ + try: + purl_obj = cls.from_string(purl, normalize_purl=not strict) + assert isinstance(purl_obj, PackageURL) + return purl_obj.validate(strict=strict) + except ValueError as e: + return [ + ValidationMessage( + severity=ValidationSeverity.ERROR, + message=str(e), + ) + ] @classmethod - def from_string(cls, purl: str) -> Self: + def from_string(cls, purl: str, normalize_purl: bool = True) -> Self: """ Return a PackageURL object parsed from a string. Raise ValueError on errors. @@ -622,14 +658,18 @@ def from_string(cls, purl: str) -> Self: if not name: raise ValueError(f"purl is missing the required name component: {purl!r}") - type_, namespace, name, version, qualifiers, subpath = normalize( - type_, - namespace, - name, - version, - qualifiers_str, - subpath, - encode=False, + if normalize_purl: + type_, namespace, name, version, qualifiers, subpath = normalize( + type_, + namespace, + name, + version, + qualifiers_str, + subpath, + encode=False, + ) + else: + qualifiers = normalize_qualifiers(qualifiers_str, encode=False) or {} + return cls( + type_, namespace, name, version, qualifiers, subpath, normalize_purl=normalize_purl ) - - return cls(type_, namespace, name, version, qualifiers, subpath) diff --git a/src/packageurl/validate.py b/src/packageurl/validate.py index 87a6e10..45cf146 100644 --- a/src/packageurl/validate.py +++ b/src/packageurl/validate.py @@ -27,35 +27,140 @@ """ -class TypeValidator: +class BasePurlType: + """ + Base class for all PURL type classes + """ + + type: str + """The type string for this Package-URL type.""" + + type_name: str + """The name for this PURL type.""" + + description: str + """The description of this PURL type.""" + + use_repository: bool = False + """true if this PURL type use a public package repository.""" + + default_repository_url: str + """The default public repository URL for this PURL type""" + + namespace_requirement: str + """"States if this namespace is required, optional, or prohibited.""" + + allowed_qualifiers: dict = {"repository_url", "arch"} + """Set of allowed qualifier keys for this PURL type.""" + + namespace_case_sensitive: bool = True + """true if namespace is case sensitive. If false, the canonical form must be lowercased.""" + + name_case_sensitive: bool = True + """true if name is case sensitive. If false, the canonical form must be lowercased.""" + + version_case_sensitive: bool = True + """true if version is case sensitive. If false, the canonical form must be lowercased.""" + + purl_pattern: str + """A regex pattern that matches valid purls of this type.""" + @classmethod def validate(cls, purl, strict=False): + """ + Validate a PackageURL instance or string. + Yields ValidationMessage and performs strict validation if strict=True + """ + from packageurl import ValidationMessage + from packageurl import ValidationSeverity + + if not purl: + yield ValidationMessage( + severity=ValidationSeverity.ERROR, + message="No purl provided", + ) + return + + from packageurl import PackageURL + + if not isinstance(purl, PackageURL): + try: + purl = PackageURL.from_string(purl, normalize_purl=False) + except Exception as e: + yield ValidationMessage( + severity=ValidationSeverity.ERROR, + message=f"Invalid purl {purl!r} string: {e}", + ) + return + if not strict: purl = cls.normalize(purl) + yield from cls._validate_namespace(purl) + yield from cls._validate_name(purl) + yield from cls._validate_version(purl) + if strict: + yield from cls._validate_qualifiers(purl) + + messages = cls.validate_using_type_rules(purl, strict=strict) + if messages: + yield from messages + + @classmethod + def _validate_namespace(cls, purl): + from packageurl import ValidationMessage + from packageurl import ValidationSeverity + if cls.namespace_requirement == "prohibited" and purl.namespace: - yield f"Namespace is prohibited for purl type: {cls.type!r}" + yield ValidationMessage( + severity=ValidationSeverity.ERROR, + message=f"Namespace is prohibited for purl type: {cls.type!r}", + ) elif cls.namespace_requirement == "required" and not purl.namespace: - yield f"Namespace is required for purl type: {cls.type!r}" + yield ValidationMessage( + severity=ValidationSeverity.ERROR, + message=f"Namespace is required for purl type: {cls.type!r}", + ) + # TODO: Check pending CPAN PR and decide if we want to upgrade the type definition schema if purl.type == "cpan": if purl.namespace and purl.namespace != purl.namespace.upper(): - yield f"Namespace must be uppercase for purl type: {cls.type!r}" + yield ValidationMessage( + severity=ValidationSeverity.WARNING, + message=f"Namespace must be uppercase for purl type: {cls.type!r}", + ) elif ( not cls.namespace_case_sensitive and purl.namespace and purl.namespace.lower() != purl.namespace ): - yield f"Namespace is not lowercased for purl type: {cls.type!r}" + yield ValidationMessage( + severity=ValidationSeverity.WARNING, + message=f"Namespace is not lowercased for purl type: {cls.type!r}", + ) + @classmethod + def _validate_name(cls, purl): if not cls.name_case_sensitive and purl.name and purl.name.lower() != purl.name: - yield f"Name is not lowercased for purl type: {cls.type!r}" + from packageurl import ValidationMessage + from packageurl import ValidationSeverity + yield ValidationMessage( + severity=ValidationSeverity.WARNING, + message=f"Name is not lowercased for purl type: {cls.type!r}", + ) + + @classmethod + def _validate_version(cls, purl): if not cls.version_case_sensitive and purl.version and purl.version.lower() != purl.version: - yield f"Version is not lowercased for purl type: {cls.type!r}" + from packageurl import ValidationMessage + from packageurl import ValidationSeverity - yield from cls.validate_type(purl, strict=strict) + yield ValidationMessage( + severity=ValidationSeverity.WARNING, + message=f"Version is not lowercased for purl type: {cls.type!r}", + ) @classmethod def normalize(cls, purl): @@ -84,12 +189,16 @@ def normalize(cls, purl): ) @classmethod - def validate_type(cls, purl, strict=False): - if strict: - yield from cls.validate_qualifiers(purl=purl) + def validate_using_type_rules(cls, purl, strict=False): + """ + Validate using any additional type specific rules. + Yield validation messages. + Subclasses can override this method to add type specific validation rules. + """ + return iter([]) @classmethod - def validate_qualifiers(cls, purl): + def _validate_qualifiers(cls, purl): if not purl.qualifiers: return @@ -99,13 +208,19 @@ def validate_qualifiers(cls, purl): disallowed = purl_qualifiers_keys - allowed_qualifiers_set if disallowed: - yield ( - f"Invalid qualifiers found: {', '.join(sorted(disallowed))}. " - f"Allowed qualifiers are: {', '.join(sorted(allowed_qualifiers_set))}" + from packageurl import ValidationMessage + from packageurl import ValidationSeverity + + yield ValidationMessage( + severity=ValidationSeverity.INFO, + message=( + f"Invalid qualifiers found: {', '.join(sorted(disallowed))}. " + f"Allowed qualifiers are: {', '.join(sorted(allowed_qualifiers_set))}" + ), ) -class AlpmTypeValidator(TypeValidator): +class AlpmTypeDefinition(BasePurlType): type = "alpm" type_name = "Arch Linux package" description = """Arch Linux packages and other users of the libalpm/pacman package manager.""" @@ -119,7 +234,7 @@ class AlpmTypeValidator(TypeValidator): purl_pattern = "pkg:alpm/.*" -class ApkTypeValidator(TypeValidator): +class ApkTypeDefinition(BasePurlType): type = "apk" type_name = "APK-based packages" description = """Alpine Linux APK-based packages""" @@ -133,7 +248,7 @@ class ApkTypeValidator(TypeValidator): purl_pattern = "pkg:apk/.*" -class BitbucketTypeValidator(TypeValidator): +class BitbucketTypeDefinition(BasePurlType): type = "bitbucket" type_name = "Bitbucket" description = """Bitbucket-based packages""" @@ -147,7 +262,7 @@ class BitbucketTypeValidator(TypeValidator): purl_pattern = "pkg:bitbucket/.*" -class BitnamiTypeValidator(TypeValidator): +class BitnamiTypeDefinition(BasePurlType): type = "bitnami" type_name = "Bitnami" description = """Bitnami-based packages""" @@ -161,7 +276,7 @@ class BitnamiTypeValidator(TypeValidator): purl_pattern = "pkg:bitnami/.*" -class CargoTypeValidator(TypeValidator): +class CargoTypeDefinition(BasePurlType): type = "cargo" type_name = "Cargo" description = """Cargo packages for Rust""" @@ -175,7 +290,7 @@ class CargoTypeValidator(TypeValidator): purl_pattern = "pkg:cargo/.*" -class CocoapodsTypeValidator(TypeValidator): +class CocoapodsTypeDefinition(BasePurlType): type = "cocoapods" type_name = "CocoaPods" description = """CocoaPods pods""" @@ -189,7 +304,7 @@ class CocoapodsTypeValidator(TypeValidator): purl_pattern = "pkg:cocoapods/.*" -class ComposerTypeValidator(TypeValidator): +class ComposerTypeDefinition(BasePurlType): type = "composer" type_name = "Composer" description = """Composer PHP packages""" @@ -203,7 +318,7 @@ class ComposerTypeValidator(TypeValidator): purl_pattern = "pkg:composer/.*" -class ConanTypeValidator(TypeValidator): +class ConanTypeDefinition(BasePurlType): type = "conan" type_name = "Conan C/C++ packages" description = """Conan C/C++ packages. The purl is designed to closely resemble the Conan-native /@/ syntax for package references as specified in https://docs.conan.io/en/1.46/cheatsheet.html#package-terminology""" @@ -217,7 +332,7 @@ class ConanTypeValidator(TypeValidator): purl_pattern = "pkg:conan/.*" -class CondaTypeValidator(TypeValidator): +class CondaTypeDefinition(BasePurlType): type = "conda" type_name = "Conda" description = """conda is for Conda packages""" @@ -231,7 +346,7 @@ class CondaTypeValidator(TypeValidator): purl_pattern = "pkg:conda/.*" -class CpanTypeValidator(TypeValidator): +class CpanTypeDefinition(BasePurlType): type = "cpan" type_name = "CPAN" description = """CPAN Perl packages""" @@ -245,15 +360,26 @@ class CpanTypeValidator(TypeValidator): purl_pattern = "pkg:cpan/.*" @classmethod - def validate_type(cls, purl, strict=False): + def validate_using_type_rules(cls, purl, strict=False): + from packageurl import ValidationMessage + from packageurl import ValidationSeverity + if purl.namespace and "::" in purl.name: - yield f"Name must not contain '::' when Namespace is absent for purl type: {cls.type!r}" + yield ValidationMessage( + severity=ValidationSeverity.ERROR, + message=f"Name must not contain '::' when Namespace is present for purl type: {cls.type!r}", + ) if not purl.namespace and "-" in purl.name: - yield f"Name must not contain '-' when Namespace is absent for purl type: {cls.type!r}" - yield from super().validate_type(purl, strict) + yield ValidationMessage( + severity=ValidationSeverity.ERROR, + message=f"Name must not contain '-' when Namespace is absent for purl type: {cls.type!r}", + ) + messages = super().validate_using_type_rules(purl, strict) + if messages: + yield from messages -class CranTypeValidator(TypeValidator): +class CranTypeDefinition(BasePurlType): type = "cran" type_name = "CRAN" description = """CRAN R packages""" @@ -267,7 +393,7 @@ class CranTypeValidator(TypeValidator): purl_pattern = "pkg:cran/.*" -class DebTypeValidator(TypeValidator): +class DebTypeDefinition(BasePurlType): type = "deb" type_name = "Debian package" description = """Debian packages, Debian derivatives, and Ubuntu packages""" @@ -281,7 +407,7 @@ class DebTypeValidator(TypeValidator): purl_pattern = "pkg:deb/.*" -class DockerTypeValidator(TypeValidator): +class DockerTypeDefinition(BasePurlType): type = "docker" type_name = "Docker image" description = """for Docker images""" @@ -295,7 +421,7 @@ class DockerTypeValidator(TypeValidator): purl_pattern = "pkg:docker/.*" -class GemTypeValidator(TypeValidator): +class GemTypeDefinition(BasePurlType): type = "gem" type_name = "RubyGems" description = """RubyGems""" @@ -309,7 +435,7 @@ class GemTypeValidator(TypeValidator): purl_pattern = "pkg:gem/.*" -class GenericTypeValidator(TypeValidator): +class GenericTypeDefinition(BasePurlType): type = "generic" type_name = "Generic Package" description = """The generic type is for plain, generic packages that do not fit anywhere else such as for "upstream-from-distro" packages. In particular this is handy for a plain version control repository such as a bare git repo in combination with a vcs_url.""" @@ -323,7 +449,7 @@ class GenericTypeValidator(TypeValidator): purl_pattern = "pkg:generic/.*" -class GithubTypeValidator(TypeValidator): +class GithubTypeDefinition(BasePurlType): type = "github" type_name = "GitHub" description = """GitHub-based packages""" @@ -337,7 +463,7 @@ class GithubTypeValidator(TypeValidator): purl_pattern = "pkg:github/.*" -class GolangTypeValidator(TypeValidator): +class GolangTypeDefinition(BasePurlType): type = "golang" type_name = "Go package" description = """Go packages""" @@ -351,7 +477,7 @@ class GolangTypeValidator(TypeValidator): purl_pattern = "pkg:golang/.*" -class HackageTypeValidator(TypeValidator): +class HackageTypeDefinition(BasePurlType): type = "hackage" type_name = "Haskell package" description = """Haskell packages""" @@ -365,13 +491,21 @@ class HackageTypeValidator(TypeValidator): purl_pattern = "pkg:hackage/.*" @classmethod - def validate_type(cls, purl, strict=False): + def validate_using_type_rules(cls, purl, strict=False): + from packageurl import ValidationMessage + from packageurl import ValidationSeverity + if "_" in purl.name: - yield f"Name contains underscores but should be kebab-case for purl type: {cls.type!r}" - yield from super().validate_type(purl, strict) + yield ValidationMessage( + severity=ValidationSeverity.WARNING, + message=f"Name cannot contain underscores for purl type:{cls.type!r}", + ) + messages = super().validate_using_type_rules(purl, strict) + if messages: + yield from messages -class HexTypeValidator(TypeValidator): +class HexTypeDefinition(BasePurlType): type = "hex" type_name = "Hex" description = """Hex packages""" @@ -385,7 +519,7 @@ class HexTypeValidator(TypeValidator): purl_pattern = "pkg:hex/.*" -class HuggingfaceTypeValidator(TypeValidator): +class HuggingfaceTypeDefinition(BasePurlType): type = "huggingface" type_name = "HuggingFace models" description = """Hugging Face ML models""" @@ -399,7 +533,7 @@ class HuggingfaceTypeValidator(TypeValidator): purl_pattern = "pkg:huggingface/.*" -class LuarocksTypeValidator(TypeValidator): +class LuarocksTypeDefinition(BasePurlType): type = "luarocks" type_name = "LuaRocks" description = """Lua packages installed with LuaRocks""" @@ -413,7 +547,7 @@ class LuarocksTypeValidator(TypeValidator): purl_pattern = "pkg:luarocks/.*" -class MavenTypeValidator(TypeValidator): +class MavenTypeDefinition(BasePurlType): type = "maven" type_name = "Maven" description = """PURL type for Maven JARs and related artifacts.""" @@ -427,7 +561,7 @@ class MavenTypeValidator(TypeValidator): purl_pattern = "pkg:maven/.*" -class MlflowTypeValidator(TypeValidator): +class MlflowTypeDefinition(BasePurlType): type = "mlflow" type_name = "" description = """MLflow ML models (Azure ML, Databricks, etc.)""" @@ -441,7 +575,7 @@ class MlflowTypeValidator(TypeValidator): purl_pattern = "pkg:mlflow/.*" -class NpmTypeValidator(TypeValidator): +class NpmTypeDefinition(BasePurlType): type = "npm" type_name = "Node NPM packages" description = """PURL type for npm packages.""" @@ -455,7 +589,7 @@ class NpmTypeValidator(TypeValidator): purl_pattern = "pkg:npm/.*" -class NugetTypeValidator(TypeValidator): +class NugetTypeDefinition(BasePurlType): type = "nuget" type_name = "NuGet" description = """NuGet .NET packages""" @@ -469,7 +603,7 @@ class NugetTypeValidator(TypeValidator): purl_pattern = "pkg:nuget/.*" -class OciTypeValidator(TypeValidator): +class OciTypeDefinition(BasePurlType): type = "oci" type_name = "OCI image" description = """For artifacts stored in registries that conform to the OCI Distribution Specification https://github.com/opencontainers/distribution-spec including container images built by Docker and others""" @@ -483,7 +617,7 @@ class OciTypeValidator(TypeValidator): purl_pattern = "pkg:oci/.*" -class PubTypeValidator(TypeValidator): +class PubTypeDefinition(BasePurlType): type = "pub" type_name = "Pub" description = """Dart and Flutter pub packages""" @@ -497,15 +631,27 @@ class PubTypeValidator(TypeValidator): purl_pattern = "pkg:pub/.*" @classmethod - def validate_type(cls, purl, strict=False): - if any(not (c.islower() or c.isdigit() or c == "_") for c in purl.name): - yield f"Name contains invalid characters but should only contain lowercase letters, digits, or underscores for purl type: {cls.type!r}" + def validate_using_type_rules(cls, purl, strict=False): + from packageurl import ValidationMessage + from packageurl import ValidationSeverity + + if not all(c.isalnum() or c == "_" for c in purl.name): + yield ValidationMessage( + severity=ValidationSeverity.WARNING, + message=f"Name contains invalid characters but should only contain letters, digits, or underscores for purl type: {cls.type!r}", + ) + if " " in purl.name: - yield f"Name contains spaces but should use underscores instead for purl type: {cls.type!r}" - yield from super().validate_type(purl, strict) + yield ValidationMessage( + severity=ValidationSeverity.WARNING, + message=f"Name contains spaces but should use underscores instead for purl type: {cls.type!r}", + ) + messages = super().validate_using_type_rules(purl, strict) + if messages: + yield from messages -class PypiTypeValidator(TypeValidator): +class PypiTypeDefinition(BasePurlType): type = "pypi" type_name = "PyPI" description = """Python packages""" @@ -519,13 +665,21 @@ class PypiTypeValidator(TypeValidator): purl_pattern = "pkg:pypi/.*" @classmethod - def validate_type(cls, purl, strict=False): + def validate_using_type_rules(cls, purl, strict=False): + from packageurl import ValidationMessage + from packageurl import ValidationSeverity + if "_" in purl.name: - yield f"Name cannot contain `_` for purl type:{cls.type!r}" - yield from super().validate_type(purl, strict) + yield ValidationMessage( + severity=ValidationSeverity.WARNING, + message=f"Name cannot contain underscores for purl type:{cls.type!r}", + ) + messages = super().validate_using_type_rules(purl, strict) + if messages: + yield from messages -class QpkgTypeValidator(TypeValidator): +class QpkgTypeDefinition(BasePurlType): type = "qpkg" type_name = "QNX package" description = """QNX packages""" @@ -539,7 +693,7 @@ class QpkgTypeValidator(TypeValidator): purl_pattern = "pkg:qpkg/.*" -class RpmTypeValidator(TypeValidator): +class RpmTypeDefinition(BasePurlType): type = "rpm" type_name = "RPM" description = """RPM packages""" @@ -553,7 +707,7 @@ class RpmTypeValidator(TypeValidator): purl_pattern = "pkg:rpm/.*" -class SwidTypeValidator(TypeValidator): +class SwidTypeDefinition(BasePurlType): type = "swid" type_name = "Software Identification (SWID) Tag" description = """PURL type for ISO-IEC 19770-2 Software Identification (SWID) tags.""" @@ -567,7 +721,7 @@ class SwidTypeValidator(TypeValidator): purl_pattern = "pkg:swid/.*" -class SwiftTypeValidator(TypeValidator): +class SwiftTypeDefinition(BasePurlType): type = "swift" type_name = "Swift packages" description = """Swift packages""" @@ -581,37 +735,37 @@ class SwiftTypeValidator(TypeValidator): purl_pattern = "pkg:swift/.*" -VALIDATORS_BY_TYPE = { - "alpm": AlpmTypeValidator, - "apk": ApkTypeValidator, - "bitbucket": BitbucketTypeValidator, - "bitnami": BitnamiTypeValidator, - "cargo": CargoTypeValidator, - "cocoapods": CocoapodsTypeValidator, - "composer": ComposerTypeValidator, - "conan": ConanTypeValidator, - "conda": CondaTypeValidator, - "cpan": CpanTypeValidator, - "cran": CranTypeValidator, - "deb": DebTypeValidator, - "docker": DockerTypeValidator, - "gem": GemTypeValidator, - "generic": GenericTypeValidator, - "github": GithubTypeValidator, - "golang": GolangTypeValidator, - "hackage": HackageTypeValidator, - "hex": HexTypeValidator, - "huggingface": HuggingfaceTypeValidator, - "luarocks": LuarocksTypeValidator, - "maven": MavenTypeValidator, - "mlflow": MlflowTypeValidator, - "npm": NpmTypeValidator, - "nuget": NugetTypeValidator, - "oci": OciTypeValidator, - "pub": PubTypeValidator, - "pypi": PypiTypeValidator, - "qpkg": QpkgTypeValidator, - "rpm": RpmTypeValidator, - "swid": SwidTypeValidator, - "swift": SwiftTypeValidator, +DEFINITIONS_BY_TYPE = { + "alpm": AlpmTypeDefinition, + "apk": ApkTypeDefinition, + "bitbucket": BitbucketTypeDefinition, + "bitnami": BitnamiTypeDefinition, + "cargo": CargoTypeDefinition, + "cocoapods": CocoapodsTypeDefinition, + "composer": ComposerTypeDefinition, + "conan": ConanTypeDefinition, + "conda": CondaTypeDefinition, + "cpan": CpanTypeDefinition, + "cran": CranTypeDefinition, + "deb": DebTypeDefinition, + "docker": DockerTypeDefinition, + "gem": GemTypeDefinition, + "generic": GenericTypeDefinition, + "github": GithubTypeDefinition, + "golang": GolangTypeDefinition, + "hackage": HackageTypeDefinition, + "hex": HexTypeDefinition, + "huggingface": HuggingfaceTypeDefinition, + "luarocks": LuarocksTypeDefinition, + "maven": MavenTypeDefinition, + "mlflow": MlflowTypeDefinition, + "npm": NpmTypeDefinition, + "nuget": NugetTypeDefinition, + "oci": OciTypeDefinition, + "pub": PubTypeDefinition, + "pypi": PypiTypeDefinition, + "qpkg": QpkgTypeDefinition, + "rpm": RpmTypeDefinition, + "swid": SwidTypeDefinition, + "swift": SwiftTypeDefinition, } diff --git a/tests/test_purl_spec.py b/tests/test_purl_spec.py index de036f4..a78d23d 100644 --- a/tests/test_purl_spec.py +++ b/tests/test_purl_spec.py @@ -24,28 +24,42 @@ import json import os +from dataclasses import dataclass +from typing import Any +from typing import Dict +from typing import List +from typing import Optional import pytest from packageurl import PackageURL -current_dir = os.path.dirname(__file__) -root_dir = os.path.abspath(os.path.join(current_dir, "..")) -spec_file_path = os.path.join(root_dir, "spec", "tests", "spec", "specification-test.json") -with open(spec_file_path, "r", encoding="utf-8") as f: - test_cases = json.load(f) +@dataclass +class PurlTestCase: + description: str + test_type: str + input: Any + expected_output: Optional[Any] = None + expected_failure: bool = False + test_group: Optional[str] = None -tests = test_cases["tests"] -parse_tests = [t for t in tests if t["test_type"] == "parse"] -build_tests = [t for t in tests if t["test_type"] == "build"] +def load_test_case(case_dict: dict) -> PurlTestCase: + return PurlTestCase( + description=case_dict["description"], + test_type=case_dict["test_type"], + input=case_dict["input"], + expected_output=case_dict.get("expected_output"), + expected_failure=case_dict.get("expected_failure", False), + test_group=case_dict.get("test_group"), + ) -def load_spec_files(spec_dir): +def load_spec_files(spec_dir: str) -> Dict[str, List[PurlTestCase]]: """ - Load all JSON files from the given directory into a dictionary. - Key = filename, Value = parsed JSON content + Load all JSON files from the given directory into a dictionary of test cases. + Key = filename, Value = list of PurlTestCase objects """ spec_data = {} for filename in os.listdir(spec_dir): @@ -54,78 +68,86 @@ def load_spec_files(spec_dir): with open(filepath, "r", encoding="utf-8") as f: try: data = json.load(f) - spec_data[filename] = data["tests"] + spec_data[filename] = [load_test_case(tc) for tc in data["tests"]] except json.JSONDecodeError as e: print(f"Error parsing {filename}: {e}") return spec_data +current_dir = os.path.dirname(__file__) +root_dir = os.path.abspath(os.path.join(current_dir, "..")) +spec_file_path = os.path.join(root_dir, "spec", "tests", "spec", "specification-test.json") + +with open(spec_file_path, "r", encoding="utf-8") as f: + test_cases = json.load(f) + +all_tests = [load_test_case(tc) for tc in test_cases["tests"]] +parse_tests = [t for t in all_tests if t.test_type == "parse"] +build_tests = [t for t in all_tests if t.test_type == "build"] + SPEC_DIR = os.path.join(os.path.dirname(__file__), "..", "spec", "tests", "types") spec_dict = load_spec_files(SPEC_DIR) flattened_cases = [] for filename, cases in spec_dict.items(): for case in cases: - flattened_cases.append((filename, case["description"], case)) + flattened_cases.append((filename, case.description, case)) @pytest.mark.parametrize( - "description, input_str, expected_output, expected_failure", - [ - (t["description"], t["input"], t["expected_output"], t["expected_failure"]) - for t in parse_tests - ], + "case", + parse_tests, + ids=lambda c: c.description, ) -def test_parse(description, input_str, expected_output, expected_failure): - if expected_failure: +def test_parse(case: PurlTestCase): + if case.expected_failure: with pytest.raises(Exception): - PackageURL.from_string(input_str) + PackageURL.from_string(case.input) else: - result = PackageURL.from_string(input_str) - assert result.to_string() == expected_output + result = PackageURL.from_string(case.input) + assert result.to_string() == case.expected_output @pytest.mark.parametrize( - "description, input_dict, expected_output, expected_failure", - [ - (t["description"], t["input"], t["expected_output"], t["expected_failure"]) - for t in build_tests - ], + "case", + build_tests, + ids=lambda c: c.description, ) -def test_build(description, input_dict, expected_output, expected_failure): +def test_build(case: PurlTestCase): kwargs = { - "type": input_dict.get("type"), - "namespace": input_dict.get("namespace"), - "name": input_dict.get("name"), - "version": input_dict.get("version"), - "qualifiers": input_dict.get("qualifiers"), - "subpath": input_dict.get("subpath"), + "type": case.input.get("type"), + "namespace": case.input.get("namespace"), + "name": case.input.get("name"), + "version": case.input.get("version"), + "qualifiers": case.input.get("qualifiers"), + "subpath": case.input.get("subpath"), } - if expected_failure: + if case.expected_failure: with pytest.raises(Exception): PackageURL(**kwargs).to_string() else: purl = PackageURL(**kwargs) - assert purl.to_string() == expected_output - + assert purl.to_string() == case.expected_output -@pytest.mark.parametrize("filename,description,test_case", flattened_cases) -def test_package_type_case(filename, description, test_case): - test_type = test_case["test_type"] - expected_failure = test_case.get("expected_failure", False) - if expected_failure: +@pytest.mark.parametrize( + "filename,description,case", + flattened_cases, + ids=lambda v: v[1] if isinstance(v, tuple) else str(v), +) +def test_package_type_case(filename, description, case: PurlTestCase): + if case.expected_failure: with pytest.raises(Exception): - run_test_case(test_case, test_type, description) + run_test_case(case) else: - run_test_case(test_case, test_type, description) + run_test_case(case) -def run_test_case(case, test_type, desc): - if test_type == "parse": - purl = PackageURL.from_string(case["input"]) - expected = case["expected_output"] +def run_test_case(case: PurlTestCase): + if case.test_type == "parse": + purl = PackageURL.from_string(case.input) + expected = case.expected_output assert purl.type == expected["type"] assert purl.namespace == expected["namespace"] assert purl.name == expected["name"] @@ -136,41 +158,30 @@ def run_test_case(case, test_type, desc): assert not purl.qualifiers assert purl.subpath == expected["subpath"] - elif test_type == "roundtrip": - purl = PackageURL.from_string(case["input"]) - assert purl.to_string() == case["expected_output"] + elif case.test_type == "roundtrip": + purl = PackageURL.from_string(case.input) + assert purl.to_string() == case.expected_output - elif test_type == "build": - input_data = case["input"] + elif case.test_type == "build": + inp = case.input purl = PackageURL( - type=input_data["type"], - namespace=input_data["namespace"], - name=input_data["name"], - version=input_data["version"], - qualifiers=input_data.get("qualifiers"), - subpath=input_data.get("subpath"), + type=inp["type"], + namespace=inp["namespace"], + name=inp["name"], + version=inp["version"], + qualifiers=inp.get("qualifiers"), + subpath=inp.get("subpath"), ) - assert purl.to_string() == case["expected_output"] + assert purl.to_string() == case.expected_output - elif test_type == "validation": - input_data = case["input"] - test_group = case.get("test_group") + elif case.test_type == "validation": + test_group = case.test_group if test_group not in ("base", "advanced"): raise Exception(test_group) - strict = True - if test_group == "advanced": - strict = False - purl = PackageURL( - type=input_data["type"], - namespace=input_data["namespace"], - name=input_data["name"], - version=input_data["version"], - qualifiers=input_data.get("qualifiers"), - subpath=input_data.get("subpath"), - normalize_purl=not strict, - ) - messages = purl.validate(strict=strict) - if case.get("expected_messages"): - assert messages == case["expected_messages"] + strict = test_group == "base" + messages = PackageURL.validate_string(purl=case.input, strict=strict) + messages = [message.to_dict() for message in messages] + if case.expected_output: + assert messages == case.expected_output else: assert not messages