From a6e8b68d13d06b709839e68f74bc09157f828675 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 3 Sep 2025 12:39:54 +0530 Subject: [PATCH 1/7] Adjust validation function Signed-off-by: Tushar Goel --- etc/scripts/generate_validators.py | 5 ++--- src/packageurl/validate.py | 23 ++++++++++++++++------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/etc/scripts/generate_validators.py b/etc/scripts/generate_validators.py index d3dddcb..e3d824f 100644 --- a/etc/scripts/generate_validators.py +++ b/etc/scripts/generate_validators.py @@ -44,7 +44,7 @@ ] } """ -from packageurl import PackageURL + from pathlib import Path import json @@ -131,8 +131,7 @@ def normalize(cls, purl): @classmethod def validate_type(cls, purl, strict=False): - if strict: - yield from cls.validate_qualifiers(purl=purl) + yield @classmethod def validate_qualifiers(cls, purl): diff --git a/src/packageurl/validate.py b/src/packageurl/validate.py index 87a6e10..74943ee 100644 --- a/src/packageurl/validate.py +++ b/src/packageurl/validate.py @@ -55,7 +55,9 @@ def validate(cls, purl, strict=False): if not cls.version_case_sensitive and purl.version and purl.version.lower() != purl.version: yield f"Version is not lowercased for purl type: {cls.type!r}" - yield from cls.validate_type(purl, strict=strict) + messages = cls.validate_type(purl, strict=strict) + if messages: + yield from messages @classmethod def normalize(cls, purl): @@ -85,8 +87,7 @@ def normalize(cls, purl): @classmethod def validate_type(cls, purl, strict=False): - if strict: - yield from cls.validate_qualifiers(purl=purl) + return @classmethod def validate_qualifiers(cls, purl): @@ -250,7 +251,9 @@ def validate_type(cls, purl, strict=False): yield f"Name must not contain '::' when Namespace is absent for purl type: {cls.type!r}" if not purl.namespace and "-" in purl.name: yield f"Name must not contain '-' when Namespace is absent for purl type: {cls.type!r}" - yield from super().validate_type(purl, strict) + messages = super().validate_type(purl, strict) + if messages: + yield from messages class CranTypeValidator(TypeValidator): @@ -368,7 +371,9 @@ class HackageTypeValidator(TypeValidator): def validate_type(cls, purl, strict=False): if "_" in purl.name: yield f"Name contains underscores but should be kebab-case for purl type: {cls.type!r}" - yield from super().validate_type(purl, strict) + messages = super().validate_type(purl, strict) + if messages: + yield from messages class HexTypeValidator(TypeValidator): @@ -502,7 +507,9 @@ def validate_type(cls, purl, strict=False): yield f"Name contains invalid characters but should only contain lowercase letters, digits, or underscores for purl type: {cls.type!r}" if " " in purl.name: yield f"Name contains spaces but should use underscores instead for purl type: {cls.type!r}" - yield from super().validate_type(purl, strict) + messages = super().validate_type(purl, strict) + if messages: + yield from messages class PypiTypeValidator(TypeValidator): @@ -522,7 +529,9 @@ class PypiTypeValidator(TypeValidator): def validate_type(cls, purl, strict=False): if "_" in purl.name: yield f"Name cannot contain `_` for purl type:{cls.type!r}" - yield from super().validate_type(purl, strict) + messages = super().validate_type(purl, strict) + if messages: + yield from messages class QpkgTypeValidator(TypeValidator): From 1ac0235cb729ec48303290971e030014e1c52931 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 3 Sep 2025 12:40:41 +0530 Subject: [PATCH 2/7] Adjust validation function Signed-off-by: Tushar Goel --- etc/scripts/generate_validators.py | 2 +- spec | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/etc/scripts/generate_validators.py b/etc/scripts/generate_validators.py index e3d824f..23abbec 100644 --- a/etc/scripts/generate_validators.py +++ b/etc/scripts/generate_validators.py @@ -131,7 +131,7 @@ def normalize(cls, purl): @classmethod def validate_type(cls, purl, strict=False): - yield + return @classmethod def validate_qualifiers(cls, purl): diff --git a/spec b/spec index ce67457..9ccd853 160000 --- a/spec +++ b/spec @@ -1 +1 @@ -Subproject commit ce6745797a85a3121f2f1aef718d52f26d3f6a84 +Subproject commit 9ccd8532b11c2f76dabc9231cfe12b882e7ecc18 From 8317634705e6b15e66a28ee65d371158f1855001 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 5 Sep 2025 14:07:31 +0530 Subject: [PATCH 3/7] Adjust test structure Signed-off-by: Tushar Goel --- spec | 2 +- src/packageurl/__init__.py | 26 ++++++----- src/packageurl/validate.py | 91 +++++++++++++++++++++++++++++++------- tests/test_purl_spec.py | 23 +++++----- 4 files changed, 102 insertions(+), 40 deletions(-) diff --git a/spec b/spec index 9ccd853..d474c72 160000 --- a/spec +++ b/spec @@ -1 +1 @@ -Subproject commit 9ccd8532b11c2f76dabc9231cfe12b882e7ecc18 +Subproject commit d474c724fa5115a8487f6e709e5527546b37da11 diff --git a/src/packageurl/__init__.py b/src/packageurl/__init__.py index 9d6e3e6..cc8f715 100644 --- a/src/packageurl/__init__.py +++ b/src/packageurl/__init__.py @@ -538,7 +538,7 @@ def validate(self, strict: bool = False) -> list[str]: return [f"Given type: {self.type} can not be validated"] @classmethod - def from_string(cls, purl: str) -> Self: + def from_string(cls, purl: str, normalize_purl: bool = True) -> Self: """ Return a PackageURL object parsed from a string. Raise ValueError on errors. @@ -622,14 +622,18 @@ def from_string(cls, purl: str) -> Self: if not name: raise ValueError(f"purl is missing the required name component: {purl!r}") - type_, namespace, name, version, qualifiers, subpath = normalize( - type_, - namespace, - name, - version, - qualifiers_str, - subpath, - encode=False, + if normalize_purl: + type_, namespace, name, version, qualifiers, subpath = normalize( + type_, + namespace, + name, + version, + qualifiers_str, + subpath, + encode=False, + ) + else: + qualifiers = normalize_qualifiers(qualifiers_str, encode=False) or {} + return cls( + type_, namespace, name, version, qualifiers, subpath, normalize_purl=normalize_purl ) - - return cls(type_, namespace, name, version, qualifiers, subpath) diff --git a/src/packageurl/validate.py b/src/packageurl/validate.py index 74943ee..a1311bd 100644 --- a/src/packageurl/validate.py +++ b/src/packageurl/validate.py @@ -26,6 +26,21 @@ Validate each type according to the PURL spec type definitions """ +from enum import Enum +from dataclasses import dataclass + + +class ValidationSeverity(Enum): + ERROR = "error" + WARNING = "warning" + INFO = "info" + + +@dataclass +class ValidationMessage: + severity: ValidationSeverity + message: str + class TypeValidator: @classmethod @@ -34,26 +49,45 @@ def validate(cls, purl, strict=False): purl = cls.normalize(purl) if cls.namespace_requirement == "prohibited" and purl.namespace: - yield f"Namespace is prohibited for purl type: {cls.type!r}" + yield ValidationMessage( + severity=ValidationSeverity.ERROR, + message=f"Namespace is prohibited for purl type: {cls.type!r}", + ) elif cls.namespace_requirement == "required" and not purl.namespace: - yield f"Namespace is required for purl type: {cls.type!r}" + yield ValidationMessage( + severity=ValidationSeverity.ERROR, + message=f"Namespace is required for purl type: {cls.type!r}", + ) if purl.type == "cpan": if purl.namespace and purl.namespace != purl.namespace.upper(): - yield f"Namespace must be uppercase for purl type: {cls.type!r}" + yield ValidationMessage( + severity=ValidationSeverity.WARNING, + message=f"Namespace must be uppercase for purl type: {cls.type!r}", + ) + # TODO: Check pending CPAN PR and decide if we want to upgrade the type definition schema elif ( not cls.namespace_case_sensitive and purl.namespace and purl.namespace.lower() != purl.namespace ): - yield f"Namespace is not lowercased for purl type: {cls.type!r}" + yield ValidationMessage( + severity=ValidationSeverity.WARNING, + message=f"Namespace is not lowercased for purl type: {cls.type!r}", + ) if not cls.name_case_sensitive and purl.name and purl.name.lower() != purl.name: - yield f"Name is not lowercased for purl type: {cls.type!r}" + yield ValidationMessage( + severity=ValidationSeverity.WARNING, + message=f"Name is not lowercased for purl type: {cls.type!r}", + ) if not cls.version_case_sensitive and purl.version and purl.version.lower() != purl.version: - yield f"Version is not lowercased for purl type: {cls.type!r}" + yield ValidationMessage( + severity=ValidationSeverity.WARNING, + message=f"Version is not lowercased for purl type: {cls.type!r}", + ) messages = cls.validate_type(purl, strict=strict) if messages: @@ -87,7 +121,8 @@ def normalize(cls, purl): @classmethod def validate_type(cls, purl, strict=False): - return + if strict: + yield from cls.validate_qualifiers(purl) @classmethod def validate_qualifiers(cls, purl): @@ -100,9 +135,12 @@ def validate_qualifiers(cls, purl): disallowed = purl_qualifiers_keys - allowed_qualifiers_set if disallowed: - yield ( - f"Invalid qualifiers found: {', '.join(sorted(disallowed))}. " - f"Allowed qualifiers are: {', '.join(sorted(allowed_qualifiers_set))}" + yield ValidationMessage( + severity=ValidationSeverity.INFO, + message=( + f"Invalid qualifiers found: {', '.join(sorted(disallowed))}. " + f"Allowed qualifiers are: {', '.join(sorted(allowed_qualifiers_set))}" + ), ) @@ -248,9 +286,15 @@ class CpanTypeValidator(TypeValidator): @classmethod def validate_type(cls, purl, strict=False): if purl.namespace and "::" in purl.name: - yield f"Name must not contain '::' when Namespace is absent for purl type: {cls.type!r}" + yield ValidationMessage( + severity=ValidationSeverity.ERROR, + message=f"Name must not contain '::' when Namespace is present for purl type: {cls.type!r}", + ) if not purl.namespace and "-" in purl.name: - yield f"Name must not contain '-' when Namespace is absent for purl type: {cls.type!r}" + yield ValidationMessage( + severity=ValidationSeverity.ERROR, + message=f"Name must not contain '-' when Namespace is absent for purl type: {cls.type!r}", + ) messages = super().validate_type(purl, strict) if messages: yield from messages @@ -370,7 +414,10 @@ class HackageTypeValidator(TypeValidator): @classmethod def validate_type(cls, purl, strict=False): if "_" in purl.name: - yield f"Name contains underscores but should be kebab-case for purl type: {cls.type!r}" + yield ValidationMessage( + severity=ValidationSeverity.WARNING, + message=f"Name cannot contain underscores for purl type:{cls.type!r}", + ) messages = super().validate_type(purl, strict) if messages: yield from messages @@ -503,10 +550,17 @@ class PubTypeValidator(TypeValidator): @classmethod def validate_type(cls, purl, strict=False): - if any(not (c.islower() or c.isdigit() or c == "_") for c in purl.name): - yield f"Name contains invalid characters but should only contain lowercase letters, digits, or underscores for purl type: {cls.type!r}" + if not all(c.isalnum() or c == "_" for c in purl.name): + yield ValidationMessage( + severity=ValidationSeverity.ERROR, + message=f"Name contains invalid characters but should only contain letters, digits, or underscores for purl type: {cls.type!r}", + ) + if " " in purl.name: - yield f"Name contains spaces but should use underscores instead for purl type: {cls.type!r}" + yield ValidationMessage( + severity=ValidationSeverity.ERROR, + message=f"Name contains spaces but should use underscores instead for purl type: {cls.type!r}", + ) messages = super().validate_type(purl, strict) if messages: yield from messages @@ -528,7 +582,10 @@ class PypiTypeValidator(TypeValidator): @classmethod def validate_type(cls, purl, strict=False): if "_" in purl.name: - yield f"Name cannot contain `_` for purl type:{cls.type!r}" + yield ValidationMessage( + severity=ValidationSeverity.WARNING, + message=f"Name cannot contain underscores for purl type:{cls.type!r}", + ) messages = super().validate_type(purl, strict) if messages: yield from messages diff --git a/tests/test_purl_spec.py b/tests/test_purl_spec.py index de036f4..118f92f 100644 --- a/tests/test_purl_spec.py +++ b/tests/test_purl_spec.py @@ -160,17 +160,18 @@ def run_test_case(case, test_type, desc): strict = True if test_group == "advanced": strict = False - purl = PackageURL( - type=input_data["type"], - namespace=input_data["namespace"], - name=input_data["name"], - version=input_data["version"], - qualifiers=input_data.get("qualifiers"), - subpath=input_data.get("subpath"), - normalize_purl=not strict, - ) + purl = PackageURL.from_string(input_data, normalize_purl=False) messages = purl.validate(strict=strict) - if case.get("expected_messages"): - assert messages == case["expected_messages"] + messages = list(change_messages_to_json(messages)) + if case.get("expected_output"): + assert messages == case["expected_output"] else: assert not messages + + +def change_messages_to_json(messages): + for message in messages: + yield { + "severity": message.severity.value, + "message": message.message, + } From b728a750983d08ef95aa98ea52c03b927d04bd5f Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 5 Sep 2025 14:16:36 +0530 Subject: [PATCH 4/7] Add normalization for pub packages Signed-off-by: Tushar Goel --- src/packageurl/__init__.py | 3 +++ src/packageurl/validate.py | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/packageurl/__init__.py b/src/packageurl/__init__.py index cc8f715..ff093a5 100644 --- a/src/packageurl/__init__.py +++ b/src/packageurl/__init__.py @@ -188,12 +188,15 @@ def normalize_name( "apk", "bitnami", "hex", + "pub", ): name_str = name_str.lower() if ptype == "pypi": name_str = name_str.replace("_", "-").lower() if ptype == "hackage": name_str = name_str.replace("_", "-") + if ptype == "pub": + name_str = re.sub(r"[^a-z0-9]", "_", name_str.lower()) return name_str or None diff --git a/src/packageurl/validate.py b/src/packageurl/validate.py index a1311bd..c973ac1 100644 --- a/src/packageurl/validate.py +++ b/src/packageurl/validate.py @@ -552,13 +552,13 @@ class PubTypeValidator(TypeValidator): def validate_type(cls, purl, strict=False): if not all(c.isalnum() or c == "_" for c in purl.name): yield ValidationMessage( - severity=ValidationSeverity.ERROR, + severity=ValidationSeverity.WARNING, message=f"Name contains invalid characters but should only contain letters, digits, or underscores for purl type: {cls.type!r}", ) if " " in purl.name: yield ValidationMessage( - severity=ValidationSeverity.ERROR, + severity=ValidationSeverity.WARNING, message=f"Name contains spaces but should use underscores instead for purl type: {cls.type!r}", ) messages = super().validate_type(purl, strict) From 723a68cbecb93cb1c81c4cdaccc311999ef92a31 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 5 Sep 2025 14:17:37 +0530 Subject: [PATCH 5/7] Update spec Signed-off-by: Tushar Goel --- spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec b/spec index d474c72..c398646 160000 --- a/spec +++ b/spec @@ -1 +1 @@ -Subproject commit d474c724fa5115a8487f6e709e5527546b37da11 +Subproject commit c398646bb2d642ccdd43bfbf5923cf650d69dc6a From c1a12913dcebce14ffe7370a24f298dd6e0c071e Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Sun, 7 Sep 2025 20:41:12 +0530 Subject: [PATCH 6/7] Use objects instead of dictionaries for testing Signed-off-by: Tushar Goel --- src/packageurl/__init__.py | 41 ++++-- src/packageurl/validate.py | 248 ++++++++++++++++++++++++------------- tests/test_purl_spec.py | 168 +++++++++++++------------ 3 files changed, 278 insertions(+), 179 deletions(-) diff --git a/src/packageurl/__init__.py b/src/packageurl/__init__.py index ff093a5..2a147f0 100644 --- a/src/packageurl/__init__.py +++ b/src/packageurl/__init__.py @@ -524,21 +524,38 @@ def to_string(self, encode: bool | None = True) -> str: return "".join(purl) - def validate(self, strict: bool = False) -> list[str]: + def validate(self, strict: bool = False) -> list: """ Validate this PackageURL object and return a list of validation error messages. """ - from packageurl.validate import VALIDATORS_BY_TYPE - - if self: - try: - validator_class = VALIDATORS_BY_TYPE.get(self.type) - if not validator_class: - return [f"Given type: {self.type} can not be validated"] - messages = list(validator_class.validate(self, strict)) # type: ignore[no-untyped-call] - return messages - except NoRouteAvailable: - return [f"Given type: {self.type} can not be validated"] + from packageurl.validate import DEFINITIONS_BY_TYPE + from packageurl.validate import ValidationMessage + from packageurl.validate import ValidationSeverity + + validator_class = DEFINITIONS_BY_TYPE.get(self.type) + if not validator_class: + return [ValidationMessage( + severity=ValidationSeverity.ERROR, + message=f"Unexpected purl type: expected {self.type!r}", + )] + return list(validator_class.validate(purl=self, strict=strict)) # type: ignore[no-untyped-call] + + @classmethod + def validate_string(cls, purl: str, strict: bool = False) -> list: + """ + Validate a PURL string and return a list of validation error messages. + """ + from packageurl.validate import ValidationMessage + from packageurl.validate import ValidationSeverity + + try: + purl = cls.from_string(purl, normalize_purl=not strict) + except ValueError as e: + return [ValidationMessage( + severity=ValidationSeverity.ERROR, + message=str(e), + )] + return purl.validate(strict=strict) @classmethod def from_string(cls, purl: str, normalize_purl: bool = True) -> Self: diff --git a/src/packageurl/validate.py b/src/packageurl/validate.py index c973ac1..ed221e0 100644 --- a/src/packageurl/validate.py +++ b/src/packageurl/validate.py @@ -28,9 +28,10 @@ from enum import Enum from dataclasses import dataclass +import dataclasses -class ValidationSeverity(Enum): +class ValidationSeverity(str, Enum): ERROR = "error" WARNING = "warning" INFO = "info" @@ -40,14 +41,85 @@ class ValidationSeverity(Enum): class ValidationMessage: severity: ValidationSeverity message: str + to_dict = dataclasses.asdict +class BasePurlType: + """ + Base class for all PURL type classes + """ + type: str + """The type string for this Package-URL type.""" + + type_name: str + """The name for this PURL type.""" + + description: str + """The description of this PURL type.""" + + use_repository: bool = False + """true if this PURL type use a public package repository.""" + + default_repository_url: str + """The default public repository URL for this PURL type""" + + namespace_requirement: str + """"States if this namespace is required, optional, or prohibited.""" + + allowed_qualifiers: dict = {"repository_url", "arch"} + """Set of allowed qualifier keys for this PURL type.""" + + namespace_case_sensitive: bool = True + """true if namespace is case sensitive. If false, the canonical form must be lowercased.""" + + name_case_sensitive: bool = True + """true if name is case sensitive. If false, the canonical form must be lowercased.""" + + version_case_sensitive: bool = True + """true if version is case sensitive. If false, the canonical form must be lowercased.""" + + purl_pattern: str + """A regex pattern that matches valid purls of this type.""" -class TypeValidator: @classmethod def validate(cls, purl, strict=False): + """ + Validate a PackageURL instance or string. + Yields ValidationMessage and performs strict validation if strict=True + """ + if not purl: + yield ValidationMessage( + severity=ValidationSeverity.ERROR, + message="No purl provided", + ) + return + + from packageurl import PackageURL + + if not isinstance(purl, PackageURL): + try: + purl = PackageURL.from_string(purl, normalize_purl=False) + except Exception as e: + yield ValidationMessage( + severity=ValidationSeverity.ERROR, + message=f"Invalid purl {purl!r} string: {e}", + ) + return + if not strict: purl = cls.normalize(purl) + + yield from cls._validate_namespace(purl) + yield from cls._validate_name(purl) + yield from cls._validate_version(purl) + if strict: + yield from cls._validate_qualifiers(purl) + messages = cls.validate_using_type_rules(purl, strict=strict) + if messages: + yield from messages + + @classmethod + def _validate_namespace(cls, purl): if cls.namespace_requirement == "prohibited" and purl.namespace: yield ValidationMessage( severity=ValidationSeverity.ERROR, @@ -60,13 +132,13 @@ def validate(cls, purl, strict=False): message=f"Namespace is required for purl type: {cls.type!r}", ) + # TODO: Check pending CPAN PR and decide if we want to upgrade the type definition schema if purl.type == "cpan": if purl.namespace and purl.namespace != purl.namespace.upper(): yield ValidationMessage( severity=ValidationSeverity.WARNING, message=f"Namespace must be uppercase for purl type: {cls.type!r}", ) - # TODO: Check pending CPAN PR and decide if we want to upgrade the type definition schema elif ( not cls.namespace_case_sensitive and purl.namespace @@ -76,23 +148,23 @@ def validate(cls, purl, strict=False): severity=ValidationSeverity.WARNING, message=f"Namespace is not lowercased for purl type: {cls.type!r}", ) - + + @classmethod + def _validate_name(cls, purl): if not cls.name_case_sensitive and purl.name and purl.name.lower() != purl.name: yield ValidationMessage( severity=ValidationSeverity.WARNING, message=f"Name is not lowercased for purl type: {cls.type!r}", ) - + + @classmethod + def _validate_version(cls, purl): if not cls.version_case_sensitive and purl.version and purl.version.lower() != purl.version: yield ValidationMessage( severity=ValidationSeverity.WARNING, message=f"Version is not lowercased for purl type: {cls.type!r}", ) - messages = cls.validate_type(purl, strict=strict) - if messages: - yield from messages - @classmethod def normalize(cls, purl): from packageurl import PackageURL @@ -120,12 +192,16 @@ def normalize(cls, purl): ) @classmethod - def validate_type(cls, purl, strict=False): - if strict: - yield from cls.validate_qualifiers(purl) + def validate_using_type_rules(cls, purl, strict=False): + """ + Validate using any additional type specific rules. + Yield validation messages. + Subclasses can override this method to add type specific validation rules. + """ + return iter([]) @classmethod - def validate_qualifiers(cls, purl): + def _validate_qualifiers(cls, purl): if not purl.qualifiers: return @@ -144,7 +220,7 @@ def validate_qualifiers(cls, purl): ) -class AlpmTypeValidator(TypeValidator): +class AlpmTypeDefinition(BasePurlType): type = "alpm" type_name = "Arch Linux package" description = """Arch Linux packages and other users of the libalpm/pacman package manager.""" @@ -158,7 +234,7 @@ class AlpmTypeValidator(TypeValidator): purl_pattern = "pkg:alpm/.*" -class ApkTypeValidator(TypeValidator): +class ApkTypeDefinition(BasePurlType): type = "apk" type_name = "APK-based packages" description = """Alpine Linux APK-based packages""" @@ -172,7 +248,7 @@ class ApkTypeValidator(TypeValidator): purl_pattern = "pkg:apk/.*" -class BitbucketTypeValidator(TypeValidator): +class BitbucketTypeDefinition(BasePurlType): type = "bitbucket" type_name = "Bitbucket" description = """Bitbucket-based packages""" @@ -186,7 +262,7 @@ class BitbucketTypeValidator(TypeValidator): purl_pattern = "pkg:bitbucket/.*" -class BitnamiTypeValidator(TypeValidator): +class BitnamiTypeDefinition(BasePurlType): type = "bitnami" type_name = "Bitnami" description = """Bitnami-based packages""" @@ -200,7 +276,7 @@ class BitnamiTypeValidator(TypeValidator): purl_pattern = "pkg:bitnami/.*" -class CargoTypeValidator(TypeValidator): +class CargoTypeDefinition(BasePurlType): type = "cargo" type_name = "Cargo" description = """Cargo packages for Rust""" @@ -214,7 +290,7 @@ class CargoTypeValidator(TypeValidator): purl_pattern = "pkg:cargo/.*" -class CocoapodsTypeValidator(TypeValidator): +class CocoapodsTypeDefinition(BasePurlType): type = "cocoapods" type_name = "CocoaPods" description = """CocoaPods pods""" @@ -228,7 +304,7 @@ class CocoapodsTypeValidator(TypeValidator): purl_pattern = "pkg:cocoapods/.*" -class ComposerTypeValidator(TypeValidator): +class ComposerTypeDefinition(BasePurlType): type = "composer" type_name = "Composer" description = """Composer PHP packages""" @@ -242,7 +318,7 @@ class ComposerTypeValidator(TypeValidator): purl_pattern = "pkg:composer/.*" -class ConanTypeValidator(TypeValidator): +class ConanTypeDefinition(BasePurlType): type = "conan" type_name = "Conan C/C++ packages" description = """Conan C/C++ packages. The purl is designed to closely resemble the Conan-native /@/ syntax for package references as specified in https://docs.conan.io/en/1.46/cheatsheet.html#package-terminology""" @@ -256,7 +332,7 @@ class ConanTypeValidator(TypeValidator): purl_pattern = "pkg:conan/.*" -class CondaTypeValidator(TypeValidator): +class CondaTypeDefinition(BasePurlType): type = "conda" type_name = "Conda" description = """conda is for Conda packages""" @@ -270,7 +346,7 @@ class CondaTypeValidator(TypeValidator): purl_pattern = "pkg:conda/.*" -class CpanTypeValidator(TypeValidator): +class CpanTypeDefinition(BasePurlType): type = "cpan" type_name = "CPAN" description = """CPAN Perl packages""" @@ -284,7 +360,7 @@ class CpanTypeValidator(TypeValidator): purl_pattern = "pkg:cpan/.*" @classmethod - def validate_type(cls, purl, strict=False): + def validate_using_type_rules(cls, purl, strict=False): if purl.namespace and "::" in purl.name: yield ValidationMessage( severity=ValidationSeverity.ERROR, @@ -295,12 +371,12 @@ def validate_type(cls, purl, strict=False): severity=ValidationSeverity.ERROR, message=f"Name must not contain '-' when Namespace is absent for purl type: {cls.type!r}", ) - messages = super().validate_type(purl, strict) + messages = super().validate_using_type_rules(purl, strict) if messages: yield from messages -class CranTypeValidator(TypeValidator): +class CranTypeDefinition(BasePurlType): type = "cran" type_name = "CRAN" description = """CRAN R packages""" @@ -314,7 +390,7 @@ class CranTypeValidator(TypeValidator): purl_pattern = "pkg:cran/.*" -class DebTypeValidator(TypeValidator): +class DebTypeDefinition(BasePurlType): type = "deb" type_name = "Debian package" description = """Debian packages, Debian derivatives, and Ubuntu packages""" @@ -328,7 +404,7 @@ class DebTypeValidator(TypeValidator): purl_pattern = "pkg:deb/.*" -class DockerTypeValidator(TypeValidator): +class DockerTypeDefinition(BasePurlType): type = "docker" type_name = "Docker image" description = """for Docker images""" @@ -342,7 +418,7 @@ class DockerTypeValidator(TypeValidator): purl_pattern = "pkg:docker/.*" -class GemTypeValidator(TypeValidator): +class GemTypeDefinition(BasePurlType): type = "gem" type_name = "RubyGems" description = """RubyGems""" @@ -356,7 +432,7 @@ class GemTypeValidator(TypeValidator): purl_pattern = "pkg:gem/.*" -class GenericTypeValidator(TypeValidator): +class GenericTypeDefinition(BasePurlType): type = "generic" type_name = "Generic Package" description = """The generic type is for plain, generic packages that do not fit anywhere else such as for "upstream-from-distro" packages. In particular this is handy for a plain version control repository such as a bare git repo in combination with a vcs_url.""" @@ -370,7 +446,7 @@ class GenericTypeValidator(TypeValidator): purl_pattern = "pkg:generic/.*" -class GithubTypeValidator(TypeValidator): +class GithubTypeDefinition(BasePurlType): type = "github" type_name = "GitHub" description = """GitHub-based packages""" @@ -384,7 +460,7 @@ class GithubTypeValidator(TypeValidator): purl_pattern = "pkg:github/.*" -class GolangTypeValidator(TypeValidator): +class GolangTypeDefinition(BasePurlType): type = "golang" type_name = "Go package" description = """Go packages""" @@ -398,7 +474,7 @@ class GolangTypeValidator(TypeValidator): purl_pattern = "pkg:golang/.*" -class HackageTypeValidator(TypeValidator): +class HackageTypeDefinition(BasePurlType): type = "hackage" type_name = "Haskell package" description = """Haskell packages""" @@ -412,18 +488,18 @@ class HackageTypeValidator(TypeValidator): purl_pattern = "pkg:hackage/.*" @classmethod - def validate_type(cls, purl, strict=False): + def validate_using_type_rules(cls, purl, strict=False): if "_" in purl.name: yield ValidationMessage( severity=ValidationSeverity.WARNING, message=f"Name cannot contain underscores for purl type:{cls.type!r}", ) - messages = super().validate_type(purl, strict) + messages = super().validate_using_type_rules(purl, strict) if messages: yield from messages -class HexTypeValidator(TypeValidator): +class HexTypeDefinition(BasePurlType): type = "hex" type_name = "Hex" description = """Hex packages""" @@ -437,7 +513,7 @@ class HexTypeValidator(TypeValidator): purl_pattern = "pkg:hex/.*" -class HuggingfaceTypeValidator(TypeValidator): +class HuggingfaceTypeDefinition(BasePurlType): type = "huggingface" type_name = "HuggingFace models" description = """Hugging Face ML models""" @@ -451,7 +527,7 @@ class HuggingfaceTypeValidator(TypeValidator): purl_pattern = "pkg:huggingface/.*" -class LuarocksTypeValidator(TypeValidator): +class LuarocksTypeDefinition(BasePurlType): type = "luarocks" type_name = "LuaRocks" description = """Lua packages installed with LuaRocks""" @@ -465,7 +541,7 @@ class LuarocksTypeValidator(TypeValidator): purl_pattern = "pkg:luarocks/.*" -class MavenTypeValidator(TypeValidator): +class MavenTypeDefinition(BasePurlType): type = "maven" type_name = "Maven" description = """PURL type for Maven JARs and related artifacts.""" @@ -479,7 +555,7 @@ class MavenTypeValidator(TypeValidator): purl_pattern = "pkg:maven/.*" -class MlflowTypeValidator(TypeValidator): +class MlflowTypeDefinition(BasePurlType): type = "mlflow" type_name = "" description = """MLflow ML models (Azure ML, Databricks, etc.)""" @@ -493,7 +569,7 @@ class MlflowTypeValidator(TypeValidator): purl_pattern = "pkg:mlflow/.*" -class NpmTypeValidator(TypeValidator): +class NpmTypeDefinition(BasePurlType): type = "npm" type_name = "Node NPM packages" description = """PURL type for npm packages.""" @@ -507,7 +583,7 @@ class NpmTypeValidator(TypeValidator): purl_pattern = "pkg:npm/.*" -class NugetTypeValidator(TypeValidator): +class NugetTypeDefinition(BasePurlType): type = "nuget" type_name = "NuGet" description = """NuGet .NET packages""" @@ -521,7 +597,7 @@ class NugetTypeValidator(TypeValidator): purl_pattern = "pkg:nuget/.*" -class OciTypeValidator(TypeValidator): +class OciTypeDefinition(BasePurlType): type = "oci" type_name = "OCI image" description = """For artifacts stored in registries that conform to the OCI Distribution Specification https://github.com/opencontainers/distribution-spec including container images built by Docker and others""" @@ -535,7 +611,7 @@ class OciTypeValidator(TypeValidator): purl_pattern = "pkg:oci/.*" -class PubTypeValidator(TypeValidator): +class PubTypeDefinition(BasePurlType): type = "pub" type_name = "Pub" description = """Dart and Flutter pub packages""" @@ -549,7 +625,7 @@ class PubTypeValidator(TypeValidator): purl_pattern = "pkg:pub/.*" @classmethod - def validate_type(cls, purl, strict=False): + def validate_using_type_rules(cls, purl, strict=False): if not all(c.isalnum() or c == "_" for c in purl.name): yield ValidationMessage( severity=ValidationSeverity.WARNING, @@ -561,12 +637,12 @@ def validate_type(cls, purl, strict=False): severity=ValidationSeverity.WARNING, message=f"Name contains spaces but should use underscores instead for purl type: {cls.type!r}", ) - messages = super().validate_type(purl, strict) + messages = super().validate_using_type_rules(purl, strict) if messages: yield from messages -class PypiTypeValidator(TypeValidator): +class PypiTypeDefinition(BasePurlType): type = "pypi" type_name = "PyPI" description = """Python packages""" @@ -580,18 +656,18 @@ class PypiTypeValidator(TypeValidator): purl_pattern = "pkg:pypi/.*" @classmethod - def validate_type(cls, purl, strict=False): + def validate_using_type_rules(cls, purl, strict=False): if "_" in purl.name: yield ValidationMessage( severity=ValidationSeverity.WARNING, message=f"Name cannot contain underscores for purl type:{cls.type!r}", ) - messages = super().validate_type(purl, strict) + messages = super().validate_using_type_rules(purl, strict) if messages: yield from messages -class QpkgTypeValidator(TypeValidator): +class QpkgTypeDefinition(BasePurlType): type = "qpkg" type_name = "QNX package" description = """QNX packages""" @@ -605,7 +681,7 @@ class QpkgTypeValidator(TypeValidator): purl_pattern = "pkg:qpkg/.*" -class RpmTypeValidator(TypeValidator): +class RpmTypeDefinition(BasePurlType): type = "rpm" type_name = "RPM" description = """RPM packages""" @@ -619,7 +695,7 @@ class RpmTypeValidator(TypeValidator): purl_pattern = "pkg:rpm/.*" -class SwidTypeValidator(TypeValidator): +class SwidTypeDefinition(BasePurlType): type = "swid" type_name = "Software Identification (SWID) Tag" description = """PURL type for ISO-IEC 19770-2 Software Identification (SWID) tags.""" @@ -633,7 +709,7 @@ class SwidTypeValidator(TypeValidator): purl_pattern = "pkg:swid/.*" -class SwiftTypeValidator(TypeValidator): +class SwiftTypeDefinition(BasePurlType): type = "swift" type_name = "Swift packages" description = """Swift packages""" @@ -647,37 +723,37 @@ class SwiftTypeValidator(TypeValidator): purl_pattern = "pkg:swift/.*" -VALIDATORS_BY_TYPE = { - "alpm": AlpmTypeValidator, - "apk": ApkTypeValidator, - "bitbucket": BitbucketTypeValidator, - "bitnami": BitnamiTypeValidator, - "cargo": CargoTypeValidator, - "cocoapods": CocoapodsTypeValidator, - "composer": ComposerTypeValidator, - "conan": ConanTypeValidator, - "conda": CondaTypeValidator, - "cpan": CpanTypeValidator, - "cran": CranTypeValidator, - "deb": DebTypeValidator, - "docker": DockerTypeValidator, - "gem": GemTypeValidator, - "generic": GenericTypeValidator, - "github": GithubTypeValidator, - "golang": GolangTypeValidator, - "hackage": HackageTypeValidator, - "hex": HexTypeValidator, - "huggingface": HuggingfaceTypeValidator, - "luarocks": LuarocksTypeValidator, - "maven": MavenTypeValidator, - "mlflow": MlflowTypeValidator, - "npm": NpmTypeValidator, - "nuget": NugetTypeValidator, - "oci": OciTypeValidator, - "pub": PubTypeValidator, - "pypi": PypiTypeValidator, - "qpkg": QpkgTypeValidator, - "rpm": RpmTypeValidator, - "swid": SwidTypeValidator, - "swift": SwiftTypeValidator, +DEFINITIONS_BY_TYPE = { + "alpm": AlpmTypeDefinition, + "apk": ApkTypeDefinition, + "bitbucket": BitbucketTypeDefinition, + "bitnami": BitnamiTypeDefinition, + "cargo": CargoTypeDefinition, + "cocoapods": CocoapodsTypeDefinition, + "composer": ComposerTypeDefinition, + "conan": ConanTypeDefinition, + "conda": CondaTypeDefinition, + "cpan": CpanTypeDefinition, + "cran": CranTypeDefinition, + "deb": DebTypeDefinition, + "docker": DockerTypeDefinition, + "gem": GemTypeDefinition, + "generic": GenericTypeDefinition, + "github": GithubTypeDefinition, + "golang": GolangTypeDefinition, + "hackage": HackageTypeDefinition, + "hex": HexTypeDefinition, + "huggingface": HuggingfaceTypeDefinition, + "luarocks": LuarocksTypeDefinition, + "maven": MavenTypeDefinition, + "mlflow": MlflowTypeDefinition, + "npm": NpmTypeDefinition, + "nuget": NugetTypeDefinition, + "oci": OciTypeDefinition, + "pub": PubTypeDefinition, + "pypi": PypiTypeDefinition, + "qpkg": QpkgTypeDefinition, + "rpm": RpmTypeDefinition, + "swid": SwidTypeDefinition, + "swift": SwiftTypeDefinition, } diff --git a/tests/test_purl_spec.py b/tests/test_purl_spec.py index 118f92f..d7a4130 100644 --- a/tests/test_purl_spec.py +++ b/tests/test_purl_spec.py @@ -24,28 +24,38 @@ import json import os +from dataclasses import dataclass +from typing import Any, Dict, Optional, List import pytest - from packageurl import PackageURL -current_dir = os.path.dirname(__file__) -root_dir = os.path.abspath(os.path.join(current_dir, "..")) -spec_file_path = os.path.join(root_dir, "spec", "tests", "spec", "specification-test.json") -with open(spec_file_path, "r", encoding="utf-8") as f: - test_cases = json.load(f) +@dataclass +class PurlTestCase: + description: str + test_type: str + input: Any + expected_output: Optional[Any] = None + expected_failure: bool = False + test_group: Optional[str] = None -tests = test_cases["tests"] -parse_tests = [t for t in tests if t["test_type"] == "parse"] -build_tests = [t for t in tests if t["test_type"] == "build"] +def load_test_case(case_dict: dict) -> PurlTestCase: + return PurlTestCase( + description=case_dict["description"], + test_type=case_dict["test_type"], + input=case_dict["input"], + expected_output=case_dict.get("expected_output"), + expected_failure=case_dict.get("expected_failure", False), + test_group=case_dict.get("test_group"), + ) -def load_spec_files(spec_dir): +def load_spec_files(spec_dir: str) -> Dict[str, List[PurlTestCase]]: """ - Load all JSON files from the given directory into a dictionary. - Key = filename, Value = parsed JSON content + Load all JSON files from the given directory into a dictionary of test cases. + Key = filename, Value = list of PurlTestCase objects """ spec_data = {} for filename in os.listdir(spec_dir): @@ -54,78 +64,86 @@ def load_spec_files(spec_dir): with open(filepath, "r", encoding="utf-8") as f: try: data = json.load(f) - spec_data[filename] = data["tests"] + spec_data[filename] = [load_test_case(tc) for tc in data["tests"]] except json.JSONDecodeError as e: print(f"Error parsing {filename}: {e}") return spec_data +current_dir = os.path.dirname(__file__) +root_dir = os.path.abspath(os.path.join(current_dir, "..")) +spec_file_path = os.path.join(root_dir, "spec", "tests", "spec", "specification-test.json") + +with open(spec_file_path, "r", encoding="utf-8") as f: + test_cases = json.load(f) + +all_tests = [load_test_case(tc) for tc in test_cases["tests"]] +parse_tests = [t for t in all_tests if t.test_type == "parse"] +build_tests = [t for t in all_tests if t.test_type == "build"] + SPEC_DIR = os.path.join(os.path.dirname(__file__), "..", "spec", "tests", "types") spec_dict = load_spec_files(SPEC_DIR) flattened_cases = [] for filename, cases in spec_dict.items(): for case in cases: - flattened_cases.append((filename, case["description"], case)) + flattened_cases.append((filename, case.description, case)) @pytest.mark.parametrize( - "description, input_str, expected_output, expected_failure", - [ - (t["description"], t["input"], t["expected_output"], t["expected_failure"]) - for t in parse_tests - ], + "case", + parse_tests, + ids=lambda c: c.description, ) -def test_parse(description, input_str, expected_output, expected_failure): - if expected_failure: +def test_parse(case: PurlTestCase): + if case.expected_failure: with pytest.raises(Exception): - PackageURL.from_string(input_str) + PackageURL.from_string(case.input) else: - result = PackageURL.from_string(input_str) - assert result.to_string() == expected_output + result = PackageURL.from_string(case.input) + assert result.to_string() == case.expected_output @pytest.mark.parametrize( - "description, input_dict, expected_output, expected_failure", - [ - (t["description"], t["input"], t["expected_output"], t["expected_failure"]) - for t in build_tests - ], + "case", + build_tests, + ids=lambda c: c.description, ) -def test_build(description, input_dict, expected_output, expected_failure): +def test_build(case: PurlTestCase): kwargs = { - "type": input_dict.get("type"), - "namespace": input_dict.get("namespace"), - "name": input_dict.get("name"), - "version": input_dict.get("version"), - "qualifiers": input_dict.get("qualifiers"), - "subpath": input_dict.get("subpath"), + "type": case.input.get("type"), + "namespace": case.input.get("namespace"), + "name": case.input.get("name"), + "version": case.input.get("version"), + "qualifiers": case.input.get("qualifiers"), + "subpath": case.input.get("subpath"), } - if expected_failure: + if case.expected_failure: with pytest.raises(Exception): PackageURL(**kwargs).to_string() else: purl = PackageURL(**kwargs) - assert purl.to_string() == expected_output - + assert purl.to_string() == case.expected_output -@pytest.mark.parametrize("filename,description,test_case", flattened_cases) -def test_package_type_case(filename, description, test_case): - test_type = test_case["test_type"] - expected_failure = test_case.get("expected_failure", False) - if expected_failure: +@pytest.mark.parametrize( + "filename,description,case", + flattened_cases, + ids=lambda v: v[1] if isinstance(v, tuple) else str(v), +) +def test_package_type_case(filename, description, case: PurlTestCase): + if case.expected_failure: with pytest.raises(Exception): - run_test_case(test_case, test_type, description) + run_test_case(case) else: - run_test_case(test_case, test_type, description) + run_test_case(case) -def run_test_case(case, test_type, desc): - if test_type == "parse": - purl = PackageURL.from_string(case["input"]) - expected = case["expected_output"] +def run_test_case(case: PurlTestCase): + if case.test_type == "parse": + purl = PackageURL.from_string(case.input) + expected = case.expected_output assert purl.type == expected["type"] assert purl.namespace == expected["namespace"] assert purl.name == expected["name"] @@ -136,42 +154,30 @@ def run_test_case(case, test_type, desc): assert not purl.qualifiers assert purl.subpath == expected["subpath"] - elif test_type == "roundtrip": - purl = PackageURL.from_string(case["input"]) - assert purl.to_string() == case["expected_output"] + elif case.test_type == "roundtrip": + purl = PackageURL.from_string(case.input) + assert purl.to_string() == case.expected_output - elif test_type == "build": - input_data = case["input"] + elif case.test_type == "build": + inp = case.input purl = PackageURL( - type=input_data["type"], - namespace=input_data["namespace"], - name=input_data["name"], - version=input_data["version"], - qualifiers=input_data.get("qualifiers"), - subpath=input_data.get("subpath"), + type=inp["type"], + namespace=inp["namespace"], + name=inp["name"], + version=inp["version"], + qualifiers=inp.get("qualifiers"), + subpath=inp.get("subpath"), ) - assert purl.to_string() == case["expected_output"] + assert purl.to_string() == case.expected_output - elif test_type == "validation": - input_data = case["input"] - test_group = case.get("test_group") + elif case.test_type == "validation": + test_group = case.test_group if test_group not in ("base", "advanced"): raise Exception(test_group) - strict = True - if test_group == "advanced": - strict = False - purl = PackageURL.from_string(input_data, normalize_purl=False) - messages = purl.validate(strict=strict) - messages = list(change_messages_to_json(messages)) - if case.get("expected_output"): - assert messages == case["expected_output"] + strict = test_group == "base" + messages = PackageURL.validate_string(purl=case.input, strict=strict) + messages = [message.to_dict() for message in messages] + if case.expected_output: + assert messages == case.expected_output else: assert not messages - - -def change_messages_to_json(messages): - for message in messages: - yield { - "severity": message.severity.value, - "message": message.message, - } From e1a32f072330ac3cf6bbb7872be4903c9f511cd6 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Sun, 7 Sep 2025 20:57:06 +0530 Subject: [PATCH 7/7] Update generate validators Signed-off-by: Tushar Goel --- etc/scripts/generate_validators.py | 151 ++++++++++++++++++++++++++--- src/packageurl/__init__.py | 52 ++++++---- src/packageurl/validate.py | 56 ++++++----- tests/test_purl_spec.py | 6 +- 4 files changed, 208 insertions(+), 57 deletions(-) diff --git a/etc/scripts/generate_validators.py b/etc/scripts/generate_validators.py index 23abbec..7d91050 100644 --- a/etc/scripts/generate_validators.py +++ b/etc/scripts/generate_validators.py @@ -76,32 +76,140 @@ Validate each type according to the PURL spec type definitions """ -class TypeValidator: +class BasePurlType: + """ + Base class for all PURL type classes + """ + + type: str + """The type string for this Package-URL type.""" + + type_name: str + """The name for this PURL type.""" + + description: str + """The description of this PURL type.""" + + use_repository: bool = False + """true if this PURL type use a public package repository.""" + + default_repository_url: str + """The default public repository URL for this PURL type""" + + namespace_requirement: str + """"States if this namespace is required, optional, or prohibited.""" + + allowed_qualifiers: dict = {"repository_url", "arch"} + """Set of allowed qualifier keys for this PURL type.""" + + namespace_case_sensitive: bool = True + """true if namespace is case sensitive. If false, the canonical form must be lowercased.""" + + name_case_sensitive: bool = True + """true if name is case sensitive. If false, the canonical form must be lowercased.""" + + version_case_sensitive: bool = True + """true if version is case sensitive. If false, the canonical form must be lowercased.""" + + purl_pattern: str + """A regex pattern that matches valid purls of this type.""" + @classmethod def validate(cls, purl, strict=False): + """ + Validate a PackageURL instance or string. + Yields ValidationMessage and performs strict validation if strict=True + """ + from packageurl import ValidationMessage + from packageurl import ValidationSeverity + + if not purl: + yield ValidationMessage( + severity=ValidationSeverity.ERROR, + message="No purl provided", + ) + return + + from packageurl import PackageURL + + if not isinstance(purl, PackageURL): + try: + purl = PackageURL.from_string(purl, normalize_purl=False) + except Exception as e: + yield ValidationMessage( + severity=ValidationSeverity.ERROR, + message=f"Invalid purl {purl!r} string: {e}", + ) + return + if not strict: purl = cls.normalize(purl) + yield from cls._validate_namespace(purl) + yield from cls._validate_name(purl) + yield from cls._validate_version(purl) + if strict: + yield from cls._validate_qualifiers(purl) + + messages = cls.validate_using_type_rules(purl, strict=strict) + if messages: + yield from messages + + @classmethod + def _validate_namespace(cls, purl): + from packageurl import ValidationMessage + from packageurl import ValidationSeverity + if cls.namespace_requirement == "prohibited" and purl.namespace: - yield f"Namespace is prohibited for purl type: {cls.type!r}" + yield ValidationMessage( + severity=ValidationSeverity.ERROR, + message=f"Namespace is prohibited for purl type: {cls.type!r}", + ) elif cls.namespace_requirement == "required" and not purl.namespace: - yield f"Namespace is required for purl type: {cls.type!r}" + yield ValidationMessage( + severity=ValidationSeverity.ERROR, + message=f"Namespace is required for purl type: {cls.type!r}", + ) - if ( + # TODO: Check pending CPAN PR and decide if we want to upgrade the type definition schema + if purl.type == "cpan": + if purl.namespace and purl.namespace != purl.namespace.upper(): + yield ValidationMessage( + severity=ValidationSeverity.WARNING, + message=f"Namespace must be uppercase for purl type: {cls.type!r}", + ) + elif ( not cls.namespace_case_sensitive and purl.namespace and purl.namespace.lower() != purl.namespace ): - yield f"Namespace is not lowercased for purl type: {cls.type!r}" + yield ValidationMessage( + severity=ValidationSeverity.WARNING, + message=f"Namespace is not lowercased for purl type: {cls.type!r}", + ) + @classmethod + def _validate_name(cls, purl): if not cls.name_case_sensitive and purl.name and purl.name.lower() != purl.name: - yield f"Name is not lowercased for purl type: {cls.type!r}" + from packageurl import ValidationMessage + from packageurl import ValidationSeverity + yield ValidationMessage( + severity=ValidationSeverity.WARNING, + message=f"Name is not lowercased for purl type: {cls.type!r}", + ) + + @classmethod + def _validate_version(cls, purl): if not cls.version_case_sensitive and purl.version and purl.version.lower() != purl.version: - yield f"Version is not lowercased for purl type: {cls.type!r}" + from packageurl import ValidationMessage + from packageurl import ValidationSeverity - yield from cls.validate_type(purl, strict=strict) + yield ValidationMessage( + severity=ValidationSeverity.WARNING, + message=f"Version is not lowercased for purl type: {cls.type!r}", + ) @classmethod def normalize(cls, purl): @@ -130,11 +238,16 @@ def normalize(cls, purl): ) @classmethod - def validate_type(cls, purl, strict=False): - return + def validate_using_type_rules(cls, purl, strict=False): + """ + Validate using any additional type specific rules. + Yield validation messages. + Subclasses can override this method to add type specific validation rules. + """ + return iter([]) @classmethod - def validate_qualifiers(cls, purl): + def _validate_qualifiers(cls, purl): if not purl.qualifiers: return @@ -144,9 +257,15 @@ def validate_qualifiers(cls, purl): disallowed = purl_qualifiers_keys - allowed_qualifiers_set if disallowed: - yield ( - f"Invalid qualifiers found: {', '.join(sorted(disallowed))}. " - f"Allowed qualifiers are: {', '.join(sorted(allowed_qualifiers_set))}" + from packageurl import ValidationMessage + from packageurl import ValidationSeverity + + yield ValidationMessage( + severity=ValidationSeverity.INFO, + message=( + f"Invalid qualifiers found: {', '.join(sorted(disallowed))}. " + f"Allowed qualifiers are: {', '.join(sorted(allowed_qualifiers_set))}" + ), ) ''' @@ -184,10 +303,10 @@ def generate_validators(): type_def = json.loads(type.read_text()) _type = type_def["type"] - standard_validator_class = "TypeValidator" + standard_validator_class = "BasePurlType" class_prefix = _type.capitalize() - class_name = f"{class_prefix}{standard_validator_class}" + class_name = f"{class_prefix}TypeDefinition" validators_by_type[_type] = class_name name_normalization_rules=type_def["name_definition"].get("normalization_rules") or [] allowed_qualifiers = [defintion.get("key") for defintion in type_def.get("qualifiers_definition") or []] diff --git a/src/packageurl/__init__.py b/src/packageurl/__init__.py index 2a147f0..3bfae87 100644 --- a/src/packageurl/__init__.py +++ b/src/packageurl/__init__.py @@ -24,10 +24,13 @@ from __future__ import annotations +import dataclasses import re import string from collections import namedtuple from collections.abc import Mapping +from dataclasses import dataclass +from enum import Enum from typing import TYPE_CHECKING from typing import Any from typing import Optional @@ -58,6 +61,19 @@ """ +class ValidationSeverity(str, Enum): + ERROR = "error" + WARNING = "warning" + INFO = "info" + + +@dataclass +class ValidationMessage: + severity: ValidationSeverity + message: str + to_dict = dataclasses.asdict + + def quote(s: AnyStr) -> str: """ Return a percent-encoded unicode string, except for colon :, given an `s` @@ -524,38 +540,38 @@ def to_string(self, encode: bool | None = True) -> str: return "".join(purl) - def validate(self, strict: bool = False) -> list: + def validate(self, strict: bool = False) -> list["ValidationMessage"]: """ Validate this PackageURL object and return a list of validation error messages. """ from packageurl.validate import DEFINITIONS_BY_TYPE - from packageurl.validate import ValidationMessage - from packageurl.validate import ValidationSeverity validator_class = DEFINITIONS_BY_TYPE.get(self.type) if not validator_class: - return [ValidationMessage( - severity=ValidationSeverity.ERROR, - message=f"Unexpected purl type: expected {self.type!r}", - )] + return [ + ValidationMessage( + severity=ValidationSeverity.ERROR, + message=f"Unexpected purl type: expected {self.type!r}", + ) + ] return list(validator_class.validate(purl=self, strict=strict)) # type: ignore[no-untyped-call] - + @classmethod - def validate_string(cls, purl: str, strict: bool = False) -> list: + def validate_string(cls, purl: str, strict: bool = False) -> list["ValidationMessage"]: """ Validate a PURL string and return a list of validation error messages. """ - from packageurl.validate import ValidationMessage - from packageurl.validate import ValidationSeverity - try: - purl = cls.from_string(purl, normalize_purl=not strict) + purl_obj = cls.from_string(purl, normalize_purl=not strict) + assert isinstance(purl_obj, PackageURL) + return purl_obj.validate(strict=strict) except ValueError as e: - return [ValidationMessage( - severity=ValidationSeverity.ERROR, - message=str(e), - )] - return purl.validate(strict=strict) + return [ + ValidationMessage( + severity=ValidationSeverity.ERROR, + message=str(e), + ) + ] @classmethod def from_string(cls, purl: str, normalize_purl: bool = True) -> Self: diff --git a/src/packageurl/validate.py b/src/packageurl/validate.py index ed221e0..45cf146 100644 --- a/src/packageurl/validate.py +++ b/src/packageurl/validate.py @@ -26,27 +26,12 @@ Validate each type according to the PURL spec type definitions """ -from enum import Enum -from dataclasses import dataclass -import dataclasses - - -class ValidationSeverity(str, Enum): - ERROR = "error" - WARNING = "warning" - INFO = "info" - - -@dataclass -class ValidationMessage: - severity: ValidationSeverity - message: str - to_dict = dataclasses.asdict class BasePurlType: """ Base class for all PURL type classes """ + type: str """The type string for this Package-URL type.""" @@ -86,13 +71,16 @@ def validate(cls, purl, strict=False): Validate a PackageURL instance or string. Yields ValidationMessage and performs strict validation if strict=True """ + from packageurl import ValidationMessage + from packageurl import ValidationSeverity + if not purl: yield ValidationMessage( severity=ValidationSeverity.ERROR, message="No purl provided", ) return - + from packageurl import PackageURL if not isinstance(purl, PackageURL): @@ -103,11 +91,11 @@ def validate(cls, purl, strict=False): severity=ValidationSeverity.ERROR, message=f"Invalid purl {purl!r} string: {e}", ) - return + return if not strict: purl = cls.normalize(purl) - + yield from cls._validate_namespace(purl) yield from cls._validate_name(purl) yield from cls._validate_version(purl) @@ -117,9 +105,12 @@ def validate(cls, purl, strict=False): messages = cls.validate_using_type_rules(purl, strict=strict) if messages: yield from messages - + @classmethod def _validate_namespace(cls, purl): + from packageurl import ValidationMessage + from packageurl import ValidationSeverity + if cls.namespace_requirement == "prohibited" and purl.namespace: yield ValidationMessage( severity=ValidationSeverity.ERROR, @@ -148,18 +139,24 @@ def _validate_namespace(cls, purl): severity=ValidationSeverity.WARNING, message=f"Namespace is not lowercased for purl type: {cls.type!r}", ) - + @classmethod def _validate_name(cls, purl): if not cls.name_case_sensitive and purl.name and purl.name.lower() != purl.name: + from packageurl import ValidationMessage + from packageurl import ValidationSeverity + yield ValidationMessage( severity=ValidationSeverity.WARNING, message=f"Name is not lowercased for purl type: {cls.type!r}", ) - + @classmethod def _validate_version(cls, purl): if not cls.version_case_sensitive and purl.version and purl.version.lower() != purl.version: + from packageurl import ValidationMessage + from packageurl import ValidationSeverity + yield ValidationMessage( severity=ValidationSeverity.WARNING, message=f"Version is not lowercased for purl type: {cls.type!r}", @@ -211,6 +208,9 @@ def _validate_qualifiers(cls, purl): disallowed = purl_qualifiers_keys - allowed_qualifiers_set if disallowed: + from packageurl import ValidationMessage + from packageurl import ValidationSeverity + yield ValidationMessage( severity=ValidationSeverity.INFO, message=( @@ -361,6 +361,9 @@ class CpanTypeDefinition(BasePurlType): @classmethod def validate_using_type_rules(cls, purl, strict=False): + from packageurl import ValidationMessage + from packageurl import ValidationSeverity + if purl.namespace and "::" in purl.name: yield ValidationMessage( severity=ValidationSeverity.ERROR, @@ -489,6 +492,9 @@ class HackageTypeDefinition(BasePurlType): @classmethod def validate_using_type_rules(cls, purl, strict=False): + from packageurl import ValidationMessage + from packageurl import ValidationSeverity + if "_" in purl.name: yield ValidationMessage( severity=ValidationSeverity.WARNING, @@ -626,6 +632,9 @@ class PubTypeDefinition(BasePurlType): @classmethod def validate_using_type_rules(cls, purl, strict=False): + from packageurl import ValidationMessage + from packageurl import ValidationSeverity + if not all(c.isalnum() or c == "_" for c in purl.name): yield ValidationMessage( severity=ValidationSeverity.WARNING, @@ -657,6 +666,9 @@ class PypiTypeDefinition(BasePurlType): @classmethod def validate_using_type_rules(cls, purl, strict=False): + from packageurl import ValidationMessage + from packageurl import ValidationSeverity + if "_" in purl.name: yield ValidationMessage( severity=ValidationSeverity.WARNING, diff --git a/tests/test_purl_spec.py b/tests/test_purl_spec.py index d7a4130..a78d23d 100644 --- a/tests/test_purl_spec.py +++ b/tests/test_purl_spec.py @@ -25,9 +25,13 @@ import json import os from dataclasses import dataclass -from typing import Any, Dict, Optional, List +from typing import Any +from typing import Dict +from typing import List +from typing import Optional import pytest + from packageurl import PackageURL