Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
154 changes: 136 additions & 18 deletions etc/scripts/generate_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
]
}
"""
from packageurl import PackageURL

from pathlib import Path
import json

Expand Down Expand Up @@ -76,32 +76,140 @@
Validate each type according to the PURL spec type definitions
"""

class TypeValidator:
class BasePurlType:
"""
Base class for all PURL type classes
"""

type: str
"""The type string for this Package-URL type."""

type_name: str
"""The name for this PURL type."""

description: str
"""The description of this PURL type."""

use_repository: bool = False
"""true if this PURL type use a public package repository."""

default_repository_url: str
"""The default public repository URL for this PURL type"""

namespace_requirement: str
""""States if this namespace is required, optional, or prohibited."""

allowed_qualifiers: dict = {"repository_url", "arch"}
"""Set of allowed qualifier keys for this PURL type."""

namespace_case_sensitive: bool = True
"""true if namespace is case sensitive. If false, the canonical form must be lowercased."""

name_case_sensitive: bool = True
"""true if name is case sensitive. If false, the canonical form must be lowercased."""

version_case_sensitive: bool = True
"""true if version is case sensitive. If false, the canonical form must be lowercased."""

purl_pattern: str
"""A regex pattern that matches valid purls of this type."""

@classmethod
def validate(cls, purl, strict=False):
"""
Validate a PackageURL instance or string.
Yields ValidationMessage and performs strict validation if strict=True
"""
from packageurl import ValidationMessage
from packageurl import ValidationSeverity

if not purl:
yield ValidationMessage(
severity=ValidationSeverity.ERROR,
message="No purl provided",
)
return

from packageurl import PackageURL

if not isinstance(purl, PackageURL):
try:
purl = PackageURL.from_string(purl, normalize_purl=False)
except Exception as e:
yield ValidationMessage(
severity=ValidationSeverity.ERROR,
message=f"Invalid purl {purl!r} string: {e}",
)
return

if not strict:
purl = cls.normalize(purl)

yield from cls._validate_namespace(purl)
yield from cls._validate_name(purl)
yield from cls._validate_version(purl)
if strict:
yield from cls._validate_qualifiers(purl)

messages = cls.validate_using_type_rules(purl, strict=strict)
if messages:
yield from messages

@classmethod
def _validate_namespace(cls, purl):
from packageurl import ValidationMessage
from packageurl import ValidationSeverity

if cls.namespace_requirement == "prohibited" and purl.namespace:
yield f"Namespace is prohibited for purl type: {cls.type!r}"
yield ValidationMessage(
severity=ValidationSeverity.ERROR,
message=f"Namespace is prohibited for purl type: {cls.type!r}",
)

elif cls.namespace_requirement == "required" and not purl.namespace:
yield f"Namespace is required for purl type: {cls.type!r}"
yield ValidationMessage(
severity=ValidationSeverity.ERROR,
message=f"Namespace is required for purl type: {cls.type!r}",
)

if (
# TODO: Check pending CPAN PR and decide if we want to upgrade the type definition schema
if purl.type == "cpan":
if purl.namespace and purl.namespace != purl.namespace.upper():
yield ValidationMessage(
severity=ValidationSeverity.WARNING,
message=f"Namespace must be uppercase for purl type: {cls.type!r}",
)
elif (
not cls.namespace_case_sensitive
and purl.namespace
and purl.namespace.lower() != purl.namespace
):
yield f"Namespace is not lowercased for purl type: {cls.type!r}"
yield ValidationMessage(
severity=ValidationSeverity.WARNING,
message=f"Namespace is not lowercased for purl type: {cls.type!r}",
)

@classmethod
def _validate_name(cls, purl):
if not cls.name_case_sensitive and purl.name and purl.name.lower() != purl.name:
yield f"Name is not lowercased for purl type: {cls.type!r}"
from packageurl import ValidationMessage
from packageurl import ValidationSeverity

yield ValidationMessage(
severity=ValidationSeverity.WARNING,
message=f"Name is not lowercased for purl type: {cls.type!r}",
)

@classmethod
def _validate_version(cls, purl):
if not cls.version_case_sensitive and purl.version and purl.version.lower() != purl.version:
yield f"Version is not lowercased for purl type: {cls.type!r}"
from packageurl import ValidationMessage
from packageurl import ValidationSeverity

yield from cls.validate_type(purl, strict=strict)
yield ValidationMessage(
severity=ValidationSeverity.WARNING,
message=f"Version is not lowercased for purl type: {cls.type!r}",
)

@classmethod
def normalize(cls, purl):
Expand Down Expand Up @@ -130,12 +238,16 @@ def normalize(cls, purl):
)

@classmethod
def validate_type(cls, purl, strict=False):
if strict:
yield from cls.validate_qualifiers(purl=purl)
def validate_using_type_rules(cls, purl, strict=False):
"""
Validate using any additional type specific rules.
Yield validation messages.
Subclasses can override this method to add type specific validation rules.
"""
return iter([])

@classmethod
def validate_qualifiers(cls, purl):
def _validate_qualifiers(cls, purl):
if not purl.qualifiers:
return

Expand All @@ -145,9 +257,15 @@ def validate_qualifiers(cls, purl):
disallowed = purl_qualifiers_keys - allowed_qualifiers_set

if disallowed:
yield (
f"Invalid qualifiers found: {', '.join(sorted(disallowed))}. "
f"Allowed qualifiers are: {', '.join(sorted(allowed_qualifiers_set))}"
from packageurl import ValidationMessage
from packageurl import ValidationSeverity

yield ValidationMessage(
severity=ValidationSeverity.INFO,
message=(
f"Invalid qualifiers found: {', '.join(sorted(disallowed))}. "
f"Allowed qualifiers are: {', '.join(sorted(allowed_qualifiers_set))}"
),
)
'''

Expand Down Expand Up @@ -185,10 +303,10 @@ def generate_validators():
type_def = json.loads(type.read_text())

_type = type_def["type"]
standard_validator_class = "TypeValidator"
standard_validator_class = "BasePurlType"

class_prefix = _type.capitalize()
class_name = f"{class_prefix}{standard_validator_class}"
class_name = f"{class_prefix}TypeDefinition"
validators_by_type[_type] = class_name
name_normalization_rules=type_def["name_definition"].get("normalization_rules") or []
allowed_qualifiers = [defintion.get("key") for defintion in type_def.get("qualifiers_definition") or []]
Expand Down
86 changes: 63 additions & 23 deletions src/packageurl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,13 @@

from __future__ import annotations

import dataclasses
import re
import string
from collections import namedtuple
from collections.abc import Mapping
from dataclasses import dataclass
from enum import Enum
from typing import TYPE_CHECKING
from typing import Any
from typing import Optional
Expand Down Expand Up @@ -58,6 +61,19 @@
"""


class ValidationSeverity(str, Enum):
ERROR = "error"
WARNING = "warning"
INFO = "info"


@dataclass
class ValidationMessage:
severity: ValidationSeverity
message: str
to_dict = dataclasses.asdict


def quote(s: AnyStr) -> str:
"""
Return a percent-encoded unicode string, except for colon :, given an `s`
Expand Down Expand Up @@ -188,12 +204,15 @@ def normalize_name(
"apk",
"bitnami",
"hex",
"pub",
):
name_str = name_str.lower()
if ptype == "pypi":
name_str = name_str.replace("_", "-").lower()
if ptype == "hackage":
name_str = name_str.replace("_", "-")
if ptype == "pub":
name_str = re.sub(r"[^a-z0-9]", "_", name_str.lower())
return name_str or None


Expand Down Expand Up @@ -521,24 +540,41 @@ def to_string(self, encode: bool | None = True) -> str:

return "".join(purl)

def validate(self, strict: bool = False) -> list[str]:
def validate(self, strict: bool = False) -> list["ValidationMessage"]:
"""
Validate this PackageURL object and return a list of validation error messages.
"""
from packageurl.validate import VALIDATORS_BY_TYPE

if self:
try:
validator_class = VALIDATORS_BY_TYPE.get(self.type)
if not validator_class:
return [f"Given type: {self.type} can not be validated"]
messages = list(validator_class.validate(self, strict)) # type: ignore[no-untyped-call]
return messages
except NoRouteAvailable:
return [f"Given type: {self.type} can not be validated"]
from packageurl.validate import DEFINITIONS_BY_TYPE

validator_class = DEFINITIONS_BY_TYPE.get(self.type)
if not validator_class:
return [
ValidationMessage(
severity=ValidationSeverity.ERROR,
message=f"Unexpected purl type: expected {self.type!r}",
)
]
return list(validator_class.validate(purl=self, strict=strict)) # type: ignore[no-untyped-call]

@classmethod
def validate_string(cls, purl: str, strict: bool = False) -> list["ValidationMessage"]:
"""
Validate a PURL string and return a list of validation error messages.
"""
try:
purl_obj = cls.from_string(purl, normalize_purl=not strict)
assert isinstance(purl_obj, PackageURL)
return purl_obj.validate(strict=strict)
except ValueError as e:
return [
ValidationMessage(
severity=ValidationSeverity.ERROR,
message=str(e),
)
]

@classmethod
def from_string(cls, purl: str) -> Self:
def from_string(cls, purl: str, normalize_purl: bool = True) -> Self:
"""
Return a PackageURL object parsed from a string.
Raise ValueError on errors.
Expand Down Expand Up @@ -622,14 +658,18 @@ def from_string(cls, purl: str) -> Self:
if not name:
raise ValueError(f"purl is missing the required name component: {purl!r}")

type_, namespace, name, version, qualifiers, subpath = normalize(
type_,
namespace,
name,
version,
qualifiers_str,
subpath,
encode=False,
if normalize_purl:
type_, namespace, name, version, qualifiers, subpath = normalize(
type_,
namespace,
name,
version,
qualifiers_str,
subpath,
encode=False,
)
else:
qualifiers = normalize_qualifiers(qualifiers_str, encode=False) or {}
return cls(
type_, namespace, name, version, qualifiers, subpath, normalize_purl=normalize_purl
)

return cls(type_, namespace, name, version, qualifiers, subpath)
Loading
Loading