Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validate ExternalPackageRef #439

90 changes: 86 additions & 4 deletions src/spdx/validation/external_package_ref_validator.py
Expand Up @@ -8,11 +8,22 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import re
from typing import List

from spdx.model.package import ExternalPackageRef
from spdx.validation.validation_message import ValidationMessage
from spdx.model.package import ExternalPackageRef, ExternalPackageRefCategory
from spdx.validation.uri_validators import validate_url, validate_uri
from spdx.validation.validation_message import ValidationMessage, ValidationContext, SpdxElementType

CPE22TYPE_REGEX = r'^c[pP][eE]:/[AHOaho]?(:[A-Za-z0-9._\-~%]*){0,6}$'
CPE23TYPE_REGEX = r'^cpe:2\.3:[aho\*\-](:(((\?*|\*?)([a-zA-Z0-9\-\._]|(\\[\\\*\?!"#$$%&\'\(\)\+,\/:;<=>@\[\]\^`\{\|}~]))+(\?*|\*?))|[\*\-])){5}(:(([a-zA-Z]{2,3}(-([a-zA-Z]{2}|[0-9]{3}))?)|[\*\-]))(:(((\?*|\*?)([a-zA-Z0-9\-\._]|(\\[\\\*\?!"#$$%&\'\(\)\+,\/:;<=>@\[\]\^`\{\|}~]))+(\?*|\*?))|[\*\-])){4}$'
MAVEN_CENTRAL_REGEX = r'^[^:]+:[^:]+(:[^:]+)?$'
NPM_REGEX = r'^[^@]+@[^@]+$'
NUGET_REGEX = r'^[^/]+/[^/]+$'
BOWER_REGEX = r'^[^#]+#[^#]+$'
PURL_REGEX = r'^pkg:.+(\/.+)?\/.+(@.+)?(\?.+)?(#.+)?$'
SWH_REGEX = r'^swh:1:(snp|rel|rev|dir|cnt):[0-9a-fA-F]{40}$'
GITOID_REGEX = r'^gitoid:(blob|tree|commit|tag):(sha1:[0-9a-fA-F]{40}|sha256:[0-9a-fA-F]{64})$'


def validate_external_package_refs(external_package_refs: List[ExternalPackageRef], parent_id: str) -> List[
Expand All @@ -25,5 +36,76 @@ def validate_external_package_refs(external_package_refs: List[ExternalPackageRe


def validate_external_package_ref(external_package_ref: ExternalPackageRef, parent_id: str) -> List[ValidationMessage]:
# TODO: https://github.com/spdx/tools-python/issues/373
context = ValidationContext(parent_id=parent_id, element_type=SpdxElementType.EXTERNAL_PACKAGE_REF,
full_element=external_package_ref)

category = external_package_ref.category
locator = external_package_ref.locator
reference_type = external_package_ref.reference_type

if category == ExternalPackageRefCategory.SECURITY:
if reference_type == "cpe22Type":
return validate_against_regex(locator, CPE22TYPE_REGEX, "cpe22Type", context)
if reference_type == "cpe23Type":
return validate_against_regex(locator, CPE23TYPE_REGEX, "cpe23Type", context)
if reference_type in ["advisory", "fix", "url"]:
if validate_url(locator):
return [ValidationMessage(
f'externalPackageRef locator of type "{reference_type}" must be a valid URL, but is: {locator}',
context)]
return []
if reference_type == "swid":
if validate_uri(locator) or not locator.startswith("swid"):
return [ValidationMessage(
f'externalPackageRef locator of type "swid" must be a valid URI with scheme swid, but is: {locator}',
context)]
return []

return [ValidationMessage(
f"externalPackageRef type in category SECURITY must be one of [cpe22Type, cpe23Type, advisory, fix, url, swid], but is: {reference_type}",
context)]

if category == ExternalPackageRefCategory.PACKAGE_MANAGER:
if reference_type == "maven-central":
return validate_against_regex(locator, MAVEN_CENTRAL_REGEX, "maven-central", context)
if reference_type == "npm":
return validate_against_regex(locator, NPM_REGEX, "npm", context)
if reference_type == "nuget":
return validate_against_regex(locator, NUGET_REGEX, "nuget", context)
if reference_type == "bower":
return validate_against_regex(locator, BOWER_REGEX, "bower", context)
if reference_type == "purl":
return validate_against_regex(locator, PURL_REGEX, "purl", context)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a lot of code duplication. We could use a dictionary instead to map ExternalPackageRefCategory to a set of valid types and map the regex expressions to the corresponding type.
As I am currently working on the rdf writer and there I need to check if the specified type is listed (so one of cpe22Type, cpe23Type, swid and so on) or not, I would need such a mapping as well.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very good point, the code is now much more concise. Please have a look if you can work with this in rdf now, too! :)


return [ValidationMessage(
f"externalPackageRef type in category PACKAGE_MANAGER must be one of [maven-central, npm, nuget, bower, purl], but is: {reference_type}",
context)]

if category == ExternalPackageRefCategory.PERSISTENT_ID:
if reference_type == "swh":
return validate_against_regex(locator, SWH_REGEX, "swh", context)
if reference_type == "gitoid":
return validate_against_regex(locator, GITOID_REGEX, "gitoid", context)

return [ValidationMessage(
f"externalPackageRef type in category PERSISTENT_ID must be one of [swh, gitoid], but is: {reference_type}",
context)]

if category == ExternalPackageRefCategory.OTHER:
if " " in locator:
return [ValidationMessage(
f"externalPackageRef type in category OTHER must contain no spaces, but is: {locator}",
context)]
return []



def validate_against_regex(string_to_validate: str, regex: str, type_name: str, context: ValidationContext) -> List[
ValidationMessage]:
if not re.match(regex, string_to_validate):
return [ValidationMessage(
f'externalPackageRef locator of type "{type_name}" must conform with the regex {regex}, but is: {string_to_validate}',
context)
]

return []
140 changes: 131 additions & 9 deletions tests/spdx/validation/test_external_package_ref_validator.py
Expand Up @@ -13,25 +13,147 @@

import pytest

from spdx.model.package import ExternalPackageRef, ExternalPackageRefCategory
from spdx.validation.external_package_ref_validator import validate_external_package_ref
from spdx.validation.validation_message import ValidationMessage, ValidationContext, SpdxElementType
from tests.spdx.fixtures import external_package_ref_fixture


def test_valid_external_package_ref():
external_package_ref = external_package_ref_fixture()
@pytest.mark.parametrize("category, reference_type, locator",
[(ExternalPackageRefCategory.SECURITY, "cpe22Type",
"cpe:/o:canonical:ubuntu_linux:10.04:-:lts"),
(ExternalPackageRefCategory.SECURITY, "cpe23Type",
"cpe:2.3:o:canonical:ubuntu_linux:10.04:-:lts:*:*:*:*:*"),
(ExternalPackageRefCategory.SECURITY, "advisory",
"https://nvd.nist.gov/vuln/detail/CVE-2020-28498"),
(ExternalPackageRefCategory.SECURITY, "fix",
"https://github.com/indutny/elliptic/commit/441b7428"),
(ExternalPackageRefCategory.SECURITY, "url",
"https://github.com/christianlundkvist/blog/blob/master/2020_05_26_secp256k1_twist_attacks/secp256k1_twist_attacks.md"),
(ExternalPackageRefCategory.SECURITY, "swid", "swid:2df9de35-0aff-4a86-ace6-f7dddd1ade4c"),
(ExternalPackageRefCategory.PACKAGE_MANAGER, "maven-central",
"org.apache.tomcat:tomcat:9.0.0.M4"),
(ExternalPackageRefCategory.PACKAGE_MANAGER, "npm", "http-server@0.3.0"),
(ExternalPackageRefCategory.PACKAGE_MANAGER, "nuget", "Microsoft.AspNet.MVC/5.0.0"),
(ExternalPackageRefCategory.PACKAGE_MANAGER, "bower", "modernizr#2.6.2"),
(ExternalPackageRefCategory.PACKAGE_MANAGER, "purl",
"pkg:docker/debian@sha256:2f04d3d33b6027bb74ecc81397abe780649ec89f1a2af18d7022737d0482cefe"),
(ExternalPackageRefCategory.PACKAGE_MANAGER, "purl",
"pkg:bitbucket/birkenfeld/pygments-main@244fd47e07d1014f0aed9c"),
(ExternalPackageRefCategory.PACKAGE_MANAGER, "purl",
"pkg:deb/debian/curl@7.50.3-1?arch=i386&distro=jessie"),
(ExternalPackageRefCategory.PACKAGE_MANAGER, "purl",
"pkg:docker/customer/dockerimage@sha256:244fd47e07d1004f0aed9c?repository_url=gcr.io"),
(ExternalPackageRefCategory.PACKAGE_MANAGER, "purl",
"pkg:gem/jruby-launcher@1.1.2?platform=java"),
(ExternalPackageRefCategory.PACKAGE_MANAGER, "purl", "pkg:gem/ruby-advisory-db-check@0.12.4"),
(ExternalPackageRefCategory.PACKAGE_MANAGER, "purl",
"pkg:github/package-url/purl-spec@244fd47e07d1004f0aed9c"),
(ExternalPackageRefCategory.PACKAGE_MANAGER, "purl",
"pkg:golang/google.golang.org/genproto#googleapis/api/annotations"),
(ExternalPackageRefCategory.PACKAGE_MANAGER, "purl",
"pkg:maven/org.apache.xmlgraphics/batik-anim@1.9.1?repository_url=repo.spring.io%2Frelease"),
(ExternalPackageRefCategory.PACKAGE_MANAGER, "purl", "pkg:npm/%40angular/animation@12.3.1"),
(ExternalPackageRefCategory.PACKAGE_MANAGER, "purl",
"pkg:nuget/EnterpriseLibrary.Common@6.0.1304"),
(ExternalPackageRefCategory.PACKAGE_MANAGER, "purl",
"pkg:rpm/fedora/curl@7.50.3-1.fc25?arch=i386&distro=fedora-25"),
(ExternalPackageRefCategory.PERSISTENT_ID, "swh",
"swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2"),
(ExternalPackageRefCategory.PERSISTENT_ID, "swh",
"swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505"),
(ExternalPackageRefCategory.PERSISTENT_ID, "swh",
"swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d"),
(ExternalPackageRefCategory.PERSISTENT_ID, "swh",
"swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f"),
(ExternalPackageRefCategory.PERSISTENT_ID, "swh",
"swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453"),
(ExternalPackageRefCategory.PERSISTENT_ID, "gitoid",
"gitoid:blob:sha1:261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64"),
(ExternalPackageRefCategory.PERSISTENT_ID, "gitoid",
"gitoid:blob:sha256:3557f7eb43c621c71483743d4b37059bb80933e7f71277c0c3b3846159d1f61c"),
(ExternalPackageRefCategory.OTHER, "some idstring", "#//string-withOUT!Spaces\\?")
])
def test_valid_external_package_ref(category, reference_type, locator):
external_package_ref = ExternalPackageRef(category, reference_type, locator, "externalPackageRef comment")
validation_messages: List[ValidationMessage] = validate_external_package_ref(external_package_ref, "parent_id")

assert validation_messages == []


@pytest.mark.parametrize("external_package_ref, expected_message",
[(external_package_ref_fixture(),
"TBD"),
@pytest.mark.parametrize("category, reference_type, locator, expected_message",
[(
ExternalPackageRefCategory.SECURITY, "cpe22Typo", "cpe:/o:canonical:ubuntu_linux:10.04:-:lts",
"externalPackageRef type in category SECURITY must be one of [cpe22Type, cpe23Type, advisory, fix, url, swid], but is: cpe22Typo"),
(ExternalPackageRefCategory.PACKAGE_MANAGER, "nugat",
"cpe:/o:canonical:ubuntu_linux:10.04:-:lts",
"externalPackageRef type in category PACKAGE_MANAGER must be one of [maven-central, npm, nuget, bower, purl], but is: nugat"),
(ExternalPackageRefCategory.PERSISTENT_ID, "git-oid",
"cpe:/o:canonical:ubuntu_linux:10.04:-:lts",
"externalPackageRef type in category PERSISTENT_ID must be one of [swh, gitoid], but is: git-oid")
])
def test_invalid_external_package_ref_types(category, reference_type, locator, expected_message):
external_package_ref = ExternalPackageRef(category, reference_type, locator, "externalPackageRef comment")
parent_id = "SPDXRef-Package"
validation_messages: List[ValidationMessage] = validate_external_package_ref(external_package_ref, parent_id)

expected = ValidationMessage(expected_message,
ValidationContext(parent_id=parent_id,
element_type=SpdxElementType.EXTERNAL_PACKAGE_REF,
full_element=external_package_ref))

assert validation_messages == [expected]


CPE22TYPE_REGEX = r'^c[pP][eE]:/[AHOaho]?(:[A-Za-z0-9._\-~%]*){0,6}$'
CPE23TYPE_REGEX = r'^cpe:2\.3:[aho\*\-](:(((\?*|\*?)([a-zA-Z0-9\-\._]|(\\[\\\*\?!"#$$%&\'\(\)\+,\/:;<=>@\[\]\^`\{\|}~]))+(\?*|\*?))|[\*\-])){5}(:(([a-zA-Z]{2,3}(-([a-zA-Z]{2}|[0-9]{3}))?)|[\*\-]))(:(((\?*|\*?)([a-zA-Z0-9\-\._]|(\\[\\\*\?!"#$$%&\'\(\)\+,\/:;<=>@\[\]\^`\{\|}~]))+(\?*|\*?))|[\*\-])){4}$'
MAVEN_CENTRAL_REGEX = r'^[^:]+:[^:]+(:[^:]+)?$'
NPM_REGEX = r'^[^@]+@[^@]+$'
NUGET_REGEX = r'^[^/]+/[^/]+$'
BOWER_REGEX = r'^[^#]+#[^#]+$'
PURL_REGEX = r'^pkg:.+(\/.+)?\/.+(@.+)?(\?.+)?(#.+)?$'
SWH_REGEX = r'^swh:1:(snp|rel|rev|dir|cnt):[0-9a-fA-F]{40}$'
GITOID_REGEX = r'^gitoid:(blob|tree|commit|tag):(sha1:[0-9a-fA-F]{40}|sha256:[0-9a-fA-F]{64})$'
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would import these regex statements from external_package_ref_validator.py to prevent possible mismatch in the future if one of the regex needs changes and simply to avoid duplications.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done



@pytest.mark.parametrize("category, reference_type, locator, expected_message",
[(ExternalPackageRefCategory.SECURITY, "cpe22Type", "cpe:o:canonical:ubuntu_linux:10.04:-:lts",
f'externalPackageRef locator of type "cpe22Type" must conform with the regex {CPE22TYPE_REGEX}, but is: cpe:o:canonical:ubuntu_linux:10.04:-:lts'),
(ExternalPackageRefCategory.SECURITY, "cpe23Type",
"cpe:2.3:/o:canonical:ubuntu_linux:10.04:-:lts:*:*:*:*:*",
f'externalPackageRef locator of type "cpe23Type" must conform with the regex {CPE23TYPE_REGEX}, but is: cpe:2.3:/o:canonical:ubuntu_linux:10.04:-:lts:*:*:*:*:*'),
(ExternalPackageRefCategory.SECURITY, "advisory", "http://locatorurl",
f'externalPackageRef locator of type "advisory" must be a valid URL, but is: http://locatorurl'),
(ExternalPackageRefCategory.SECURITY, "fix", "http://fixurl",
f'externalPackageRef locator of type "fix" must be a valid URL, but is: http://fixurl'),
(ExternalPackageRefCategory.SECURITY, "url", "http://url",
f'externalPackageRef locator of type "url" must be a valid URL, but is: http://url'),
(ExternalPackageRefCategory.SECURITY, "swid", "2df9de35-0aff-4a86-ace6-f7dddd1ade4c",
f'externalPackageRef locator of type "swid" must be a valid URI with scheme swid, but is: 2df9de35-0aff-4a86-ace6-f7dddd1ade4c'),
(ExternalPackageRefCategory.PACKAGE_MANAGER, "maven-central",
"org.apache.tomcat:tomcat:tomcat:9.0.0.M4",
f'externalPackageRef locator of type "maven-central" must conform with the regex {MAVEN_CENTRAL_REGEX}, but is: org.apache.tomcat:tomcat:tomcat:9.0.0.M4'),
(ExternalPackageRefCategory.PACKAGE_MANAGER, "npm", "http-server:0.3.0",
f'externalPackageRef locator of type "npm" must conform with the regex {NPM_REGEX}, but is: http-server:0.3.0'),
(ExternalPackageRefCategory.PACKAGE_MANAGER, "nuget", "Microsoft.AspNet.MVC@5.0.0",
f'externalPackageRef locator of type "nuget" must conform with the regex {NUGET_REGEX}, but is: Microsoft.AspNet.MVC@5.0.0'),
(ExternalPackageRefCategory.PACKAGE_MANAGER, "bower", "modernizr:2.6.2",
f'externalPackageRef locator of type "bower" must conform with the regex {BOWER_REGEX}, but is: modernizr:2.6.2'),
(ExternalPackageRefCategory.PACKAGE_MANAGER, "purl", "pkg:npm@12.3.1",
f'externalPackageRef locator of type "purl" must conform with the regex {PURL_REGEX}, but is: pkg:npm@12.3.1'),
(ExternalPackageRefCategory.PERSISTENT_ID, "swh",
"swh:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2",
f'externalPackageRef locator of type "swh" must conform with the regex {SWH_REGEX}, but is: swh:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2'),
(ExternalPackageRefCategory.PERSISTENT_ID, "gitoid",
"gitoid:blob:sha1:3557f7eb43c621c71483743d4b37059bb80933e7f71277c0c3b3846159d1f61c",
f'externalPackageRef locator of type "gitoid" must conform with the regex {GITOID_REGEX}, but is: gitoid:blob:sha1:3557f7eb43c621c71483743d4b37059bb80933e7f71277c0c3b3846159d1f61c'),
(ExternalPackageRefCategory.PERSISTENT_ID, "gitoid",
"gitoid:blob:sha256:261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64",
f'externalPackageRef locator of type "gitoid" must conform with the regex {GITOID_REGEX}, but is: gitoid:blob:sha256:261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64'),
(ExternalPackageRefCategory.OTHER, "id string", "locator string",
"externalPackageRef type in category OTHER must contain no spaces, but is: locator string"),
])
@pytest.mark.skip(
"add tests once external package ref validation is implemented: https://github.com/spdx/tools-python/issues/373")
def test_invalid_external_package_ref(external_package_ref, expected_message):
def test_invalid_external_package_ref_locators(category, reference_type, locator, expected_message):
external_package_ref = ExternalPackageRef(category, reference_type, locator, "externalPackageRef comment")
parent_id = "SPDXRef-Package"
validation_messages: List[ValidationMessage] = validate_external_package_ref(external_package_ref, parent_id)

Expand Down