Skip to content

Commit

Permalink
Merge pull request #3783 from nexB/fix-cargo-workspaces
Browse files Browse the repository at this point in the history
Improve cargo package detection support
  • Loading branch information
AyanSinhaMahapatra committed Jun 10, 2024
2 parents 8e9dc46 + 60c580e commit 8f5daf7
Show file tree
Hide file tree
Showing 40 changed files with 8,145 additions and 2,265 deletions.
19 changes: 14 additions & 5 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,23 @@ v33.0.0 (next next, roadmap)
of these in other summary plugins.
See https://github.com/nexB/scancode-toolkit/issues/1745

- Improve cargo package detection support with various improvements
and bugfixes:
- Fix for parser crashing on cargo workspaces
- Fix a bug in dependency parsing (we were not returning any dependencies)
- Also support getting dependency versions from workspace
- Support more attributes from cargo
- Better handle workspace data thorugh extra_data attribute
See https://github.com/nexB/scancode-toolkit/pull/3783

- We now support parsing the Swift manifest JSON dump and the ``Package.resolved`` file https://github.com/nexB/scancode-toolkit/issues/2657.
- Run the commands below on your local Swift project before running the scan.
- ::
- Run the commands below on your local Swift project before running the scan.
- ::

swift package dump-package > Package.swift.json
- ::
swift package dump-package > Package.swift.json
- ::

swift package resolve
swift package resolve

v32.1.0 (next, roadmap)
----------------------------
Expand Down
146 changes: 119 additions & 27 deletions src/packagedcode/cargo.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@
# See https://aboutcode.org for more information about nexB OSS projects.
#

import logging
import os
import re
import sys

import saneyaml
import toml
from packageurl import PackageURL

Expand All @@ -20,6 +21,22 @@
Handle Rust cargo crates
"""

TRACE = os.environ.get('SCANCODE_DEBUG_PACKAGE_CARGO', False)


def logger_debug(*args):
pass


logger = logging.getLogger(__name__)

if TRACE:
logging.basicConfig(stream=sys.stdout)
logger.setLevel(logging.DEBUG)

def logger_debug(*args):
return logger.debug(' '.join(isinstance(a, str) and a or repr(a) for a in args))


class CargoBaseHandler(models.DatafileHandler):
@classmethod
Expand All @@ -29,7 +46,7 @@ def assemble(cls, package_data, resource, codebase, package_adder):
support cargo workspaces where we have multiple packages from
a repository and some shared information present at top-level.
"""
workspace = package_data.extra_data.get("workspace", {})
workspace = package_data.extra_data.get('workspace', {})
workspace_members = workspace.get("members", [])
workspace_package_data = workspace.get("package", {})
attributes_to_copy = [
Expand All @@ -39,10 +56,13 @@ def assemble(cls, package_data, resource, codebase, package_adder):
]
if "license" in workspace_package_data:
for attribute in attributes_to_copy:
package_data.extra_data[attribute] = 'workspace'
workspace_package_data[attribute] = getattr(package_data, attribute)

workspace_root_path = resource.parent(codebase).path
if workspace_package_data and workspace_members:

# TODO: support glob patterns found in cargo workspaces
for workspace_member_path in workspace_members:
workspace_directory_path = os.path.join(workspace_root_path, workspace_member_path)
workspace_directory = codebase.get_resource(path=workspace_directory_path)
Expand All @@ -56,9 +76,13 @@ def assemble(cls, package_data, resource, codebase, package_adder):
if not resource.package_data:
continue

if TRACE:
logger_debug(f"Resource manifest to update: {resource.path}")

updated_package_data = cls.update_resource_package_data(
package_data=workspace_package_data,
old_package_data=resource.package_data.pop(),
workspace=workspace,
workspace_package_data=workspace_package_data,
resource_package_data=resource.package_data.pop(),
mapping=CARGO_ATTRIBUTE_MAPPING,
)
resource.package_data.append(updated_package_data)
Expand All @@ -79,20 +103,61 @@ def assemble(cls, package_data, resource, codebase, package_adder):
)

@classmethod
def update_resource_package_data(cls, package_data, old_package_data, mapping=None):
def update_resource_package_data(cls, workspace, workspace_package_data, resource_package_data, mapping=None):

for attribute in old_package_data.keys():
extra_data = resource_package_data["extra_data"]
for attribute in resource_package_data.keys():
if attribute in mapping:
replace_by_attribute = mapping.get(attribute)
old_package_data[attribute] = package_data.get(replace_by_attribute)
if not replace_by_attribute in extra_data:
continue

extra_data.pop(replace_by_attribute)
replace_by_value = workspace_package_data.get(replace_by_attribute)
if replace_by_value:
resource_package_data[attribute] = replace_by_value
elif attribute == "parties":
old_package_data[attribute] = list(get_parties(
person_names=package_data.get("authors"),
resource_package_data[attribute] = list(get_parties(
person_names=workspace_package_data.get("authors", []),
party_role='author',
))

return old_package_data

if "authors" in extra_data:
extra_data.pop("authors")

extra_data_copy = extra_data.copy()
for key, value in extra_data_copy.items():
if value == 'workspace':
extra_data.pop(key)

if key in workspace_package_data:
workspace_value = workspace_package_data.get(key)
if workspace_value and key in mapping:
replace_by_attribute = mapping.get(key)
extra_data[replace_by_attribute] = workspace_value

# refresh purl if version updated from workspace
if "version" in workspace_package_data:
resource_package_data["purl"] = PackageURL(
type=cls.default_package_type,
name=resource_package_data["name"],
namespace=resource_package_data["namespace"],
version=resource_package_data["version"],
).to_string()

workspace_dependencies = dependency_mapper(dependencies=workspace.get('dependencies', {}))
deps_by_purl = {}
for dependency in workspace_dependencies:
deps_by_purl[dependency.purl] = dependency

for dep_mapping in resource_package_data['dependencies']:
workspace_dependency = deps_by_purl.get(dep_mapping['purl'], None)
if workspace_dependency and workspace_dependency.extracted_requirement:
dep_mapping['extracted_requirement'] = workspace_dependency.extracted_requirement

if 'workspace' in dep_mapping["extra_data"]:
dep_mapping['extra_data'].pop('workspace')

return resource_package_data


class CargoTomlHandler(CargoBaseHandler):
Expand All @@ -105,16 +170,21 @@ class CargoTomlHandler(CargoBaseHandler):

@classmethod
def parse(cls, location, package_only=False):
package_data = toml.load(location, _dict=dict)
core_package_data = package_data.get('package', {})
workspace = package_data.get('workspace', {})
package_data_toml = toml.load(location, _dict=dict)
workspace = package_data_toml.get('workspace', {})
core_package_data = package_data_toml.get('package', {})
extra_data = {}
if workspace:
extra_data['workspace'] = workspace

package_data = core_package_data.copy()
for key, value in package_data.items():
if isinstance(value, dict) and 'workspace' in value:
core_package_data.pop(key)
extra_data[key] = 'workspace'

name = core_package_data.get('name')
version = core_package_data.get('version')
if isinstance(version, dict) and "workspace" in version:
version = None
extra_data["version"] = "workspace"

description = core_package_data.get('description') or ''
description = description.strip()
Expand All @@ -132,22 +202,28 @@ def parse(cls, location, package_only=False):

# cargo dependencies are complex and can be overriden at multiple levels
dependencies = []
for key, value in core_package_data.items():
for key, value in package_data_toml.items():
if key.endswith('dependencies'):
dependencies.extend(dependency_mapper(dependencies=value, scope=key))

# TODO: add file refs:
# - readme, include and exclude
# TODO: other URLs
# - documentation

vcs_url = core_package_data.get('repository')
homepage_url = core_package_data.get('homepage')
repository_homepage_url = name and f'https://crates.io/crates/{name}'
repository_download_url = name and version and f'https://crates.io/api/v1/crates/{name}/{version}/download'
api_data_url = name and f'https://crates.io/api/v1/crates/{name}'
if workspace:
extra_data["workspace"] = workspace

extra_data_mappings = {
"documentation": "documentation_url",
"rust-version": "rust_version",
"edition": "rust_edition",
}
for cargo_attribute, extra_attribute in extra_data_mappings.items():
value = core_package_data.get(cargo_attribute)
if value:
extra_data[extra_attribute] = value

package_data = dict(
datasource_id=cls.datasource_id,
Expand All @@ -156,6 +232,7 @@ def parse(cls, location, package_only=False):
version=version,
primary_language=cls.default_primary_language,
description=description,
keywords=keywords,
parties=parties,
extracted_license_statement=extracted_license_statement,
vcs_url=vcs_url,
Expand All @@ -171,6 +248,7 @@ def parse(cls, location, package_only=False):

CARGO_ATTRIBUTE_MAPPING = {
# Fields in PackageData model: Fields in cargo
"version": "version",
"homepage_url": "homepage",
"vcs_url": "repository",
"keywords": "categories",
Expand All @@ -179,6 +257,9 @@ def parse(cls, location, package_only=False):
"license_detections": "license_detections",
"declared_license_expression": "declared_license_expression",
"declared_license_expression_spdx": "declared_license_expression_spdx",
# extra data fields (reverse mapping)
"edition": "rust_edition",
"rust-version": "rust_version",
}


Expand Down Expand Up @@ -237,25 +318,36 @@ def dependency_mapper(dependencies, scope='dependencies'):
"""
is_runtime = not scope.endswith(('dev-dependencies', 'build-dependencies'))
for name, requirement in dependencies.items():
extra_data = {}
extracted_requirement = None
if isinstance(requirement, str):
# plain version requirement
is_optional = False
extracted_requirement = requirement

elif isinstance(requirement, dict):
# complex requirement, with more than version are harder to handle
# so we just dump
# complex requirement, we extract version if available
# everything else is just dumped in extra data
# here {workspace = true} means dependency version
# should be inherited
is_optional = requirement.pop('optional', False)
requirement = saneyaml.dump(requirement)
if 'version' in requirement:
extracted_requirement = requirement.get('version')

if requirement:
extra_data = requirement

yield models.DependentPackage(
purl=PackageURL(
type='cargo',
name=name,
).to_string(),
extracted_requirement=requirement,
extracted_requirement=extracted_requirement,
scope=scope,
is_runtime=is_runtime,
is_optional=is_optional,
is_resolved=False,
extra_data=extra_data,
)


Expand Down
Loading

0 comments on commit 8f5daf7

Please sign in to comment.