Skip to content

Commit

Permalink
Address feedback on #3462
Browse files Browse the repository at this point in the history
Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com>
  • Loading branch information
AyanSinhaMahapatra committed Jul 24, 2023
1 parent 35d8f6b commit 8697b3f
Show file tree
Hide file tree
Showing 9 changed files with 54 additions and 37 deletions.
22 changes: 14 additions & 8 deletions docs/source/reference/available_package_parsers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,6 @@ parsers in scancode-toolkit during documentation builds.
- Datasource ID
- Primary Language
- Documentation URL
* - JAR Java Archive
- ``*.jar``
- None
- ``java_jar``
- None
- https://en.wikipedia.org/wiki/JAR_(file_format)
* - AboutCode ABOUT file
- ``*.ABOUT``
- ``about``
Expand Down Expand Up @@ -306,13 +300,13 @@ parsers in scancode-toolkit during documentation builds.
- ``debian_source_metadata_tarball``
- None
- https://manpages.debian.org/unstable/dpkg-dev/deb.5.en.html
* - None
* - macOS disk image file
- ``*.dmg``
``*.sparseimage``
- ``dmg``
- ``apple_dmg``
- None
- None
- https://en.wikipedia.org/wiki/Apple_Disk_Image
* - Java EAR application.xml
- ``*/META-INF/application.xml``
- ``ear``
Expand Down Expand Up @@ -437,6 +431,12 @@ parsers in scancode-toolkit during documentation builds.
- ``ant_ivy_xml``
- Java
- https://ant.apache.org/ivy/history/latest-milestone/ivyfile.html
* - JAR Java Archive
- ``*.jar``
- ``jar``
- ``java_jar``
- None
- https://en.wikipedia.org/wiki/JAR_(file_format)
* - Java JAR MANIFEST.MF
- ``*/META-INF/MANIFEST.MF``
- ``jar``
Expand Down Expand Up @@ -555,6 +555,12 @@ parsers in scancode-toolkit during documentation builds.
- ``opam_file``
- Ocaml
- https://opam.ocaml.org/doc/Manual.html#Common-file-format
* - Java OSGi MANIFEST.MF
- None
- ``osgi``
- ``java_osgi_manifest``
- Java
- https://docs.oracle.com/javase/tutorial/deployment/jar/manifestindex.html
* - Dart pubspec lockfile
- ``*pubspec.lock``
- ``pubspec``
Expand Down
12 changes: 6 additions & 6 deletions src/licensedcode/detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,10 @@ class DetectionCategory(Enum):
EXTRA_WORDS = 'extra-words'
UNKNOWN_MATCH = 'unknown-match'
LICENSE_CLUES = 'license-clues'
LOW_QUALITY_MATCHES = 'license-clues'
LOW_QUALITY_MATCH_FRAGMENTS = 'low-quality-matches'
IMPERFECT_COVERAGE = 'imperfect-match-coverage'
FALSE_POSITVE = 'possible-false-positive'
UNDETECTED_LICENSE = 'undetected-license'
MATCH_FRAGMENTS = 'match-fragments'
LOW_RELEVANCE = 'low-relevance'


Expand All @@ -124,6 +123,7 @@ class DetectionRule(Enum):
"""
UNKNOWN_MATCH = 'unknown-match'
LICENSE_CLUES = 'license-clues'
LOW_QUALITY_MATCH_FRAGMENTS = 'low-quality-matches'
FALSE_POSITIVE = 'possible-false-positive'
NOT_LICENSE_CLUES = 'not-license-clues-as-more-detections-present'
UNKNOWN_REFERENCE_TO_LOCAL_FILE = 'unknown-reference-to-local-file'
Expand Down Expand Up @@ -1374,12 +1374,12 @@ def get_detected_license_expression(
detection_log.append(DetectionRule.LICENSE_CLUES.value)
return detection_log, combined_expression

elif analysis == DetectionCategory.LOW_QUALITY_MATCHES.value:
elif analysis == DetectionCategory.LOW_QUALITY_MATCH_FRAGMENTS.value:
if TRACE_ANALYSIS:
logger_debug(f'analysis {DetectionCategory.LICENSE_CLUES.value}')
# TODO: we are temporarily returning these as license clues, and not
# in detections but ideally we should return synthetic unknowns for these
detection_log.append(DetectionRule.LOW_QUALITY_MATCHES.value)
detection_log.append(DetectionRule.LOW_QUALITY_MATCH_FRAGMENTS.value)
return detection_log, combined_expression

else:
Expand Down Expand Up @@ -1501,7 +1501,7 @@ def get_ambiguous_license_detections_by_type(unique_license_detections):

for detection in unique_license_detections:
if not detection.license_expression:
ambi_license_detections[DetectionCategory.MATCH_FRAGMENTS.value] = detection
ambi_license_detections[DetectionCategory.LOW_QUALITY_MATCH_FRAGMENTS.value] = detection

elif is_undetected_license_matches(license_matches=detection.matches):
ambi_license_detections[DetectionCategory.UNDETECTED_LICENSE.value] = detection
Expand Down Expand Up @@ -1567,7 +1567,7 @@ def analyze_detection(license_matches, package_license=False):
return DetectionCategory.UNKNOWN_MATCH.value

elif not package_license and is_low_quality_matches(license_matches=license_matches):
return DetectionCategory.LOW_QUALITY_MATCHES.value
return DetectionCategory.LOW_QUALITY_MATCH_FRAGMENTS.value

# Case where at least one of the matches have `match_coverage`
# below IMPERFECT_MATCH_COVERAGE_THR
Expand Down
4 changes: 2 additions & 2 deletions src/licensedcode/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1193,7 +1193,7 @@ def load_rules(
rules_data_dir=rules_data_dir,
with_checks=True,
is_builtin=True,
ignore_deprecated=True,
with_depreacted=False,
):
"""
Return an iterable of rules loaded from rule files in ``rules_data_dir``.
Expand All @@ -1217,7 +1217,7 @@ def load_rules(

try:
rule = Rule.from_file(rule_file=rule_file)
if rule.is_deprecated and ignore_deprecated:
if not with_depreacted and rule.is_deprecated:
continue
else:
yield rule
Expand Down
8 changes: 4 additions & 4 deletions src/packagedcode/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,12 @@
# yet the purpose and semantics are rather different here

# TODO: parse me!!!
# TODO: add missing URLs and descriptions


class JavaJarHandler(models.NonAssemblableDatafileHandler):
datasource_id = 'java_jar'
# NOTE: there are a few rare cases where a .zip can be a JAR.
path_patterns = ('*.jar',)
default_package_type = 'jar'
filetypes = ('zip archive', 'java archive',)
description = 'JAR Java Archive'
documentation_url = 'https://en.wikipedia.org/wiki/JAR_(file_format)'
Expand Down Expand Up @@ -270,8 +269,9 @@ class AppleDmgHandler(models.NonAssemblableDatafileHandler):
datasource_id = 'apple_dmg'
default_package_type = 'dmg'
path_patterns = ('*.dmg', '*.sparseimage',)
description = ''
documentation_url = ''
description = 'macOS disk image file'
# See also https://en.wikipedia.org/wiki/Sparse_image
documentation_url = 'https://en.wikipedia.org/wiki/Apple_Disk_Image'


class IsoImageHandler(models.NonAssemblableDatafileHandler):
Expand Down
3 changes: 3 additions & 0 deletions src/packagedcode/readme.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@

'licence': 'extracted_license_statement',
'license': 'extracted_license_statement',
# This also has License File sometimes
}


Expand All @@ -50,6 +51,8 @@ class ReadmeHandler(models.NonAssemblableDatafileHandler):
'*/README.google',
'*/README.thirdparty',
)
description = ''
documentation_url = ''

@classmethod
def parse(cls, location):
Expand Down
8 changes: 3 additions & 5 deletions src/summarycode/todo.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,8 +213,6 @@ def get_package_identifier(package_data, file_path):


def get_unknown_purl(package_type):
if not package_type:
package_type = "unknown"
purl = PackageURL(type=package_type, name="unknown")
return purl.to_string()

Expand Down Expand Up @@ -337,7 +335,7 @@ class ReviewComments(Enum):
"been matched to rules having unknown as their license key, and these "
"needs to be reviewed."
)
MATCH_FRAGMENTS = (
LOW_QUALITY_MATCH_FRAGMENTS = (
"Fragments of license text were detected which are not proper license detections "
"and likely has misleading license expression, but this has some clues about licenses, "
"which needs review."
Expand Down Expand Up @@ -391,8 +389,8 @@ def get_review_comments(detection_log):
if LicenseDetectionCategory.UNKNOWN_MATCH.value in detection_log:
review_comments[LicenseDetectionCategory.UNKNOWN_MATCH.value] = ReviewComments.UNKNOWN_MATCH.value

if LicenseDetectionCategory.MATCH_FRAGMENTS.value in detection_log:
review_comments[LicenseDetectionCategory.MATCH_FRAGMENTS.value] = ReviewComments.MATCH_FRAGMENTS.value
if LicenseDetectionCategory.LOW_QUALITY_MATCH_FRAGMENTS.value in detection_log:
review_comments[LicenseDetectionCategory.LOW_QUALITY_MATCH_FRAGMENTS.value] = ReviewComments.LOW_QUALITY_MATCH_FRAGMENTS.value

if LicenseDetectionCategory.LOW_RELEVANCE.value in detection_log:
review_comments[LicenseDetectionCategory.LOW_RELEVANCE.value] = ReviewComments.LOW_RELEVANCE.value
Expand Down
22 changes: 11 additions & 11 deletions tests/packagedcode/data/plugin/help.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,4 @@
--------------------------------------------
Package type: None
datasource_id: java_jar
documentation URL: https://en.wikipedia.org/wiki/JAR_(file_format)
primary language: None
description: JAR Java Archive
path_patterns: '*.jar'
--------------------------------------------
Package type: about
datasource_id: about_file
documentation URL: https://aboutcode-toolkit.readthedocs.io/en/latest/specification.html
Expand Down Expand Up @@ -323,9 +316,9 @@ Package type: deb
--------------------------------------------
Package type: dmg
datasource_id: apple_dmg
documentation URL:
documentation URL: https://en.wikipedia.org/wiki/Apple_Disk_Image
primary language: None
description:
description: macOS disk image file
path_patterns: '*.dmg', '*.sparseimage'
--------------------------------------------
Package type: ear
Expand Down Expand Up @@ -468,6 +461,13 @@ Package type: ivy
description: Ant IVY dependency file
path_patterns: '*/ivy.xml'
--------------------------------------------
Package type: jar
datasource_id: java_jar
documentation URL: https://en.wikipedia.org/wiki/JAR_(file_format)
primary language: None
description: JAR Java Archive
path_patterns: '*.jar'
--------------------------------------------
Package type: jar
datasource_id: java_jar_manifest
documentation URL: https://docs.oracle.com/javase/tutorial/deployment/jar/manifestindex.html
Expand Down Expand Up @@ -715,9 +715,9 @@ Package type: pypi
--------------------------------------------
Package type: readme
datasource_id: readme
documentation URL: None
documentation URL:
primary language: None
description: None
description:
path_patterns: '*/README.android', '*/README.chromium', '*/README.facebook', '*/README.google', '*/README.thirdparty'
--------------------------------------------
Package type: rpm
Expand Down
10 changes: 10 additions & 0 deletions tests/packagedcode/test_package_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,16 @@ def test_package_data_datasource_id_are_unique(self):
), f'Duplicated datasource_id: {pdh!r} with {seen[pdhid]!r}'
seen[pdh.datasource_id] = pdh

def test_package_data_handlers_have_package_type(self):
"""
Check that we do not have two DataFileHandlers with the same
datasource_id and that all have one.
"""
for pdh in ALL_DATAFILE_HANDLERS:
pdh_type = pdh.default_package_type
assert pdh_type


def test_package_data_file_patterns_are_tuples(self):
"""
Check that all file patterns are tuples, as if they are
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
}
],
"detection_log": [
"license-clues"
"low-quality-matches"
],
"identifier": "borceux-3c39742c-edef-82b7-0cdd-fc4d9ff8b044"
}
Expand Down

0 comments on commit 8697b3f

Please sign in to comment.