Skip to content

Commit

Permalink
Do not return empty strings in license data
Browse files Browse the repository at this point in the history
Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com>
  • Loading branch information
AyanSinhaMahapatra committed Oct 20, 2022
1 parent 6e14d8a commit 044f60d
Show file tree
Hide file tree
Showing 42 changed files with 702 additions and 904 deletions.
2 changes: 1 addition & 1 deletion docs/source/cli-reference/output-format.rst
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ following options.
"text_url": "http://fedoraproject.org/wiki/Licensing:MIT#Old_Style",
"reference_url": "https://enterprise.dejacode.com/urn/urn:dje:license:mit-old-style",
"spdx_license_key": null,
"spdx_url": "",
"spdx_url": null,
"start_line": 9,
"end_line": 15,
"matched_rule": {
Expand Down
2 changes: 1 addition & 1 deletion docs/source/cli-reference/synopsis.rst
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ A sample JSON output for an individual file will look like::
"text_url": "http://fedoraproject.org/wiki/Licensing:MIT#Old_Style",
"reference_url": "https://enterprise.dejacode.com/urn/urn:dje:license:mit-old-style",
"spdx_license_key": null,
"spdx_url": "",
"spdx_url": null,
"start_line": 9,
"end_line": 15,
"matched_rule": {
Expand Down
80 changes: 40 additions & 40 deletions src/licensedcode/plugin_license.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
# See https://aboutcode.org for more information about nexB OSS projects.
#

import logging
import os
import posixpath
from functools import partial

Expand All @@ -20,28 +22,22 @@

from scancode.api import SCANCODE_LICENSEDB_URL

TRACE = False
TRACE = os.environ.get('SCANCODE_DEBUG_LICENSE_PLUGIN', False)


def logger_debug(*args): pass
def logger_debug(*args):
pass


logger = logging.getLogger(__name__)

if TRACE:
use_print = True
if use_print:
prn = print
else:
import logging
import sys
logger = logging.getLogger(__name__)
# logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
logging.basicConfig(stream=sys.stdout)
logger.setLevel(logging.DEBUG)
prn = logger.debug
import sys
logging.basicConfig(stream=sys.stdout)
logger.setLevel(logging.DEBUG)

def logger_debug(*args):
return prn(' '.join(isinstance(a, str) and a or repr(a) for a in args))

return logger.debug(' '.join(isinstance(a, str) and a or repr(a) for a in args))

@scan_impl
class LicenseScanner(ScanPlugin):
Expand Down Expand Up @@ -156,28 +152,37 @@ def process_codebase(self, codebase, unknown_licenses, **kwargs):
cle.extra_data['additional_license_plugins'] = cche.additional_license_plugins
has_additional_licenses = True

if unknown_licenses:
if codebase.has_single_resource:
return
if TRACE and has_additional_licenses:
logger_debug(
f'add_referenced_filenames_license_matches: additional_licenses',
f'has_additional_licenses: {has_additional_licenses}\n',
f'additional_license_directory: {cche.additional_license_directory}\n',
f'additional_license_plugins : {cche.additional_license_plugins}'
)

for resource in codebase.walk(topdown=False):
# follow license references to other files
if TRACE:
license_expressions_before = list(resource.license_expressions)
if codebase.has_single_resource and not codebase.root.is_file:
return

modified = False
for resource in codebase.walk(topdown=False):
# follow license references to other files
if TRACE:
license_expressions_before = list(resource.license_expressions)

if unknown_licenses:
modified = add_referenced_filenames_license_matches(resource, codebase)

if has_additional_licenses and resource.is_file and resource.license_detections:
add_builtin_license_flag(resource, licenses)
if has_additional_licenses and resource.is_file and resource.licenses:
add_builtin_license_flag(resource, licenses)

if TRACE and modified:
license_expressions_after = list(resource.license_expressions)
logger_debug(
f'add_referenced_filenames_license_matches: Modfied:',
f'{resource.path} with license_expressions:\n'
f'before: {license_expressions_before}\n'
f'after : {license_expressions_after}'
)
if TRACE and modified:
license_expressions_after = list(resource.license_expressions)
logger_debug(
f'add_referenced_filenames_license_matches: Modfied:',
f'{resource.path} with license_expressions:\n'
f'before: {license_expressions_before}\n'
f'after : {license_expressions_after}'
)


def add_builtin_license_flag(resource, licenses):
Expand All @@ -186,22 +191,17 @@ def add_builtin_license_flag(resource, licenses):
additional licenses present in the cache, either through an additional
license directory or additional license plugins.
"""
for detection in resource.license_detections:
matches = detection['matches']
for match in matches:
add_builtin_value(license_match=match, licenses=licenses)

for match in resource.license_clues:
for match in resource.licenses:
add_builtin_value(license_match=match, licenses=licenses)


def add_builtin_value(license_match, licenses):
license_key = license_match['key']
lic = licenses.get(license_key)
if lic.is_builtin:
license_match['is_builtin'] = True
license_match['matched_rule']['is_builtin'] = True
else:
license_match['is_builtin'] = False
license_match['matched_rule']['is_builtin'] = False


def add_referenced_filenames_license_matches(resource, codebase):
Expand Down
15 changes: 6 additions & 9 deletions src/scancode/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,19 +249,17 @@ def _licenses_data_from_match(
result['is_exception'] = lic.is_exception
result['is_unknown'] = lic.is_unknown
result['owner'] = lic.owner
result['homepage_url'] = lic.homepage_url
result['text_url'] = lic.text_urls[0] if lic.text_urls else None
# if the license is not builtin these should all be empty
if lic.is_builtin:
result['homepage_url'] = lic.homepage_url
result['text_url'] = lic.text_urls[0] if lic.text_urls else ''
result['reference_url'] = license_url_template.format(lic.key)
result['scancode_text_url'] = SCANCODE_LICENSE_TEXT_URL.format(lic.key)
result['scancode_data_url'] = SCANCODE_LICENSE_DATA_URL.format(lic.key)
else:
result['homepage_url'] = ''
result['text_url'] = ''
result['reference_url'] = ''
result['scancode_text_url'] = ''
result['scancode_data_url'] = ''
result['reference_url'] = None
result['scancode_text_url'] = None
result['scancode_data_url'] = None
spdx_key = lic.spdx_license_key
result['spdx_license_key'] = spdx_key

Expand All @@ -273,7 +271,7 @@ def _licenses_data_from_match(
spdx_key = lic.spdx_license_key.rstrip('+')
spdx_url = SPDX_LICENSE_URL.format(spdx_key)
else:
spdx_url = ''
spdx_url = None
result['spdx_url'] = spdx_url
result['start_line'] = match.start_line
result['end_line'] = match.end_line
Expand All @@ -293,7 +291,6 @@ def _licenses_data_from_match(
matched_rule['matched_length'] = match.len()
matched_rule['match_coverage'] = match.coverage()
matched_rule['rule_relevance'] = match.rule.relevance
# FIXME: for sanity this should always be included?????
if include_text:
result['matched_text'] = matched_text
return detected_licenses
Expand Down
32 changes: 16 additions & 16 deletions tests/formattedcode/data/csv/flatten_scan/full.json
Original file line number Diff line number Diff line change
Expand Up @@ -258,11 +258,11 @@
"short_name": "JBoss EULA",
"category": "Proprietary Free",
"owner": "JBoss Community",
"homepage_url": "",
"homepage_url": null,
"text_url": "http://repository.jboss.org/licenses/jbossorg-eula.txt",
"reference_url": "https://scancode-licensedb.aboutcode.org/jboss-eula",
"spdx_license_key": "",
"spdx_url": "",
"spdx_url": null,
"start_line": 3,
"end_line": 108,
"matched_rule": {
Expand Down Expand Up @@ -1296,10 +1296,10 @@
"category": "Public Domain",
"owner": "Unspecified",
"homepage_url": "http://www.linfo.org/publicdomain.html",
"text_url": "",
"text_url": null,
"reference_url": "https://scancode-licensedb.aboutcode.org/public-domain",
"spdx_license_key": "",
"spdx_url": "",
"spdx_url": null,
"start_line": 1649,
"end_line": 1649,
"matched_rule": {
Expand All @@ -1322,10 +1322,10 @@
"category": "Public Domain",
"owner": "Unspecified",
"homepage_url": "http://www.linfo.org/publicdomain.html",
"text_url": "",
"text_url": null,
"reference_url": "https://scancode-licensedb.aboutcode.org/public-domain",
"spdx_license_key": "",
"spdx_url": "",
"spdx_url": null,
"start_line": 1692,
"end_line": 1692,
"matched_rule": {
Expand Down Expand Up @@ -1478,10 +1478,10 @@
"category": "Permissive",
"owner": "OpenSSL",
"homepage_url": "http://openssl.org/source/license.html",
"text_url": "",
"text_url": null,
"reference_url": "https://scancode-licensedb.aboutcode.org/openssl",
"spdx_license_key": "",
"spdx_url": "",
"spdx_url": null,
"start_line": 4,
"end_line": 7,
"matched_rule": {
Expand Down Expand Up @@ -1555,10 +1555,10 @@
"category": "Permissive",
"owner": "OpenSSL",
"homepage_url": "http://openssl.org/source/license.html",
"text_url": "",
"text_url": null,
"reference_url": "https://scancode-licensedb.aboutcode.org/openssl",
"spdx_license_key": "",
"spdx_url": "",
"spdx_url": null,
"start_line": 4,
"end_line": 7,
"matched_rule": {
Expand Down Expand Up @@ -2562,11 +2562,11 @@
"short_name": "Ada linking exception to GPL 2.0 or later",
"category": "Copyleft Limited",
"owner": "Dmitriy Anisimkov",
"homepage_url": "",
"text_url": "",
"homepage_url": null,
"text_url": null,
"reference_url": "https://scancode-licensedb.aboutcode.org/ada-linking-exception",
"spdx_license_key": "",
"spdx_url": "",
"spdx_url": null,
"start_line": 20,
"end_line": 25,
"matched_rule": {
Expand Down Expand Up @@ -3268,11 +3268,11 @@
"short_name": "CMR License",
"category": "Permissive",
"owner": "CMR - Christian Michelsen Research AS",
"homepage_url": "",
"text_url": "",
"homepage_url": null,
"text_url": null,
"reference_url": "https://scancode-licensedb.aboutcode.org/cmr-no",
"spdx_license_key": "",
"spdx_url": "",
"spdx_url": null,
"start_line": 9,
"end_line": 15,
"matched_rule": {
Expand Down
32 changes: 16 additions & 16 deletions tests/formattedcode/data/csv/flatten_scan/full.json-expected
Original file line number Diff line number Diff line change
Expand Up @@ -206,11 +206,11 @@
"license__short_name": "JBoss EULA",
"license__category": "Proprietary Free",
"license__owner": "JBoss Community",
"license__homepage_url": "",
"license__homepage_url": null,
"license__text_url": "http://repository.jboss.org/licenses/jbossorg-eula.txt",
"license__reference_url": "https://scancode-licensedb.aboutcode.org/jboss-eula",
"license__spdx_license_key": "",
"license__spdx_url": "",
"license__spdx_url": null,
"start_line": 3,
"end_line": 108,
"matched_rule__identifier": "jboss-eula.LICENSE",
Expand Down Expand Up @@ -1090,10 +1090,10 @@
"license__category": "Public Domain",
"license__owner": "Unspecified",
"license__homepage_url": "http://www.linfo.org/publicdomain.html",
"license__text_url": "",
"license__text_url": null,
"license__reference_url": "https://scancode-licensedb.aboutcode.org/public-domain",
"license__spdx_license_key": "",
"license__spdx_url": "",
"license__spdx_url": null,
"start_line": 1649,
"end_line": 1649,
"matched_rule__identifier": "public-domain.LICENSE",
Expand All @@ -1113,10 +1113,10 @@
"license__category": "Public Domain",
"license__owner": "Unspecified",
"license__homepage_url": "http://www.linfo.org/publicdomain.html",
"license__text_url": "",
"license__text_url": null,
"license__reference_url": "https://scancode-licensedb.aboutcode.org/public-domain",
"license__spdx_license_key": "",
"license__spdx_url": "",
"license__spdx_url": null,
"start_line": 1692,
"end_line": 1692,
"matched_rule__identifier": "public-domain.LICENSE",
Expand Down Expand Up @@ -1244,10 +1244,10 @@
"license__category": "Permissive",
"license__owner": "OpenSSL",
"license__homepage_url": "http://openssl.org/source/license.html",
"license__text_url": "",
"license__text_url": null,
"license__reference_url": "https://scancode-licensedb.aboutcode.org/openssl",
"license__spdx_license_key": "",
"license__spdx_url": "",
"license__spdx_url": null,
"start_line": 4,
"end_line": 7,
"matched_rule__identifier": "openssl_8.RULE",
Expand Down Expand Up @@ -1309,10 +1309,10 @@
"license__category": "Permissive",
"license__owner": "OpenSSL",
"license__homepage_url": "http://openssl.org/source/license.html",
"license__text_url": "",
"license__text_url": null,
"license__reference_url": "https://scancode-licensedb.aboutcode.org/openssl",
"license__spdx_license_key": "",
"license__spdx_url": "",
"license__spdx_url": null,
"start_line": 4,
"end_line": 7,
"matched_rule__identifier": "openssl_8.RULE",
Expand Down Expand Up @@ -2131,11 +2131,11 @@
"license__short_name": "Ada linking exception to GPL 2.0 or later",
"license__category": "Copyleft Limited",
"license__owner": "Dmitriy Anisimkov",
"license__homepage_url": "",
"license__text_url": "",
"license__homepage_url": null,
"license__text_url": null,
"license__reference_url": "https://scancode-licensedb.aboutcode.org/ada-linking-exception",
"license__spdx_license_key": "",
"license__spdx_url": "",
"license__spdx_url": null,
"start_line": 20,
"end_line": 25,
"matched_rule__identifier": "ada-linking-exception.LICENSE",
Expand Down Expand Up @@ -2718,11 +2718,11 @@
"license__short_name": "CMR License",
"license__category": "Permissive",
"license__owner": "CMR - Christian Michelsen Research AS",
"license__homepage_url": "",
"license__text_url": "",
"license__homepage_url": null,
"license__text_url": null,
"license__reference_url": "https://scancode-licensedb.aboutcode.org/cmr-no",
"license__spdx_license_key": "",
"license__spdx_url": "",
"license__spdx_url": null,
"start_line": 9,
"end_line": 15,
"matched_rule__identifier": "cmr-no.LICENSE",
Expand Down
Loading

0 comments on commit 044f60d

Please sign in to comment.