Skip to content

Commit

Permalink
Address feedback and fix tests
Browse files Browse the repository at this point in the history
Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com>
  • Loading branch information
AyanSinhaMahapatra committed Apr 10, 2023
1 parent d556e4c commit 607497d
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 15 deletions.
53 changes: 38 additions & 15 deletions src/licensedcode/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,11 @@
from licensedcode._vendor import attr
from license_expression import ExpressionError
from license_expression import Licensing
from saneyaml import load as yaml_load
from saneyaml import dump as yaml_dump
from saneyaml import load as saneyaml_load
from saneyaml import dump as saneyaml_dump
from yaml import load as yaml_load
from yaml import dump as yaml_dump
from yaml import CSafeLoader

from commoncode.fileutils import file_base_name
from commoncode.fileutils import file_name
Expand Down Expand Up @@ -554,7 +557,7 @@ def spdx_keys(self):
yield key

@staticmethod
def validate(licenses, verbose=False, no_dupe_urls=False):
def validate(licenses, verbose=False, no_dupe_urls=False, thorough=False):
"""
Check that the ``licenses`` a mapping of {key: License} are valid.
Return dictionaries of infos, errors and warnings mapping a license key
Expand Down Expand Up @@ -659,14 +662,23 @@ def validate(licenses, verbose=False, no_dupe_urls=False):
if not len(all_licenses) == len(set(all_licenses)):
warn('Some duplicated URLs')

# local text consistency
text = lic.text

data = {"text": text}
# We are testing whether we can dump as yaml and load from yaml
# without failing (i.e. whether the text is yaml safe)
yaml_string = yaml_dump(data, indent=4)
loaded_yaml = yaml_load(yaml_string)
if thorough:
# local text consistency
text = lic.text

data = {"text": text}
# We are testing whether we can dump as yaml and load from yaml
# without failing (i.e. whether the text is yaml safe)
# Using saneyaml
try:
yaml_string = saneyaml_dump(data, indent=4)
loaded_yaml = saneyaml_load(yaml_string)
except Exception:
errors['GLOBAL'].append(
f'Error invalid YAML text at: {lic.key}, failed during saneyaml.load()'
)
# This fails because of missing line break at text end, added by saneyaml_dump
# assert text == loaded_yaml["text"]

license_itokens = tuple(index_tokenizer(text))
if not license_itokens:
Expand Down Expand Up @@ -750,9 +762,9 @@ def validate(licenses, verbose=False, no_dupe_urls=False):
def get_yaml_safe_text(text):

data = {"text": text}
yaml_string = yaml_dump(data, indent=4)
yaml_string = saneyaml_dump(data, indent=4)
try:
loaded_yaml = yaml_load(yaml_string)
loaded_yaml = saneyaml_load(yaml_string)
except Exception:
text = text.replace('\n\n', '\n \n')
return text
Expand Down Expand Up @@ -1028,7 +1040,7 @@ def _validate_all_rules(rules, licenses_by_key):
errors = defaultdict(list)

for rule in rules:
for err_msg in rule.validate(licensing):
for err_msg in rule.validate(licensing, thorough=True):
errors[err_msg].append(rule)
return errors

Expand Down Expand Up @@ -1717,7 +1729,7 @@ def has_unknown(self):
# license flag instead
return self.license_expression and 'unknown' in self.license_expression

def validate(self, licensing=None):
def validate(self, licensing=None, thorough=False):
"""
Validate this rule using the provided ``licensing`` Licensing and yield
one error message for each type of error detected.
Expand Down Expand Up @@ -1811,6 +1823,17 @@ def validate(self, licensing=None):
if len(set(self.referenced_filenames)) != len(self.referenced_filenames):
yield 'referenced_filenames cannot contain duplicates.'

if thorough:
text = self.text
data = {"text": text}
# We are testing whether we can dump as yaml and load from yaml
# without failing (i.e. whether the text is yaml safe)
try:
yaml_string = saneyaml_dump(data, indent=4)
loaded_yaml = saneyaml_load(yaml_string)
except Exception:
yield (f'Error invalid YAML text at: {self.identifier}, failed during saneyaml.load()')

def license_keys(self, unique=True):
"""
Return a list of license keys for this rule.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ headers:
platform_version: '#67-Ubuntu SMP Mon Mar 13 14:22:10 UTC 2023'
python_version: "3.8.10 (default, Mar 13 2023, 10:26:41) \n[GCC 9.4.0]"
spdx_license_list_version: '3.20'
additional_license_directory: /home/ayansinha/nexB/write_access/scancode-extra/tests/licensedcode/data/additional_licenses/additional_dir
additional_license_plugins:
- /home/ayansinha/nexB/write_access/scancode-extra/venv/lib/python3.8/site-packages/licenses_to_install1
- /home/ayansinha/nexB/write_access/scancode-extra/venv/lib/python3.8/site-packages/licenses_to_install2
files_count: 4
summary:
declared_license_expression: apache-2.0 AND (apache-2.0 OR mit)
Expand Down Expand Up @@ -85,6 +89,7 @@ packages:
code_view_url:
vcs_url:
copyright:
holder:
declared_license_expression: apache-2.0
declared_license_expression_spdx: Apache-2.0
license_detections:
Expand Down Expand Up @@ -1137,6 +1142,7 @@ files:
code_view_url:
vcs_url:
copyright:
holder:
declared_license_expression: apache-2.0
declared_license_expression_spdx: Apache-2.0
license_detections:
Expand Down
4 changes: 4 additions & 0 deletions tests/licensedcode/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ def test_validate_license_library_data(self):
errors, warnings, infos = models.License.validate(
licenses=models.load_licenses(with_deprecated=False),
verbose=False,
thorough=True,
)
assert errors == {}
assert warnings == {}
Expand Down Expand Up @@ -185,6 +186,9 @@ def test_rule_from_license_have_text_file_and_data_file_are_computed_correctly(s
class TestRule(FileBasedTesting):
test_data_dir = TEST_DATA_DIR

def test_validate_license_rules_data(self):
rules = list(models.get_rules(validate=True))

def test_create_rule_ignore_punctuation(self):
test_rule = create_rule_from_text_and_expression(text='A one. A two. A three.')
expected = ['one', 'two', 'three']
Expand Down

0 comments on commit 607497d

Please sign in to comment.