Skip to content

Commit

Permalink
Merge pull request #2343 from VanL/fix-is-empty-logic
Browse files Browse the repository at this point in the history
Modify is_empty to fix logic error
  • Loading branch information
pombredanne committed Feb 27, 2021
2 parents 498defa + c79e2f5 commit f030eb3
Show file tree
Hide file tree
Showing 10 changed files with 609 additions and 16 deletions.
45 changes: 32 additions & 13 deletions src/cluecode/plugin_filter_clues.py
Expand Up @@ -90,7 +90,7 @@ def filter_ignorable_resource_clues(resource, rules_by_id):
if hasattr(resource, 'holders'):
resource.holders = filtered.holders
if hasattr(resource, 'copyrights'):
resource.copyrights = filtered.copyrights
resource.copyrights = filtered.copyrights
return resource


Expand Down Expand Up @@ -191,6 +191,10 @@ def is_empty(clues):
if clues:
return not any([
clues.copyrights, clues.holders, clues.authors, clues.urls, clues.emails])
else:
# The logic is reversed, so a false or None "clues" object returns None, which
# is interpreted as False (i.e., the object is *not* empty).
return True


def filter_ignorable_clues(detections, rules_by_id):
Expand All @@ -205,7 +209,7 @@ def filter_ignorable_clues(detections, rules_by_id):
no_detected_ignorables = not detections.copyrights and not detections.authors

ignorables = collect_ignorables(detections.licenses, rules_by_id)

no_ignorables = not detections.licenses or is_empty(ignorables)

if TRACE:
Expand All @@ -220,36 +224,45 @@ def filter_ignorable_clues(detections, rules_by_id):
attributes=detections.emails,
ignorables=ignorables.emails.union(
detections.copyrights_as_ignorable,
detections.authors_as_ignorable),
value_key='email'))
detections.authors_as_ignorable,
),
value_key='email',
))

# discard redundant urls if ignorable or in a detected copyright or author
urls = list(filter_values(
attributes=detections.urls,
ignorables=ignorables.urls.union(
detections.copyrights_as_ignorable,
detections.authors_as_ignorable),
value_key='url', strip='/'))
detections.authors_as_ignorable,
),
value_key='url',
strip='/',
))

# discard redundant authors if ignorable or in detected holders or copyrights
authors = list(filter_values(
attributes=detections.authors,
ignorables=ignorables.authors.union(
detections.copyrights_as_ignorable,
detections.holders_as_ignorable),
value_key='value'))
detections.holders_as_ignorable,
),
value_key='value',
))

# discard redundant holders if ignorable
holders = list(filter_values(
attributes=detections.holders,
ignorables=ignorables.holders,
value_key='value'))
value_key='value',
))

# discard redundant copyrights if ignorable
copyrights = list(filter_values(
attributes=detections.copyrights,
ignorables=ignorables.copyrights,
value_key='value'))
value_key='value',
))

return Detections(
copyrights=copyrights,
Expand Down Expand Up @@ -277,22 +290,26 @@ def filter_values(attributes, ignorables, value_key='value', strip=''):
el = item['end_line']
val = item[value_key].strip(strip)
ignored = False

if TRACE:
logger_debug(' filter_values: ignorables:', ignorables)

for ign in ignorables:
if TRACE: logger_debug(' filter_values: ign:', ign)
if (ls in ign.lines_range or el in ign.lines_range) and val in ign.value:
ignored = True
if TRACE: logger_debug(' filter_values: skipped')
break

if not ignored:
yield item


def collect_ignorables(license_matches, rules_by_id):
"""
Collect and return an ignorable Clues object built from `license_matches` matched licenses
which is the list of "licenses" objects returned in JSON results.
Collect and return an ignorable Clues object built from `license_matches`
matched licenses which is the list of "licenses" objects returned in JSON
results.
The value of each ignorable list of clues is a set of (set of
lines number, set of ignorable values). The return values is a mapping
Expand Down Expand Up @@ -356,6 +373,9 @@ def collect_ignorables(license_matches, rules_by_id):
if ign_urls:
urls.add(Ignorable(lines_range=lines_range, value=ign_urls))

if TRACE:
logger_debug(' collect_ignorables: rule:', rule)

ignorables = Ignorables(
copyrights=frozenset(copyrights),
holders=frozenset(holders),
Expand All @@ -365,7 +385,6 @@ def collect_ignorables(license_matches, rules_by_id):
)

if TRACE:
logger_debug(' collect_ignorables: rule:', rule)
logger_debug(' collect_ignorables: ignorables:', ignorables)

return ignorables
17 changes: 17 additions & 0 deletions src/licensedcode/data/rules/pygres-2.2_2.RULE
@@ -0,0 +1,17 @@
Permission to use, copy, modify, and distribute this software and its
documentation for any purpose, without fee, and without a written
agreement is hereby granted, provided that the above copyright notice and
this paragraph and the following two paragraphs appear in all copies or in
any new file that contains a substantial portion of this file.

IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS,
ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE
AUTHOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE
AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
ENHANCEMENTS, OR MODIFICATIONS.

3 changes: 3 additions & 0 deletions src/licensedcode/data/rules/pygres-2.2_2.yml
@@ -0,0 +1,3 @@
license_expression: pygres-2.2
is_license_text: yes
notes: from http://shell.vex.net/viewvc.cgi/pygresql/trunk/module/pgmodule.c?view=markup&pathrev=431
70 changes: 70 additions & 0 deletions tests/cluecode/data/plugin_filter_clues/files/LICENSE
@@ -0,0 +1,70 @@

Copyright (c) The Eclipse Foundation
https://eclipse.org
contact foo@eclipse.org
author: John Doe

is licensed under the
Apache Software License, Version 1.1, which is reproduced below.

/*
* The Apache Software License, Version 1.1
*
*
* Copyright (c) The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache@apache.org.

Copyright (c) The Eclipse Foundation
https://eclipse.org
contact foo@eclipse.org
author: John Doe

*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.ibm.com. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
25 changes: 25 additions & 0 deletions tests/cluecode/data/plugin_filter_clues/files/LICENSE2
@@ -0,0 +1,25 @@
* $Id$
* PyGres, version 2.2 A Python interface for PostgreSQL database. Written by
* D'Arcy John Cain, (darcy@druid.net). Based heavily on code written by
* Pascal Andre, andre@chimay.via.ecp.fr. Copyright (c) 1996, Pascal Andre
* (andre@avia.ecp.fr).
*
* Permission to use, copy, modify, and distribute this software and its
* documentation for any purpose, without fee, and without a written
* agreement is hereby granted, provided that the above copyright notice and
* this paragraph and the following two paragraphs appear in all copies or in
* any new file that contains a substantial portion of this file.
*
* IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
* SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS,
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE
* AUTHOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE
* AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
* ENHANCEMENTS, OR MODIFICATIONS.
*
* Further modifications copyright 1997, 1998, 1996 by D'Arcy John. Cain
* (darcy@druid.net) subject to the same terms and conditions as above.
47 changes: 47 additions & 0 deletions tests/cluecode/data/plugin_filter_clues/files/LICENSE3
@@ -0,0 +1,47 @@
PCRE LICENCE
------------

PCRE is a library of functions to support regular expressions whose
syntax and semantics are as close as possible to those of the Perl 5
language.

Written by: Philip Hazel <ph10@cam.ac.uk>
University of Cambridge Computing Service, Cambridge, England.
Phone: +44 1223 334714.
Copyright (c) 1997-2001 University of Cambridge

Permission is granted to anyone to use this software for any purpose on
any computer system, and to redistribute it freely, subject to the
following restrictions:

1. This software is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

2. The origin of this software must not be misrepresented, either by
explicit claim or by omission. In practice, this means that if you use
PCRE in software which you distribute to others, commercially or
otherwise, you must put a sentence like this
"Regular expression support is provided by the PCRE library package,
which is open source software, written by Philip Hazel, and copyright by
the University of Cambridge, England"

somewhere reasonably visible in your documentation and in any relevant
files or online help data or similar.

A reference to the ftp site for the source, that is, to
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/
should also be given in the documentation.

3. Altered versions must be plainly marked as such, and must not be
misrepresented as being the original software.

4. If PCRE is embedded in any software that is released under the GNU
General Purpose Licence (GPL), or Lesser General Purpose Licence (LGPL),
then the terms of that licence shall supersede any condition above with
which it is incompatible.

The documentation for PCRE, supplied in the "doc" directory, is
distributed under the same terms as the software itself.

End PCRE LICENCE

0 comments on commit f030eb3

Please sign in to comment.