Skip to content

Commit

Permalink
Merge pull request #2974 from nexB/2972-summary-consider-copyrights
Browse files Browse the repository at this point in the history
Consider only copyrights in summry #2972
  • Loading branch information
pombredanne committed Jun 10, 2022
2 parents 2c49c4c + 91a2094 commit a94d414
Show file tree
Hide file tree
Showing 820 changed files with 1,321 additions and 929 deletions.
74 changes: 37 additions & 37 deletions src/summarycode/copyright_tallies.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,49 +485,50 @@ def filter_junk(texts):


COMMON_NAMES = {
'3dfxinteractiveinc.': '3dfx Interactive, Inc.',
'3dfxinteractiveinc.': '3dfx Interactive',

'cern': 'CERN - European Organization for Nuclear Research',

'ciscosystemsinc': 'Cisco Systems, Inc.',
'ciscosystems': 'Cisco Systems, Inc.',
'cisco': 'Cisco Systems, Inc.',
'ciscosystemsinc': 'Cisco Systems',
'ciscosystems': 'Cisco Systems',
'cisco': 'Cisco Systems',

'daisy': 'Daisy Ltd.',
'daisy': 'Daisy',
'daisyltd': 'Daisy',

'fsf': 'Free Software Foundation, Inc.',
'freesoftwarefoundation': 'Free Software Foundation, Inc.',
'freesoftwarefoundationinc': 'Free Software Foundation, Inc.',
'thefreesoftwarefoundation': 'Free Software Foundation, Inc.',
'thefreesoftwarefoundationinc': 'Free Software Foundation, Inc.',
'fsf': 'Free Software Foundation',
'freesoftwarefoundation': 'Free Software Foundation',
'freesoftwarefoundationinc': 'Free Software Foundation',
'thefreesoftwarefoundation': 'Free Software Foundation',
'thefreesoftwarefoundationinc': 'Free Software Foundation',

'hp': 'Hewlett-Packard, Inc.',
'hewlettpackard': 'Hewlett-Packard, Inc.',
'hewlettpackardco': 'Hewlett-Packard, Inc.',
'hpcompany': 'Hewlett-Packard, Inc.',
'hpdevelopmentcompanylp': 'Hewlett-Packard, Inc.',
'hpdevelopmentcompany': 'Hewlett-Packard, Inc.',
'hewlettpackardcompany': 'Hewlett-Packard, Inc.',
'hp': 'Hewlett-Packard',
'hewlettpackard': 'Hewlett-Packard',
'hewlettpackardco': 'Hewlett-Packard',
'hpcompany': 'Hewlett-Packard',
'hpdevelopmentcompanylp': 'Hewlett-Packard',
'hpdevelopmentcompany': 'Hewlett-Packard',
'hewlettpackardcompany': 'Hewlett-Packard',

'theandroidopensourceproject': 'The Android Open Source Project, Inc.',
'androidopensourceproject': 'The Android Open Source Project, Inc.',
'theandroidopensourceproject': 'Android Open Source Project',
'androidopensourceproject': 'Android Open Source Project',

'ibm': 'IBM Corporation',
'ibm': 'IBM',

'redhat': 'Red Hat, Inc.',
'redhatinc': 'Red Hat, Inc.',
'redhat': 'Red Hat',
'redhatinc': 'Red Hat',

'softwareinthepublicinterest': 'Software in the Public Interest, Inc.',
'spiinc': 'Software in the Public Interest, Inc.',
'softwareinthepublicinterest': 'Software in the Public Interest',
'spiinc': 'Software in the Public Interest',

'suse': 'SuSE, Inc.',
'suseinc': 'SuSE, Inc.',
'suse': 'SuSE',
'suseinc': 'SuSE',

'sunmicrosystems': 'Sun Microsystems, Inc.',
'sunmicrosystemsinc': 'Sun Microsystems, Inc.',
'sunmicro': 'Sun Microsystems, Inc.',
'sunmicrosystems': 'Sun Microsystems',
'sunmicrosystemsinc': 'Sun Microsystems',
'sunmicro': 'Sun Microsystems',

'thaiopensourcesoftwarecenter': 'Thai Open Source Software Center Ltd.',
'thaiopensourcesoftwarecenter': 'Thai Open Source Software Center',

'apachefoundation': 'The Apache Software Foundation',
'apachegroup': 'The Apache Software Foundation',
Expand All @@ -540,20 +541,19 @@ def filter_junk(texts):

'regentsoftheuniversityofcalifornia': 'The Regents of the University of California',

# 'mit': 'the Massachusetts Institute of Technology',

'borland': 'Borland Corp.',
'borland': 'Borland',
'borlandcorp': 'Borland',

'microsoft': 'Microsoft',
'microsoftcorp': 'Microsoft',
'microsoftinc': 'Microsoft',
'microsoftcorporation': 'Microsoft',

'google': 'Google Inc.',
'googlellc': 'Google Inc.',
'googleinc': 'Google Inc.',
'google': 'Google',
'googlellc': 'Google',
'googleinc': 'Google',

'intel': 'Intel Corporation',
'intel': 'Intel',
}

# Remove everything except letters and numbers
Expand Down
41 changes: 25 additions & 16 deletions src/summarycode/summarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from cluecode.copyrights import CopyrightDetector
from packagedcode.utils import combine_expressions
from packagedcode import models
from summarycode.copyright_tallies import canonical_holder
from summarycode.score import compute_license_score
from summarycode.score import get_field_values_from_codebase_resources
from summarycode.score import unique
Expand Down Expand Up @@ -167,7 +168,7 @@ def get_declared_holders(codebase, holders_tallies):
codebase, 'holders', key_files_only=True
)
entry_by_key_file_holders = {
fingerprints.generate(entry['holder']): entry
fingerprints.generate(canonical_holder(entry['holder'])): entry
for entry in key_file_holders
if entry['holder']
}
Expand Down Expand Up @@ -212,25 +213,24 @@ def get_primary_language(programming_language_tallies):

def get_origin_info_from_top_level_packages(top_level_packages, codebase):
"""
Return a 3-tuple containing the strings of declared license expression,
copyright holder, and primary programming language from a
Return a 3-tuple containing the declared license expression string, a list
of copyright holder, and primary programming language string from a
``top_level_packages`` list of detected top-level packages mapping and a
``codebase``.
"""
if not top_level_packages:
return '', '', ''
return '', [], ''

license_expressions = []
programming_languages = []
copyrights = []
parties = []

for package_mapping in top_level_packages:
package = models.Package.from_dict(package_mapping)
# we are only interested in key packages
if not is_key_package(package, codebase):
continue

top_level_packages = [
models.Package.from_dict(package_mapping)
for package_mapping in top_level_packages
]
key_file_packages = [p for p in top_level_packages if is_key_package(p, codebase)]
for package in key_file_packages:
license_expression = package.license_expression
if license_expression:
license_expressions.append(license_expression)
Expand All @@ -243,8 +243,6 @@ def get_origin_info_from_top_level_packages(top_level_packages, codebase):
if copyright_statement:
copyrights.append(copyright_statement)

parties.extend(package.parties or [])

# Combine license expressions
unique_license_expressions = unique(license_expressions)
combined_declared_license_expression = combine_expressions(
Expand All @@ -263,9 +261,20 @@ def get_origin_info_from_top_level_packages(top_level_packages, codebase):
declared_holders = []
if holders:
declared_holders = holders
elif parties:
declared_holders = [party.name for party in parties or []]

else:
# If the package data does not contain an explicit copyright, check the
# key files where the package data was detected from and see if there
# are any holder detections that can be used.
for package in key_file_packages:
for datafile_path in package.datafile_paths:
key_file_resource = codebase.get_resource(path=datafile_path)
if not key_file_resource:
continue
holders = [h['holder'] for h in key_file_resource.holders]
declared_holders.extend(holders)
# Normalize holder names before collecting them
# This allows us to properly remove declared holders from `other_holders` later
declared_holders = [canonical_holder(h) for h in declared_holders]
declared_holders = unique(declared_holders)

# Programming language
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ holders:
- Free Software Foundation, Inc.
- the Free Software Foundation
holders_summary:
- value: Free Software Foundation, Inc.
- value: Free Software Foundation
count: 2
2 changes: 1 addition & 1 deletion tests/cluecode/data/copyrights/afferogplv1-AfferoGPLv.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@ holders:
holders_summary:
- value: Affero Inc.
count: 2
- value: Free Software Foundation, Inc.
- value: Free Software Foundation
count: 1
2 changes: 1 addition & 1 deletion tests/cluecode/data/copyrights/afferogplv3-AfferoGPLv.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ copyrights:
holders:
- Free Software Foundation, Inc.
holders_summary:
- value: Free Software Foundation, Inc.
- value: Free Software Foundation
count: 1
4 changes: 2 additions & 2 deletions tests/cluecode/data/copyrights/android_c-c.c.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ holders:
- The Android Open Source Project
- Colin Percival
holders_summary:
- value: Colin Percival
- value: Android Open Source Project
count: 1
- value: The Android Open Source Project, Inc.
- value: Colin Percival
count: 1
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ holders_summary:
count: 2
- value: Board of Trustees of the University of Illinois
count: 1
- value: Cisco Systems, Inc.
- value: Cisco Systems
count: 1
- value: Eric Haines
count: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@ holders:
holders_summary:
- value: Bigelow & Holmes
count: 1
- value: Sun Microsystems, Inc.
- value: Sun Microsystems
count: 1
4 changes: 2 additions & 2 deletions tests/cluecode/data/copyrights/colin_android-bsdiff_c.c.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ holders:
- The Android Open Source Project
- Colin Percival
holders_summary:
- value: Colin Percival
- value: Android Open Source Project
count: 1
- value: The Android Open Source Project, Inc.
- value: Colin Percival
count: 1
2 changes: 1 addition & 1 deletion tests/cluecode/data/copyrights/complex_notice-NOTICE.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ holders:
holders_summary:
- value: David Schultz
count: 9
- value: Sun Microsystems, Inc.
- value: Sun Microsystems
count: 4
- value: Mike Barcroft
count: 2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ holders:
holders_summary:
- value: IBM Corporation
count: 3
- value: Sun Microsystems, Inc.
- value: Sun Microsystems
count: 3
- value: Lotus Development Corporation
count: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ copyrights:
holders:
- Free Software Foundation, Inc.
holders_summary:
- value: Free Software Foundation, Inc.
- value: Free Software Foundation
count: 1
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ copyrights:
holders:
- Free Software Foundation, Inc.
holders_summary:
- value: Free Software Foundation, Inc.
- value: Free Software Foundation
count: 1
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ copyrights:
holders:
- Free Software Foundation, Inc.
holders_summary:
- value: Free Software Foundation, Inc.
- value: Free Software Foundation
count: 1
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ holders:
- Daisy Ltd.
- Daisy
holders_summary:
- value: Daisy Ltd.
- value: Daisy
count: 2
2 changes: 1 addition & 1 deletion tests/cluecode/data/copyrights/copytest/aosp.txt.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ copyrights:
holders:
- Android Open Source Project
holders_summary:
- value: The Android Open Source Project, Inc.
- value: Android Open Source Project
count: 1
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@ holders:
- Hewlett-Packard Co.
- Hewlett-Packard Co.
holders_summary:
- value: Hewlett-Packard, Inc.
- value: Hewlett-Packard
count: 3
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ copyrights:
holders:
- Free Software Foundation, Inc.
holders_summary:
- value: Free Software Foundation, Inc.
- value: Free Software Foundation
count: 1
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ copyrights:
holders:
- Free Software Foundation, Inc.
holders_summary:
- value: Free Software Foundation, Inc.
- value: Free Software Foundation
count: 1
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ holders:
- Free Software Foundation, Inc.
- Free Software Foundation, Inc.
holders_summary:
- value: Free Software Foundation, Inc.
- value: Free Software Foundation
count: 11
- value: Colin Plumb
count: 2
Expand Down
2 changes: 1 addition & 1 deletion tests/cluecode/data/copyrights/ecosv2_0-eCosv.0.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ copyrights:
holders:
- Red Hat, Inc.
holders_summary:
- value: Red Hat, Inc.
- value: Red Hat
count: 1
2 changes: 1 addition & 1 deletion tests/cluecode/data/copyrights/ed-ed.copyright.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ holders_summary:
count: 1
- value: Antonio Diaz Diaz
count: 1
- value: Free Software Foundation, Inc.
- value: Free Software Foundation
count: 1
- value: James Troup
count: 1
4 changes: 2 additions & 2 deletions tests/cluecode/data/copyrights/esmertec_java-java.java.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ holders:
- Esmertec AG
- The Android Open Source Project
holders_summary:
- value: Esmertec AG
- value: Android Open Source Project
count: 1
- value: The Android Open Source Project, Inc.
- value: Esmertec AG
count: 1
2 changes: 1 addition & 1 deletion tests/cluecode/data/copyrights/fsf_py-999_py.py.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ copyrights:
holders:
- Free Software Foundation, Inc.
holders_summary:
- value: Free Software Foundation, Inc.
- value: Free Software Foundation
count: 1
2 changes: 1 addition & 1 deletion tests/cluecode/data/copyrights/gfdlv1_2-GFDLv.2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ copyrights:
holders:
- Free Software Foundation, Inc.
holders_summary:
- value: Free Software Foundation, Inc.
- value: Free Software Foundation
count: 1
2 changes: 1 addition & 1 deletion tests/cluecode/data/copyrights/gfdlv1_3-GFDLv.3.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ copyrights:
holders:
- Free Software Foundation, Inc.
holders_summary:
- value: Free Software Foundation, Inc.
- value: Free Software Foundation
count: 1
Loading

0 comments on commit a94d414

Please sign in to comment.