In [1]:
import os
import re
from enum import Enum, auto

In [2]:
file_spec_c = {
    'regex_first_comment': r'^\s*/\*\n((.|\n)*?) \*/',
    'linestart_comment': ' *',
}

file_spec_py = {
    'regex_first_comment': r'^\s*(?:#\!.*(?:\n#.*)*)?\s*((?:#[^\!].*\n)+)',
    'linestart_comment': '#',
}

file_spec_mk = file_spec_py
file_spec_kconfig = file_spec_py

file_specs_ext = {
    '.c': file_spec_c,
    '.h': file_spec_c,
    '.py': file_spec_py,
    '.mk': file_spec_mk,
    '.config': file_spec_kconfig,
    '.md': None,
}

file_specs_name = {
    'Makefile': file_spec_mk,
    'Kconfig': file_spec_kconfig,
}

In [3]:
licence_map = {
    'LGPL-2.1-only': "This file is subject to the terms and conditions of the GNU Lesser General Public License v2.1. See the file LICENSE in the top level directory for more details.",
}

licence_map = dict((v, k) for k, v in licence_map.items())

In [4]:
class LicenceReplaceResult(Enum):
    ERROR_UNRECOGNIZED_FILETYPE = auto()
    ERROR_NO_STARTING_COMMENT = auto()
    ERROR_NO_COPYRIGHT_FOUND = auto()
    ERROR_COMMENT_PARSING_ERROR = auto()
    ERROR_UNKNOWN_LICENCE_TEXT = auto()
    WARNING_ALREADY_CONVERTED = auto()
    SUCCES_CONVERTED = auto()

In [38]:
def convert(path):

    # if path is folder, recursively go over subfolders
    if os.path.isdir(path):
        results = []
        for file in os.listdir(path):
            results += convert(os.path.join(path, file))
        return results

    # silently ignore anything that is not a file
    if not os.path.isfile(path):
        return []

    # recognize file type
    file_name, file_ext = os.path.splitext(os.path.basename(path))
    if file_ext in file_specs_ext.keys():
        file_spec = file_specs_ext[file_ext]
    elif file_name in file_specs_name.keys():
        file_spec = file_specs_name[file_name]
    else:
        return [(path, LicenceReplaceResult.ERROR_UNRECOGNIZED_FILETYPE)]

    # file types to explicitly ignore
    if file_spec is None:
        return []
    
    # read file
    with open(path, "r") as f:
        content = f.read()

    # locate first comment in file
    m = re.search(file_spec['regex_first_comment'], content)
    if m is None:
        return [(path, LicenceReplaceResult.ERROR_NO_STARTING_COMMENT)]

    # parse raw comment
    c = m[1].replace('\n'+file_spec['linestart_comment'], '\n').strip()

    # check if file has been already converted
    if 'SPDX-License-Identifier:' in c:
        return [(path, LicenceReplaceResult.WARNING_ALREADY_CONVERTED)]

    # expect two paragraphs with copyright first, licence second
    parts = c.split('\n\n')
    print(parts)
    if len(parts) == 2:
        copyrights = []
        for c in re.finditer(r'Copyright \([Cc]\) (.*)', parts[0]):
            copyrights += [cc.strip() for cc in c[1].split('\n')]
        if len(copyrights) == 0:
            return [(path, LicenceReplaceResult.ERROR_NO_COPYRIGHT_FOUND)]
        print(copyrights)

        licence_text = parts[1].replace('\n', '').strip()
    else:
        return [(path, LicenceReplaceResult.ERROR_COMMENT_PARSING_ERROR)]
        return

    # map licence text
    if licence_text not in licence_map:
        return [(path, LicenceReplaceResult.ERROR_UNKNOWN_LICENCE_TEXT)]
    licence = licence_map[licence_text]

    # rewrite comment
    content = (content[:m.start(1)]
        + file_spec['linestart_comment'] + f' SPDX-FileCopyrightText: {copyright}\n'
        + file_spec['linestart_comment'] + '\n'
        + file_spec['linestart_comment'] + f' SPDX-License-Identifier: {licence}\n'
        + content[m.end(1):])

    # write file
    # with open(path, "w") as f:
    #     f.write(content)

    return [(path, LicenceReplaceResult.SUCCES_CONVERTED)]

In [7]:
def res_filter(res, restype):
    return [r for r in res if r[1] is restype]

def res_stats(res):
    for restype in LicenceReplaceResult:
        print(restype, len(res_filter(res, restype)))
    
def res_files(res, restype):
    print(restype, [r[0] for r in res_filter(res, restype)])

In [53]:
res = convert('tests/sys')

res_stats(res)

# res_files(res, LicenceReplaceResult.WARNING_ALREADY_CONVERTED)
res_files(res, LicenceReplaceResult.ERROR_NO_COPYRIGHT_FOUND)
res_files(res, LicenceReplaceResult.ERROR_COMMENT_PARSING_ERROR)

LicenceReplaceResult.ERROR_UNRECOGNIZED_FILETYPE 13
LicenceReplaceResult.ERROR_NO_STARTING_COMMENT 282
LicenceReplaceResult.ERROR_NO_COPYRIGHT_FOUND 1
LicenceReplaceResult.ERROR_COMMENT_PARSING_ERROR 5
LicenceReplaceResult.ERROR_UNKNOWN_LICENCE_TEXT 0
LicenceReplaceResult.SUCCES_CONVERTED 0
LicenceReplaceResult.ERROR_NO_COPYRIGHT_FOUND ['tests/sys/puf_sram/tests/puf_sram_if.py']
LicenceReplaceResult.ERROR_COMMENT_PARSING_ERROR ['tests/sys/xtimer_usleep/tests/01-run.py', 'tests/sys/struct_tm_utility/main.c', 'tests/sys/pthread_rwlock/main.c', 'tests/sys/pipe/main.c', 'tests/sys/posix_sleep/tests/01-run.py']


In [39]:
convert('tests/sys/xtimer_usleep/tests/01-run.py')

['# Copyright (C) 2017 Francisco Acosta <francisco.acosta@inria.fr>\n Copyright (c) 2017 Freie Universität Berlin1224\n               BLUB\n               Blaadlfjsa;dfka;dflkan;dslkfans;dlfka;ldsn;lsnv;ladjf;laf', ' This file is subject to the terms and conditions of the GNU Lesser\n General Public License v2.1. See the file LICENSE in the top level\n directory for more details.']
['', '']


[('tests/sys/xtimer_usleep/tests/01-run.py',
  <LicenceReplaceResult.SUCCES_CONVERTED: 7>)]