spdx · davaya · Jul 24, 2023 · Jul 5, 2023 · Jul 6, 2023 · Jul 6, 2023
diff --git a/model/make_types.py b/model/make_types.py
@@ -0,0 +1,153 @@
+# SPDX-FileCopyrightText: 2023 spdx contributors
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import fire
+import json
+import os
+from collections import defaultdict
+from graphlib import TopologicalSorter
+
+OUT_DIR = 'generated'
+MODEL_SNAPSHOT = 'modelTypes.json'
+
+# Model types to be serialized as references, not inlined
+typerefs = {
+    "Element": "SpdxId"
+}
+
+# Map from xsd types to Python built-in or model-defined types
+typemap = {
+    'xsd:anyURI': 'AnyUri',
+    'xsd:integer': 'int',
+    'xsd:string': 'str'
+}
+
+# Type definitions to patch missing classes in model
+model_patches = {
+    'AnyUri': {
+        'Summary': {},
+        'Description': {},
+        'Metadata': {
+            'name': 'AnyUri',
+            'SubclassOf': 'none',
+            '_modelRef': 'https://rdf.spdx.org/v3/Core/AnyUri',
+            '_profile': 'Core',
+            '_category': 'Classes',
+            '_file': 'AnyUri.md',
+            '_html': '',
+            '_generated': True
+        },
+        'Properties': {}
+    },
+    'SpdxId': {
+        'Summary': {},
+        'Description': {},
+        'Metadata': {
+            'name': 'SpdxId',
+            'SubclassOf': 'none',
+            '_modelRef': 'https://rdf.spdx.org/v3/Core/SpdxId',
+            '_profile': 'Core',
+            '_category': 'Classes',
+            '_file': 'SpdxId.md',
+            '_html': '',
+            '_generated': True
+        },
+        'Properties': {}
+    }
+}
+
+
+def write_tools_class(model, mtypes, out):
+    meta = mtypes[model]['Metadata']
+    for m in meta:
+        assert m in {'name', 'SubclassOf', 'Instantiability',
+                     '_modelRef', '_profile', '_category', '_file', '_html', '_generated', '_root_class'}
+
+    pdir = os.path.join(out, meta['_profile'])
+    os.makedirs(pdir, exist_ok=True)
+    class_name = meta['name']
+    with open(os.path.join(pdir, meta['name']) + '.md', 'w') as fp:
+        commit = f'[{mtypes["_commit"]["url"].split("/")[-1][:7]}]({mtypes["_commit"]["html_url"]})'
+        fp.write(f'## [{class_name}]({meta["_html"]})\nModel: {commit} {mtypes["_commit"]["date"]}\n```\n')
+        if meta['_category'] == 'Classes':
+            fp.write(f'class {class_name}:\n')
+            for k, v in mtypes[model]['Properties'].items():
+                ptype = typemap.get(v['type'], v['type'])
+                rc = mtypes.get(ptype, {}).get('Metadata', {}).get('_root_class', '')
+                ptype = typerefs.get(rc, ptype)                 # Use SpdxId for all Element subclasses
+                ptype = 'SpdxId' if k == 'spdxId' else ptype    # Patch until Element fixed in model
+                prop = f'{k}: {ptype} = None'
+                opt = ' optional' if str(v['minCount']) == '0' else ''
+                pmin = v['minCount'] if (pmax := v['maxCount']) == '1' else '1'
+                mult = f'Set[{pmin}..{pmax}]' if pmax != '1' else ''
+                fp.write(f'    {prop:50} #{opt} {mult}\n')
+        elif meta['_category'] == 'Vocabularies':
+            fp.write(f'class {class_name}(Enum):\n')
+            for n, v in enumerate(mtypes[model]['Entries'], start=1):
+                fp.write(f'    {v} = {n}\n')
+        fp.write('```\n')
+
+
+def subclass(td):
+    return td['Metadata'].get('SubclassOf', '').split('/')[-1]
+
+
+# Fill in or update properties from superclass
+def build_td(tname, model_types):
+    if td := model_types.get(tname, {}):
+        sd = model_types.get(subclass(td), {})
+        td['Metadata'].update({'_root_class': tname}) if tname in typerefs else {}
+        td['Metadata'].update({k: v for k, v in sd.get('Metadata', {}).items() if k == '_root_class'})
+        if 'Properties' in td:
+            # Apply default property constraints
+            for k, tdp in td['Properties'].items():
+                p = {'minCount': 0, 'maxCount': '*'}
+                p.update(tdp)
+                if p['maxCount'] != '*':
+                    assert int(p['minCount']) <= int(p['maxCount'])
+                td['Properties'][k] = p
+            # Propagate properties to subclasses
+            if sd:
+                for k, p in sd['Properties'].items():
+                    if k in td['Properties'] and p != td['Properties'][k]:
+                        tdp = td['Properties'][k]    # ensure restrictions
+                        assert tdp['type'] == p['type'], f'Property type mismatch: {tname} {tdp["type"]} != {p["type"]}'
+                        assert int(tdp['minCount']) >= int(p['minCount']),\
+                            f'Cannot relax constraint: {tname} minCount {p["minCount"]} -> {tdp["minCount"]}'
+                        if p['maxCount'] != '*':
+                            assert int(tdp['maxCount']) <= int(p['maxCount']),\
+                                f'Cannot relax constraint: {tname} maxCount {p["maxCount"]} -> {tdp["maxCount"]}'
+                        p = tdp
+                    td['Properties'][k] = p
+    return td
+
+
+def make_types(model: str = MODEL_SNAPSHOT, out: str = OUT_DIR) -> None:
+    # Load model snapshot created by "parse_model"
+    with open(os.path.join(out, model)) as fp:
+        model_types = json.load(fp)
+    model_types.update(model_patches)
+
+    # Check consistency
+    for k, v in model_types.items():
+        if not k.startswith('_'):
+            assert k == v['Metadata']['name'], f'{k} name mismatch'
+
+    # Update model_types to full type definitions after subclassing
+    refs = defaultdict(list)
+    [refs[subclass(t)].append(t['Metadata']['name']) for k, t in model_types.items() if not k.startswith('_')]
+    [build_td(t, model_types) for t in reversed([e for e in TopologicalSorter(refs).static_order()])]
+
+    print('Subclass tree:')
+    for k, v in refs.items():
+        print(f'{k:>30}: [{", ".join(v)}]')
+
+    # Write class files for each type definition
+    for td in model_types:
+        if not td.startswith('_'):
+            write_tools_class(td, model_types, out)
+
+
+if __name__ == '__main__':
+    fire.Fire(make_types)
diff --git a/model/parse_model.py b/model/parse_model.py
@@ -0,0 +1,132 @@
+# SPDX-FileCopyrightText: 2023 spdx contributors
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import fire
+import json
+import os
+import re
+from io import TextIOWrapper
+from typing import TextIO
+from urllib.request import urlopen, Request
+from urllib.parse import urlparse
+
+SPDX_MODEL = 'https://api.github.com/repos/spdx/spdx-3-model/contents/model'
+OUTDIR = 'generated'
+
+
+class WebDirEntry:
+    """
+    Fake os.DirEntry type for GitHub filesystem
+    """
+    def __init__(self, name: str, path: str, url: str, html: str):
+        self.name = name
+        self.path = path
+        self.url = url
+        self.html = html
+
+
+def list_dir(dirpath: str) -> dict:
+    """
+    Return a dict listing the files and directories in a directory on local filesystem or GitHub repo.
+
+    :param dirpath: str - a filesystem path or GitHub API URL
+    :return: dict {files: [DirEntry*], dirs: [DirEntry*]}
+    Local Filesystem: Each list item is an os.DirEntry structure containing name and path attributes
+    GitHub Filesystem: Each list item has name, path, and url (download URL) attributes
+    """
+
+    files, dirs = [], []
+    u = urlparse(dirpath)
+    if all([u.scheme, u.netloc]):
+        with urlopen(Request(dirpath, headers=AUTH)) as d:
+            for dl in json.loads(d.read().decode()):
+                url = 'url' if dl['type'] == 'dir' else 'download_url'
+                entry = WebDirEntry(dl['name'], dl[url], dl['url'], dl['html_url'])
+                (dirs if dl['type'] == 'dir' else files).append(entry)
+    else:
+        with os.scandir(dirpath) as dlist:
+            for entry in dlist:
+                (dirs if os.path.isdir(entry) else files).append(entry)
+    return {'files': files, 'dirs': dirs}
+
+
+def open_file(fileentry: os.DirEntry) -> TextIO:
+    u = urlparse(fileentry.path)
+    if all([u.scheme, u.netloc]):
+        return TextIOWrapper(urlopen(Request(fileentry.path, headers=AUTH)), encoding='utf8')
+    return open(fileentry.path, 'r', encoding='utf8')
+
+
+def load_model(fp):
+    model = {}
+    cursor = []
+    for line in fp.readlines():
+        if m := re.match(r'^\s*##\s*(.+?)(\s*)$', line):
+            model[c := m.group(1)] = {}
+            cursor = [model[c]]
+        elif m := re.match(r'^[-*]\s*([-/\w]+):\s*(.*?)\s*$', line):
+            cursor[0].update({m.group(1): m.group(2)})
+        elif m := re.match(r'^[-*]\s*([-/\w]+)\s*$', line):
+            cursor[0][c := m.group(1)] = {}
+            cursor = [cursor[0], cursor[0][c]]
+        elif m := re.match(r'^\s+[-*]\s*([-/\w]+):\s*(.*?)\s*$', line):
+            cursor[1].update({m.group(1): m.group(2)})
+    return model
+
+
+def make_classes(model: str = SPDX_MODEL, out: str = OUTDIR) -> None:
+    # get model latest commit date
+    m = re.match(r'^(.*/spdx/spdx-3-model)/contents/(.+?)\s*$', SPDX_MODEL)
+    q = f'{m.group(1)}/commits?path={m.group(2)}'
+    commit = json.load(TextIOWrapper(urlopen(Request(q, headers=AUTH)), encoding='utf8'))[0]
+    out += '_' + commit['commit']['committer']['date'].replace(':', '').replace('-', '')
+    os.makedirs(out, exist_ok=True)
+
+    model_refs = {}
+    model_types = {
+        '_commit': {
+            'url': commit['url'],
+            'html_url': commit['html_url'],
+            'date': commit['commit']['committer']['date']},
+        '_defaults': {}}
+    e1 = list_dir(model)
+    assert len(e1['files']) == 0
+    for d1 in e1['dirs']:
+        print(f'{d1.name}')
+        e2 = list_dir(d1.path)
+        assert len(e2['files']) == 1
+        model_types['_defaults'][d1.name] = load_model(open_file(e2['files'][0]))
+        for d2 in e2['dirs']:
+            # print(f'. {d2.name}')
+            e3 = list_dir(d2.path)
+            assert len(e3['dirs']) == 0
+            assert d2.name in {'Classes', 'Individuals', 'Properties', 'Vocabularies'}
+            if d2.name in {'Classes', 'Vocabularies'}:
+                for f3 in e3['files']:
+                    if not f3.name.startswith('_'):
+                        model = load_model(open_file(f3))
+                        meta = model['Metadata']
+                        if meta['name'] in model_refs:
+                            m = model_types[meta['name']]['Metadata']
+                            print(f"###### Duplicate: {meta['name']} in {m['_profile']}/{m['_file']}, {d1.name}/{f3.name}")
+                        ref = '/'.join((model_types['_defaults'][d1.name]['Metadata']['id'], meta['name']))
+                        model_refs[meta['name']] = ref
+                        meta['_modelRef'] = ref
+                        meta['_profile'] = d1.name
+                        meta['_category'] = d2.name
+                        meta['_file'] = f3.name
+                        meta['_html'] = f3.html
+                        model_types[meta['name']] = model
+                    else:
+                        print('###### Ignored:', f3.name)
+
+    print(f'\n{len(model_types) - 2} Types in model')
+    with open(os.path.join(out, 'modelTypes.json'), 'w') as fp:
+        json.dump(model_types, fp, indent=2)
+
+
+if __name__ == '__main__':
+    AUTH = {'Authorization': f'token {os.environ["GitHubToken"]}'}
+    print(f'GitHub Token: ..{AUTH["Authorization"][-4:]}')
+    fire.Fire(make_classes)
diff --git a/model/testclass.py b/model/testclass.py
@@ -0,0 +1,34 @@
+# SPDX-FileCopyrightText: 2023 spdx contributors
+#
+# SPDX-License-Identifier: Apache-2.0
+from dataclasses import dataclass, fields
+from generated.Core import CreationInfo, Element, Extension, ExternalIdentifier, ExternalMap, ExternalReference
+from generated.Core import IntegrityMethod, NamespaceMap
+from generated.Software import SBOMType
+
+
+@dataclass
+class Sbom:
+    sbomType: SBOMType = None
+    context: str = None
+    element: Element = None
+    rootElement: Element = None
+    namespaces: NamespaceMap = None
+    imports: ExternalMap = None
+    spdxId: str = None
+    name: str = None
+    summary: str = None
+    description: str = None
+    comment: str = None
+    creationInfo: CreationInfo = None
+    verifiedUsing: IntegrityMethod = None
+    externalReference: ExternalReference = None
+    externalIdentifier: ExternalIdentifier = None
+    extension: Extension = None
+
+
+if __name__ == '__main__':
+    sb = Sbom
+    sb.sbomType = 'source'
+    for f in fields(sb):
+        print(f.name, f.type)