Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add logical types generator code #2

Merged
merged 11 commits into from
Jul 24, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 153 additions & 0 deletions model/make_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
# SPDX-FileCopyrightText: 2023 spdx contributors
#
# SPDX-License-Identifier: Apache-2.0

import fire
import json
import os
from collections import defaultdict
from graphlib import TopologicalSorter

OUT_DIR = 'generated'
MODEL_SNAPSHOT = 'modelTypes.json'

# Model types to be serialized as references, not inlined
typerefs = {
"Element": "SpdxId"
}

# Map from xsd types to Python built-in or model-defined types
typemap = {
'xsd:anyURI': 'AnyUri',
'xsd:integer': 'int',
'xsd:string': 'str'
}

# Type definitions to patch missing classes in model
model_patches = {
'AnyUri': {
'Summary': {},
'Description': {},
'Metadata': {
'name': 'AnyUri',
'SubclassOf': 'none',
'_modelRef': 'https://rdf.spdx.org/v3/Core/AnyUri',
'_profile': 'Core',
'_category': 'Classes',
'_file': 'AnyUri.md',
'_html': '',
'_generated': True
},
'Properties': {}
},
'SpdxId': {
'Summary': {},
'Description': {},
'Metadata': {
'name': 'SpdxId',
'SubclassOf': 'none',
'_modelRef': 'https://rdf.spdx.org/v3/Core/SpdxId',
'_profile': 'Core',
'_category': 'Classes',
'_file': 'SpdxId.md',
'_html': '',
'_generated': True
},
'Properties': {}
}
}


def write_tools_class(model, mtypes, out):
meta = mtypes[model]['Metadata']
for m in meta:
assert m in {'name', 'SubclassOf', 'Instantiability',
'_modelRef', '_profile', '_category', '_file', '_html', '_generated', '_root_class'}

pdir = os.path.join(out, meta['_profile'])
os.makedirs(pdir, exist_ok=True)
class_name = meta['name']
with open(os.path.join(pdir, meta['name']) + '.md', 'w') as fp:
commit = f'[{mtypes["_commit"]["url"].split("/")[-1][:7]}]({mtypes["_commit"]["html_url"]})'
fp.write(f'## [{class_name}]({meta["_html"]})\nModel: {commit} {mtypes["_commit"]["date"]}\n```\n')
if meta['_category'] == 'Classes':
fp.write(f'class {class_name}:\n')
for k, v in mtypes[model]['Properties'].items():
ptype = typemap.get(v['type'], v['type'])
rc = mtypes.get(ptype, {}).get('Metadata', {}).get('_root_class', '')
ptype = typerefs.get(rc, ptype) # Use SpdxId for all Element subclasses
ptype = 'SpdxId' if k == 'spdxId' else ptype # Patch until Element fixed in model
prop = f'{k}: {ptype} = None'
opt = ' optional' if str(v['minCount']) == '0' else ''
pmin = v['minCount'] if (pmax := v['maxCount']) == '1' else '1'
mult = f'Set[{pmin}..{pmax}]' if pmax != '1' else ''
fp.write(f' {prop:50} #{opt} {mult}\n')
elif meta['_category'] == 'Vocabularies':
fp.write(f'class {class_name}(Enum):\n')
for n, v in enumerate(mtypes[model]['Entries'], start=1):
fp.write(f' {v} = {n}\n')
fp.write('```\n')


def subclass(td):
return td['Metadata'].get('SubclassOf', '').split('/')[-1]


# Fill in or update properties from superclass
def build_td(tname, model_types):
if td := model_types.get(tname, {}):
sd = model_types.get(subclass(td), {})
td['Metadata'].update({'_root_class': tname}) if tname in typerefs else {}
td['Metadata'].update({k: v for k, v in sd.get('Metadata', {}).items() if k == '_root_class'})
if 'Properties' in td:
# Apply default property constraints
for k, tdp in td['Properties'].items():
p = {'minCount': 0, 'maxCount': '*'}
p.update(tdp)
if p['maxCount'] != '*':
assert int(p['minCount']) <= int(p['maxCount'])
td['Properties'][k] = p
# Propagate properties to subclasses
if sd:
for k, p in sd['Properties'].items():
if k in td['Properties'] and p != td['Properties'][k]:
tdp = td['Properties'][k] # ensure restrictions
assert tdp['type'] == p['type'], f'Property type mismatch: {tname} {tdp["type"]} != {p["type"]}'
assert int(tdp['minCount']) >= int(p['minCount']),\
f'Cannot relax constraint: {tname} minCount {p["minCount"]} -> {tdp["minCount"]}'
if p['maxCount'] != '*':
assert int(tdp['maxCount']) <= int(p['maxCount']),\
f'Cannot relax constraint: {tname} maxCount {p["maxCount"]} -> {tdp["maxCount"]}'
p = tdp
td['Properties'][k] = p
return td


def make_types(model: str = MODEL_SNAPSHOT, out: str = OUT_DIR) -> None:
# Load model snapshot created by "parse_model"
with open(os.path.join(out, model)) as fp:
model_types = json.load(fp)
model_types.update(model_patches)

# Check consistency
for k, v in model_types.items():
if not k.startswith('_'):
assert k == v['Metadata']['name'], f'{k} name mismatch'

# Update model_types to full type definitions after subclassing
refs = defaultdict(list)
[refs[subclass(t)].append(t['Metadata']['name']) for k, t in model_types.items() if not k.startswith('_')]
[build_td(t, model_types) for t in reversed([e for e in TopologicalSorter(refs).static_order()])]

print('Subclass tree:')
for k, v in refs.items():
print(f'{k:>30}: [{", ".join(v)}]')

# Write class files for each type definition
for td in model_types:
if not td.startswith('_'):
write_tools_class(td, model_types, out)


if __name__ == '__main__':
fire.Fire(make_types)
132 changes: 132 additions & 0 deletions model/parse_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
# SPDX-FileCopyrightText: 2023 spdx contributors
#
# SPDX-License-Identifier: Apache-2.0

import fire
import json
import os
import re
from io import TextIOWrapper
from typing import TextIO
from urllib.request import urlopen, Request
from urllib.parse import urlparse

SPDX_MODEL = 'https://api.github.com/repos/spdx/spdx-3-model/contents/model'
OUTDIR = 'generated'


class WebDirEntry:
"""
Fake os.DirEntry type for GitHub filesystem
"""
def __init__(self, name: str, path: str, url: str, html: str):
self.name = name
self.path = path
self.url = url
self.html = html


def list_dir(dirpath: str) -> dict:
"""
Return a dict listing the files and directories in a directory on local filesystem or GitHub repo.

:param dirpath: str - a filesystem path or GitHub API URL
:return: dict {files: [DirEntry*], dirs: [DirEntry*]}
Local Filesystem: Each list item is an os.DirEntry structure containing name and path attributes
GitHub Filesystem: Each list item has name, path, and url (download URL) attributes
"""

files, dirs = [], []
u = urlparse(dirpath)
if all([u.scheme, u.netloc]):
with urlopen(Request(dirpath, headers=AUTH)) as d:
for dl in json.loads(d.read().decode()):
url = 'url' if dl['type'] == 'dir' else 'download_url'
entry = WebDirEntry(dl['name'], dl[url], dl['url'], dl['html_url'])
(dirs if dl['type'] == 'dir' else files).append(entry)
else:
with os.scandir(dirpath) as dlist:
for entry in dlist:
(dirs if os.path.isdir(entry) else files).append(entry)
return {'files': files, 'dirs': dirs}


def open_file(fileentry: os.DirEntry) -> TextIO:
u = urlparse(fileentry.path)
if all([u.scheme, u.netloc]):
return TextIOWrapper(urlopen(Request(fileentry.path, headers=AUTH)), encoding='utf8')
return open(fileentry.path, 'r', encoding='utf8')


def load_model(fp):
model = {}
cursor = []
for line in fp.readlines():
if m := re.match(r'^\s*##\s*(.+?)(\s*)$', line):
model[c := m.group(1)] = {}
cursor = [model[c]]
elif m := re.match(r'^[-*]\s*([-/\w]+):\s*(.*?)\s*$', line):
cursor[0].update({m.group(1): m.group(2)})
elif m := re.match(r'^[-*]\s*([-/\w]+)\s*$', line):
cursor[0][c := m.group(1)] = {}
cursor = [cursor[0], cursor[0][c]]
elif m := re.match(r'^\s+[-*]\s*([-/\w]+):\s*(.*?)\s*$', line):
cursor[1].update({m.group(1): m.group(2)})
return model


def make_classes(model: str = SPDX_MODEL, out: str = OUTDIR) -> None:
# get model latest commit date
m = re.match(r'^(.*/spdx/spdx-3-model)/contents/(.+?)\s*$', SPDX_MODEL)
q = f'{m.group(1)}/commits?path={m.group(2)}'
commit = json.load(TextIOWrapper(urlopen(Request(q, headers=AUTH)), encoding='utf8'))[0]
out += '_' + commit['commit']['committer']['date'].replace(':', '').replace('-', '')
os.makedirs(out, exist_ok=True)

model_refs = {}
model_types = {
'_commit': {
'url': commit['url'],
'html_url': commit['html_url'],
'date': commit['commit']['committer']['date']},
'_defaults': {}}
e1 = list_dir(model)
assert len(e1['files']) == 0
for d1 in e1['dirs']:
print(f'{d1.name}')
e2 = list_dir(d1.path)
assert len(e2['files']) == 1
model_types['_defaults'][d1.name] = load_model(open_file(e2['files'][0]))
for d2 in e2['dirs']:
# print(f'. {d2.name}')
e3 = list_dir(d2.path)
assert len(e3['dirs']) == 0
assert d2.name in {'Classes', 'Individuals', 'Properties', 'Vocabularies'}
if d2.name in {'Classes', 'Vocabularies'}:
for f3 in e3['files']:
if not f3.name.startswith('_'):
model = load_model(open_file(f3))
meta = model['Metadata']
if meta['name'] in model_refs:
m = model_types[meta['name']]['Metadata']
print(f"###### Duplicate: {meta['name']} in {m['_profile']}/{m['_file']}, {d1.name}/{f3.name}")
ref = '/'.join((model_types['_defaults'][d1.name]['Metadata']['id'], meta['name']))
model_refs[meta['name']] = ref
meta['_modelRef'] = ref
meta['_profile'] = d1.name
meta['_category'] = d2.name
meta['_file'] = f3.name
meta['_html'] = f3.html
model_types[meta['name']] = model
else:
print('###### Ignored:', f3.name)

print(f'\n{len(model_types) - 2} Types in model')
with open(os.path.join(out, 'modelTypes.json'), 'w') as fp:
json.dump(model_types, fp, indent=2)


if __name__ == '__main__':
AUTH = {'Authorization': f'token {os.environ["GitHubToken"]}'}
print(f'GitHub Token: ..{AUTH["Authorization"][-4:]}')
fire.Fire(make_classes)
34 changes: 34 additions & 0 deletions model/testclass.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# SPDX-FileCopyrightText: 2023 spdx contributors
#
# SPDX-License-Identifier: Apache-2.0
from dataclasses import dataclass, fields
from generated.Core import CreationInfo, Element, Extension, ExternalIdentifier, ExternalMap, ExternalReference
from generated.Core import IntegrityMethod, NamespaceMap
from generated.Software import SBOMType


@dataclass
class Sbom:
sbomType: SBOMType = None
context: str = None
element: Element = None
rootElement: Element = None
namespaces: NamespaceMap = None
imports: ExternalMap = None
spdxId: str = None
name: str = None
summary: str = None
description: str = None
comment: str = None
creationInfo: CreationInfo = None
verifiedUsing: IntegrityMethod = None
externalReference: ExternalReference = None
externalIdentifier: ExternalIdentifier = None
extension: Extension = None


if __name__ == '__main__':
sb = Sbom
sb.sbomType = 'source'
for f in fields(sb):
print(f.name, f.type)