Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Issue 305] add new json parser #366

Merged
merged 22 commits into from Dec 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
296816b
[issue-305] add new parser
meretp Nov 25, 2022
cd095e2
[issue-305, refactor] add method to construct an object and raise SPD…
meretp Dec 14, 2022
f2f91fd
[issue-305, refactor] annotation_parser: extract methods to improve r…
meretp Dec 14, 2022
190cd5a
[issue-305, refactor] add methods to parse required/ optional fields …
meretp Dec 14, 2022
2834000
[issue-305, refactor] relationship_parser: extract dict to invert rel…
meretp Dec 14, 2022
6297673
[issue-305, refactor] add method to raise error if logger has message…
meretp Dec 14, 2022
4741a43
[issue-305, review] refactor methods in dict_parsing_functions.py, sm…
meretp Dec 15, 2022
080d848
[issue-305, refactor] json_parser
meretp Dec 15, 2022
5826922
[issue-305, reformat]
meretp Dec 19, 2022
e6332cb
[issue-305] add testcases and update license_expression parser
meretp Dec 19, 2022
1f6d5b6
[issue-305, refactor] delete duplicated check for error type
meretp Dec 20, 2022
fc980b1
[issue-305, review] fix messages, naming, type hints
meretp Dec 21, 2022
3fe3e11
[issue-305, review] refactor relationship_parser
meretp Dec 21, 2022
0be1780
[issue-305, review] refactor snippet_parser
meretp Dec 21, 2022
c5b8d3c
[issue-305, review] make naming consistent
meretp Dec 21, 2022
03cce38
[issue-305, review] add test for dict parsing functions and catch Val…
meretp Dec 21, 2022
2dcd125
[issue-305, review] add None handling for required fields
meretp Dec 21, 2022
50c3038
[issue-305, review] make error messages consistent, add test for json…
meretp Dec 28, 2022
562f288
[issue-305, review] add tests, change test data, naming of tests and …
meretp Dec 22, 2022
a722036
[issue-305, review] add method to parse fields that can be SpdxNone o…
meretp Dec 22, 2022
c8851d8
[issue-305, review] refactor parse_field_or_log_error
meretp Dec 22, 2022
347051a
[issue-305, review] reformat, type hints, fix typos, error messages
meretp Dec 28, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/model/typing/constructor_type_errors.py
Expand Up @@ -10,3 +10,6 @@ class ConstructorTypeErrors(TypeError):

def __init__(self, messages: List[str]):
self.messages = messages

def get_messages(self):
return list(self.messages)
Empty file added src/parser/__init__.py
Empty file.
21 changes: 21 additions & 0 deletions src/parser/error.py
@@ -0,0 +1,21 @@
# Copyright (c) 2022 spdx contributors
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import List


class SPDXParsingError(Exception):
messages: List[str]

def __init__(self, messages: List[str]):
self.messages = messages

def get_messages(self):
return list(self.messages)
Empty file added src/parser/json/__init__.py
Empty file.
56 changes: 56 additions & 0 deletions src/parser/json/actor_parser.py
@@ -0,0 +1,56 @@
# Copyright (c) 2022 spdx contributors
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
from typing import Union, Pattern, Match, Optional

from src.model.actor import Actor, ActorType
from src.model.spdx_no_assertion import SpdxNoAssertion
from src.parser.error import SPDXParsingError
from src.parser.json.dict_parsing_functions import construct_or_raise_parsing_error


class ActorParser:

@staticmethod
def parse_actor(actor: str) -> Actor:
tool_re: Pattern = re.compile(r"^Tool:\s*(.+)", re.UNICODE)
person_re: Pattern = re.compile(r"^Person:\s*(([^(])+)(\((.*)\))?", re.UNICODE)
org_re: Pattern = re.compile(r"^Organization:\s*(([^(])+)(\((.*)\))?", re.UNICODE)
tool_match: Match = tool_re.match(actor)
person_match: Match = person_re.match(actor)
org_match: Match = org_re.match(actor)

if tool_match:
name: str = tool_match.group(1).strip()
creator = construct_or_raise_parsing_error(Actor, dict(actor_type=ActorType.TOOL, name=name))

elif person_match:
name: str = person_match.group(1).strip()
email: Optional[str] = ActorParser.get_email_or_none(person_match)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
email: Optional[str] = ActorParser.get_email_or_none(person_match)
email: Optional[str] = person_match.group(4).strip() or None

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This way, we can discard the get_email_or_none method

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This wouldn't work. If person_match.group(4) is None (e.g. if the brackets for email are completely missing) this would lead to an AttributeError. I also thought that this could be handled in one line but didn't find a good way besides using the method.

creator = construct_or_raise_parsing_error(Actor, dict(actor_type=ActorType.PERSON, name=name, email=email))
elif org_match:
name: str = org_match.group(1).strip()
email: Optional[str] = ActorParser.get_email_or_none(org_match)
creator = construct_or_raise_parsing_error(Actor,
dict(actor_type=ActorType.ORGANIZATION, name=name, email=email))
else:
raise SPDXParsingError([f"Actor {actor} doesn't match any of person, organization or tool."])

return creator

@staticmethod
def get_email_or_none(match: Match) -> Optional[str]:
email_match = match.group(4)
if email_match and email_match.strip():
email = email_match.strip()
else:
email = None
return email
104 changes: 104 additions & 0 deletions src/parser/json/annotation_parser.py
@@ -0,0 +1,104 @@
# Copyright (c) 2022 spdx contributors
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from datetime import datetime
from typing import Dict, Optional, List

from src.model.actor import Actor
from src.model.annotation import Annotation, AnnotationType
from src.parser.error import SPDXParsingError
from src.parser.json.actor_parser import ActorParser
from src.parser.json.dict_parsing_functions import datetime_from_str, construct_or_raise_parsing_error, \
parse_field_or_log_error, append_parsed_field_or_log_error, raise_parsing_error_if_logger_has_messages, \
parse_list_of_elements
from src.parser.logger import Logger


class AnnotationParser:
logger: Logger
actor_parser: ActorParser

def __init__(self):
self.logger = Logger()
self.actor_parser = ActorParser()

def parse_all_annotations(self, input_doc_dict: Dict) -> List[Annotation]:
annotations = []
self.parse_annotations_from_object(annotations, [input_doc_dict])
reviews: List[Dict] = input_doc_dict.get("revieweds", [])
for review in reviews:
annotations = append_parsed_field_or_log_error(
self.logger, annotations, review, lambda x: self.parse_review(x, spdx_id=input_doc_dict.get("SPDXID")))
packages: List[Dict] = input_doc_dict.get("packages", [])
self.parse_annotations_from_object(annotations, packages)
files: List[Dict] = input_doc_dict.get("files", [])
self.parse_annotations_from_object(annotations, files)
snippets: List[Dict] = input_doc_dict.get("snippets", [])
self.parse_annotations_from_object(annotations, snippets)

raise_parsing_error_if_logger_has_messages(self.logger, "annotations")
return annotations

def parse_annotations_from_object(self, annotations: List[Annotation], element_list: List[Dict]):
for element in element_list:
element_spdx_id: Optional[str] = element.get("SPDXID")
element_annotations: List[Dict] = element.get("annotations", [])
annotations.extend(parse_field_or_log_error(self.logger, element_annotations,

lambda y: self.parse_annotation(y, spdx_id=element_spdx_id),
[], True))

def parse_annotation(self, annotation_dict: Dict, spdx_id: Optional[str] = None) -> Annotation:
logger = Logger()
spdx_id: Optional[str] = annotation_dict.get("SPDXID") or spdx_id

annotation_type: Optional[AnnotationType] = parse_field_or_log_error(logger,
annotation_dict.get("annotationType"),
self.parse_annotation_type)

annotator: Optional[Actor] = parse_field_or_log_error(logger, annotation_dict.get("annotator"),
self.actor_parser.parse_actor)

annotation_date: Optional[datetime] = parse_field_or_log_error(logger, annotation_dict.get("annotationDate"),
datetime_from_str)

annotation_comment: Optional[str] = annotation_dict.get("comment")
raise_parsing_error_if_logger_has_messages(logger, "Annotation")
annotation_dict = construct_or_raise_parsing_error(Annotation,
dict(spdx_id=spdx_id, annotation_type=annotation_type,
annotator=annotator, annotation_date=annotation_date,
annotation_comment=annotation_comment))

return annotation_dict

@staticmethod
def parse_annotation_type(annotation_type: str) -> AnnotationType:
try:
return AnnotationType[annotation_type]
except KeyError:
raise SPDXParsingError([f"Invalid AnnotationType: {annotation_type}"])

def parse_review(self, review_dict: Dict, spdx_id: str) -> Annotation:
logger = Logger()
annotator: Optional[Actor] = parse_field_or_log_error(logger, review_dict.get("reviewer"),
self.actor_parser.parse_actor)

annotation_date: Optional[datetime] = parse_field_or_log_error(logger, review_dict.get("reviewDate"),
datetime_from_str)

annotation_type = AnnotationType.REVIEW
comment: Optional[str] = review_dict.get("comment")
raise_parsing_error_if_logger_has_messages(logger, "Annotation from revieweds")

annotation = construct_or_raise_parsing_error(Annotation,
dict(spdx_id=spdx_id, annotation_type=annotation_type,
annotator=annotator, annotation_date=annotation_date,
annotation_comment=comment))
return annotation
38 changes: 38 additions & 0 deletions src/parser/json/checksum_parser.py
@@ -0,0 +1,38 @@
# Copyright (c) 2022 spdx contributors
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Dict, List, Optional

from src.model.checksum import Checksum, ChecksumAlgorithm
from src.parser.error import SPDXParsingError
from src.parser.json.dict_parsing_functions import append_parsed_field_or_log_error, \
raise_parsing_error_if_logger_has_messages, json_str_to_enum_name, construct_or_raise_parsing_error
from src.parser.logger import Logger


class ChecksumParser:
logger: Logger

def __init__(self):
self.logger = Logger()

@staticmethod
def parse_checksum(checksum_dict: Dict) -> Checksum:
logger = Logger()
algorithm: str = json_str_to_enum_name(checksum_dict.get("algorithm", ""))
try:
checksum_algorithm = ChecksumAlgorithm[algorithm]
except KeyError:
logger.append(f"Invalid ChecksumAlgorithm: {algorithm}")
checksum_algorithm = None
checksum_value: Optional[str] = checksum_dict.get("checksumValue")
raise_parsing_error_if_logger_has_messages(logger, "Checksum")
checksum = construct_or_raise_parsing_error(Checksum, dict(algorithm=checksum_algorithm, value=checksum_value))
return checksum
122 changes: 122 additions & 0 deletions src/parser/json/creation_info_parser.py
@@ -0,0 +1,122 @@
# Copyright (c) 2022 spdx contributors
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from datetime import datetime
from typing import Dict, Optional, List

from src.model.actor import Actor
from src.model.checksum import Checksum
from src.model.document import CreationInfo
from src.model.external_document_ref import ExternalDocumentRef
from src.model.version import Version
from src.parser.error import SPDXParsingError
from src.parser.json.actor_parser import ActorParser
from src.parser.json.checksum_parser import ChecksumParser
from src.parser.json.dict_parsing_functions import append_parsed_field_or_log_error, datetime_from_str, \
raise_parsing_error_if_logger_has_messages, construct_or_raise_parsing_error, parse_field_or_log_error, \
parse_field_or_no_assertion
from src.parser.logger import Logger


class CreationInfoParser:
logger: Logger
actor_parser: ActorParser
checksum_parser: ChecksumParser

def __init__(self):
self.logger = Logger()
self.actor_parser = ActorParser()
self.checksum_parser = ChecksumParser()

def parse_creation_info(self, doc_dict: Dict) -> CreationInfo:
logger = Logger()
spdx_version: Optional[str] = doc_dict.get("spdxVersion")
spdx_id: Optional[str] = doc_dict.get("SPDXID")
name: Optional[str] = doc_dict.get("name")
document_namespace: Optional[str] = doc_dict.get("documentNamespace")
creation_info_dict: Optional[Dict] = doc_dict.get("creationInfo")

# There are nested required properties. If creationInfo is not set, we cannot continue parsing.
if creation_info_dict is None:
logger.append("CreationInfo does not exist.")
raise SPDXParsingError([f"Error while parsing document {name}: {logger.get_messages()}"])

creators: List[Actor] = parse_field_or_log_error(logger, creation_info_dict.get("creators"),
self.parse_creators)

created: Optional[datetime] = parse_field_or_log_error(logger, creation_info_dict.get("created"),
datetime_from_str)

creator_comment: Optional[str] = creation_info_dict.get("comment")
data_license: Optional[str] = doc_dict.get("dataLicense")

external_document_refs: List[ExternalDocumentRef] = parse_field_or_log_error(logger, doc_dict.get(
"externalDocumentRefs"), self.parse_external_document_refs)
license_list_version: Optional[Version] = parse_field_or_log_error(logger,
creation_info_dict.get("licenseListVersion"),
self.parse_version)
document_comment: Optional[str] = doc_dict.get("comment")
raise_parsing_error_if_logger_has_messages(logger, "Document")

creation_info = construct_or_raise_parsing_error(CreationInfo,
dict(spdx_version=spdx_version, spdx_id=spdx_id, name=name,
document_namespace=document_namespace,
creators=creators, created=created,
license_list_version=license_list_version,
document_comment=document_comment,
creator_comment=creator_comment,
data_license=data_license,
external_document_refs=external_document_refs))

return creation_info

def parse_creators(self, creators_list_from_dict: List[str]) -> List[Actor]:
logger = Logger()
creators = []
for creator_str in creators_list_from_dict:
creators = append_parsed_field_or_log_error(logger, creators, creator_str, lambda x: parse_field_or_no_assertion(x, self.actor_parser.parse_actor))

raise_parsing_error_if_logger_has_messages(logger)
return creators

@staticmethod
def parse_version(version_str: str) -> Version:
try:
return Version.from_string(version_str)
except ValueError as err:
raise SPDXParsingError([f"Error while parsing version {version_str}: {err.args[0]}"])

def parse_external_document_refs(self, external_document_ref_dicts: List[Dict]) -> List[ExternalDocumentRef]:
logger = Logger()
external_document_refs = []
for external_document_ref_dict in external_document_ref_dicts:
external_document_ref: ExternalDocumentRef = parse_field_or_log_error(logger, external_document_ref_dict,
self.parse_external_document_ref)

external_document_refs.append(external_document_ref)

raise_parsing_error_if_logger_has_messages(logger)
return external_document_refs

def parse_external_document_ref(self, external_document_ref_dict: Dict) -> ExternalDocumentRef:
logger = Logger()
checksum: Optional[Checksum] = parse_field_or_log_error(logger, external_document_ref_dict.get("checksum"),
self.checksum_parser.parse_checksum)

external_document_id: Optional[str] = external_document_ref_dict.get("externalDocumentId")
document_uri: Optional[str] = external_document_ref_dict.get("spdxDocument")
raise_parsing_error_if_logger_has_messages(logger, "ExternalDocumentRef")
external_document_ref: ExternalDocumentRef = construct_or_raise_parsing_error(ExternalDocumentRef,
dict(
document_ref_id=external_document_id,
checksum=checksum,
document_uri=document_uri))

return external_document_ref