spdx · meretp · Dec 28, 2022 · Nov 25, 2022 · Dec 14, 2022 · Dec 14, 2022
diff --git a/src/model/typing/constructor_type_errors.py b/src/model/typing/constructor_type_errors.py
@@ -10,3 +10,6 @@ class ConstructorTypeErrors(TypeError):
 
     def __init__(self, messages: List[str]):
         self.messages = messages
+
+    def get_messages(self):
+        return list(self.messages)
diff --git a/src/parser/__init__.py b/src/parser/__init__.py
diff --git a/src/parser/error.py b/src/parser/error.py
@@ -0,0 +1,21 @@
+# Copyright (c) 2022 spdx contributors
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import List
+
+
+class SPDXParsingError(Exception):
+    messages: List[str]
+
+    def __init__(self, messages: List[str]):
+        self.messages = messages
+
+    def get_messages(self):
+        return list(self.messages)
diff --git a/src/parser/json/__init__.py b/src/parser/json/__init__.py
diff --git a/src/parser/json/actor_parser.py b/src/parser/json/actor_parser.py
@@ -0,0 +1,56 @@
+# Copyright (c) 2022 spdx contributors
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import re
+from typing import Union, Pattern, Match, Optional
+
+from src.model.actor import Actor, ActorType
+from src.model.spdx_no_assertion import SpdxNoAssertion
+from src.parser.error import SPDXParsingError
+from src.parser.json.dict_parsing_functions import construct_or_raise_parsing_error
+
+
+class ActorParser:
+
+    @staticmethod
+    def parse_actor(actor: str) -> Actor:
+        tool_re: Pattern = re.compile(r"^Tool:\s*(.+)", re.UNICODE)
+        person_re: Pattern = re.compile(r"^Person:\s*(([^(])+)(\((.*)\))?", re.UNICODE)
+        org_re: Pattern = re.compile(r"^Organization:\s*(([^(])+)(\((.*)\))?", re.UNICODE)
+        tool_match: Match = tool_re.match(actor)
+        person_match: Match = person_re.match(actor)
+        org_match: Match = org_re.match(actor)
+
+        if tool_match:
+            name: str = tool_match.group(1).strip()
+            creator = construct_or_raise_parsing_error(Actor, dict(actor_type=ActorType.TOOL, name=name))
+
+        elif person_match:
+            name: str = person_match.group(1).strip()
+            email: Optional[str] = ActorParser.get_email_or_none(person_match)
-            email: Optional[str] = ActorParser.get_email_or_none(person_match)
+            email: Optional[str] = person_match.group(4).strip() or None
-            email: Optional[str] = ActorParser.get_email_or_none(person_match)
+            email: Optional[str] = person_match.group(4).strip() or None
+            creator = construct_or_raise_parsing_error(Actor, dict(actor_type=ActorType.PERSON, name=name, email=email))
+        elif org_match:
+            name: str = org_match.group(1).strip()
+            email: Optional[str] = ActorParser.get_email_or_none(org_match)
+            creator = construct_or_raise_parsing_error(Actor,
+                                                       dict(actor_type=ActorType.ORGANIZATION, name=name, email=email))
+        else:
+            raise SPDXParsingError([f"Actor {actor} doesn't match any of person, organization or tool."])
+
+        return creator
+
+    @staticmethod
+    def get_email_or_none(match: Match) -> Optional[str]:
+        email_match = match.group(4)
+        if email_match and email_match.strip():
+            email = email_match.strip()
+        else:
+            email = None
+        return email
diff --git a/src/parser/json/annotation_parser.py b/src/parser/json/annotation_parser.py
@@ -0,0 +1,104 @@
+# Copyright (c) 2022 spdx contributors
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from datetime import datetime
+from typing import Dict, Optional, List
+
+from src.model.actor import Actor
+from src.model.annotation import Annotation, AnnotationType
+from src.parser.error import SPDXParsingError
+from src.parser.json.actor_parser import ActorParser
+from src.parser.json.dict_parsing_functions import datetime_from_str, construct_or_raise_parsing_error, \
+    parse_field_or_log_error, append_parsed_field_or_log_error, raise_parsing_error_if_logger_has_messages, \
+    parse_list_of_elements
+from src.parser.logger import Logger
+
+
+class AnnotationParser:
+    logger: Logger
+    actor_parser: ActorParser
+
+    def __init__(self):
+        self.logger = Logger()
+        self.actor_parser = ActorParser()
+
+    def parse_all_annotations(self, input_doc_dict: Dict) -> List[Annotation]:
+        annotations = []
+        self.parse_annotations_from_object(annotations, [input_doc_dict])
+        reviews: List[Dict] = input_doc_dict.get("revieweds", [])
+        for review in reviews:
+            annotations = append_parsed_field_or_log_error(
+                self.logger, annotations, review, lambda x: self.parse_review(x, spdx_id=input_doc_dict.get("SPDXID")))
+        packages: List[Dict] = input_doc_dict.get("packages", [])
+        self.parse_annotations_from_object(annotations, packages)
+        files: List[Dict] = input_doc_dict.get("files", [])
+        self.parse_annotations_from_object(annotations, files)
+        snippets: List[Dict] = input_doc_dict.get("snippets", [])
+        self.parse_annotations_from_object(annotations, snippets)
+
+        raise_parsing_error_if_logger_has_messages(self.logger, "annotations")
+        return annotations
+
+    def parse_annotations_from_object(self, annotations: List[Annotation], element_list: List[Dict]):
+        for element in element_list:
+            element_spdx_id: Optional[str] = element.get("SPDXID")
+            element_annotations: List[Dict] = element.get("annotations", [])
+            annotations.extend(parse_field_or_log_error(self.logger, element_annotations,
+
+                                                            lambda y: self.parse_annotation(y, spdx_id=element_spdx_id),
+                                                            [], True))
+
+    def parse_annotation(self, annotation_dict: Dict, spdx_id: Optional[str] = None) -> Annotation:
+        logger = Logger()
+        spdx_id: Optional[str] = annotation_dict.get("SPDXID") or spdx_id
+
+        annotation_type: Optional[AnnotationType] = parse_field_or_log_error(logger,
+                                                                             annotation_dict.get("annotationType"),
+                                                                             self.parse_annotation_type)
+
+        annotator: Optional[Actor] = parse_field_or_log_error(logger, annotation_dict.get("annotator"),
+                                                              self.actor_parser.parse_actor)
+
+        annotation_date: Optional[datetime] = parse_field_or_log_error(logger, annotation_dict.get("annotationDate"),
+                                                                       datetime_from_str)
+
+        annotation_comment: Optional[str] = annotation_dict.get("comment")
+        raise_parsing_error_if_logger_has_messages(logger, "Annotation")
+        annotation_dict = construct_or_raise_parsing_error(Annotation,
+                                                           dict(spdx_id=spdx_id, annotation_type=annotation_type,
+                                                                annotator=annotator, annotation_date=annotation_date,
+                                                                annotation_comment=annotation_comment))
+
+        return annotation_dict
+
+    @staticmethod
+    def parse_annotation_type(annotation_type: str) -> AnnotationType:
+        try:
+            return AnnotationType[annotation_type]
+        except KeyError:
+            raise SPDXParsingError([f"Invalid AnnotationType: {annotation_type}"])
+
+    def parse_review(self, review_dict: Dict, spdx_id: str) -> Annotation:
+        logger = Logger()
+        annotator: Optional[Actor] = parse_field_or_log_error(logger, review_dict.get("reviewer"),
+                                                              self.actor_parser.parse_actor)
+
+        annotation_date: Optional[datetime] = parse_field_or_log_error(logger, review_dict.get("reviewDate"),
+                                                                       datetime_from_str)
+
+        annotation_type = AnnotationType.REVIEW
+        comment: Optional[str] = review_dict.get("comment")
+        raise_parsing_error_if_logger_has_messages(logger, "Annotation from revieweds")
+
+        annotation = construct_or_raise_parsing_error(Annotation,
+                                                      dict(spdx_id=spdx_id, annotation_type=annotation_type,
+                                                           annotator=annotator, annotation_date=annotation_date,
+                                                           annotation_comment=comment))
+        return annotation
diff --git a/src/parser/json/checksum_parser.py b/src/parser/json/checksum_parser.py
@@ -0,0 +1,38 @@
+# Copyright (c) 2022 spdx contributors
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Dict, List, Optional
+
+from src.model.checksum import Checksum, ChecksumAlgorithm
+from src.parser.error import SPDXParsingError
+from src.parser.json.dict_parsing_functions import append_parsed_field_or_log_error, \
+    raise_parsing_error_if_logger_has_messages, json_str_to_enum_name, construct_or_raise_parsing_error
+from src.parser.logger import Logger
+
+
+class ChecksumParser:
+    logger: Logger
+
+    def __init__(self):
+        self.logger = Logger()
+
+    @staticmethod
+    def parse_checksum(checksum_dict: Dict) -> Checksum:
+        logger = Logger()
+        algorithm: str = json_str_to_enum_name(checksum_dict.get("algorithm", ""))
+        try:
+            checksum_algorithm = ChecksumAlgorithm[algorithm]
+        except KeyError:
+            logger.append(f"Invalid ChecksumAlgorithm: {algorithm}")
+            checksum_algorithm = None
+        checksum_value: Optional[str] = checksum_dict.get("checksumValue")
+        raise_parsing_error_if_logger_has_messages(logger, "Checksum")
+        checksum = construct_or_raise_parsing_error(Checksum, dict(algorithm=checksum_algorithm, value=checksum_value))
+        return checksum
diff --git a/src/parser/json/creation_info_parser.py b/src/parser/json/creation_info_parser.py
@@ -0,0 +1,122 @@
+# Copyright (c) 2022 spdx contributors
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from datetime import datetime
+from typing import Dict, Optional, List
+
+from src.model.actor import Actor
+from src.model.checksum import Checksum
+from src.model.document import CreationInfo
+from src.model.external_document_ref import ExternalDocumentRef
+from src.model.version import Version
+from src.parser.error import SPDXParsingError
+from src.parser.json.actor_parser import ActorParser
+from src.parser.json.checksum_parser import ChecksumParser
+from src.parser.json.dict_parsing_functions import append_parsed_field_or_log_error, datetime_from_str, \
+    raise_parsing_error_if_logger_has_messages, construct_or_raise_parsing_error, parse_field_or_log_error, \
+    parse_field_or_no_assertion
+from src.parser.logger import Logger
+
+
+class CreationInfoParser:
+    logger: Logger
+    actor_parser: ActorParser
+    checksum_parser: ChecksumParser
+
+    def __init__(self):
+        self.logger = Logger()
+        self.actor_parser = ActorParser()
+        self.checksum_parser = ChecksumParser()
+
+    def parse_creation_info(self, doc_dict: Dict) -> CreationInfo:
+        logger = Logger()
+        spdx_version: Optional[str] = doc_dict.get("spdxVersion")
+        spdx_id: Optional[str] = doc_dict.get("SPDXID")
+        name: Optional[str] = doc_dict.get("name")
+        document_namespace: Optional[str] = doc_dict.get("documentNamespace")
+        creation_info_dict: Optional[Dict] = doc_dict.get("creationInfo")
+
+        # There are nested required properties. If creationInfo is not set, we cannot continue parsing.
+        if creation_info_dict is None:
+            logger.append("CreationInfo does not exist.")
+            raise SPDXParsingError([f"Error while parsing document {name}: {logger.get_messages()}"])
+
+        creators: List[Actor] = parse_field_or_log_error(logger, creation_info_dict.get("creators"),
+                                                         self.parse_creators)
+
+        created: Optional[datetime] = parse_field_or_log_error(logger, creation_info_dict.get("created"),
+                                                               datetime_from_str)
+
+        creator_comment: Optional[str] = creation_info_dict.get("comment")
+        data_license: Optional[str] = doc_dict.get("dataLicense")
+
+        external_document_refs: List[ExternalDocumentRef] = parse_field_or_log_error(logger, doc_dict.get(
+            "externalDocumentRefs"), self.parse_external_document_refs)
+        license_list_version: Optional[Version] = parse_field_or_log_error(logger,
+                                                                           creation_info_dict.get("licenseListVersion"),
+                                                                           self.parse_version)
+        document_comment: Optional[str] = doc_dict.get("comment")
+        raise_parsing_error_if_logger_has_messages(logger, "Document")
+
+        creation_info = construct_or_raise_parsing_error(CreationInfo,
+                                                         dict(spdx_version=spdx_version, spdx_id=spdx_id, name=name,
+                                                              document_namespace=document_namespace,
+                                                              creators=creators, created=created,
+                                                              license_list_version=license_list_version,
+                                                              document_comment=document_comment,
+                                                              creator_comment=creator_comment,
+                                                              data_license=data_license,
+                                                              external_document_refs=external_document_refs))
+
+        return creation_info
+
+    def parse_creators(self, creators_list_from_dict: List[str]) -> List[Actor]:
+        logger = Logger()
+        creators = []
+        for creator_str in creators_list_from_dict:
+            creators = append_parsed_field_or_log_error(logger, creators, creator_str, lambda x: parse_field_or_no_assertion(x, self.actor_parser.parse_actor))
+
+        raise_parsing_error_if_logger_has_messages(logger)
+        return creators
+
+    @staticmethod
+    def parse_version(version_str: str) -> Version:
+        try:
+            return Version.from_string(version_str)
+        except ValueError as err:
+            raise SPDXParsingError([f"Error while parsing version {version_str}: {err.args[0]}"])
+
+    def parse_external_document_refs(self, external_document_ref_dicts: List[Dict]) -> List[ExternalDocumentRef]:
+        logger = Logger()
+        external_document_refs = []
+        for external_document_ref_dict in external_document_ref_dicts:
+            external_document_ref: ExternalDocumentRef = parse_field_or_log_error(logger, external_document_ref_dict,
+                                                                                  self.parse_external_document_ref)
+
+            external_document_refs.append(external_document_ref)
+
+        raise_parsing_error_if_logger_has_messages(logger)
+        return external_document_refs
+
+    def parse_external_document_ref(self, external_document_ref_dict: Dict) -> ExternalDocumentRef:
+        logger = Logger()
+        checksum: Optional[Checksum] = parse_field_or_log_error(logger, external_document_ref_dict.get("checksum"),
+                                                                self.checksum_parser.parse_checksum)
+
+        external_document_id: Optional[str] = external_document_ref_dict.get("externalDocumentId")
+        document_uri: Optional[str] = external_document_ref_dict.get("spdxDocument")
+        raise_parsing_error_if_logger_has_messages(logger, "ExternalDocumentRef")
+        external_document_ref: ExternalDocumentRef = construct_or_raise_parsing_error(ExternalDocumentRef,
+                                                                                 dict(
+                                                                                     document_ref_id=external_document_id,
+                                                                                     checksum=checksum,
+                                                                                     document_uri=document_uri))
+
+        return external_document_ref