Skip to content

Commit

Permalink
[issue-558] add optional feature to generate a relationship graph
Browse files Browse the repository at this point in the history
Signed-off-by: Meret Behrens <meret.behrens@tngtech.com>
  • Loading branch information
meretp committed Apr 6, 2023
1 parent 0e1df0a commit 91bb744
Show file tree
Hide file tree
Showing 8 changed files with 295 additions and 15 deletions.
16 changes: 15 additions & 1 deletion README.md
Expand Up @@ -38,9 +38,11 @@ This library implements SPDX parsers, convertors, validators and handlers in Pyt

# Features

* API to create and manipulate SPDX v2.2 and v2.3 documents.
* API to create and manipulate SPDX v2.2 and v2.3 documents
* Parse, convert, create and validate SPDX files
* supported formats: Tag/Value, RDF, JSON, YAML, XML
* visualize the structure of a SPDX document by creating an `AGraph`. Note: This is an optional feature and requires
additional installation of optional dependencies

# Planned features

Expand Down Expand Up @@ -78,6 +80,18 @@ instead of `bin`.

* For help use `pyspdxtools --help`

3. **GRAPH GENERATION** (optional feature)

* This feature generates a graph representing all elements in the SPDX document and their connections based on the provided
relationships. The graph can be rendered to a picture. Below is an example for the file `tests/data/formats/SPDXJSONExample-v2.3.spdx.json`:
![SPDXJSONExample-v2.3.spdx.png](assets/SPDXJSONExample-v2.3.spdx.png)
* Make sure you install the optional dependencies `networkx` and `pygraphviz`. To do so run `pip install ".[graph_generation]"`.
* Use `pyspdxtools -i <input_file> --graph -o <output_file>` where `<output_file>` is an output file name with valid format for `pygraphviz` (check
the documentation [here](https://pygraphviz.github.io/documentation/stable/reference/agraph.html#pygraphviz.AGraph.draw)).
* If you are using a source distribution, try running
`pyspdxtools -i tests/data/formats/SPDXJSONExample-v2.3.spdx.json --graph -o SPDXJSONExample-v2.3.spdx.png` to generate
a png with an overview of the structure of the example file.

## Library usage
1. **DATA MODEL**
* The `src.spdx.model` package constitutes the internal SPDX v2.3 data model (v2.2 is a simply a subset of this).
Expand Down
Binary file added assets/SPDXJSONExample-v2.3.spdx.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions pyproject.toml
Expand Up @@ -30,6 +30,7 @@ dynamic = ["version"]
[project.optional-dependencies]
test = ["pytest"]
code_style = ["isort", "black", "flake8"]
graph_generation = ["pygraphviz", "networkx"]

[project.scripts]
pyspdxtools = "spdx.clitools.pyspdxtools:main"
Expand Down
32 changes: 26 additions & 6 deletions src/spdx/clitools/pyspdxtools.py
Expand Up @@ -18,6 +18,7 @@

import click

from spdx.graph_generation import export_graph_from_document
from spdx.model.document import Document
from spdx.parser.error import SPDXParsingError
from spdx.parser.parse_anything import parse_file
Expand All @@ -32,7 +33,8 @@
@click.option(
"--outfile",
"-o",
help="The file to write the converted document to (write a dash for output to stdout or omit for no conversion).",
help="The file to write the converted document to (write a dash for output to stdout or omit for no conversion). "
"If you add the option --graph to the command the generated graph will be written to this file.",
)
@click.option(
"--version",
Expand All @@ -41,7 +43,15 @@
default=None,
)
@click.option("--novalidation", is_flag=True, help="Don't validate the provided document.")
def main(infile: str, outfile: str, version: str, novalidation: bool):
@click.option(
"--graph",
is_flag=True,
default=False,
help="Generate a relationship graph from the input file. "
"The generated graph is saved to the file specified with --outfile. "
"Note: You need to install the optional dependencies 'networkx' and 'pygraphviz' for this feature.",
)
def main(infile: str, outfile: str, version: str, novalidation: bool, graph: bool):
"""
CLI-tool for validating SPDX documents and converting between RDF, TAG-VALUE, JSON, YAML and XML formats.
Formats are determined by the file endings.
Expand All @@ -50,9 +60,6 @@ def main(infile: str, outfile: str, version: str, novalidation: bool):
try:
document: Document = parse_file(infile)

if outfile == "-":
tagvalue_writer.write_document(document, sys.stdout)

if not novalidation:
if not version:
version = document.creation_info.spdx_version
Expand All @@ -72,7 +79,20 @@ def main(infile: str, outfile: str, version: str, novalidation: bool):
else:
logging.info("The document is valid.")

if outfile and outfile != "-":
if outfile == "-":
tagvalue_writer.write_document(document, sys.stdout)

elif graph:
try:
export_graph_from_document(document, outfile)
except ImportError:
logging.error(
"To be able to draw a relationship graph of the parsed document "
"you need to install 'networkx' and 'pygraphviz'. Run 'pip install \".[graph_generation]\"'."
)
sys.exit(1)

elif outfile:
write_file(document, outfile, validate=False)

except NotImplementedError as err:
Expand Down
20 changes: 13 additions & 7 deletions src/spdx/document_utils.py
@@ -1,7 +1,7 @@
# SPDX-FileCopyrightText: 2022 spdx contributors
#
# SPDX-License-Identifier: Apache-2.0
from typing import List, Union
from typing import Dict, List, Union

from spdx.model.document import Document
from spdx.model.file import File
Expand All @@ -17,9 +17,15 @@ def get_contained_spdx_element_ids(document: Document) -> List[str]:


def get_element_from_spdx_id(document: Document, spdx_id: str) -> Union[Package, File, Snippet, None]:
elements = [file_ for file_ in document.files]
elements.extend([package_ for package_ in document.packages])
elements.extend([snippet_ for snippet_ in document.snippets])
for element in elements:
if element.spdx_id == spdx_id:
return element
contained_spdx_elements: Dict[str, Union[Package, File, Snippet]] = get_contained_spdx_elements(document)
if spdx_id not in contained_spdx_elements:
return None
return contained_spdx_elements[spdx_id]


def get_contained_spdx_elements(document: Document) -> Dict[str, Union[Package, File, Snippet]]:
contained_spdx_elements = {package.spdx_id: package for package in document.packages}
contained_spdx_elements.update({file.spdx_id: file for file in document.files})
contained_spdx_elements.update({snippet.spdx_id: snippet for snippet in document.snippets})

return contained_spdx_elements
76 changes: 76 additions & 0 deletions src/spdx/graph_generation.py
@@ -0,0 +1,76 @@
# SPDX-FileCopyrightText: 2023 spdx contributors
#
# SPDX-License-Identifier: Apache-2.0
from typing import Dict, List, Union

from spdx.model.file import File
from spdx.model.package import Package
from spdx.model.snippet import Snippet

try:
from networkx import DiGraph
except ImportError:
DiGraph = None
from spdx.document_utils import get_contained_spdx_elements
from spdx.model.document import Document
from spdx.model.relationship import Relationship


def export_graph_from_document(document: Document, file_name: str) -> None:
from networkx.drawing import nx_agraph

graph = generate_relationship_graph_from_spdx(document)
_color_nodes(graph)
attributes_graph = nx_agraph.to_agraph(graph) # convert to a pygraphviz graph
attributes_graph.draw(file_name, prog="dot")


def generate_relationship_graph_from_spdx(document: Document) -> DiGraph:
from networkx import DiGraph

graph = DiGraph()
graph.add_node(document.creation_info.spdx_id, element=document.creation_info)

contained_elements: Dict[str, Union[Package, File, Snippet]] = get_contained_spdx_elements(document)
contained_element_nodes = [(spdx_id, {"element": element}) for spdx_id, element in contained_elements.items()]
graph.add_nodes_from(contained_element_nodes)

relationships_by_spdx_id: Dict[str, List[Relationship]] = dict()
for relationship in document.relationships:
relationships_by_spdx_id.setdefault(relationship.spdx_element_id, []).append(relationship)

for spdx_id, relationships in relationships_by_spdx_id.items():
if spdx_id not in graph.nodes():
# this will add any external spdx_id to the graph where we have no further information about the element,
# to indicate that this node represents an element we add the attribute "element"
graph.add_node(spdx_id, element=None)
for relationship in relationships:
relationship_node_key = relationship.spdx_element_id + "_" + relationship.relationship_type.name
graph.add_node(relationship_node_key, comment=relationship.comment)
graph.add_edge(relationship.spdx_element_id, relationship_node_key)
# if the related spdx element is SpdxNone or SpdxNoAssertion we need a type conversion
related_spdx_element_id = str(relationship.related_spdx_element_id)

if related_spdx_element_id not in graph.nodes():
# this will add any external spdx_id to the graph where we have no further information about
# the element, to indicate that this node represents an element we add the attribute "element"
graph.add_node(
related_spdx_element_id,
element=None,
)
graph.add_edge(relationship_node_key, related_spdx_element_id)

return graph


def _color_nodes(graph: DiGraph) -> None:
for node in graph.nodes():
if "_" in node:
# nodes representing a RelationshipType are concatenated with the spdx_element_id,
# to only see the RelationshipType when rendering the graph to a picture we add
# a label to these nodes
graph.add_node(node, color="lightgreen", label=node.split("_", 1)[-1])
elif node == "SPDXRef-DOCUMENT":
graph.add_node(node, color="indianred2")
else:
graph.add_node(node, color="lightskyblue")
10 changes: 9 additions & 1 deletion tests/spdx/test_document_utils.py
Expand Up @@ -5,7 +5,7 @@

import pytest

from spdx.document_utils import get_contained_spdx_element_ids, get_element_from_spdx_id
from spdx.document_utils import get_contained_spdx_element_ids, get_contained_spdx_elements, get_element_from_spdx_id
from tests.spdx.fixtures import document_fixture, file_fixture, package_fixture, snippet_fixture


Expand All @@ -26,3 +26,11 @@ def test_get_element_from_spdx_id(variables):
assert get_element_from_spdx_id(document, file.spdx_id) == file
assert get_element_from_spdx_id(document, snippet.spdx_id) == snippet
assert get_element_from_spdx_id(document, "unknown_id") is None


def test_get_contained_spdx_elements(variables):
document, package, file, snippet = variables
contained_elements = get_contained_spdx_elements(document)
assert contained_elements[package.spdx_id] == package
assert contained_elements[file.spdx_id] == file
assert contained_elements[snippet.spdx_id] == snippet
155 changes: 155 additions & 0 deletions tests/spdx/test_graph_generation.py
@@ -0,0 +1,155 @@
# SPDX-FileCopyrightText: 2023 spdx contributors
#
# SPDX-License-Identifier: Apache-2.0
from pathlib import Path
from typing import List
from unittest import TestCase

import pytest

from spdx.graph_generation import generate_relationship_graph_from_spdx
from spdx.model.document import Document
from spdx.model.relationship import Relationship, RelationshipType
from spdx.parser.parse_anything import parse_file
from tests.spdx.fixtures import document_fixture, file_fixture, package_fixture

try:
import networkx # noqa: F401
import pygraphviz # noqa: F401
except ImportError:
pytest.skip("Skip this module as the tests need optional dependencies to run.", allow_module_level=True)


@pytest.mark.parametrize(
"file_name, nodes_count, edges_count, relationship_node_keys",
[
(
"SPDXJSONExample-v2.3.spdx.json",
22,
22,
["SPDXRef-Package_DYNAMIC_LINK", "SPDXRef-JenaLib_CONTAINS"],
),
(
"SPDXJSONExample-v2.2.spdx.json",
20,
19,
["SPDXRef-Package_DYNAMIC_LINK", "SPDXRef-JenaLib_CONTAINS"],
),
(
"SPDXRdfExample-v2.3.spdx.rdf.xml",
22,
22,
["SPDXRef-Package_DYNAMIC_LINK", "SPDXRef-JenaLib_CONTAINS"],
),
(
"SPDXRdfExample-v2.2.spdx.rdf.xml",
20,
17,
["SPDXRef-Package_DYNAMIC_LINK", "SPDXRef-JenaLib_CONTAINS"],
),
(
"SPDXTagExample-v2.3.spdx",
22,
22,
["SPDXRef-Package_DYNAMIC_LINK", "SPDXRef-JenaLib_CONTAINS"],
),
],
)
def test_generate_graph_from_spdx(
file_name: str,
nodes_count: int,
edges_count: int,
relationship_node_keys: List[str],
) -> None:
document = parse_file(str(Path(__file__).resolve().parent.parent / "spdx" / "data" / "formats" / file_name))
graph = generate_relationship_graph_from_spdx(document)

assert document.creation_info.spdx_id in graph.nodes()
assert graph.number_of_nodes() == nodes_count
assert graph.number_of_edges() == edges_count
assert "SPDXRef-DOCUMENT_DESCRIBES" in graph.nodes()
for relationship_node_key in relationship_node_keys:
assert relationship_node_key in graph.nodes()


def test_complete_connected_graph() -> None:
document = _create_minimal_document()

graph = generate_relationship_graph_from_spdx(document)

TestCase().assertCountEqual(
graph.nodes(),
[
"SPDXRef-DOCUMENT",
"SPDXRef-Package-A",
"SPDXRef-Package-B",
"SPDXRef-File",
"SPDXRef-DOCUMENT_DESCRIBES",
"SPDXRef-Package-A_CONTAINS",
"SPDXRef-Package-B_CONTAINS",
],
)
TestCase().assertCountEqual(
graph.edges(),
[
("SPDXRef-DOCUMENT", "SPDXRef-DOCUMENT_DESCRIBES"),
("SPDXRef-DOCUMENT_DESCRIBES", "SPDXRef-Package-A"),
("SPDXRef-DOCUMENT_DESCRIBES", "SPDXRef-Package-B"),
("SPDXRef-Package-A", "SPDXRef-Package-A_CONTAINS"),
("SPDXRef-Package-A_CONTAINS", "SPDXRef-File"),
("SPDXRef-Package-B", "SPDXRef-Package-B_CONTAINS"),
("SPDXRef-Package-B_CONTAINS", "SPDXRef-File"),
],
)


def test_complete_unconnected_graph() -> None:
document = _create_minimal_document()
document.packages += [package_fixture(spdx_id="SPDXRef-Package-C", name="Package without connection to document")]

graph = generate_relationship_graph_from_spdx(document)

TestCase().assertCountEqual(
graph.nodes(),
[
"SPDXRef-DOCUMENT",
"SPDXRef-Package-A",
"SPDXRef-Package-B",
"SPDXRef-File",
"SPDXRef-DOCUMENT_DESCRIBES",
"SPDXRef-Package-A_CONTAINS",
"SPDXRef-Package-B_CONTAINS",
"SPDXRef-Package-C",
],
)
TestCase().assertCountEqual(
graph.edges(),
[
("SPDXRef-DOCUMENT", "SPDXRef-DOCUMENT_DESCRIBES"),
("SPDXRef-DOCUMENT_DESCRIBES", "SPDXRef-Package-A"),
("SPDXRef-DOCUMENT_DESCRIBES", "SPDXRef-Package-B"),
("SPDXRef-Package-A", "SPDXRef-Package-A_CONTAINS"),
("SPDXRef-Package-A_CONTAINS", "SPDXRef-File"),
("SPDXRef-Package-B", "SPDXRef-Package-B_CONTAINS"),
("SPDXRef-Package-B_CONTAINS", "SPDXRef-File"),
],
)


def _create_minimal_document() -> Document:
packages = [
package_fixture(spdx_id="SPDXRef-Package-A", name="Package-A"),
package_fixture(spdx_id="SPDXRef-Package-B", name="Package-B"),
]
files = [
file_fixture(spdx_id="SPDXRef-File", name="File"),
]
relationships = [
Relationship("SPDXRef-DOCUMENT", RelationshipType.DESCRIBES, "SPDXRef-Package-A"),
Relationship("SPDXRef-DOCUMENT", RelationshipType.DESCRIBES, "SPDXRef-Package-B"),
Relationship("SPDXRef-Package-A", RelationshipType.CONTAINS, "SPDXRef-File"),
Relationship("SPDXRef-Package-B", RelationshipType.CONTAINS, "SPDXRef-File"),
]
document = document_fixture(packages=packages, files=files, relationships=relationships, snippets=[])

return document

0 comments on commit 91bb744

Please sign in to comment.