In [2]:
#| hide
import kglab

# Analyzing SPDX Example SBOM

SBOM Source: [spdx/spdx-spec/examples](https://github.com/spdx/spdx-spec/tree/development/v2.2.2/examples)

RDF Source: Generated using [pyspdxtools](https://github.com/spdx/tools-python)

## Import Graph

In [5]:
kg = kglab.KnowledgeGraph().load_rdf("sboms/rdf/model.rdf.xml", format="xml")

## Querying

**Total Number of Triples**

In [17]:
query = """
SELECT (COUNT(*) as ?count)
WHERE {
  ?s ?p ?o .
}
"""

result = kg.query(query)
for row in result:
    print("Total Triples:", row["count"])


Total Triples: 306


**Number of Distinct Entities**

In [9]:
query = """
SELECT (COUNT(DISTINCT ?entity) as ?count)
WHERE {
  ?entity a ?type .
}
"""

result = kg.query(query)
for row in result:
    print("Distinct Entities:", row["count"])


Distinct Entities: 56


**Number of Distinct Properties**

In [10]:
query = """
SELECT (COUNT(DISTINCT ?property) as ?count)
WHERE {
  ?s ?property ?o .
}
"""

result = kg.query(query)
for row in result:
    print("Distinct Properties:", row["count"])

Distinct Properties: 62


**Number of Entities per Type**

In [11]:
query = """
SELECT ?type (COUNT(?entity) as ?count)
WHERE {
  ?entity a ?type .
}
GROUP BY ?type
ORDER BY DESC(?count)
"""

result = kg.query(query)
for row in result:
    print(row["type"], ":", row["count"])


http://spdx.org/rdf/terms#Relationship : 11
http://spdx.org/rdf/terms#Checksum : 10
http://spdx.org/rdf/terms#ExtractedLicensingInfo : 5
http://spdx.org/rdf/terms#Annotation : 5
http://spdx.org/rdf/terms#File : 4
http://spdx.org/rdf/terms#Package : 4
http://spdx.org/rdf/terms#ExternalRef : 3
http://www.w3.org/2009/pointers#StartEndPointer : 2
http://www.w3.org/2009/pointers#ByteOffsetPointer : 2
http://www.w3.org/2009/pointers#LineCharPointer : 2
http://spdx.org/rdf/terms#DisjunctiveLicenseSet : 2
http://spdx.org/rdf/terms#Snippet : 1
http://spdx.org/rdf/terms#SpdxDocument : 1
http://spdx.org/rdf/terms#ExternalDocumentRef : 1
http://spdx.org/rdf/terms#PackageVerificationCode : 1
http://spdx.org/rdf/terms#ConjunctiveLicenseSet : 1
http://spdx.org/rdf/terms#CreationInfo : 1


**Top N Properties by Popularity**

In [12]:
N = 10

query = """
SELECT ?property (COUNT(?property) as ?count)
WHERE {
  ?s ?property ?o .
}
GROUP BY ?property
ORDER BY DESC(?count)
LIMIT %d
""" % N

result = kg.query(query)
for row in result:
    print(row["property"], ":", row["count"])


http://www.w3.org/1999/02/22-rdf-syntax-ns#type : 56
http://www.w3.org/2000/01/rdf-schema#comment : 14
http://spdx.org/rdf/terms#relationship : 11
http://spdx.org/rdf/terms#relatedSpdxElement : 11
http://spdx.org/rdf/terms#fileContributor : 11
http://spdx.org/rdf/terms#relationshipType : 11
http://spdx.org/rdf/terms#algorithm : 10
http://spdx.org/rdf/terms#checksum : 10
http://spdx.org/rdf/terms#checksumValue : 10
http://spdx.org/rdf/terms#licenseConcluded : 9


## Visualization

In [3]:


VIS_STYLE = {
    "spdx": {
        "color": "orange",
        "size": 40,
    },
    "rdf":{
        "color": "blue",
        "size": 30,
    },
    "ptr":{
        "color": "red",
        "size": 20,
    },
}

subgraph = kglab.SubgraphTensor(kg)
pyvis_graph = subgraph.build_pyvis_graph(notebook=True, style=VIS_STYLE)



In [4]:
pyvis_graph.force_atlas_2based()
pyvis_graph.show("tmp.fig03.html")

tmp.fig03.html
