In [78]:
#| hide
import kglab
import pandas as pd

# Analyzing SPDX Example SBOM

SBOM Source: [spdx/spdx-spec/examples](https://github.com/spdx/spdx-spec/tree/development/v2.2.2/examples)

RDF Source: Generated using [pyspdxtools](https://github.com/spdx/tools-python)

## Import Graph

In [61]:
kg.load_rdf("sboms/rdf/model.rdf.xml", format="xml")

<kglab.kglab.KnowledgeGraph at 0x7fe06bebf280>

## Querying

**Total Number of Triples**

In [17]:
query = """
SELECT (COUNT(*) as ?count)
WHERE {
  ?s ?p ?o .
}
"""

result = kg.query(query)
for row in result:
    print("Total Triples:", row["count"])


Total Triples: 306


**Number of Distinct Entities**

In [9]:
query = """
SELECT (COUNT(DISTINCT ?entity) as ?count)
WHERE {
  ?entity a ?type .
}
"""

result = kg.query(query)
for row in result:
    print("Distinct Entities:", row["count"])


Distinct Entities: 56


**Number of Distinct Properties**

In [10]:
query = """
SELECT (COUNT(DISTINCT ?property) as ?count)
WHERE {
  ?s ?property ?o .
}
"""

result = kg.query(query)
for row in result:
    print("Distinct Properties:", row["count"])

Distinct Properties: 62


**Number of Entities per Type**

In [11]:
query = """
SELECT ?type (COUNT(?entity) as ?count)
WHERE {
  ?entity a ?type .
}
GROUP BY ?type
ORDER BY DESC(?count)
"""

result = kg.query(query)
for row in result:
    print(row["type"], ":", row["count"])


http://spdx.org/rdf/terms#Relationship : 11
http://spdx.org/rdf/terms#Checksum : 10
http://spdx.org/rdf/terms#ExtractedLicensingInfo : 5
http://spdx.org/rdf/terms#Annotation : 5
http://spdx.org/rdf/terms#File : 4
http://spdx.org/rdf/terms#Package : 4
http://spdx.org/rdf/terms#ExternalRef : 3
http://www.w3.org/2009/pointers#StartEndPointer : 2
http://www.w3.org/2009/pointers#ByteOffsetPointer : 2
http://www.w3.org/2009/pointers#LineCharPointer : 2
http://spdx.org/rdf/terms#DisjunctiveLicenseSet : 2
http://spdx.org/rdf/terms#Snippet : 1
http://spdx.org/rdf/terms#SpdxDocument : 1
http://spdx.org/rdf/terms#ExternalDocumentRef : 1
http://spdx.org/rdf/terms#PackageVerificationCode : 1
http://spdx.org/rdf/terms#ConjunctiveLicenseSet : 1
http://spdx.org/rdf/terms#CreationInfo : 1


**Top N Properties by Popularity**

In [12]:
N = 10

query = """
SELECT ?property (COUNT(?property) as ?count)
WHERE {
  ?s ?property ?o .
}
GROUP BY ?property
ORDER BY DESC(?count)
LIMIT %d
""" % N

result = kg.query(query)
for row in result:
    print(row["property"], ":", row["count"])


http://www.w3.org/1999/02/22-rdf-syntax-ns#type : 56
http://www.w3.org/2000/01/rdf-schema#comment : 14
http://spdx.org/rdf/terms#relationship : 11
http://spdx.org/rdf/terms#relatedSpdxElement : 11
http://spdx.org/rdf/terms#fileContributor : 11
http://spdx.org/rdf/terms#relationshipType : 11
http://spdx.org/rdf/terms#algorithm : 10
http://spdx.org/rdf/terms#checksum : 10
http://spdx.org/rdf/terms#checksumValue : 10
http://spdx.org/rdf/terms#licenseConcluded : 9


In [19]:
measure = kglab.Measure()
measure.measure_graph(kg)

print("edges", measure.get_edge_count())
print("nodes", measure.get_node_count())

edges 306
nodes 99


### Files

Contained file schema

In [100]:
query = """
PREFIX spdx:<http://spdx.org/rdf/terms#>
SELECT DISTINCT ?property
WHERE {
  ?file rdf:type spdx:File .
  ?file ?property ?value .
}
"""


df = kg.query_as_df(query)
df

Unnamed: 0,property
0,rdf:type
1,spdx:licenseInfoInFile
2,spdx:fileContributor
3,spdx:licenseConcluded
4,spdx:checksum
5,spdx:fileName
6,spdx:copyrightText
7,spdx:fileType
8,spdx:relationship
9,spdx:annotation


In [146]:
query = """
SELECT 
(?file AS ?fileID)
?fileName
?fileType
?licenseInFile
(GROUP_CONCAT(?contributor; SEPARATOR=", ") AS ?contributors)
(GROUP_CONCAT(?licenseConcluded; SEPARATOR=", ") AS ?licenseConcluded)
?checksum
?relationship
?annotation
?comment
?licenseComments
?noticeText
WHERE {
  ?file rdf:type spdx:File .
  ?file spdx:fileName ?fileName .
  ?file spdx:fileContributor ?contributor .
  ?file spdx:licenseInfoInFile ?licenseInFile .
  ?file spdx:licenseConcluded ?licenseConcluded .
  ?file spdx:checksum ?checksum .
  ?file spdx:copyrightText ?copyrightText .
  ?file spdx:fileType ?fileType .
  OPTIONAL {?file spdx:relationship ?relationship .}
  OPTIONAL {?file spdx:annotation ?annotation . }
  OPTIONAL {?file rdfs:comment ?comment . }
  OPTIONAL {?file spdx:licenseComments ?licenseComments . }
  OPTIONAL {?file spdx:noticeText ?noticeText . }
}
GROUP BY ?file
"""

result = kg.query(query)

df = kg.query_as_df(query)
df.head(5)

Unnamed: 0,fileID,fileName,fileType,licenseInFile,contributors,licenseConcluded,checksum,relationship,annotation,comment,licenseComments,noticeText
0,<http://spdx.org/spdxdocs/spdx-example-444504E...,./src/org/spdx/parser/DOAPProject.java,spdx:fileType_source,<http://spdx.org/licenses/Apache-2.0>,"Open Logic Inc., Source Auditor Inc., SPDX Tec...","http://spdx.org/licenses/Apache-2.0, http://sp...",_:N28d080231b7741e49be559be668f1b8d,,,,,
1,<http://spdx.org/spdxdocs/spdx-example-444504E...,./package/foo.c,spdx:fileType_source,<http://spdx.org/spdxdocs/spdx-example-444504E...,"Modified by Paul Mundt lethal@linux-sh.org, Mo...","N88df131d279c47c4a7307cc8aae23a78, N88df131d27...",_:N463446d551064ca99daa369d9243524f,_:Ncc6fc35341df45df8fed3b6c31f08297,_:Na4325b02158b40fb9d06521c6d563bf5,The concluded license was taken from the packa...,The concluded license was taken from the packa...,Copyright (c) 2001 Aaron Lehmann aaroni@vitelu...
2,<http://spdx.org/spdxdocs/spdx-example-444504E...,./lib-source/jena-2.6.3-sources.jar,spdx:fileType_archive,<http://spdx.org/spdxdocs/spdx-example-444504E...,"Apache Software Foundation, Apache Software Fo...",http://spdx.org/spdxdocs/spdx-example-444504E0...,_:N4ac8db8d68ed47ec8d607140cf592534,_:N3e1acf9f51ca489eade5dc1452f7bf9e,,This file belongs to Jena,This license is used by Jena,
3,<http://spdx.org/spdxdocs/spdx-example-444504E...,./lib-source/commons-lang3-3.1-sources.jar,spdx:fileType_archive,<http://spdx.org/licenses/Apache-2.0>,"Apache Software Foundation, Apache Software Fo...","http://spdx.org/licenses/Apache-2.0, http://sp...",_:Nef4d6df3250d4e42bb4f99a95bf3a2c7,_:Nf475fec285554b63a018b722d6055602,,This file is used by Jena,,Apache Commons Lang\nCopyright 2001-2011 The A...


## Visualization

In [3]:


VIS_STYLE = {
    "spdx": {
        "color": "orange",
        "size": 40,
    },
    "rdf":{
        "color": "blue",
        "size": 30,
    },
    "ptr":{
        "color": "red",
        "size": 20,
    },
}

subgraph = kglab.SubgraphTensor(kg)
pyvis_graph = subgraph.build_pyvis_graph(notebook=True, style=VIS_STYLE)



In [4]:
pyvis_graph.force_atlas_2based()
pyvis_graph.show("tmp.fig03.html")

tmp.fig03.html
