In [None]:
#| hide
import nbdev; nbdev.nbdev_export()
import kglab
import pandas as pd

# Core Functions

This page defines all boilerplate functions to be used when analyzing sboms.  To see these functions in action please view the following pages.

#| hide
### Load KG for testing

In [None]:
#| hide
import kglab
kg = kglab.KnowledgeGraph()
kg.load_rdf("sboms/rdf/model.rdf.xml", format="xml")

<kglab.kglab.KnowledgeGraph>

In [None]:
#| export
def show_metadata(kg, dataframe=False):
    """
    Return total number of triples, distict entities, and properties to a pandas dataframe.
    
    - `kg`: `KnowledgeGraph()` - Knowledge graph to query from
    - `dataframe` : `bool` - Optionally, return result to dataframe
    """
    
    query = """
    SELECT 
        (COUNT(*) AS ?triples)
        (COUNT(DISTINCT ?entity) AS ?entities)
        (COUNT(DISTINCT ?property) AS ?properties)
    WHERE {
        ?subject ?property ?object .
        BIND(?subject AS ?entity) .
    }
    """

    if dataframe:
        return kg.query_as_df(query)

    for row in kg.query(query):
        print("Total Triples:", row['triples'])
        print("Distinct Entities:", row['entities'])
        print("Distinct Properties:", row['properties'])

In [None]:
#| hide
show_metadata(kg)

Total Triples: 306
Distinct Entities: 56
Distinct Properties: 62


**Number of Entities per Type**

In [None]:
#| export
def show_entity_types(kg, dataframe=False):
    """
    Show counts of entity types
    
    - `kg`: `KnowledgeGraph()` - Knowledge graph to query from
    - `dataframe` : `bool` - Optionally, return result to dataframe
    """
    
    query = """
    SELECT ?type (COUNT(?entity) as ?count)
    WHERE {
        ?entity a ?type .
    }
    GROUP BY ?type
    ORDER BY DESC(?count)
    """
    
    if dataframe:
        return kg.query_as_df(query)

    result = kg.query(query)
    for row in result:
        print(row["type"], ":", row["count"])


In [None]:
#| hide
show_entity_types(kg)

http://spdx.org/rdf/terms#Relationship : 11
http://spdx.org/rdf/terms#Checksum : 10
http://spdx.org/rdf/terms#ExtractedLicensingInfo : 5
http://spdx.org/rdf/terms#Annotation : 5
http://spdx.org/rdf/terms#Package : 4
http://spdx.org/rdf/terms#File : 4
http://spdx.org/rdf/terms#ExternalRef : 3
http://spdx.org/rdf/terms#DisjunctiveLicenseSet : 2
http://www.w3.org/2009/pointers#StartEndPointer : 2
http://www.w3.org/2009/pointers#ByteOffsetPointer : 2
http://www.w3.org/2009/pointers#LineCharPointer : 2
http://spdx.org/rdf/terms#SpdxDocument : 1
http://spdx.org/rdf/terms#PackageVerificationCode : 1
http://spdx.org/rdf/terms#ConjunctiveLicenseSet : 1
http://spdx.org/rdf/terms#CreationInfo : 1
http://spdx.org/rdf/terms#ExternalDocumentRef : 1
http://spdx.org/rdf/terms#Snippet : 1


In [None]:
#| export
def show_top_n_props(kg, n, dataframe=False):
    """
    Display the top N properties from a knowledge graph
    
    - `kg`: `KnowledgeGraph()` - Knowledge graph to query from
    - `n`: `int` - Top n results to display
    - `dataframe` : `bool` - Optionally, return result to dataframe
    """
    
    query = """
    SELECT ?property (COUNT(?property) as ?count)
    WHERE {
        ?s ?property ?o .
    }
    GROUP BY ?property
    ORDER BY DESC(?count)
    LIMIT %d
    """ % n
    
    if dataframe:
        return kg.query_as_df(query)

    result = kg.query(query)
    for row in result:
        print(row["property"], ":", row["count"])

In [None]:
#| hide
show_top_n_props(kg, 10, dataframe=True)

Unnamed: 0,property,count
0,rdf:type,56
1,rdfs:comment,14
2,spdx:relatedSpdxElement,11
3,spdx:fileContributor,11
4,spdx:relationship,11
5,spdx:relationshipType,11
6,spdx:checksumValue,10
7,spdx:checksum,10
8,spdx:algorithm,10
9,spdx:licenseConcluded,9


In [None]:
#| export
import kglab
def show_measures(kg:kglab.KnowledgeGraph()): #Knowledge graph to query from
    """
    Display number of nodes and edges for knowledge graph
    """
    measure = kglab.Measure()
    measure.measure_graph(kg)

    print("edges", measure.get_edge_count())
    print("nodes", measure.get_node_count())

## Files

Contained file schema

In [None]:
#| echo: false
query = """
PREFIX spdx:<http://spdx.org/rdf/terms#>
SELECT DISTINCT ?property
WHERE {
  ?file rdf:type spdx:File .
  ?file ?property ?value .
}
"""


df = kg.query_as_df(query)
df

Unnamed: 0,property
0,rdf:type
1,spdx:licenseInfoInFile
2,rdfs:comment
3,spdx:licenseConcluded
4,spdx:checksum
5,spdx:fileContributor
6,spdx:fileType
7,spdx:relationship
8,spdx:licenseComments
9,spdx:fileName


In [None]:
namespaces = {
    "spdx": "http://spdx.org/rdf/terms#"
}
subgraph = kglab.KnowledgeGraph(
    namespaces = namespaces,
)
query = """
PREFIX spdx:<http://spdx.org/rdf/terms#>
CONSTRUCT {
    ?file rdf:type spdx:File .
    ?file spdx:fileName ?fileName .
    ?file spdx:fileContributor ?contributor .
    ?file spdx:licenseInfoInFile ?licenseInFile .
    ?file spdx:checksum ?checksum .
    ?file spdx:relationship ?relationship .
}
WHERE {
    ?file rdf:type spdx:File .
    ?file spdx:fileName ?fileName .
    ?file spdx:fileContributor ?contributor .
    ?file spdx:licenseInfoInFile ?licenseInFile .
    OPTIONAL {?file spdx:relationship ?relationship .}
}
"""

for row in kg.query(query):
    s, p, o = row
    subgraph.add(s, p, o)
    
VIS_STYLE = {
    "spdx": {
        "color": "orange",
        "size": 40,
    },
    "rdf":{
        "color": "red",
        "size": 30,
    },
}
g = kglab.SubgraphTensor(subgraph)
pyvis_graph = g.build_pyvis_graph(notebook=True, style=VIS_STYLE)
pyvis_graph.force_atlas_2based()
pyvis_graph.show("tmp.fig.html")

tmp.fig.html


In [None]:
query = """
PREFIX spdx:<http://spdx.org/rdf/terms#>
SELECT 
(?file AS ?fileID)
?fileName
?fileType
?licenseInFile
(GROUP_CONCAT(?contributor; SEPARATOR=", ") AS ?contributors)
(GROUP_CONCAT(?licenseConcluded; SEPARATOR=", ") AS ?licenseConcluded)
?checksum
?relationship
?annotation
?comment
?licenseComments
?noticeText
WHERE {
  ?file rdf:type spdx:File .
  ?file spdx:fileName ?fileName .
  ?file spdx:fileContributor ?contributor .
  ?file spdx:licenseInfoInFile ?licenseInFile .
  ?file spdx:licenseConcluded ?licenseConcluded .
  ?file spdx:checksum ?checksum .
  ?file spdx:copyrightText ?copyrightText .
  ?file spdx:fileType ?fileType .
  OPTIONAL {?file spdx:relationship ?relationship .}
  OPTIONAL {?file spdx:annotation ?annotation . }
  OPTIONAL {?file rdfs:comment ?comment . }
  OPTIONAL {?file spdx:licenseComments ?licenseComments . }
  OPTIONAL {?file spdx:noticeText ?noticeText . }
}
GROUP BY ?file
"""

kg.query_as_df(query)

Unnamed: 0,fileID,fileName,fileType,licenseInFile,contributors,licenseConcluded,checksum,relationship,comment,licenseComments,noticeText,annotation
0,<http://spdx.org/spdxdocs/spdx-example-444504E...,./lib-source/jena-2.6.3-sources.jar,spdx:fileType_archive,<http://spdx.org/spdxdocs/spdx-example-444504E...,"Apache Software Foundation, Hewlett Packard Inc.",http://spdx.org/spdxdocs/spdx-example-444504E0...,_:N8d065bb23af54166a7807b66fd413fc3,_:N5169ddfc0196483381a7f89eed5068a7,This file belongs to Jena,This license is used by Jena,,
1,<http://spdx.org/spdxdocs/spdx-example-444504E...,./lib-source/commons-lang3-3.1-sources.jar,spdx:fileType_archive,<http://spdx.org/licenses/Apache-2.0>,Apache Software Foundation,http://spdx.org/licenses/Apache-2.0,_:Nc213ffa88a6d48e3b10d0d7c11b6566e,_:Nb4a82500dd3d4bacb47e5fcf9e144605,This file is used by Jena,,Apache Commons Lang\nCopyright 2001-2011 The A...,
2,<http://spdx.org/spdxdocs/spdx-example-444504E...,./src/org/spdx/parser/DOAPProject.java,spdx:fileType_source,<http://spdx.org/licenses/Apache-2.0>,"Source Auditor Inc., Black Duck Software In.c,...","http://spdx.org/licenses/Apache-2.0, http://sp...",_:N2964943906914ce68eb3b521dc1f556b,,,,,
3,<http://spdx.org/spdxdocs/spdx-example-444504E...,./package/foo.c,spdx:fileType_source,<http://spdx.org/spdxdocs/spdx-example-444504E...,"IBM Corporation, IBM Corporation, Modified by ...","Ndf409e5541114a25ba5e1201c1b27fe4, Ndf409e5541...",_:N0aa53b380112478d81ed8cafa425e2a4,_:Na577ab75d0fa424d882fda85780bcc3f,The concluded license was taken from the packa...,The concluded license was taken from the packa...,Copyright (c) 2001 Aaron Lehmann aaroni@vitelu...,_:N25b78b43825a43dfb6b13246a5c329cb


In [None]:
pyvis_graph = kg.visualize_query(query, notebook=True)

pyvis_graph.force_atlas_2based()
pyvis_graph.show("tmp.fig06.html")

tmp.fig06.html


In [None]:
query = """
SELECT ?subject ?object
WHERE {
    ?subject rdf:type spdx:File .
    ?subject spdx:relationship ?object .
}
"""

files = kg.query_as_df(query)
files.head(5)

Unnamed: 0,subject,object
0,<http://spdx.org/spdxdocs/spdx-example-444504E...,_:N5169ddfc0196483381a7f89eed5068a7
1,<http://spdx.org/spdxdocs/spdx-example-444504E...,_:Nb4a82500dd3d4bacb47e5fcf9e144605
2,<http://spdx.org/spdxdocs/spdx-example-444504E...,_:Na577ab75d0fa424d882fda85780bcc3f


In [None]:
#import networkx as nx
#
#subgraph = kglab.SubgraphMatrix(kg, query)
#nx_graph = subgraph.build_nx_graph(nx.DiGraph(), bipartite=True)

In [None]:
#nx.density(nx_graph)

### Packages

In [None]:
query = """
PREFIX spdx:<http://spdx.org/rdf/terms#>
SELECT DISTINCT ?property
WHERE {
  ?file rdf:type spdx:Package .
  ?file ?property ?value .
}
order by asc(UCASE(str(?property)))
"""


df = kg.query_as_df(query)
df

Unnamed: 0,property
0,spdx:annotation
1,spdx:attributionText
2,spdx:checksum
3,spdx:copyrightText
4,spdx:description
5,spdx:downloadLocation
6,spdx:externalRef
7,spdx:filesAnalyzed
8,spdx:licenseComments
9,spdx:licenseConcluded


In [None]:
query = """
PREFIX spdx:<http://spdx.org/rdf/terms#>
SELECT ?package
       (GROUP_CONCAT(?annotation; SEPARATOR=", ") AS ?annotations)
       (GROUP_CONCAT(?attributionText; SEPARATOR=", ") AS ?attributionTexts)
       ?builtDate
       (GROUP_CONCAT(?checksum; SEPARATOR=", ") AS ?checksums)
       ?comment
       ?copyrightText
       ?description
       ?downloadLocation
       (GROUP_CONCAT(?externalRef; SEPARATOR=", ") AS ?externalRefs)
       ?filesAnalyzed
       (GROUP_CONCAT(?hasFile; SEPARATOR=", ") AS ?hasFiles)
       ?homepage
       ?licenseComments
       ?licenseConcluded
       ?licenseDeclared
       (GROUP_CONCAT(?licenseInfoFromFile; SEPARATOR=", ") AS ?licenseInfoFromFiles)
       ?name
       ?originator
       ?packageFileName
       ?packageVerificationCode
       ?primaryPackagePurpose
       ?releaseDate
       ?sourceInfo
       ?summary
       ?supplier
       ?validUntilDate
       ?versionInfo
       (GROUP_CONCAT(?relationship; SEPARATOR=", ") AS ?relationships)
WHERE {
      ?package rdf:type spdx:Package .
      OPTIONAL { ?package spdx:annotation ?annotation . }
      OPTIONAL { ?package spdx:attributionText ?attributionText . }
      OPTIONAL { ?package spdx:builtDate ?buildDate . }
      OPTIONAL { ?package spdx:checksum ?checksum . }
      OPTIONAL { ?package spdx:comment ?comment . }
      OPTIONAL { ?package spdx:copyrightText ?copyrightText . }
      OPTIONAL { ?package spdx:description ?description . }
      OPTIONAL { ?package spdx:downloadLocation ?downloadLocation . }
      OPTIONAL { ?package spdx:externalRef ?externalRef . }
      OPTIONAL { ?package spdx:filesAnalyed ?filesAnalyzed . }
      OPTIONAL { ?package spdx:hasFile ?hasFile . }
      OPTIONAL { ?package spdx:homepage ?homepage . }
      OPTIONAL { ?package spdx:licenseComments ?licenseComments . }
      OPTIONAL { ?package spdx:licenseConcluded ?licenseConcluded . }
      OPTIONAL { ?package spdx:licenseDeclared ?licenseDeclared . }
      OPTIONAL { ?package spdx:licenseInfoFromFile ?licenseInfoFromFile . }
      OPTIONAL { ?package spdx:name ?name . }
      OPTIONAL { ?package spdx:originator ?originator . }
      OPTIONAL { ?package spdx:packageFileName ?packageFileName . }
      OPTIONAL { ?package spdx:packageVerificationCode ?packageVerificationCode . }
      OPTIONAL { ?package spdx:primaryPackagePurpose ?primaryPackagePurpose . }
      OPTIONAL { ?package spdx:releaseDate ?releaseDate . }
      OPTIONAL { ?package spdx:sourceInfo ?sourceInfo . }
      OPTIONAL { ?package spdx:summary ?summary . }
      OPTIONAL { ?package spdx:supplier ?supplier . }
      OPTIONAL { ?package spdx:validUntilDate ?validUntilDate . }
      OPTIONAL { ?package spdx:versionInfo ?versionInfo . }
      OPTIONAL { ?package spdx:relationship ?relationship . }
}
GROUP BY ?package
"""

pkgs = kg.query_as_df(query)
pkgs.head(5)

Unnamed: 0,package,annotations,attributionTexts,checksums,copyrightText,downloadLocation,externalRefs,hasFiles,licenseConcluded,licenseDeclared,...,relationships,description,licenseComments,originator,packageFileName,packageVerificationCode,sourceInfo,summary,supplier,versionInfo
0,<http://spdx.org/spdxdocs/spdx-example-444504E...,,,,NOASSERTION,spdx:noassertion,,,spdx:noassertion,spdx:noassertion,...,,,,,,,,,,
1,<http://spdx.org/spdxdocs/spdx-example-444504E...,"N085f576a95364e2b8d4d6617c2a6369d, N085f576a95...",The GNU C Library is free software. See the f...,"Nfa7a3c76393b434ab6da973ca6b772c1, Nfa7a3c7639...",Copyright 2008-2010 John Smith,http://ftp.gnu.org/gnu/glibc/glibc-ports-2.15....,"Na3a0d075c07d497ba38131127c2107bc, Na3a0d075c0...",,_:Na7cac89f115c426c9e3dbdb9ae108827,_:N4a45b0d236794bd19a74d8601335c0c8,...,"N614c99865de6435c98bfa8258d56a422, N8bf969e757...",The GNU C Library defines functions that are s...,The license for this project changed with the ...,Organization: ExampleCodeInspect (contact@exam...,glibc-2.11.1.tar.gz,_:N6f4948534d724990a492ef5147e9e5a2,uses glibc-2_11-branch from git://sourceware.o...,GNU C library.,Person: Jane Doe (jane.doe@example.com),2.11.1
2,<http://spdx.org/spdxdocs/spdx-example-444504E...,,,N2e06250e2a3b4eff85b406d5fc0894c7,Copyright Saxonica Ltd,https://sourceforge.net/projects/saxon/files/S...,,,<http://spdx.org/licenses/MPL-1.0>,<http://spdx.org/licenses/MPL-1.0>,...,,The Saxon package is a collection of tools for...,Other versions available for a commercial license,,saxonB-8.8.zip,,,,,8.8
3,<http://spdx.org/spdxdocs/spdx-example-444504E...,,,,NOASSERTION,https://search.maven.org/remotecontent?filepat...,N2f98b14e529f4cd09664d8ae47a7136f,,spdx:noassertion,spdx:noassertion,...,,,,,,,,,,3.12.0


Construct a subgraph from this query

### Relationships

In [None]:
query = """
PREFIX spdx:<http://spdx.org/rdf/terms#>
SELECT DISTINCT ?property
WHERE {
  ?file rdf:type spdx:Relationship .
  ?file ?property ?value .
}
"""


df = kg.query_as_df(query)
df

Unnamed: 0,property
0,rdf:type
1,spdx:relationshipType
2,spdx:relatedSpdxElement


In [None]:
query = """
PREFIX spdx:<http://spdx.org/rdf/terms#>
SELECT ?element ?elementType ?relationshipType ?relatedElement ?relatedElementType
WHERE {
  ?element spdx:relationship ?relationship .
  ?element rdf:type ?elementType .
  ?relationship spdx:relatedSpdxElement ?relatedElement .
  ?relationship spdx:relationshipType ?relationshipType .
  ?relatedElement rdf:type ?relatedElementType .
}
"""

#for row in kg.query(query):
#    print(row.asdict())
#    print()


df = kg.query_as_df(query)
df

Unnamed: 0,element,elementType,relationshipType,relatedElement,relatedElementType
0,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:File,spdx:relationshipType_contains,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:Package
1,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:Package,spdx:relationshipType_contains,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:File
2,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:Package,spdx:relationshipType_dynamicLink,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:Package
3,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:Package,spdx:relationshipType_contains,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:File
4,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:Package,spdx:relationshipType_contains,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:File
5,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:SpdxDocument,spdx:relationshipType_contains,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:Package
6,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:File,spdx:relationshipType_generatedFrom,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:Package
7,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:SpdxDocument,spdx:relationshipType_describes,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:File
8,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:SpdxDocument,spdx:relationshipType_describes,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:Package


In [None]:
namespaces = {
    "spdx": "http://spdx.org/rdf/terms#",
    "this": "http://spdx.org/spdxdocs/spdx-example-444504E0-4F89-41D3-9A0C-0305E82C3301#"
}
subgraph = kglab.KnowledgeGraph(
    namespaces = namespaces,
)
query = """
PREFIX spdx:<http://spdx.org/rdf/terms#>
CONSTRUCT {
  ?element spdx:relationship ?relationship .
  ?element rdf:type ?elementType .
  ?relationship spdx:relatedSpdxElement ?relatedElement .
  ?relationship spdx:relationshipType ?relationshipType .
  ?relatedElement rdf:type ?relatedElementType .
}
WHERE {
  ?element spdx:relationship ?relationship .
  ?element rdf:type ?elementType .
  ?relationship spdx:relatedSpdxElement ?relatedElement .
  ?relationship spdx:relationshipType ?relationshipType .
  ?relatedElement rdf:type ?relatedElementType .
}
"""

for row in kg.query(query):
    s, p, o = row
    subgraph.add(s, p, o)
    
VIS_STYLE = {
    "spdx": {
        "color": "orange",
        "size": 40,
    },
    "this":{
        "color": "green",
        "size": 30,
    },
}
g = kglab.SubgraphTensor(subgraph)
pyvis_graph = g.build_pyvis_graph(notebook=True, style=VIS_STYLE)
pyvis_graph.force_atlas_2based()
pyvis_graph.show("tmp.fig.html")

tmp.fig.html


## Visualization

In [None]:
VIS_STYLE = {
    "spdx": {
        "color": "orange",
        "size": 40,
    },
    "rdf":{
        "color": "blue",
        "size": 30,
    },
    "ptr":{
        "color": "red",
        "size": 20,
    },
}

subgraph = kglab.SubgraphTensor(kg)
pyvis_graph = subgraph.build_pyvis_graph(notebook=True, style=VIS_STYLE)



In [None]:
pyvis_graph.force_atlas_2based()
pyvis_graph.show("tmp.fig03.html")


tmp.fig03.html


In [None]:
#| default_exp core