In [1]:
#| hide
import nbdev; nbdev.nbdev_export()
import kglab
import pandas as pd
from pandas import DataFrame

# Core Functions

This page defines all boilerplate functions to be used when analyzing sboms.  To see these functions in action please view the following pages.

Many of the queries seen on this page were written with the help of ChatGPT.  To repeat this simply prompt ChatGPT as follows: 
`Write me a SPARQL query to select X, Y, and Z`

#| hide
### Load KG for testing

In [2]:
#| hide
import kglab
kg = kglab.KnowledgeGraph()
kg.load_rdf("sboms/rdf/model.rdf.xml", format="xml")

<kglab.kglab.KnowledgeGraph at 0x14ffcab50>

## Basic Queries

These functions perform basic queries and return metadata about a given knowledge graph

In [3]:
#| export
from kglab import KnowledgeGraph
def show_metadata(kg:KnowledgeGraph, #Knowledge graph to query from
                  dataframe: bool=False): #Optionally, return result to dataframe
    """
    Return total number of triples, distict entities, and properties to a pandas dataframe.
    """
    
    query = """
    SELECT 
        (COUNT(*) AS ?triples)
        (COUNT(DISTINCT ?entity) AS ?entities)
        (COUNT(DISTINCT ?property) AS ?properties)
    WHERE {
        ?subject ?property ?object .
        BIND(?subject AS ?entity) .
    }
    """

    if dataframe:
        return kg.query_as_df(query)

    for row in kg.query(query):
        print("Total Triples:", row['triples'])
        print("Distinct Entities:", row['entities'])
        print("Distinct Properties:", row['properties'])

In [4]:
#| hide
show_metadata(kg)

Total Triples: 306
Distinct Entities: 56
Distinct Properties: 62


In [5]:
#| export
from kglab import KnowledgeGraph
def show_entity_types(kg: KnowledgeGraph, # Knowledge graph to query from 
                      dataframe: bool=False): # Optionally, return result to dataframe
    """
    Show counts of entity types
    """
    
    query = """
    SELECT ?type (COUNT(?entity) as ?count)
    WHERE {
        ?entity a ?type .
    }
    GROUP BY ?type
    ORDER BY DESC(?count)
    """
    
    if dataframe:
        return kg.query_as_df(query)

    result = kg.query(query)
    for row in result:
        print(row["type"], ":", row["count"])


In [6]:
#| hide
show_entity_types(kg)

http://spdx.org/rdf/terms#Relationship : 11
http://spdx.org/rdf/terms#Checksum : 10
http://spdx.org/rdf/terms#ExtractedLicensingInfo : 5
http://spdx.org/rdf/terms#Annotation : 5
http://spdx.org/rdf/terms#Package : 4
http://spdx.org/rdf/terms#File : 4
http://spdx.org/rdf/terms#ExternalRef : 3
http://spdx.org/rdf/terms#DisjunctiveLicenseSet : 2
http://www.w3.org/2009/pointers#StartEndPointer : 2
http://www.w3.org/2009/pointers#ByteOffsetPointer : 2
http://www.w3.org/2009/pointers#LineCharPointer : 2
http://spdx.org/rdf/terms#SpdxDocument : 1
http://spdx.org/rdf/terms#PackageVerificationCode : 1
http://spdx.org/rdf/terms#ConjunctiveLicenseSet : 1
http://spdx.org/rdf/terms#CreationInfo : 1
http://spdx.org/rdf/terms#ExternalDocumentRef : 1
http://spdx.org/rdf/terms#Snippet : 1


In [7]:
#| export
from kglab import KnowledgeGraph
def show_top_n_props(kg: KnowledgeGraph,  # Knowledge graph to query from
                     n: int=10, # Top n results to display
                     dataframe: bool=False): # Optionally, return result to dataframe
    """
    Display the top N properties from a knowledge graph
    """
    
    query = """
    SELECT ?property (COUNT(?property) as ?count)
    WHERE {
        ?s ?property ?o .
    }
    GROUP BY ?property
    ORDER BY DESC(?count)
    LIMIT %d
    """ % n
    
    if dataframe:
        return kg.query_as_df(query)

    result = kg.query(query)
    for row in result:
        print(row["property"], ":", row["count"])

In [8]:
#| hide
show_top_n_props(kg, 10, dataframe=True)

Unnamed: 0,property,count
0,rdf:type,56
1,rdfs:comment,14
2,spdx:relatedSpdxElement,11
3,spdx:relationshipType,11
4,spdx:fileContributor,11
5,spdx:relationship,11
6,spdx:checksum,10
7,spdx:checksumValue,10
8,spdx:algorithm,10
9,spdx:licenseConcluded,9


In [9]:
#| export
from kglab import KnowledgeGraph
def show_measures(kg:KnowledgeGraph): #Knowledge graph to query from
    """
    Display number of nodes and edges for knowledge graph
    """
    measure = kglab.Measure()
    measure.measure_graph(kg)

    print("edges", measure.get_edge_count())
    print("nodes", measure.get_node_count())

## Files

These functions peform queries on a SBOM's files

In [10]:
#| export
def file_schema(kg:KnowledgeGraph): #Knowledge graph to query from
  """
  Display all properties for a file
  """
  query = """
  PREFIX spdx:<http://spdx.org/rdf/terms#>
  SELECT DISTINCT ?property
  WHERE {
    ?file rdf:type spdx:File .
    ?file ?property ?value .
  }
  order by asc(UCASE(str(?property)))
  """
  return kg.query_as_df(query)

In [11]:
#| hide
file_schema(kg)

Unnamed: 0,property
0,spdx:annotation
1,spdx:checksum
2,spdx:copyrightText
3,spdx:fileContributor
4,spdx:fileName
5,spdx:fileType
6,spdx:licenseComments
7,spdx:licenseConcluded
8,spdx:licenseInfoInFile
9,spdx:noticeText


In [12]:
#| export
from pandas import DataFrame
def get_files_data(kg:KnowledgeGraph #Knowledge graph to query from
                   ) -> DataFrame: #Return result to dataframe
    """
    Return all files and their properties
    """
    query = """
    PREFIX spdx:<http://spdx.org/rdf/terms#>
    SELECT 
    (?file AS ?fileID)
    ?fileName
    ?fileType
    ?licenseInFile
    (GROUP_CONCAT(?contributor; SEPARATOR=", ") AS ?contributors)
    (GROUP_CONCAT(?licenseConcluded; SEPARATOR=", ") AS ?licenseConcluded)
    ?checksum
    ?relationship
    ?annotation
    ?comment
    ?licenseComments
    ?noticeText
    WHERE {
    ?file rdf:type spdx:File .
    OPTIONAL {?file spdx:fileName ?fileName .}
    OPTIONAL {?file spdx:fileContributor ?contributor .}
    OPTIONAL {?file spdx:licenseInfoInFile ?licenseInFile .}
    OPTIONAL {?file spdx:licenseConcluded ?licenseConcluded .}
    OPTIONAL {?file spdx:checksum ?checksum .}
    OPTIONAL {?file spdx:copyrightText ?copyrightText .}
    OPTIONAL {?file spdx:fileType ?fileType .}
    OPTIONAL {?file spdx:relationship ?relationship .}
    OPTIONAL {?file spdx:annotation ?annotation . }
    OPTIONAL {?file rdfs:comment ?comment . }
    OPTIONAL {?file spdx:licenseComments ?licenseComments . }
    OPTIONAL {?file spdx:noticeText ?noticeText . }
    }
    GROUP BY ?file
    """

    return kg.query_as_df(query)

In [13]:
#| hide
get_files_data(kg)

Unnamed: 0,fileID,fileName,fileType,licenseInFile,contributors,licenseConcluded,checksum,relationship,comment,licenseComments,noticeText,annotation
0,<http://spdx.org/spdxdocs/spdx-example-444504E...,./lib-source/jena-2.6.3-sources.jar,spdx:fileType_archive,<http://spdx.org/spdxdocs/spdx-example-444504E...,"Apache Software Foundation, Hewlett Packard Inc.",http://spdx.org/spdxdocs/spdx-example-444504E0...,_:Ndb8cc695e6e94aa28363fd063688ccf4,_:N264c33c7e68a4e9685f838eb82e27ff5,This file belongs to Jena,This license is used by Jena,,
1,<http://spdx.org/spdxdocs/spdx-example-444504E...,./lib-source/commons-lang3-3.1-sources.jar,spdx:fileType_archive,<http://spdx.org/licenses/Apache-2.0>,Apache Software Foundation,http://spdx.org/licenses/Apache-2.0,_:N1f394eb95ea746a280c177d224676ee1,_:N4612429eff104780b94fef7b53a39bd2,This file is used by Jena,,Apache Commons Lang\nCopyright 2001-2011 The A...,
2,<http://spdx.org/spdxdocs/spdx-example-444504E...,./src/org/spdx/parser/DOAPProject.java,spdx:fileType_source,<http://spdx.org/licenses/Apache-2.0>,"Source Auditor Inc., Black Duck Software In.c,...","http://spdx.org/licenses/Apache-2.0, http://sp...",_:N1bf365df766b48f0bda19ae45ba56e6a,,,,,
3,<http://spdx.org/spdxdocs/spdx-example-444504E...,./package/foo.c,spdx:fileType_source,<http://spdx.org/spdxdocs/spdx-example-444504E...,"IBM Corporation, IBM Corporation, IBM Corporat...","Ne9dc5d03382e4282b18da58ef35c6032, Ne9dc5d0338...",_:N01ce0fb68f68498095be97cafec34f64,_:Nf5f96b834fc1444f8ff4b952e32bc189,The concluded license was taken from the packa...,The concluded license was taken from the packa...,Copyright (c) 2001 Aaron Lehmann aaroni@vitelu...,_:N337c0392cf814570bdc31ff12654e8ff


In [14]:
#| export
def get_files_graph(kg:KnowledgeGraph #Knowledge graph to query from
                    )-> KnowledgeGraph: #Return result to knowledge graph
    """
    Return a subgraph of the files and their properties
    """
    namespaces = {
        "spdx": "http://spdx.org/rdf/terms#"
    }
    subgraph = kglab.KnowledgeGraph(
        namespaces = namespaces,
    )
    query = """
    PREFIX spdx:<http://spdx.org/rdf/terms#>
    CONSTRUCT {
        ?file rdf:type spdx:File .
        ?file spdx:fileName ?fileName .
        ?file spdx:fileContributor ?contributor .
        ?file spdx:licenseInfoInFile ?licenseInFile .
        ?file spdx:checksum ?checksum .
        ?file spdx:relationship ?relationship .
    }
    WHERE {
        ?file rdf:type spdx:File .
        OPTIONAL {?file spdx:fileName ?fileName .}
        OPTIONAL {?file spdx:fileContributor ?contributor .}
        OPTIONAL {?file spdx:licenseInfoInFile ?licenseInFile .}
        OPTIONAL {?file spdx:relationship ?relationship .}
    }
    """

    for row in kg.query(query):
        s, p, o = row
        subgraph.add(s, p, o)

    return subgraph # Return the subgraph

## Packages



In [15]:
#| export
def package_schema(kg:KnowledgeGraph #Knowledge graph to query from
                  )-> DataFrame: #Return result to dataframe
  """
  Return the schema for the package
  """
  query = """
  PREFIX spdx:<http://spdx.org/rdf/terms#>
  SELECT DISTINCT ?property
  WHERE {
    ?package rdf:type spdx:Package .
    ?package ?property ?value .
  }
  order by asc(UCASE(str(?property)))
  """
  return kg.query_as_df(query)

In [16]:
#| hide
package_schema(kg)

Unnamed: 0,property
0,spdx:annotation
1,spdx:attributionText
2,spdx:checksum
3,spdx:copyrightText
4,spdx:description
5,spdx:downloadLocation
6,spdx:externalRef
7,spdx:filesAnalyzed
8,spdx:licenseComments
9,spdx:licenseConcluded


In [17]:
#| export
def get_package_data(kg:KnowledgeGraph #Knowledge graph to query from
                     ) -> DataFrame: #Return result to dataframe
    """
    Construct a dataframe of package data
    """
    query = """
    PREFIX spdx:<http://spdx.org/rdf/terms#>
    SELECT ?package
        (GROUP_CONCAT(?annotation; SEPARATOR=", ") AS ?annotations)
        (GROUP_CONCAT(?attributionText; SEPARATOR=", ") AS ?attributionTexts)
        ?builtDate
        (GROUP_CONCAT(?checksum; SEPARATOR=", ") AS ?checksums)
        ?comment
        ?copyrightText
        ?description
        ?downloadLocation
        (GROUP_CONCAT(?externalRef; SEPARATOR=", ") AS ?externalRefs)
        ?filesAnalyzed
        (GROUP_CONCAT(?hasFile; SEPARATOR=", ") AS ?hasFiles)
        ?homepage
        ?licenseComments
        ?licenseConcluded
        ?licenseDeclared
        (GROUP_CONCAT(?licenseInfoFromFile; SEPARATOR=", ") AS ?licenseInfoFromFiles)
        ?name
        ?originator
        ?packageFileName
        ?packageVerificationCode
        ?primaryPackagePurpose
        ?releaseDate
        ?sourceInfo
        ?summary
        ?supplier
        ?validUntilDate
        ?versionInfo
        (GROUP_CONCAT(?relationship; SEPARATOR=", ") AS ?relationships)
    WHERE {
        ?package rdf:type spdx:Package .
        OPTIONAL { ?package spdx:annotation ?annotation . }
        OPTIONAL { ?package spdx:attributionText ?attributionText . }
        OPTIONAL { ?package spdx:builtDate ?buildDate . }
        OPTIONAL { ?package spdx:checksum ?checksum . }
        OPTIONAL { ?package spdx:comment ?comment . }
        OPTIONAL { ?package spdx:copyrightText ?copyrightText . }
        OPTIONAL { ?package spdx:description ?description . }
        OPTIONAL { ?package spdx:downloadLocation ?downloadLocation . }
        OPTIONAL { ?package spdx:externalRef ?externalRef . }
        OPTIONAL { ?package spdx:filesAnalyed ?filesAnalyzed . }
        OPTIONAL { ?package spdx:hasFile ?hasFile . }
        OPTIONAL { ?package spdx:homepage ?homepage . }
        OPTIONAL { ?package spdx:licenseComments ?licenseComments . }
        OPTIONAL { ?package spdx:licenseConcluded ?licenseConcluded . }
        OPTIONAL { ?package spdx:licenseDeclared ?licenseDeclared . }
        OPTIONAL { ?package spdx:licenseInfoFromFile ?licenseInfoFromFile . }
        OPTIONAL { ?package spdx:name ?name . }
        OPTIONAL { ?package spdx:originator ?originator . }
        OPTIONAL { ?package spdx:packageFileName ?packageFileName . }
        OPTIONAL { ?package spdx:packageVerificationCode ?packageVerificationCode . }
        OPTIONAL { ?package spdx:primaryPackagePurpose ?primaryPackagePurpose . }
        OPTIONAL { ?package spdx:releaseDate ?releaseDate . }
        OPTIONAL { ?package spdx:sourceInfo ?sourceInfo . }
        OPTIONAL { ?package spdx:summary ?summary . }
        OPTIONAL { ?package spdx:supplier ?supplier . }
        OPTIONAL { ?package spdx:validUntilDate ?validUntilDate . }
        OPTIONAL { ?package spdx:versionInfo ?versionInfo . }
        OPTIONAL { ?package spdx:relationship ?relationship . }
    }
    GROUP BY ?package
    """

    return kg.query_as_df(query)

In [18]:
#| hide
get_package_data(kg)

Unnamed: 0,package,annotations,attributionTexts,checksums,copyrightText,downloadLocation,externalRefs,hasFiles,licenseConcluded,licenseDeclared,...,relationships,description,licenseComments,originator,packageFileName,packageVerificationCode,sourceInfo,summary,supplier,versionInfo
0,<http://spdx.org/spdxdocs/spdx-example-444504E...,,,,NOASSERTION,spdx:noassertion,,,spdx:noassertion,spdx:noassertion,...,,,,,,,,,,
1,<http://spdx.org/spdxdocs/spdx-example-444504E...,"N371007482a604f3f8a2e40880779aec4, N371007482a...",The GNU C Library is free software. See the f...,"N9ed2fcc009ee40ff90c1b4c5c3234bda, N9ed2fcc009...",Copyright 2008-2010 John Smith,http://ftp.gnu.org/gnu/glibc/glibc-ports-2.15....,"N4d0d6830e4c24d889228b1355d0db4eb, N4d0d6830e4...",,_:N2cb7e34bfbdb4d589f90069af40e065a,_:N13822fda1d9642fb86d8297d4beec50f,...,"Naf2d97f1cb674835bb550a6a939f7564, N07100ef8cc...",The GNU C Library defines functions that are s...,The license for this project changed with the ...,Organization: ExampleCodeInspect (contact@exam...,glibc-2.11.1.tar.gz,_:N716959472723448cb4c7dd192eebdebb,uses glibc-2_11-branch from git://sourceware.o...,GNU C library.,Person: Jane Doe (jane.doe@example.com),2.11.1
2,<http://spdx.org/spdxdocs/spdx-example-444504E...,,,Ne23f8541f17a4699b37cc176c8baeea4,Copyright Saxonica Ltd,https://sourceforge.net/projects/saxon/files/S...,,,<http://spdx.org/licenses/MPL-1.0>,<http://spdx.org/licenses/MPL-1.0>,...,,The Saxon package is a collection of tools for...,Other versions available for a commercial license,,saxonB-8.8.zip,,,,,8.8
3,<http://spdx.org/spdxdocs/spdx-example-444504E...,,,,NOASSERTION,https://search.maven.org/remotecontent?filepat...,Nc2393655838343878c4580cf57e8a8d3,,spdx:noassertion,spdx:noassertion,...,,,,,,,,,,3.12.0


In [19]:
#| export
def get_package_graph(kg:KnowledgeGraph #Knowledge graph to query from
                      ) -> KnowledgeGraph: #Return result to knowledge graph
    """
    Construct a subgraph of the package data
    """
    namespaces = {
        "spdx": "http://spdx.org/rdf/terms#"
    }   

    subgraph = kglab.KnowledgeGraph(
        namespaces = namespaces,
    )   

    query = """
    PREFIX spdx:<http://spdx.org/rdf/terms#>
    CONSTRUCT {
        ?package rdf:type spdx:Package .
        ?package spdx:annotation ?annotation .
        ?package spdx:attributionText ?attributionText .
        ?package spdx:builtDate ?buildDate .
        ?package spdx:checksum ?checksum .
        ?package spdx:comment ?comment .
        ?package spdx:copyrightText ?copyrightText .
        ?package spdx:description ?description .
        ?package spdx:downloadLocation ?downloadLocation .
        ?package spdx:externalRef ?externalRef .
        ?package spdx:filesAnalyed ?filesAnalyzed .
        ?package spdx:hasFile ?hasFile .
        ?package spdx:homepage ?homepage .
        ?package spdx:licenseComments ?licenseComments .
        ?package spdx:licenseConcluded ?licenseConcluded .
        ?package spdx:licenseDeclared ?licenseDeclared .
        ?package spdx:licenseInfoFromFile ?licenseInfoFromFile .
        ?package spdx:name ?name .
        ?package spdx:originator ?originator .
        ?package spdx:packageFileName ?packageFileName .
        ?package spdx:packageVerificationCode ?packageVerificationCode .
        ?package spdx:primaryPackagePurpose ?primaryPackagePurpose .
        ?package spdx:releaseDate ?releaseDate .
        ?package spdx:sourceInfo ?sourceInfo .
        ?package spdx:summary ?summary .
        ?package spdx:supplier ?supplier .
        ?package spdx:validUntilDate ?validUntilDate .
        ?package spdx:versionInfo ?versionInfo .
        ?package spdx:relationship ?relationship .
    }   
    WHERE {
        ?package rdf:type spdx:Package .
        OPTIONAL { ?package spdx:annotation ?annotation . }
        OPTIONAL { ?package spdx:attributionText ?attributionText . }
        OPTIONAL { ?package spdx:attributionText ?attributionText . }
        OPTIONAL { ?package spdx:builtDate ?buildDate . }
        OPTIONAL { ?package spdx:checksum ?checksum . }
        OPTIONAL { ?package spdx:comment ?comment . }
        OPTIONAL { ?package spdx:copyrightText ?copyrightText . }
        OPTIONAL { ?package spdx:description ?description . }
        OPTIONAL { ?package spdx:downloadLocation ?downloadLocation . }
        OPTIONAL { ?package spdx:externalRef ?externalRef . }
        OPTIONAL { ?package spdx:filesAnalyed ?filesAnalyzed . }
        OPTIONAL { ?package spdx:hasFile ?hasFile . }
        OPTIONAL { ?package spdx:homepage ?homepage . }
        OPTIONAL { ?package spdx:licenseComments ?licenseComments . }
        OPTIONAL { ?package spdx:licenseConcluded ?licenseConcluded . }
        OPTIONAL { ?package spdx:licenseDeclared ?licenseDeclared . }
        OPTIONAL { ?package spdx:licenseInfoFromFile ?licenseInfoFromFile . }
        OPTIONAL { ?package spdx:name ?name . }
        OPTIONAL { ?package spdx:originator ?originator . }
        OPTIONAL { ?package spdx:packageFileName ?packageFileName . }
        OPTIONAL { ?package spdx:packageVerificationCode ?packageVerificationCode . }
        OPTIONAL { ?package spdx:primaryPackagePurpose ?primaryPackagePurpose . }
        OPTIONAL { ?package spdx:releaseDate ?releaseDate . }
        OPTIONAL { ?package spdx:sourceInfo ?sourceInfo . }
        OPTIONAL { ?package spdx:summary ?summary . }
        OPTIONAL { ?package spdx:supplier ?supplier . }
        OPTIONAL { ?package spdx:validUntilDate ?validUntilDate . }
        OPTIONAL { ?package spdx:versionInfo ?versionInfo . }
        OPTIONAL { ?package spdx:relationship ?relationship . }
    }
    GROUP BY ?package
    """
    for row in kg.query(query):
        s, p, o = row
        subgraph.add(s, p, o)

    return subgraph # Return the subgraph

## Relationships

In [20]:
#| export
def relationship_schema(kg : KnowledgeGraph #Knowledge graph to query from
                        ) -> DataFrame: #Return result to dataframe
    """
    Return the schema for the relationships
    """

    query = """
    PREFIX spdx:<http://spdx.org/rdf/terms#>
    SELECT DISTINCT ?property
    WHERE {
    ?file rdf:type spdx:Relationship .
    ?file ?property ?value .
    }
    """


    return kg.query_as_df(query)


In [21]:
#| hide
relationship_schema(kg)

Unnamed: 0,property
0,rdf:type
1,spdx:relationshipType
2,spdx:relatedSpdxElement


In [22]:
#| export
def get_relationship_data(kg:KnowledgeGraph #Knowledge graph to query from
                          ) -> DataFrame: #Return result to dataframe
    
    query = """
    PREFIX spdx:<http://spdx.org/rdf/terms#>
    SELECT ?element ?elementType ?relationshipType ?relatedElement ?relatedElementType
    WHERE {
    ?element spdx:relationship ?relationship .
    ?element rdf:type ?elementType .
    ?relationship spdx:relatedSpdxElement ?relatedElement .
    ?relationship spdx:relationshipType ?relationshipType .
    ?relatedElement rdf:type ?relatedElementType .
    }
    """

    return kg.query_as_df(query)

In [23]:
#| hide
get_relationship_data(kg)

Unnamed: 0,element,elementType,relationshipType,relatedElement,relatedElementType
0,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:File,spdx:relationshipType_contains,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:Package
1,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:Package,spdx:relationshipType_contains,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:File
2,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:Package,spdx:relationshipType_dynamicLink,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:Package
3,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:Package,spdx:relationshipType_contains,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:File
4,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:Package,spdx:relationshipType_contains,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:File
5,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:SpdxDocument,spdx:relationshipType_contains,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:Package
6,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:File,spdx:relationshipType_generatedFrom,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:Package
7,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:SpdxDocument,spdx:relationshipType_describes,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:File
8,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:SpdxDocument,spdx:relationshipType_describes,<http://spdx.org/spdxdocs/spdx-example-444504E...,spdx:Package


In [24]:
#| export
def get_relationship_graph(kg: KnowledgeGraph #Knowledge graph to query from
                           ) -> KnowledgeGraph: #Return result to knowledge graph
    """
    Construct a subgraph of the relationship data
    """

    namespaces = {
    "spdx": "http://spdx.org/rdf/terms#",
    }
    subgraph = kglab.KnowledgeGraph(
        namespaces = namespaces,
    )
    query = """
    PREFIX spdx:<http://spdx.org/rdf/terms#>
    CONSTRUCT {
    ?element spdx:relationship ?relationship .
    ?element rdf:type ?elementType .
    ?relationship spdx:relatedSpdxElement ?relatedElement .
    ?relationship spdx:relationshipType ?relationshipType .
    ?relatedElement rdf:type ?relatedElementType .
    }
    WHERE {
    ?element spdx:relationship ?relationship .
    ?element rdf:type ?elementType .
    ?relationship spdx:relatedSpdxElement ?relatedElement .
    ?relationship spdx:relationshipType ?relationshipType .
    ?relatedElement rdf:type ?relatedElementType .
    }
    """

    for row in kg.query(query):
        s, p, o = row
        subgraph.add(s, p, o)

    return subgraph

In [25]:
#| hide
get_relationship_graph(kg)

<kglab.kglab.KnowledgeGraph at 0x16e1b5460>

## Visualization

Functions to visualize graph results

In [26]:
#| export
import kglab
def visualize_graph(kg:KnowledgeGraph, #Knowledge graph to query from
                    VIS_STYLE: str={} #Optional, visualization style
                    ):
    """
    Visualize the knowledge graph
    """
    if not VIS_STYLE:
        VIS_STYLE = {
            "spdx": {
                "color": "orange",
                "size": 40,
            },
            "rdf":{
                "color": "blue",
                "size": 30,
            },
            "ptr":{
                "color": "red",
                "size": 20,
            },
        }
    subgraph = kglab.SubgraphTensor(kg)
    return subgraph.build_pyvis_graph(notebook=True, style=VIS_STYLE)

In [27]:
#| hide

# Create a knowledge graph
kg = kglab.KnowledgeGraph()


# load the knowledge graph info
kg.load_rdf("sboms/rdf/model.rdf.xml", format="xml")

pyvis_graph = visualize_graph(kg)
pyvis_graph.force_atlas_2based()
pyvis_graph.show("tmp.html")

tmp.html


In [28]:
#| export
import kglab
from pyvis.network import Network

def visualize_relationship_graph(
    kg: KnowledgeGraph,     # Knowledge graph to query from
    hideTypeFile: str=False # Flag for showing SPDX:File type components in the graph
    ) -> Network:           # Return a Network object representing the SBOMs relationship graph

    """
    Construct a Network object for representing the SBOMs components relationship graph.
    The returned graph is ready to be visualized using `.show()`. 
    """
    
    def get_node_title(elmName: str, elmType: str, elmVersion: str, elemPurpose: str) -> str:
        """
        Create a node title. 
        The title will be the node hover text.
        """
        nodeTitle = f"{elmType}: {elmName}"
        if elmVersion:
            nodeTitle += f"\nVersion:{elmVersion}"
        if elemPurpose:
            nodeTitle += "\nPurpose: " + elemPurpose.split("purpose_")[1]
        return nodeTitle

    def get_node_label(elmName: str, elmVersion: str) -> str:
        """
        Create a node label. 
        The label will be the text under the node.
        """
        nodeLabel = elmName
        if elmVersion: nodeLabel += "==" + elmVersion
        return nodeLabel
    
       
    VIS_STYLE = { 
        'SpdxDocument': {
            "color": "#DE3163",
            "size": 20,
        },
        'Package': {
            "color": "#99ccff",
            "size": 20,
        },
        'File': {
            "color": "#FFBF00",
            "size": 15,
        },
    }
    
    SPDX_NS = "http://spdx.org/rdf/terms#"
    
    QUERY = """
    PREFIX spdx:<http://spdx.org/rdf/terms#>
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    
    SELECT
        ?element
        ?elementName
        ?elementType
        ?elementVersionInfo
        ?elementPrimaryPackagePurpose
        ?relatedElement
        ?relationshipType
        ?relatedElementName
        ?relatedElementType
        ?relatedElementVersionInfo
        ?relatedElementPrimaryPackagePurpose
        
    WHERE {
        ?element spdx:relationship ?relationship .
        ?element rdf:type ?elementType .
        ?relationship spdx:relatedSpdxElement ?relatedElement .
        ?relationship spdx:relationshipType ?relationshipType .
        ?relatedElement rdf:type ?relatedElementType .
        
        OPTIONAL { ?element spdx:name ?elementName . }
        OPTIONAL { ?element spdx:fileName ?elementName . }
        OPTIONAL { ?element spdx:primaryPackagePurpose ?elementPrimaryPackagePurpose . }
        OPTIONAL { ?relatedElement spdx:name ?relatedElementName . }
        OPTIONAL { ?relatedElement spdx:fileName ?relatedElementName . }
        OPTIONAL { ?element spdx:versionInfo ?elementVersionInfo .}
        OPTIONAL { ?relatedElement spdx:versionInfo ?relatedElementVersionInfo .}
        OPTIONAL { ?relatedElement spdx:primaryPackagePurpose ?relatedElementPrimaryPackagePurpose . }
    }
    """
    
    # run query
    query_result = kg.query(QUERY)

    # hide from query_result the components of the spdx:File type     
    if hideTypeFile:
        query_result = [ row for row in query_result if str(row.relatedElementType).split(SPDX_NS)[-1] != "File"]
        
    # create a graph of the relationships using Network
    relationship_graph = Network(notebook=True, directed=True, cdn_resources="remote")
    
    # update the graph of the relationships based on the query_result
    for row in query_result:
        
        # element
        elementName = str(row.elementName)
        elementType = str(row.elementType).split(SPDX_NS)[-1]
        elementVersionInfo = row.elementVersionInfo
        elementPrimaryPackagePurpose= row.elementPrimaryPackagePurpose
        
        # relationship
        relationshipTypeName = row.relationshipType.split("relationshipType_")[1]
        
        # relatedElement        
        relatedElementName = str(row.relatedElementName)
        relatedElementType = str(row.relatedElementType).split(SPDX_NS)[-1]
        relatedElementVersionInfo = row.relatedElementVersionInfo
        relatedElementPrimaryPackagePurpose = row.relatedElementPrimaryPackagePurpose

        ## update graph
        # element Node info
        elementNodeId = row.element 
        elementNodeLabel = get_node_label(elementName, elementVersionInfo)
        elementNodeTitle = get_node_title(elementName, elementType, 
                                          elementVersionInfo, elementPrimaryPackagePurpose)
        elementNodeColor = VIS_STYLE[elementType]['color']
        elementNodeSize = VIS_STYLE[elementType]['size']        
        
        # relatedElement Node info
        relatedElementNodeId = row.relatedElement 
        relatedElementNodeLabel = get_node_label(relatedElementName, relatedElementVersionInfo)
        relatedElementNodeTitle = get_node_title(relatedElementName, relatedElementType, 
                                                 relatedElementVersionInfo, relatedElementPrimaryPackagePurpose)
        relatedElementNodeColor = VIS_STYLE[relatedElementType]['color']
        relatedElementNodeSize = VIS_STYLE[relatedElementType]['size']            
        
        # add nodes (elementName, relatedElementName) to the graph
        relationship_graph.add_node(elementNodeId,
                                    label = elementNodeLabel,
                                    title = elementNodeTitle,
                                    color = elementNodeColor,
                                    size = elementNodeSize
                                   )
        relationship_graph.add_node(relatedElementNodeId,
                                    label = relatedElementNodeLabel,
                                    title = relatedElementNodeTitle,
                                    color = relatedElementNodeColor,
                                    size = relatedElementNodeSize
                                   )
        # and edge(relatedElementName) to the graph
        relationship_graph.add_edge(elementNodeId,
                                    relatedElementNodeId,
                                    title = relationshipTypeName,
                                    label = relationshipTypeName # text over the edge
                                   )
    return relationship_graph

In [29]:
#| export
from pandas import DataFrame
def display_relationship_graph_legend():    
    """
    Display the legend of the SBOMs components relationship graph that can be visualized by `visualize_relationship_graph()`.
    """    
    legend_df = DataFrame([['File', 'Yellow'], ['Package', 'Blue'], ['SPDXDocument', 'Red']], 
                  columns=['SPDX Type', 'Node Color'])

    display(legend_df)


#### How to visualize the relationship graph

You can use the function `visualize_relationship_graph(kg)` where `kg` is a knowledge graph and the returned graph is ready to be visualized using the method `.show()`.

You can also change the physics layout of the graph. In the example below, we are using the option `.force_atlas_2based()`. See the example below. 

In [30]:
# Create a knowledge graph
kg = kglab.KnowledgeGraph()

# load the knowledge graph info
kg.load_rdf("sboms/rdf/model.rdf.xml", format="xml")

# get the relationship graph to be visualized
graph = visualize_relationship_graph(kg)

# optional: set the physics layout of the network
graph.force_atlas_2based()
graph.set_edge_smooth('dynamic')

# show graph
graph.show("figs/fig00.relationship_full.html")

figs/fig00.relationship_full.html


In [31]:
display_relationship_graph_legend()

Unnamed: 0,SPDX Type,Node Color
0,File,Yellow
1,Package,Blue
2,SPDXDocument,Red


In [32]:
#| default_exp core