In [None]:
from urllib.parse import urlparse
import bald
import rdflib
from pandas import DataFrame
from rdflib.plugins.sparql.processor import SPARQLResult

In [None]:
#helper functions 

def nc2rdf(ncfilename, baseuri=None):  
    root_container = bald.load_netcdf(ncfilename, baseuri=baseuri)
    g = root_container.rdfgraph()
    return g

#from https://github.com/RDFLib/sparqlwrapper/issues/125
def sparql_results_to_df(results: SPARQLResult) -> DataFrame:
    """
    Export results from an rdflib SPARQL query into a `pandas.DataFrame`,
    using Python types. See https://github.com/RDFLib/rdflib/issues/1179.
    """
    return DataFrame(
        data=([None if x is None else x.toPython() for x in row] for row in results),
        columns=[str(x) for x in results.vars],
    )

## Load a single netCDF file from OpenDAP

Using the eReefs GBR4 rivers example data.

In [None]:
ncfile_gbr4_rivers="https://dapds00.nci.org.au/thredds/dodsC/fx3/gbr4_2.0_rivers/gbr4_rivers_simple_2022-02-03.nc"

In [None]:
base_uri = "http://localcontext/"

In [None]:
%%time
graph = nc2rdf(ncfile_gbr4_rivers,  baseuri=base_uri)

In [None]:
ttl = graph.serialize(format="turtle")

In [None]:
print(ttl)

Now that we have the netCDF file loaded as a netCDF-LD graph, we can perform graph queries using SPARQL

In [None]:
sparql_query = """
PREFIX bald: <https://www.opengis.net/def/binary-array-ld/>
PREFIX localcontext: <http://localcontext/> 

SELECT DISTINCT ?var ?long_name ?standard_name
WHERE {
    ?container a bald:Container .
    ?container bald:contains ?var .
    ?var localcontext:long_name ?long_name .
    OPTIONAL {
       ?var localcontext:standard_name ?standard_name .
    }
}"""

result = graph.query(sparql_query)

In [None]:
# use a helper function to convert the graph result into a DataFrame for friendly rendering
sparql_results_to_df(result)

## Load in another netCDF file and merge the graphs

Fetch another netCDF file (eReefs GBR1 data) and merge it with the previous netCDF file (eReefs GBR4 rivers data). 
Note that these are from 2 different models. 

In [None]:
ncfile_gbr1 = "https://dapds00.nci.org.au/thredds/dodsC/fx3/gbr1_2.0/gbr1_simple_2022-02-05.nc"

In [None]:
%%time
base_uri = "http://localcontext2/"
graph2 = nc2rdf(ncfile_gbr1,  baseuri=base_uri)

Merge the graph using RDFLib functions. Note, we can merge any other graph in this way, e.g. other netCDF-LD graphs, wikidata graphs, etc.

In [None]:
#merge the two graphs 
graph = graph + graph2

In [None]:
ttl = graph.serialize(format="turtle")
print(ttl)

In [None]:
#Run the same SPARQL query but now with updated graph data

sparql_query = """
PREFIX bald: <https://www.opengis.net/def/binary-array-ld/>
PREFIX localcontext: <http://localcontext/> 
PREFIX localcontext2: <http://localcontext2/> 

SELECT DISTINCT ?container ?var ?long_name ?standard_name
WHERE {
    ?container a bald:Container .
    ?container bald:contains ?var .
    ?var ?longnameProp ?long_name .
    OPTIONAL {
       ?var ?stdnameProp ?standard_name .
       FILTER(regex(str(?stdnameProp), "standard_name") )
    }
    FILTER(regex(str(?longnameProp), "long_name") )
}
"""

result = graph.query(sparql_query)
sparql_results_to_df(result)

In [None]:
#maybe look at https://stackoverflow.com/questions/39274216/visualize-an-rdflib-graph-in-python
'''
import rdflib
from rdflib.extras.external_graph_libs import rdflib_to_networkx_multidigraph
import networkx as nx
import matplotlib.pyplot as plt

G = rdflib_to_networkx_multidigraph(graph)
# Plot Networkx instance of RDF Graph
pos = nx.spring_layout(G, scale=2)
edge_labels = nx.get_edge_attributes(G, 'r')
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
nx.draw(G, with_labels=False)
'''

In [None]:
#pydot visualise test... doesn't quite work
'''
import io
import pydotplus
from IPython.display import display, Image
from rdflib.tools.rdf2dot import rdf2dot

def visualize(g):
    stream = io.StringIO()
    rdf2dot(g, stream, opts = {display})
    dg = pydotplus.graph_from_dot_data(stream.getvalue())
    png = dg.create_png()
    display(Image(png))
visualize(graph)'''