In [4]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [41]:
from rdflib import Graph, Literal, Namespace, RDF, URIRef, XSD, RDFS, SOSA, ConjunctiveGraph
from rdflib.plugins.stores import sparqlstore
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID as default
from SPARQLWrapper import SPARQLWrapper, POST, DIGEST
import requests
import json
from shapely.geometry import shape
import os
import xarray as xr
os.chdir("/workspaces/aqqa-kg-creation-dev/")
from scripts.utils import unix_ts_to_date_str
from scripts.production_scripts.observableProperties import variables_dict

In [42]:
# Define the namespaces
aqqa = Namespace("http://example.com/ontologies/aqqa#")
geo = Namespace("http://www.opengis.net/ont/geosparql#")
xsd = Namespace("http://www.w3.org/2001/XMLSchema#")
sf = Namespace("http://www.opengis.net/ont/sf#")

### Create RDF file for obervable Properties

In [43]:
# Create a graph
g = Graph()
g.bind("aqqa", aqqa)

In [44]:
# reading in observable properties into RDF Graph
for var in variables_dict:

    ent_obs_prop = URIRef(aqqa[f"{var}"])
    ent_has_unit = URIRef(aqqa["hasUnit"])
    ent_avg_period = URIRef(aqqa["averagingPeriod"])

    g.add((ent_obs_prop, RDF.type, SOSA.ObservableProperty))
    g.add((ent_obs_prop, RDFS.label, Literal(variables_dict[var]["label"])))
    g.add((ent_obs_prop, ent_has_unit, Literal(variables_dict[var]["hasUnit"])))
    g.add((ent_obs_prop, ent_avg_period, Literal(variables_dict[var]["averagingPeriod"])))
    g.add((ent_obs_prop, RDFS.comment, Literal(variables_dict[var]["comment"])))

In [46]:
query1 = """    
        SELECT ?s ?p ?o
        WHERE {
            ?s ?p ?o
        }
        LIMIT 10
       """

results1 = g.query(query1)
print("Query 1 Results:")
for row in results1:
    print(f"{row['s']} {row['p']} {row['o']} ")

Query 1 Results:
http://example.com/ontologies/aqqa#CO http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://www.w3.org/ns/sosa/ObservableProperty 
http://example.com/ontologies/aqqa#CO http://www.w3.org/2000/01/rdf-schema#label CO 
http://example.com/ontologies/aqqa#PM10 http://example.com/ontologies/aqqa#hasUnit µg m-3 
http://example.com/ontologies/aqqa#O3 http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://www.w3.org/ns/sosa/ObservableProperty 
http://example.com/ontologies/aqqa#SO2 http://www.w3.org/2000/01/rdf-schema#label SO2 
http://example.com/ontologies/aqqa#CO http://example.com/ontologies/aqqa#hasUnit µg m-3 
http://example.com/ontologies/aqqa#PM2P5 http://www.w3.org/2000/01/rdf-schema#label PM2.5 
http://example.com/ontologies/aqqa#O3 http://www.w3.org/2000/01/rdf-schema#comment Ozone as averaged over 8 hours 
http://example.com/ontologies/aqqa#PM2P5 http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://www.w3.org/ns/sosa/ObservableProperty 
http://example.com/ontolog

In [47]:
g.serialize(destination="/mnt/data/processed/RDF/CAMS/CAMS_observable_properties.ttl")

<Graph identifier=N167ff7ec4ba74093bac4a9e9f4c42e71 (<class 'rdflib.graph.Graph'>)>

### Create RDF file for CAMS sensor

In [48]:
# Create a graph
g = Graph()
g.bind("aqqa", aqqa)

In [49]:
ent_cams_sensor = URIRef(aqqa[f"CAMS_AQ_reanalysis_ensemble_lvl_0"])
ent_has_provider = URIRef(aqqa["hasProvider"])

g.add((ent_cams_sensor, RDF.type, SOSA.Sensor))
g.add((ent_cams_sensor, ent_has_provider, Literal("Copernicus Atmosphere Monitoring Service (CAMS)")))

<Graph identifier=Nf6dd5591d7964cbcbc5472ddcaad43a0 (<class 'rdflib.graph.Graph'>)>

In [50]:
query1 = """    
        SELECT ?s ?p ?o
        WHERE {
            ?s ?p ?o
        }
        LIMIT 10
       """

results1 = g.query(query1)
print("Query 1 Results:")
for row in results1:
    print(f"{row['s']} {row['p']} {row['o']} ")

Query 1 Results:
http://example.com/ontologies/aqqa#CAMS_AQ_reanalysis_ensemble_lvl_0 http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://www.w3.org/ns/sosa/Sensor 
http://example.com/ontologies/aqqa#CAMS_AQ_reanalysis_ensemble_lvl_0 http://example.com/ontologies/aqqa#hasProvider Copernicus Atmosphere Monitoring Service (CAMS) 


In [51]:
g.serialize(destination="/mnt/data/processed/RDF/CAMS/CAMS_sensor_data.ttl")

<Graph identifier=Nf6dd5591d7964cbcbc5472ddcaad43a0 (<class 'rdflib.graph.Graph'>)>

### Create RDF file for monthly observations

In [34]:
# Create a graph
g = Graph()
g.bind("aqqa", aqqa)

In [53]:
# reading in observations into RDF Graph
year = "2020"
month = "01"
for var_name in variables_dict:
    path_to_nc_file = f"/mnt/data/processed/CAMS_AQ_AOI_AGG/{year}/{month}/cams_aq_{var_name.lower()}_{year}{month}.nc"
    ds = xr.open_dataset(path_to_nc_file) 
    measurement_var = ds.variables[var_name.lower()]
    time_var = ds.variables["time"]

    for t_i, time in enumerate(time_var):
        for row_i in range(measurement_var.data.shape[1]):
            for col_i in range(measurement_var.data.shape[2]):
                
                t = unix_ts_to_date_str(time.item())
                measurement_value = measurement_var.data[t_i, row_i, col_i]
                
                ent_obs = URIRef(aqqa[f"Cell_{row_i}_{col_i}_ts_{t}_var_{var_name}"])
                ent_cell = URIRef(aqqa[f"Cell_{row_i}_{col_i}"])   
                ent_geom_cell = URIRef(aqqa[f"GeomCell_{row_i}_{col_i}"])   
                ent_obs_prop = URIRef(aqqa[f"{var_name}"])
                ent_cams_sensor = URIRef(aqqa[f"CAMS_AQ_reanalysis_ensemble_lvl_0"])
    
                g.add((ent_obs, RDF.type, SOSA.Observation))
                g.add((ent_obs, SOSA.madeBySensor, ent_cams_sensor))
                g.add((ent_obs, SOSA.observedProperty, ent_obs_prop))
                g.add((ent_obs, SOSA.hasFeatureOfInterest, ent_cell))
                g.add((ent_obs, SOSA.hasSimpleResult, Literal(measurement_value)))
                g.add((ent_obs, SOSA.resultTime, Literal(t, datatype=XSD.date)))

In [54]:
g.serialize(destination=f"/mnt/data/processed/RDF/CAMS/observations/CAMS_observations_{year}_{month}.ttl")

<Graph identifier=Nf6dd5591d7964cbcbc5472ddcaad43a0 (<class 'rdflib.graph.Graph'>)>

### Some example queries

In [9]:
# Execute Query 1 (get variable, time and result of all observations)
query1 = """    
        SELECT ?var ?unit ?time ?measurement ?foI
        WHERE {
            ?s a sosa:Observation ;
               sosa:resultTime ?time ;
               sosa:hasSimpleResult ?measurement ;
               sosa:observedProperty [ rdfs:label "NO2"; aqqa:hasUnit ?unit ] ;
               sosa:hasFeatureOfInterest [ aqqa:hasID 1 ] .
            
            ?foI aqqa:hasID 1 .
            
            FILTER (?time >= "2020-01-02"^^xsd:date && ?time <= "2020-01-10"^^xsd:date)
        }
        LIMIT 10
       """

results1 = g.query(query1)
print("Query 1 Results:")
for row in results1:
    print(f"{row['time']} {row['measurement']} {row['unit']} {row['var']} {row['foI']}")

Query 1 Results:
2020-01-02 35.232746 µg m-3 None http://example.com/ontologies/aqqa#Cell_01
2020-01-03 35.272137 µg m-3 None http://example.com/ontologies/aqqa#Cell_01
2020-01-04 15.603108 µg m-3 None http://example.com/ontologies/aqqa#Cell_01
2020-01-05 22.24642 µg m-3 None http://example.com/ontologies/aqqa#Cell_01
2020-01-06 29.234375 µg m-3 None http://example.com/ontologies/aqqa#Cell_01
2020-01-07 35.990887 µg m-3 None http://example.com/ontologies/aqqa#Cell_01
2020-01-08 40.488934 µg m-3 None http://example.com/ontologies/aqqa#Cell_01
2020-01-09 40.826824 µg m-3 None http://example.com/ontologies/aqqa#Cell_01
2020-01-10 32.11849 µg m-3 None http://example.com/ontologies/aqqa#Cell_01
