In [42]:
%load_ext autoreload
%autoreload 2

In [106]:
from rdflib import Graph, Literal, Namespace, RDF, URIRef, XSD, RDFS, SOSA
import json
from shapely.geometry import shape
import os
import xarray as xr
os.chdir("/workspaces/aqqa-kg-creation-dev/")
from observableProperties import variables_dict
from src.utils import unix_ts_to_date_str

In [12]:
# Define the namespaces
aqqa = Namespace("http://example.com/ontologies/aqqa#")
geo = Namespace("http://www.opengis.net/ont/geosparql#")
xsd = Namespace("http://www.w3.org/2001/XMLSchema#")

In [13]:
# Create a graph
g = Graph()
g.bind("aqqa", aqqa)

In [14]:
# loading ref raster cells
ref_raster_geojson = "/mnt/data/processed/ref_raster.geojson"
with open(ref_raster_geojson, "r") as f:
    geojson_data = json.load(f)

features = geojson_data.get("features", [])
geometries = [feature.get("geometry") for feature in features]
indexes = [feature.get("properties")["index"] for feature in features]
shapely_geometries = [shape(geometry).wkt for geometry in geometries]

In [34]:
# reading geometries and Features of interest into RDF graph
for i, (index, geom) in enumerate(zip(indexes, shapely_geometries)):
    
    ent_geom_cell = URIRef(aqqa[f"GeomCell_{index[0]}{index[1]}"])   
    ent_cell = URIRef(aqqa[f"Cell_{index[0]}{index[1]}"])   
    ent_cell_id = URIRef(aqqa["hasID"])

    g.add((ent_cell, RDF.type, SOSA.FeatureOfInterest))
    g.add((ent_cell, ent_cell_id, Literal(i)))
    g.add((ent_cell, geo.hasGeometry, ent_geom_cell))
    
    g.add((ent_geom_cell, RDF.type, geo.Geometry))
    g.add((ent_geom_cell, geo.asWKT, Literal(geom, datatype=geo.wktLiteral)))

In [24]:
# reading in observable properties into RDF Graph
for var in variables_dict:

    ent_obs_prop = URIRef(aqqa[f"{var}"])
    ent_has_unit = URIRef(aqqa["hasUnit"])
    ent_avg_period = URIRef(aqqa["averagingPeriod"])

    g.add((ent_obs_prop, RDF.type, SOSA.ObservableProperty))
    g.add((ent_obs_prop, RDFS.label, Literal(variables_dict[var]["label"])))
    g.add((ent_obs_prop, ent_has_unit, Literal(variables_dict[var]["hasUnit"])))
    g.add((ent_obs_prop, ent_avg_period, Literal(variables_dict[var]["averagingPeriod"])))
    g.add((ent_obs_prop, RDFS.comment, Literal(variables_dict[var]["comment"])))

In [111]:
# reading in observations into RDF Graph
year = "2020"
month = "01"
for var_name in variables_dict:
    path_to_nc_file = f"/mnt/data/processed/CAMS_AQ_AOI_AGG/{year}/{month}/cams_aq_{var_name.lower()}_{year}{month}.nc"
    ds = xr.open_dataset(path_to_nc_file) 
    measurement_var = ds.variables[var_name.lower()]
    time_var = ds.variables["time"]

    for t_i, time in enumerate(time_var):
        measurement_var = measurement_var
        for row_i in range(measurement_var.data.shape[1]):
            for col_i in range(measurement_var.data.shape[2]):
                
                t = unix_ts_to_date_str(time.item())
                measurement_value = measurement_var.data[t_i, row_i, col_i]
                
                ent_obs = URIRef(aqqa[f"Cell_{row_i}{col_i}_ts_{t}_var_{var_name}"])
                ent_cell = URIRef(aqqa[f"Cell_{index[0]}{index[1]}"])   
                ent_obs_prop = URIRef(aqqa[f"{var_name}"])
    
                g.add((ent_obs, RDF.type, SOSA.Observation))
                g.add((ent_obs, SOSA.hasFeatureOfInterest, ent_cell))
                g.add((ent_obs, SOSA.observedProperty, ent_obs_prop))
                g.add((ent_obs, SOSA.hasSimpleResult, Literal(measurement_value)))
                g.add((ent_obs, SOSA.resultTime, Literal(t, datatype=XSD.date)))

In [113]:
#print(g.serialize())