In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from SPARQLWrapper import SPARQLWrapper, JSON
from rdflib import Graph
import folium
from shapely import wkt

In [3]:
#endpoint_url = "http://64.225.134.139:9999/Strabon/Query"
#sparql = SPARQLWrapper(endpoint_url)

In [4]:
graph = Graph()

# specify path to datasets
path_to_gadm_aut = "/mnt/data/GADM/RDF/gadm_AUT_RDF_population.ttl"
path_to_gadm_deu = "/mnt/data/GADM/RDF/gadm_DEU_RDF_population.ttl"
path_to_cams_obs = "/mnt/data/CAMS/RDF/observations/2020/01/cams_aq_2020_01_co.ttl"
path_to_obs_props = "/mnt/data/AQ_observable_properties.ttl"
path_to_cams_ref_raster = "/mnt/data/CAMS/RDF/CAMS_reference_grid_gadm_connections.ttl"

graph.parse(path_to_gadm_aut, format='ttl')
graph.parse(path_to_gadm_deu, format='ttl')
graph.parse(path_to_cams_obs, format='ttl')
graph.parse(path_to_obs_props, format='ttl')
graph.parse(path_to_cams_ref_raster, format='ttl')

<Graph identifier=Ndbd18ed6d1424ef98de8f059aaadde07 (<class 'rdflib.graph.Graph'>)>

In [60]:
# read sparql query
path_to_sparql_query = "../../scripts/sparql_queries/get_pop_affected_by_treshold_exceedance.ttl"
with open(path_to_sparql_query, 'r') as query_file:
    query_string = query_file.read()

In [99]:
query_string = """
PREFIX sosa: <http://www.w3.org/ns/sosa/>
PREFIX geo: <http://www.opengis.net/ont/geosparql#> 
PREFIX gadm: <http://example.com/ontologies/gadm#>

SELECT ?gadm_name ?obs_result ?obs_time 
WHERE {
    {
        SELECT ?foi_ent ?cell_geom ?gadm_name
        WHERE {
            ?foi_ent a sosa:FeatureOfInterest ;
                geo:intersects ?gadm_ent ;
                geo:hasGeometry ?cell_geom_ent .
            ?cell_geom_ent geo:asWKT ?cell_geom .
            ?gadm_ent a gadm:AdministrativeUnit ;
                gadm:hasName ?gadm_name;
                gadm:hasNationalLevel 3 ;
        } 
    }

    ?obs_ent a sosa:Observation ;
            sosa:hasSimpleResult ?obs_result ; 
            sosa:resultTime ?obs_time ;
            sosa:hasFeatureOfInterest ?foi_ent ;
            sosa:observedProperty ?obs_prop_ent .
    ?obs_prop_ent a sosa:ObservableProperty ;
        rdfs:label "O3" .

    FILTER (YEAR(?obs_time) = 2020 && MONTH(?obs_time) >= 1 && MONTH(?obs_time) <= 1)
    FILTER (?obs_result > 1)
} LIMIT 1
"""

In [100]:
data = []
for row in graph.query(query_string):
    print(row)

KeyboardInterrupt: 

#### Text to Sparql using GPT-turbo

In [77]:
import os
import openai

In [80]:
openai.api_key = os.environ["OPENAI_KEY"]

In [81]:
def get_completion(prompt, model="gpt-3.5-turbo", temperature=0): 
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=temperature, 
    )
    return response.choices[0].message["content"]

In [91]:
prompt = """
Your task is to transform natural language to a sparql query. 
There are the following labels for observable attributes: CO, NO2, O3, PM10, PM2P5 SO2
The full names of those attributes are: carbon_monoxide nitrogen_dioxide ozone particulate_matter_10um particulate_matter_2.5um sulphur_dioxide
For the query always the short version has to be used.
If you are not sure if you can construct the sparql query, respond <I am sorry, but I cannot create a sparql query from this question>
The structure of this query is purposely divided in subqueries to improve performance. Don't change this structure.

<Natural Language>: What was the <observable attribute> concentration in <location name> for <month> <year>?

<Sparql Query>: 
PREFIX sosa: <http://www.w3.org/ns/sosa/>
PREFIX geo: <http://www.opengis.net/ont/geosparql#> 
PREFIX gadm: <http://example.com/ontologies/gadm#>

SELECT ?obs_result ?obs_time ?foi_ent
WHERE {
    {
        SELECT ?foi_ent
        WHERE {
            ?foi_ent a sosa:FeatureOfInterest ;
                geo:intersects ?gadm_ent .
            ?gadm_ent a gadm:AdministrativeUnit ;
                gadm:hasName <location name> ;
                gadm:hasNationalLevel 3 ;
        } 
    }

    ?obs_ent a sosa:Observation ;
            sosa:hasSimpleResult ?obs_result ; 
            sosa:resultTime ?obs_time ;
            sosa:hasFeatureOfInterest ?foi_ent ;
            sosa:observedProperty ?obs_prop_ent .
    ?obs_prop_ent a sosa:ObservableProperty ;
        rdfs:label <observable attribute e.g. PM10> .

    FILTER (YEAR(?obs_time) = <year> && MONTH(?obs_time) = <mont>)

<Natural Language>: Which municipalities exceeded the <observable attribute> threshold of <threshold value> for the year <year> and month <month>?

<Sparql Query>: 
PREFIX sosa: <http://www.w3.org/ns/sosa/>
PREFIX geo: <http://www.opengis.net/ont/geosparql#> 
PREFIX gadm: <http://example.com/ontologies/gadm#>

SELECT ?gadm_name ?obs_result ?obs_time 
WHERE {
    {
        SELECT ?foi_ent ?cell_geom ?gadm_name
        WHERE {
            ?foi_ent a sosa:FeatureOfInterest ;
                geo:intersects ?gadm_ent ;
                geo:hasGeometry ?cell_geom_ent .
            ?cell_geom_ent geo:asWKT ?cell_geom .
            ?gadm_ent a gadm:AdministrativeUnit ;
                gadm:hasName ?gadm_name;
                gadm:hasNationalLevel 3 ;
        } 
    }

    ?obs_ent a sosa:Observation ;
            sosa:hasSimpleResult ?obs_result ; 
            sosa:resultTime ?obs_time ;
            sosa:hasFeatureOfInterest ?foi_ent ;
            sosa:observedProperty ?obs_prop_ent .
    ?obs_prop_ent a sosa:ObservableProperty ;
        rdfs:label <observable attribute e.g. CO> .

    FILTER (YEAR(?obs_time) = <year> && MONTH(?obs_time) = <month>)
    FILTER (?obs_result > <threshold value>)
}

} 

<Natural Language>: List all observations where the ozone value was above 40 during summer 2020.


"""
response = get_completion(prompt)
print(response)

<Sparql Query>: 
PREFIX sosa: <http://www.w3.org/ns/sosa/>
PREFIX geo: <http://www.opengis.net/ont/geosparql#> 
PREFIX gadm: <http://example.com/ontologies/gadm#>

SELECT ?gadm_name ?obs_result ?obs_time 
WHERE {
    {
        SELECT ?foi_ent ?cell_geom ?gadm_name?pop21
        WHERE {
            ?foi_ent a sosa:FeatureOfInterest ;
                geo:intersects ?gadm_ent ;
                geo:hasGeometry ?cell_geom_ent .
            ?cell_geom_ent geo:asWKT ?cell_geom .
            ?gadm_ent a gadm:AdministrativeUnit ;
                gadm:hasName ?gadm_name;
                gadm:hasNationalLevel 3 ;
        } 
    }

    ?obs_ent a sosa:Observation ;
            sosa:hasSimpleResult ?obs_result ; 
            sosa:resultTime ?obs_time ;
            sosa:hasFeatureOfInterest ?foi_ent ;
            sosa:observedProperty ?obs_prop_ent .
    ?obs_prop_ent a sosa:ObservableProperty ;
        rdfs:label "O3" .

    FILTER (YEAR(?obs_time) = 2020 && MONTH(?obs_time) >= 6 && MONTH(?obs_time