In [1]:
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from step import preprocessing as pp
from step import osm
from step.model import step, namespaces
from step.model import utility

import json
import gpxpy
import pandas as pnd
import numpy as np

import shapely
from shapely import wkt
from shapely.ops import polygonize_full, cascaded_union
import folium
import geojson
import re

import rdflib
from rdflib import URIRef, Namespace, Graph
from SPARQLWrapper import SPARQLWrapper2

%load_ext autoreload
%autoreload 2

In [2]:
file_path = r'gpx/49996348-1079694145.gpx'
gpx = gpxpy.parse(open(file_path, 'r'))

In [3]:
points_data = gpx.get_points_data()
points = [(pdata.point.latitude, pdata.point.longitude) for pdata in points_data]

In [4]:
tileset = r'http://{s}.tile.openstreetmap.se/hydda/full/{z}/{x}/{y}.png'
attribution = 'Tiles courtesy of <a href="http://openstreetmap.se/" target="_blank">OpenStreetMap Sweden</a> \
                &mdash; Map data &copy; <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a>'
#tileset = r'http://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png'
#attribution = '&copy; <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a>'

_map = folium.Map(tiles=tileset, attr=attribution, max_zoom=25)

start_mark = folium.Marker([points[0][0], points[0][1]], icon=folium.Icon(color='green', icon='glyphicon glyphicon-play'))
start_mark.add_to(_map)

end_mark = folium.Marker([points[-1][0], points[-1][1]], icon=folium.Icon(color='red', icon='glyphicon glyphicon-stop'))
end_mark.add_to(_map)

line = folium.PolyLine(points, opacity=0.8)
line.add_to(_map)
_map.fit_bounds(line.get_bounds())

_map

## Parameters

In [5]:
buffer_size = 0.0002 # ~20 meters
minimum_length_ratio = 0.2 #delta_s
minimum_duration_ratio = 0.2 #delta_t

keys = ["leisure"]
expand_keys = True

expanded_keys = []
excluded_keys = ['name', 'comment', 'source', 'boundary']

## LinkedGeoData

In [6]:
# Search for tags in OSN
if 'osn_graph' not in locals():
    osn_graph = Graph()
    osn_graph.parse('osm_semantic_network.skos.rdf')

if 'lgdo_graph' not in locals():
    lgdo_graph = Graph()
    lgdo_graph.parse('lgdo_2014-07-26.n3', format='n3')

http://spatial.ucd.ie/lod/osn/term/k:source/v:tiger_import_#{date}n does not look like a valid URI, trying to serialize this will break.
http://spatial.ucd.ie/lod/osn/term/k:source/v:tiger_import_#{date}n does not look like a valid URI, trying to serialize this will break.
http://spatial.ucd.ie/lod/osn/term/k:source/v:tiger_import_#{date}n does not look like a valid URI, trying to serialize this will break.
http://spatial.ucd.ie/lod/osn/term/k:source/v:tiger_import_#{date}n does not look like a valid URI, trying to serialize this will break.
http://spatial.ucd.ie/lod/osn/term/k:source/v:tiger_import_#{date}n does not look like a valid URI, trying to serialize this will break.
http://spatial.ucd.ie/lod/osn/term/k:source/v:tiger_import_#{date}n does not look like a valid URI, trying to serialize this will break.
http://spatial.ucd.ie/lod/osn/term/k:source/v:tiger_import_#{date}n does not look like a valid URI, trying to serialize this will break.
http://spatial.ucd.ie/lod/osn/term/k:sour

In [7]:
if expand_keys and len(keys) > 0:
    expanded_keys = []
    keys_regex = "|".join(keys)
    regex = r'"http://spatial.ucd.ie/lod/osn/term/k:({0}).*"'.format(keys_regex)
    
    query = """
    PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
    PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
    PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>

    SELECT ?subject ?relatedTerm ?lgdConcept
    {
     ?subject skos:related ?relatedTerm .
     ?relatedTerm skos:exactMatch ?lgdConcept .
     
     FILTER (REGEX(STR(?subject), """ + regex +"""))
     FILTER (STRSTARTS(STR(?lgdConcept), "http://linkedgeodata.org/ontology"))
    }"""

    print(query)

    response = osn_graph.query(query)

    for row in response:
        uri = row[1]
        new_key = uri.split('k:')
        if len(new_key) > 1:
            new_key = new_key[-1]
        else:
            continue
        
        try:
            new_key = new_key.split('/')[0]
        except:
            pass
        
        if new_key not in keys:
            expanded_keys.append(new_key)

    expanded_keys = list(set(expanded_keys))

    print(len(expanded_keys), 'new terms')
    print(expanded_keys)


    PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
    PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
    PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>

    SELECT ?subject ?relatedTerm ?lgdConcept
    {
     ?subject skos:related ?relatedTerm .
     ?relatedTerm skos:exactMatch ?lgdConcept .
     
     FILTER (REGEX(STR(?subject), "http://spatial.ucd.ie/lod/osn/term/k:(leisure).*"))
     FILTER (STRSTARTS(STR(?lgdConcept), "http://linkedgeodata.org/ontology"))
    }
17 new terms
['amenity', 'boundary', 'route', 'fishing', 'building', 'barrier', 'highway', 'natural', 'man_made', 'landuse', 'playground', 'shelter', 'sport', 'tourism', 'shop', 'harbour', 'waterway']


In [8]:
gpx_simple = gpx.clone()
gpx_simple.simplify()

linestring_simple = "LINESTRING("

simple_points = gpx_simple.get_points_data()

for i, point_data in enumerate(simple_points):
    point = point_data.point
    linestring_simple += str(point.longitude) + " " + str(point.latitude)
    if i < len(simple_points)-1:
        linestring_simple += ", "

linestring_simple += ")"
linestring_simple

'LINESTRING(5.73425791 45.18210998, 5.735235225 45.1823674714, 5.7407547183 45.1846534569, 5.7429304089 45.1860455704, 5.742455759 45.1867867431, 5.7424284553 45.187952129, 5.7429561659 45.1890001696, 5.74367138 45.1889849224, 5.7451961052 45.1884058183, 5.7478446699 45.1883522866, 5.7494157359 45.1884815264, 5.7505612736 45.1889204948, 5.7495635222 45.1885502523, 5.7470312762 45.1884664258, 5.7454857658 45.1886651283, 5.7439751935 45.189106106, 5.7430106295 45.1889756432, 5.7426115612 45.1884896534, 5.7425259899 45.187664964, 5.742229581 45.1873207084, 5.7426253664 45.186550021, 5.7426946766 45.1858511243, 5.7422678227 45.1855394129)'

In [13]:
endpoint = 'http://linkedgeodata.org/vsparql'
sparql = SPARQLWrapper2(endpoint) 
sparql.setReturnFormat('json')

distance = 0.001 # 1 meter

query = """
PREFIX ogc: <http://www.opengis.net/ont/geosparql#>
PREFIX geom: <http://geovocab.org/geometry#>
PREFIX lgdo: <http://linkedgeodata.org/ontology/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT *
FROM <http://linkedgeodata.org> {
  ?subject
    a ?type ;
    geom:geometry [
      ogc:asWKT ?geometry
    ] .
    OPTIONAL { 
        ?subject rdfs:label ?label .
    }
    OPTIONAL { 
        ?subclass rdfs:subClassOf ?type .
        ?subject a ?subclass . 
    }
    OPTIONAL { 
        ?type rdfs:subClassOf ?superclass .
    }
    
    FILTER (bif:st_intersects (?geometry, 
    bif:st_geomFromText(" """ + linestring_simple + """ "), 
   """ + str(distance) + """))
}"""
#FILTER (?type IN (""" + concepts + """))

print(query)

sparql.setQuery(query)
results = sparql.query().convert()
dict_array = []
for dic in results.bindings:
    new_dict = {k: v.value for k, v in dic.items()}
    dict_array.append(new_dict)
    
df = pnd.DataFrame(dict_array)
df


PREFIX ogc: <http://www.opengis.net/ont/geosparql#>
PREFIX geom: <http://geovocab.org/geometry#>
PREFIX lgdo: <http://linkedgeodata.org/ontology/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT *
FROM <http://linkedgeodata.org> {
  ?subject
    a ?type ;
    geom:geometry [
      ogc:asWKT ?geometry
    ] .
    OPTIONAL { 
        ?subject rdfs:label ?label .
    }
    OPTIONAL { 
        ?subclass rdfs:subClassOf ?type .
        ?subject a ?subclass . 
    }
    OPTIONAL { 
        ?type rdfs:subClassOf ?superclass .
    }
    
    FILTER (bif:st_intersects (?geometry, 
    bif:st_geomFromText(" LINESTRING(5.73425791 45.18210998, 5.735235225 45.1823674714, 5.7407547183 45.1846534569, 5.7429304089 45.1860455704, 5.742455759 45.1867867431, 5.7424284553 45.187952129, 5.7429561659 45.1890001696, 5.74367138 45.1889849224, 5.7451961052 45.1884058183, 5.7478446699 45.1883522866, 5.7494157359 45.1884815264, 5.7505612736 45.1889204948, 5.7495635222 45.1885502523, 5.7470312762 45

EndPointInternalError: EndPointInternalError: endpoint returned code 500 and response.

## Overpass API

In [11]:
all_keys = keys + expanded_keys
all_keys

['leisure',
 'amenity',
 'boundary',
 'route',
 'fishing',
 'building',
 'barrier',
 'highway',
 'natural',
 'man_made',
 'landuse',
 'playground',
 'shelter',
 'sport',
 'tourism',
 'shop',
 'harbour',
 'waterway']

In [11]:
osm_features = osm.get_spatial_features(gpx, all_keys)


        [out:json];
        (way[~"."~"."]
        (poly:"45.18210998 5.73425791 45.1824220976 5.7353382904 45.1847253391 5.7408826096 45.186140418 5.7429230865 45.1868992116 5.7424419076 45.1880505119 5.7424473461 45.1889469214 5.7428454475 45.1890251298 5.7430898764 45.1889355192 5.7437873918 45.1884058183 5.7451961052 45.1883963889 5.7454810635 45.188360427 5.7481449371 45.1884815264 5.7494157359 45.1888904565 5.7504363 45.1885329393 5.7492650439 45.1884723829 5.7468814271 45.1887051998 5.7453555906 45.1891110944 5.743829997 45.1889185302 5.7429074605 45.1883845097 5.7425774415 45.1875742611 5.742458776 45.1872283783 5.7422837729 45.1864469882 5.7425883036 45.185734623 5.7426182096 45.1855394129 5.7422678227");
        rel(bw);

        relation[~"."~"."]
        (poly:"45.18210998 5.73425791 45.1824220976 5.7353382904 45.1847253391 5.7408826096 45.186140418 5.7429230865 45.1868992116 5.7424419076 45.1880505119 5.7424473461 45.1889469214 5.7428454475 45.1890251298 5.7430898764 45.1

In [12]:
print(len(osm_features))
osm_features

NameError: name 'osm_features' is not defined

In [13]:
if len(keys) == 0:
    osn_terms = []
    #invalid_chars = [' ', '<', '>', '|']
    
    for tags in osm_features['tags']:
            for item in tags.items():
                if item[0] not in excluded_keys and re.match('[a-z]*', item[0]) and re.match('[a-z]*', item[1]):
                    uri = "http://spatial.ucd.ie/lod/osn/term/k:{0}/v:{1}".format(item[0], item[1]).replace(' ', '_')
                    #uri = ''.join([i if i not in invalid_chars else '_' for i in uri])
                    osn_terms.append("<{0}>".format(uri))

    query = """
PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>

SELECT ?osnConcept ?lgdoConcept
{
 ?osnConcept skos:exactMatch ?lgdoConcept .
 
 FILTER (?osnConcept IN (""" + ", ".join(osn_terms) + """))
 FILTER (STRSTARTS(STR(?lgdoConcept), "http://linkedgeodata.org/ontology"^^xsd:string))
}
"""
    print(query)
    
    response = osn_graph.query(query)
    
    valid_tags = []
    lgd_concepts = []
    for row in response:
        osn_concept = row[0]
        osn_concept = 'k:{0}'.format(osn_concept.split('k:')[-1])
        valid_tags.append(osn_concept)
        lgd_concepts.append(str(row[1]).split('/')[-1])
    
    print(len(valid_tags), 'valid tags')
    print(valid_tags)
    print(lgd_concepts)
    
    #Exclude features that don't have the valid tags
    to_drop = []
    for tags, idx in zip(osm_features['tags'], osm_features.index):
        #print(idx, tags.items())
        feature_tags = ['k:{0}/v:{1}'.format(item[0], item[1]) for item in tags.items()]
        if not any([tag in valid_tags for tag in feature_tags]):
            to_drop.append(idx)
    
    print(len(osm_features))
    print(to_drop)
    osm_features = osm_features.drop(to_drop)
    print(len(osm_features))


PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>

SELECT ?osnConcept ?lgdoConcept
{
 ?osnConcept skos:exactMatch ?lgdoConcept .
 
 FILTER (?osnConcept IN (<http://spatial.ucd.ie/lod/osn/term/k:name:lt/v:Grenoblis>, <http://spatial.ucd.ie/lod/osn/term/k:name:lb/v:Grenobel>, <http://spatial.ucd.ie/lod/osn/term/k:addr:postcode/v:38000;38100>, <http://spatial.ucd.ie/lod/osn/term/k:name:oc/v:Grenoble>, <http://spatial.ucd.ie/lod/osn/term/k:ref:INSEE/v:38185>, <http://spatial.ucd.ie/lod/osn/term/k:type/v:boundary>, <http://spatial.ucd.ie/lod/osn/term/k:admin_level/v:8>, <http://spatial.ucd.ie/lod/osn/term/k:name:sr/v:Гренобл>, <http://spatial.ucd.ie/lod/osn/term/k:source:population/v:INSEE_2013>, <http://spatial.ucd.ie/lod/osn/term/k:population/v:155637>, <http://spatial.ucd.ie/lod/osn/term/k:name:ru/v:Гренобль>, <http://spatial.ucd.ie

## Map

In [14]:
linestring = "LINESTRING("

for i, point_data in enumerate(points_data):
    point = point_data.point
    linestring += str(point.longitude) + " " + str(point.latitude)
    if i < len(points_data)-1:
        linestring += ", "

linestring += ")"

trajectory = wkt.loads(linestring)

features = []
features_json = []
marker_data = []

to_drop = []
for item in osm_features.itertuples():
    try:
        feature0 = wkt.loads(item.geometry)
        json_feature0 = geojson.Feature(geometry=feature0, properties={"id": str(item.Index)})

        feature1 = feature0.buffer(buffer_size)
        json_feature1 = geojson.Feature(geometry=feature1)

        feature2 = polygonize_full(feature0)[0]
        json_feature2 = geojson.Feature(geometry=feature2)

        feature3 = cascaded_union([feature1, feature2])
        json_feature3 = geojson.Feature(geometry=feature3)

        #0: original geometry (linestring)
        #1: buffered 0 (polygon)
        #2: polygonized
        #3: union of 1 and 2
        # This is for visualization purposes.
        # A more efficient solution would be to buffer the polygonized line (feature2)
        features_json.append([json_feature0, json_feature1, json_feature2, json_feature3])
        features.append(feature3)

        rep_point = feature3.representative_point()
        popup_text = "{0}\n{1}".format(str(item.Index), repr(item.tags))
        if item.relation and len(item.relation) > 0:
            popup_text += " | part of: " + str(item.relation)
        marker_data.append(([rep_point.y, rep_point.x], popup_text))
    except Exception as e:
        #features_json.append([None, None, None, None])
        #features.append(None)
        #marker_data.append([None, None])
        print(e)
        print(item.Index)
        print(item.label)
        print(item.geometry)
        to_drop.append(item.Index)


print(len(osm_features))
osm_features = osm_features.drop(to_drop)
print(len(osm_features))

0
0


In [15]:
traj_map = folium.Map(tiles=tileset, attr=attribution, max_zoom=25)

for i, feat in enumerate(features_json):
    traj_map.choropleth(geo_str=feat[0], line_color='black', line_weight=3)
    #traj_map.choropleth(geo_str=feat[1], fill_color='gray', line_weight=1, fill_opacity=0.2)
    #traj_map.choropleth(geo_str=feat[2], fill_color='green', line_weight=1, fill_opacity=0.2)
    traj_map.choropleth(geo_str=feat[3], fill_color='red', line_weight=1, fill_opacity=.1)
    folium.Marker(location=marker_data[i][0], popup=marker_data[i][1]).add_to(traj_map)

start_mark.add_to(traj_map)
end_mark.add_to(traj_map)

line.add_to(traj_map)

traj_map.fit_bounds(line.get_bounds())

traj_map

In [16]:
feature_intervals = osm.compute_intersections(gpx, osm_features) 
print("{0} intersections retrieved.".format(len(feature_intervals)))
#feature_intervals

0 intersections retrieved.


## Group by id

In [17]:
groups = feature_intervals.groupby(level=0)
print(len(groups), 'unique features.')

0 unique features.


## Select features

In [18]:
selected = []
for group in groups:
    if group[1]['length_ratio'].sum() > minimum_length_ratio or group[1]['duration_ratio'].sum() > minimum_duration_ratio:
        #tag_present = False
        #for key in osm_features.loc[group[0]]['tags'].keys():
        #    if key in keys:
        #        tag_present = True
        #        continue

        #if tag_present:
            print('')
            print(group[0])
            print('relations:', osm_features.loc[group[0]]['relation'])
            print(osm_features.loc[group[0]]['label'], osm_features.loc[group[0]]['tags'])
            print("length ratio: {:.2f}%".format(group[1]['length_ratio'].sum()*100))
            print("duration ratio: {:.2f}%".format(group[1]['duration_ratio'].sum()*100))
            selected.append(group[0])

## Map of selected features

In [19]:
selected_map = folium.Map(tiles=tileset, attr=attribution, max_zoom=25)

for osm_index in selected:
    index = osm_features.index.get_loc(osm_index)
    selected_map.choropleth(geo_str=features_json[index][3], line_weight=4, 
                            line_opacity=.6, line_color='black', fill_color='red', fill_opacity=.1)
    #folium.Marker(location=marker_data[index][0], popup=marker_data[index][1]).add_to(selected_map)
    print(marker_data[index][1])
    
start_mark.add_to(selected_map)
end_mark.add_to(selected_map)
line.add_to(selected_map)

selected_map.fit_bounds(line._get_self_bounds())

selected_map

## Create STEP instances

In [20]:
#for key, value in osm_features.loc[group[0]]['tags'].iteritems():
#    if key in keys:
#        print '{0}k:{1}/v:{2}'.format(osn_namespace, key, value)
# building=yes
#  (leisure|natural|landuse)*=(water|riverbank|residential|park)*
# merge
# http://spatial.ucd.ie/lod/osn/term/
#'''
#PREFIX skos: <http://www.w3.org/2004/02/skos/core#>

#SELECT DISTINCT * 
#{ 
# GRAPH <http://OSN> {
#  <http://spatial.ucd.ie/lod/osn/term/k:leisure/v:park> skos:related ?related .
# }
#}

In [21]:
OSNT = Namespace('http://spatial.ucd.ie/lod/osn/term/')
LGDR = Namespace('http://linkedgeodata.org/triplify/')
EXAMPLE = Namespace('http://example.com/resource/')

raw = utility.create_raw_trajectory(gpx)

In [22]:
foi_places = step.FeatureOfInterest("Places")

for group in groups:
    if group[0] in selected:
        intervals = group[1]
        feature_id = group[0]
        
        semantics_str = 'near'
        if osm_features.loc[feature_id].label:
            semantics_str += ' ' +osm_features.loc[feature_id].label
        place_semantics = step.QualitativeDescription(semantics_str)
        
        if osm_features.loc[feature_id].relation is None:
            element_type = 'relation'
        else:
            element_type = 'way'

        osm_label = str(osm_features.loc[feature_id]['label'])
        
        related_tags = []
        for key, value in osm_features.loc[feature_id]['tags'].items():
            if key in keys:
                related_tags.append(URIRef("{0}k:{1}/v:{2}".format(OSNT, key, value)))
                
        lgdo_uri = URIRef("{0}{1}{2}".format(LGDR, element_type, feature_id))
        
        contextual_element = step.ContextualElement(osm_label, lgdo_uri, related_tags)
        
        for item in intervals.itertuples():
            start_fix = raw.get_fix(points_data[item.start_index].point.time)
            end_fix = raw.get_fix(points_data[item.end_index].point.time)
            st_extent = step.SpatiotemporalExtent(start_fix, end_fix)

            episode = step.Episode(st_extent, place_semantics)
            episode.relate(contextual_element)

            foi_places.add_episode(episode)

#print(foi_places)

In [23]:
triples = foi_places.triplify()
#print(triples)

In [24]:
g = rdflib.Graph()
g.bind('step', namespaces.STEP)
g.bind('skos', namespaces.SKOS)
g.bind('geo', namespaces.GEO)
g.bind('time', namespaces.TIME)
g.bind('owl', namespaces.OWL)

for triple in triples:
    g.add(triple)
    
g.serialize("geographic_features_of_interest.rdf")