In [6]:
#Yi's Parser Code
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt
import urllib.request as req

def gpx_parser(file):
    root = ET.parse(file).getroot()
    
    # root[0].tag is 'metadata', root[0][0].tag is 'name'
    trk_name = root[0][0].text
    
    # root[-1].tag is 'trk', root[-1][0].tag is 'name'
    assert trk_name == root[-1][0].text
    
    #here we need to account for names needing to be one coherent unit under TTL rules
    trk_name = trk_name.replace(" ", "_")
        
    trk_wpts = {}
    for x in root[1:-1]:
        # x.tag is 'wpt'
        wpt_loc = (x.attrib['lat'], x.attrib['lon']) # :hasloc
        # x[0].tag is 'ele'
        wpt_ele = x[0].text # :hasele       
        # x[1].tag is 'time'
        #wpt_time = x[1].text # :hastime
        wpt_time = x[1].text.replace(":", "_") #time fix
        wpt_time = wpt_time.replace("+", "p")
        # x[2].tag is 'name'
        #wpt_name = x[2].text # :hasname
        wpt_name = x[2].text.replace(" ", "_")
        
        wpt = 'wpt' + wpt_ele
        trk_wpts[wpt] = {'loc': wpt_loc, 'ele': wpt_ele, 'time': wpt_time, 'name': wpt_name}
        
        
    trk_trkpts = {}
    # root[-1].tag is 'trk', root[-1][1].tag is 'trkseg'
    for x in root[-1][1][:]:
        # x.tag is 'trkpt'
        trkpt_loc = (x.attrib['lat'], x.attrib['lon']) # :hasloc
        # x[0].tag is 'ele'
        trkpt_ele = x[0].text # :hasele       
        # x[1].tag is 'time'
        #trkpt_time = x[1].text # :hastime
        trkpt_time = x[1].text.replace(":", "_") #time fix
        trkpt_time = trkpt_time.replace("+","p")
        
        trkpt = 'trkpt' + trkpt_ele
        trk_trkpts[trkpt] = {'loc': trkpt_loc, 'ele': trkpt_ele, 'time': trkpt_time}
        
    return trk_name, trk_wpts, trk_trkpts

def link2osm(gpx_file):    
    trk_name, trk_wpts, trk_trkpts = gpx_parser(gpx_file)
    
    wpt_lats = [float(trk_wpts[wpt]['loc'][0]) for wpt in trk_wpts]
    wpt_minlat, wpt_maxlat = min(wpt_lats), max(wpt_lats)
    wpt_lons = [float(trk_wpts[wpt]['loc'][1]) for wpt in trk_wpts]
    wpt_minlon, wpt_maxlon = min(wpt_lons), max(wpt_lons)
    
    trkpt_lats = [float(trk_trkpts[trkpt]['loc'][0]) for trkpt in trk_trkpts]
    trkpt_minlat, trkpt_maxlat = min(trkpt_lats), max(trkpt_lats)
    trkpt_lons = [float(trk_trkpts[trkpt]['loc'][1]) for trkpt in trk_trkpts]
    trkpt_minlon, trkpt_maxlon = min(trkpt_lons), max(trkpt_lons)
    
    #print('wpt min: ', wpt_minlat, wpt_minlon)
    #print('wpt max: ', wpt_maxlat, wpt_maxlon)
    
    #print('trkpt min: ', trkpt_minlat, trkpt_minlon)
    #print('trkpt max: ', trkpt_maxlat, trkpt_maxlon)
    
    # the naive surroundings
    minlat = trkpt_minlat - 0.005
    minlon = trkpt_minlon - 0.005
    maxlat = trkpt_maxlat + 0.005
    maxlon = trkpt_maxlon + 0.005
    
    osm_bounds = (minlat, minlon, maxlat, maxlon)
    print('osm bounds: ', osm_bounds)
    
    bound_lats = [minlat, maxlat, maxlat, minlat, minlat]
    bound_lons = [minlon, minlon, maxlon, maxlon, minlon]
    
    data = ((wpt_lats, wpt_lons), (trkpt_lats, trkpt_lons), (bound_lats, bound_lons))
    colors = ('red', 'blue', 'black')
    groups = ('wpt', 'trkpt', 'bounds')
    
    
    for data, color, group in zip(data, colors, groups):
        x, y = data
    
        plt.plot(x, y, '-ok', c  = color, label = group)
        plt.title('gpx track: %s'%(trk_name))
        plt.xlabel('lat')
        plt.ylabel('lon')
        plt.grid(True)
        plt.legend()
   
    #plt.show()
    
    bbox = '%f,%f,%f,%f'%(minlon, minlat, maxlon, maxlat)
    osm_file = '%s.osm'%(trk_name)
    
    req.urlretrieve('https://api.openstreetmap.org/api/0.6/map?bbox=%s'%(bbox), osm_file)
  
    print('%s downloaded'%(osm_file))
    
    return osm_file

def osm_parser(file):
    root = ET.parse(file).getroot()
    
    # root[0].tag is 'bounds'
    assert root[0].tag == 'bounds'
    osm_bounds = (root[0].attrib['minlat'], root[0].attrib['minlon'], root[0].attrib['maxlat'], root[0].attrib['maxlon'])
    
    osm_nds = {}
    for x in root.findall('node'):
        # x.tag is 'node'
        nd_id = x.attrib['id'] # hasid
        nd = 'nd' + nd_id
        
        nd_ts = x.attrib['timestamp'].replace(":", "_") # hasts
        nd_loc = (x.attrib['lat'], x.attrib['lon']) # hasloc
        # more objects can be added...
        nd_name = 'none' # hasname
        nd_isinterested = 'no' # isinterested
        
        nd_ways = [] # ontheway
        nd_rlts = [] # intherlt
        
        for tag in x.findall('tag'):
            tag_k, tag_v = tag.attrib['k'], tag.attrib['v']
            
            if tag_k == 'name':
                nd_name = tag_v
                
            # definition of 'isinterested' is to be discussed...
            #isinterestedtags = ['amenity', 'tourism', 'leisure']
            #if tag_k in isinterestedtags:
            if (tag_k == 'amenity' or tag_k == 'tourism' or tag_k == 'leisure'):
                nd_isinterested = 'yes'
                
        osm_nds[nd] = {'id': nd_id, 'ts': nd_ts, 'loc': nd_loc, 'name': nd_name, 'isinterested': nd_isinterested, 'ways': nd_ways, 'rlts': nd_rlts}
    
    osm_ways = {} 
    for x in root.findall('way'):
        # x.tag is 'way'
        way_id = x.attrib['id'] # hasid
        way = 'way' + way_id
        
        way_nds = [] # hasnd
        # more objects can be added...
        way_name = 'none' # hasname
        way_isinterested = 'no' # isinterested
        
        way_rlts = [] # intherlt
        
        for nd in x.findall('nd'):
            nd_ref = nd.attrib['ref']
            nd = 'nd' + nd_ref
            way_nds.append(nd)
            
            osm_nds[nd]['ways'].append(way)
        
        for tag in x.findall('tag'):
            tag_k, tag_v = tag.attrib['k'], tag.attrib['v']
            
            if tag_k == 'name':
                nd_name = tag_v
            
            # definition of 'isinterested' is to be discussed...
            if (tag_k == 'amenity' or tag_k == 'tourism' or tag_k == 'leisure'):
                way_isinterested = 'yes'

        osm_ways[way] = {'id': way_id, 'nds': way_nds, 'name': way_name, 'isinterested': way_isinterested, 'rlts': way_rlts}       
    
    # if the node is on the way that is interested,
    # then the node is interested
    for nd in osm_nds:
        for way in osm_nds[nd]['ways']:
            if osm_ways[way]['isinterested'] == 'yes':
                osm_nds[nd]['isinterested'] = 'yes'
    
    osm_rlts = {}
    for x in root.findall('relation'):
        # x.tag is 'relation'
        rlt_id = x.attrib['id'] # hasid
        rlt = 'rlt' + rlt_id
        rlt_ways = [] # hasway
        rlt_nds = [] # hasnd
        rlt_rlts = [] # intherlt
        # more objects can be added...
        rlt_name = ''
        
        for member in x.findall('member'):
            mtype = member.attrib['type']
            
            if mtype == 'way':
                way_ref = member.attrib['ref']
                way = 'way' + way_ref
                rlt_ways.append(way)
                # the key, way, may be beyond the area
                if way in osm_ways:
                    osm_ways[way]['rlts'].append(rlt)
                
            if mtype == 'node':
                nd_ref = member.attrib['ref']
                nd = 'nd' + nd_ref
                rlt_nds.append(nd)
                # the key, nd, may be beyond the area
                if nd in osm_nds:
                    osm_nds[nd]['rlts'].append(rlt)
                
            if mtype == 'relation':
                rlt_ref = member.attrib['ref']
                rlt = 'rlt' + rlt_ref
                rlt_rlts.append(rlt)
                
        for tag in x.findall('tag'):
            tag_k, tag_v = tag.attrib['k'], tag.attrib['v']
            
            if tag_k == 'name':
                rlt_name = tag_v
                
        osm_rlts[rlt] = {'id': rlt_id, 'ways': rlt_ways, 'nds': rlt_nds, 'rlts': rlt_rlts, 'name': rlt_name}
        
    return osm_bounds, osm_nds, osm_ways, osm_rlts

def analyzer(gpx_file, nway = 9):
    
    osm_file = link2osm(gpx_file)    
    osm_bounds, osm_nds, osm_ways = osm_parser(osm_file)[:-1]

    print('osm bounds:', osm_bounds)
     
    nd_lats = [float(osm_nds[nd]['loc'][0]) for nd in osm_nds]
    nd_lons = [float(osm_nds[nd]['loc'][1]) for nd in osm_nds]
    
    data = [(nd_lats, nd_lons)]
    colors = ['green']
    groups = ['nd']
    
    print('there are %d ways.'%len(osm_ways))
    
    way_lats, way_lons = {}, {}
    for way in osm_ways:
        way_nds = osm_ways[way]['nds']
        way_lats[way] = [float(osm_nds[nd]['loc'][0]) for nd in way_nds]
        way_lons[way] = [float(osm_nds[nd]['loc'][1]) for nd in way_nds]
        data.append((way_lats[way], way_lons[way]))
        colors.append('red')
        groups.append(way)
    
    way_counter = 0
    for data, color, group in zip(data, colors, groups):
        if way_counter > nway:
            break
        x, y = data
    
        plt.scatter(x, y, c  = color, label = group)
        plt.title(osm_file)
        plt.xlabel('lat')
        plt.ylabel('lon')
        plt.grid(True)
        if group == 'nd':
            plt.legend()
        #plt.show()
        
        way_counter += 1
   
    plt.show()   

def around(loc, osm_nds, radius = 0.005):
    
    lat, lon = float(loc[0]), float(loc[1])
    # naive surrounding, adjusted by radius
    minlat, maxlat = (lat - radius), (lat + radius)
    minlon, maxlon = (lon - radius), (lon + radius)
    
    surround = []
    for nd in osm_nds:
        nd_lat, nd_lon = float(osm_nds[nd]['loc'][0]), float(osm_nds[nd]['loc'][1])
        if (nd_lat >= minlat and nd_lat <= maxlat) and (nd_lon >= minlon and nd_lon <= maxlon):
            # we only care about the surrounded nodes that is interested
            if osm_nds[nd]['isinterested'] == 'yes':
                surround.append(nd)
    
    return surround

def gen_rdf(gpx_file):
    
    trk_name, trk_wpts, trk_trkpts = gpx_parser(gpx_file)  
    osm_file = link2osm(gpx_file)
    osm_bounds, osm_nds, osm_ways, osm_rlts = osm_parser(osm_file)
    
    trk_surround = [] # hassurround
    trkpt_surround = [] # hassurround
    for trkpt in trk_trkpts:
        trkpt_loc = trk_trkpts[trkpt]['loc']
     
        trkpt_surround = around(trkpt_loc, osm_nds)
        trk_trkpts[trkpt]['surround'] = trkpt_surround
        
        trk_surround += trkpt_surround
        
    trk_surround = list(set(trk_surround))
    
    trk_surround_ways = []
    for nd in trk_surround:
        for way in osm_nds[nd]['ways']:
            trk_surround_ways.append(way)
            
    trk_surround_ways = list(set(trk_surround_ways))
    
    with open('%s.ttl'%(trk_name), 'w') as f:
        f.write('@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .\n')
        f.write('@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .\n')
        f.write('@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n')
        f.write('\n')
        f.write('@prefix : <http://www.topografix.com/GPX/1/1>. \n')
        f.write('\n')
        
        f.write(':%s a :GPXtrack .\n'%(trk_name))
        f.write('\n')
        
        ltrk_wpts = ':%s :haswpt '%(trk_name)
        for wpt in trk_wpts:
            ltrk_wpts += ':%s, '%(wpt)
        ltrk_wpts = ltrk_wpts[:-2] + ' .\n'
        f.write(ltrk_wpts)
        f.write('\n')
        
        ltrk_trkpts = ':%s :hastrkpt '%(trk_name)
        for trkpt in trk_trkpts:
            ltrk_trkpts += ':%s, '%(trkpt)
        ltrk_trkpts = ltrk_trkpts[:-2] + ' .\n'
        f.write(ltrk_trkpts)
        f.write('\n')
        
        ltrk_surround = ':%s :hassurround '%(trk_name)
        for nd in trk_surround:
            ltrk_surround += ':%s, '%(nd)
        if len(trk_surround) == 0:
            ltrk_surround = ltrk_surround[:] + ":none "+' .\n'
        else:
            ltrk_surround = ltrk_surround[:-2] + ' .\n'
        f.write(ltrk_surround)
        f.write('\n')
        
        for wpt in trk_wpts:
            f.write(':%s :hasloc (:%s :%s) ;\n'%(wpt, trk_wpts[wpt]['loc'][0], trk_wpts[wpt]['loc'][1])) #fixed format (:lat :lon) instead of :(lat, lon)
            f.write(' ' * (len(wpt) + 2) + ':hasele :%s ;\n'%(trk_wpts[wpt]['ele']))
            f.write(' ' * (len(wpt) + 2) + ':hastime :%s ;\n'%(trk_wpts[wpt]['time']))
            f.write(' ' * (len(wpt) + 2) + ':hasname :%s .\n'%(trk_wpts[wpt]['name'].replace(" ", "_")))
            f.write('\n')
        
        for trkpt in trk_trkpts:
            f.write(':%s :hasloc (:%s :%s) ;\n'%(trkpt, trk_trkpts[trkpt]['loc'][0], trk_trkpts[trkpt]['loc'][1])) #same format fix
            f.write(' ' * (len(trkpt) + 2) + ':hasele :%s ;\n'%(trk_trkpts[trkpt]['ele']))
            f.write(' ' * (len(trkpt) + 2) + ':hastime :%s ;\n'%(trk_trkpts[trkpt]['time']))
            ltrk_trkpts_surround = ' ' * (len(trkpt) + 2) + ':hassurround '
            for nd in trk_trkpts[trkpt]['surround']:
                ltrk_trkpts_surround += ':%s, '%(nd)
            if len(trk_trkpts[trkpt]['surround']) == 0:
                ltrk_trkpts_surround = ltrk_trkpts_surround[:] + ':none ' +' .\n'
            else:
                ltrk_trkpts_surround = ltrk_trkpts_surround[:-2] + ' .\n'
            f.write(ltrk_trkpts_surround)
            f.write('\n')
                 
        for nd in trk_surround:
            name_fix_ = osm_nds[nd]['name'].replace(" ","_")
            name_fix_ = name_fix_.replace("'","")
            f.write(':%s :hasloc (:%s :%s) ;\n'%(nd, osm_nds[nd]['loc'][0], osm_nds[nd]['loc'][1])) #same format fix
            f.write(' ' * (len(nd) + 2) + ':hasid :%s ;\n'%(osm_nds[nd]['id']))
            f.write(' ' * (len(nd) + 2) + ':hasts :%s ;\n'%(osm_nds[nd]['ts'].replace(":","_")))
            f.write(' ' * (len(nd) + 2) + ':hasname :%s ;\n'%(name_fix_))
            f.write(' ' * (len(nd) + 2) + ':isinterested :%s ;\n'%(osm_nds[nd]['isinterested']))
            ltrk_surround_way = ' ' * (len(nd) + 2) + ':ontheway '
            for way in osm_nds[nd]['ways']:
                ltrk_surround_way += ':%s, '%(way)
            if len(osm_nds[nd]['ways']) == 0:
                ltrk_surround_way = ltrk_surround_way[:]+ ":none " + ' ;\n'
            else:
                ltrk_surround_way = ltrk_surround_way[:-2] + ' ;\n'
            f.write(ltrk_surround_way)
            ltrk_surround_rlt = ' ' * (len(nd) + 2) + ':intherlt '
            for rlt in osm_nds[nd]['rlts']:
                ltrk_surround_rlt += ':%s, '%(rlt)
            if len(osm_nds[nd]['rlts']) == 0:
                ltrk_surround_rlt = ltrk_surround_rlt[:] + ":none " +' .\n'
            else:
                ltrk_surround_rlt = ltrk_surround_rlt[:-2] + ' .\n'
            f.write(ltrk_surround_rlt)
            f.write('\n')
            
        for way in trk_surround_ways:
            fix_name = osm_ways[way]['name'].replace(" ", "_")
            fix_name = fix_name.replace("'","")
            f.write(':%s :hasid :%s ;\n'%(way, osm_ways[way]['id']))
            f.write(' ' * (len(way) + 2) + ':hasname :%s ;\n'%(fix_name))
            f.write(' ' * (len(way) + 2) + ':isinterested :%s ;\n'%(osm_ways[way]['isinterested']))
            ltrk_surround_way_nd = ' ' * (len(way) + 2) + ':hasnd '
            for nd in osm_ways[way]['nds']:
                ltrk_surround_way_nd += ':%s, '%(nd)
            ltrk_surround_way_nd = ltrk_surround_way_nd[:-2] + ' ;\n'
            f.write(ltrk_surround_way_nd)
            ltrk_surround_way_rlt = ' ' * (len(way) + 2) + ':intherlt '
            for rlt in osm_ways[way]['rlts']:
                ltrk_surround_way_rlt += ':%s, '%(rlt)
            if len(osm_ways[way]['rlts']) ==0:
                ltrk_surround_way_rlt = ltrk_surround_way_rlt[:] + ':none ' + ' .\n'
            else:
                ltrk_surround_way_rlt = ltrk_surround_way_rlt[:-2] + ' .\n'
            f.write(ltrk_surround_way_rlt)
            f.write('\n')

In [5]:
#run gen RDF on all GPX tracks files
def download_all_GPX ():
    for file in ['4sDDFdd4cjA.gpx', 'btSeByOExEc.gpx', 'kmrcRbHcMpg.gpx', 'PO21QxqG2co.gpx', 'pRAjjKqHwzQ.gpx', 'rx1-4gf5lts.gpx', 'tIRn_qJSB5s.gpx', 'UAQjXL9WRKY.gpx']:
        gen_rdf('./GPX_Tracks/{0}'.format(file)) #download files

In [7]:
#kense's DBpedia queries
from SPARQLWrapper import SPARQLWrapper
from bs4 import BeautifulSoup
import requests

def query_DBpedia (query):
    '''
    obtain the JSON of the query to the DBpedia database.
    '''

    db = SPARQLWrapper("http://dbpedia.org/sparql/") #query DBpedia via SPARQL
    db.setQuery(query)
    db.method = "POST"
    db.setReturnFormat('json')
    db.queryType = "SELECT"
    result = db.query().convert()

    return result

def inquire_place (name):
    
    #query DBpedia based on name
    query = "PREFIX dbo: <http://dbpedia.org/ontology/> \
    PREFIX geo:  <http://www.w3.org/2003/01/geo/wgs84_pos#> \
    SELECT DISTINCT ?place ?label ?lat ?lng \
    WHERE { ?place a dbo:Place . \
    ?place rdfs:label ?label  \
    FILTER (?label IN ( '%s'@en )) . \
    ?place geo:lat ?lat . \
    ?place geo:long ?lng .}" % name

    inquiry = query_DBpedia(query)
    
    #no results
    if len(inquiry['results']['bindings']) == 0:
        return (None, None, None, None)
    
    else:
        #query results
        dbpedia_url = inquiry['results']['bindings'][0]['place']['value']
        lat = inquiry['results']['bindings'][0]['lat']['value']
        lng = inquiry['results']['bindings'][0]['lng']['value']
        name = inquiry['results']['bindings'][0]['label']['value']
        comment = 'none' #defaults none 
        country = 'none'
        postalCode = 'none'
        max_ele, min_ele, ele = 'none', 'none', 'none'
        
        #download XML file
        xml_file = requests.get(dbpedia_url)
        open('./%s.xml'%name, 'wb').write(xml_file.content)
        
        #get comment, and some relevant info
        fp = open("./%s.xml"%name, "r")
        soup = BeautifulSoup(fp, 'xml')
        
        #check spans
        for item in soup.find_all("span"):
            if item.get('property') == 'dbo:abstract': #get comment
                if item.get('xml:lang') == "en":
                    comment = item.get_text()
            if item.get('property') == 'dbo:postalCode': #get postalCode
                postalCode = item.get_text()
            if item.get('property') == "dbo:maximumElevation": #max elevation
                max_ele = item.get_text()
            if item.get('property') == "dbo:minimumElevation": #min elevation
                min_ele = item.get_text()
            if not max_ele == 'none' and not min_ele == 'none': #elevation calc
                ele = str((float(max_ele)+float(min_ele))/2)
        
        #check a's
        for item in soup.find_all("a"):
            if item.get('rel') == "dbo:country": #get country
                country = item.get_text()
        
        #output
        print('DBpedia entry found!')
        print('Downloaded XML file...')
        return (name, lat, lng, dbpedia_url, ele, country, postalCode, comment)

In [None]:
#need to do for EACH TTL file:
#display a graph (using yi's code)
#display relevant information from dbpedia (using kense's queries)

#download_all_GPX() #downloads all TTL files

#load TTL files into GraphDB and establish a query point

#query GraphDB for EACH ttl file, take a look at sparqlwrapper for ideas

#query all possible dbpedia for EACH ttl file

#display information as static pages