In [80]:
import overpy
import shapefile
import urllib.request
import time
from geojson import Point, LineString, Polygon, MultiPolygon, MultiLineString
from geojsonio import display

In [69]:
inshp = "area/reg_buff.shp"
sf = shapefile.Reader(inshp)
bbox = sf.bbox

objects = [['natural', ['natural','name','amenity'], ['geological', 'landcover', 'waterway']],
           ['building', ['building', 'name', 'amenity'], ['shop', 'tourism']],
           ['boundary', ['boundary','name','admin_level','place','koatuu','population'], []],
            ['landuse', ['landuse','name','amenity'], ['aeroway', 'leisure', 'tourism']],
            ['highway', ['highway','name','amenity','int_ref','surface'], ['railway', 'trafic_sign','public_transport']],
            ['man_made', ['man_made','name','amenity',], ['power', 'office', 'shop', 'leisure', 'tourism', 'historic']],
            ['place', ['place','name','koatuu','population'], []],
            ['natural', ['natural','name','amenity'], ['geological', 'landcover']]]

In [73]:
def convert_int(x):
    try:
        return int(x)
    except ValueError:
        return x
    
    
def get_osm(tag):
    api = overpy.Overpass()
    while True:
        try:
            res = api.query("""
                way({},{},{},{}) ["{}"];
                (._;>;);
                out body;
                """.format(bbox[1], bbox[0], bbox[3], bbox[2], tag))
            return res
        except:
            time.sleep(4)                   #time delay for the next query 4 seconds in case of many requests
    

def define_prj(name):
    #add a projection file
    prj = urllib.request.urlopen("http://spatialreference.org/ref/epsg/4326/esriwkt/").read()
    with open(name + '.prj', "w") as f:
        f.write(prj.decode('utf-8'))
        

def format_coord(val, geom_type):
        if geom_type == Point:
            return val[0]
        elif geom_type == LineString:
            return val
        elif geom_type == Polygon:
            return [val]

In [98]:
def parse_osm(obj_name, schema, additional_tags):
    #main func
    #download osm data with overpass api
    result = get_osm(obj_name)
    additional_result = []
    for atag in additional_tags:
         additional_result.append(get_osm(atag))
               
    #parse osm data
    additional_coords = [[[[float(node.lon), float(node.lat)] for node in way.nodes] for way in res.ways] for res in additional_result]
    params = [list(map(convert_int, [way.tags.get(f, '') for f in schema])) for way in result.ways]
    coords = [[[float(node.lon), float(node.lat)] for node in way.nodes] for way in result.ways]  
    if additional_tags:
        for i, acrd in enumerate(additional_coords):
            type_val = additional_tags[i]
            for j, elem in enumerate(acrd):
                amenity_val = additional_result[i].ways[j].tags.get(additional_tags[i], '')
                try:                                     #if there is an object with the same geometry in the original tag, update the values
                    k = coords.index(elem)
                    params[k][0] = type_val
                    params[k][2] = amenity_val
                    acrd.remove(elem)
                except ValueError:                       #if no object with the same geometry found, add the object
                    coords.append(elem)
                    prm_new = list(map(convert_int, [additional_result[i].ways[j].tags.get(f, '') for f in schema]))
                    prm_new[0] = type_val
                    prm_new[2] = amenity_val
                    params.append(prm_new)
                    
    vals = list(zip(coords, params))


    #filter out different geometry types in the result request
    obj_dict = {Point: [[], []], Polygon: [[], []], LineString: [[], []]}
    for val in vals:
        crd_set = []
        for crd in val[0]:
            if crd not in crd_set:
                crd_set.append(crd)
        if len(val[0]) == 1:
            for i in [0,1]:
                obj_dict[Point][i].append(val[i])
        elif len(val[0]) != len(crd_set):
            for i in [0,1]:
                obj_dict[Polygon][i].append(val[i])
        else:
            for i in [0,1]:
                obj_dict[LineString][i].append(val[i])
                
    #export json   
    schema = list(map(lambda x: x[:10], schema)) #shorten field names to 10 characters
    schema[0] = 'type'     #rename the first field to 'type'
    for geom in obj_dict.keys():
        if obj_dict[geom][0]:    #if the geom type has values for this osm tag
            outjson = {"type": "FeatureCollection", "features": []}
            outname = obj_name + '_' + geom.__name__ + '.geojson'
            for i in range(len(obj_dict[geom][0])):
                feat = {'type': 'Feature',
                        'geometry': geom(format_coord(obj_dict[geom][0][i], geom)),    #{'type': k,'coordinates': format_coord(obj_dict[k][0][i], k)},
                        'properties': dict(zip(schema, obj_dict[geom][1][i]))
                }
                outjson['features'].append(feat)
            with open(outname, 'w') as outfile:
                geojson.dump(outjson, outfile)

In [None]:
for tag in objects:
    parse_osm(tag[0], tag[1], tag[2])