In [1]:
import overpy
import shapefile
import urllib.request
import time
import geojson

In [2]:
inshp = "area/reg_buff.shp"
sf = shapefile.Reader(inshp)
bbox = sf.bbox

objects = [['natural', ['natural','name','amenity'], ['geological', 'landcover', 'waterway']],
           ['building', ['building', 'name', 'amenity'], ['shop', 'tourism']],
           ['boundary', ['boundary','name','admin_level','place','koatuu','population'], []],
            ['landuse', ['landuse','name','amenity'], ['aeroway', 'leisure', 'tourism']],
            ['highway', ['highway','name','amenity','int_ref','surface'], ['railway', 'trafic_sign','public_transport']],
            ['man_made', ['man_made','name','amenity',], ['power', 'office', 'shop', 'leisure', 'tourism', 'historic']],
            ['place', ['place','name','koatuu','population'], []],
            ['natural', ['natural','name','amenity'], ['geological', 'landcover']]]

In [10]:
def convert_int_none(x):
    '''
    Attempts to convert variables to integers
    '''
    try:
        if x == None:
            return ''
        else:
            return int(x)
    except ValueError:
        return x

    
def get_osm(tag):
    '''
    requests data from OSM using Overpass API
    input: key word for OSM data type
    output: dictionary {'node' : overpy.Result, 'way': overpy.Result, 'rel': [....]} 
    '''
    api = overpy.Overpass()
    while True:
        try:
            query = lambda rel_type: api.query("""({}({},{},{},{}) [{}];);(._;>;);out;"""
                                               .format(rel_type, bbox[1], bbox[0], bbox[3], bbox[2], tag.replace("'", '"') ))
            result = dict()
            result['node'] = query('node')
            result['way'] = query('way')
            result['rel'] = []
            relations = query('rel')
            for ids in relations.get_relation_ids():
                result['rel'].append(api.query("""relation({});(._;>;);out;""".format(ids)))
            return result
        except:
            time.sleep(4)                   #time delay for the next query 4 seconds in case of many requests
        
        
def get_params(obj, schema, additional_tags):
    '''
    returns attribute values for each complete object (e.g. node, way or a component-way in a relation)
    '''
    val = [obj.tags.get(tag) for tag in schema]
    if not val[2]:                    #if 'amenity' value does not exist, check if the object is from additional tags
        for atag in additional_tags:
            val[2] = obj.tags.get(atag)   #amenity = value of the tag
            if val[2]:
                val[0] = atag       #type = name of the tag
                break
        if not val[2]:              #if (after the check) the object is still not from the additional tags
            val[2] = ''
            val[0] = ''
    params = list(map(convert_int_none, val))   #try to convert to int and empty string/NULL
    return params


def all_unique(inlist):
        seen = list()
        return not any(i in seen or seen.append(i) for i in inlist)
    
    
def format_coord(geom_key, val):
    '''
    Formats coordinates (latitude and longitude) to the needed format for json
    input: val - list of coordinates
            geom_type - string; a required geometry type
    output: a list
    '''
    if geom_key == 'node':
        return 'Point', val
    elif geom_key == 'way':
        if all_unique:   #is line
            return 'LineString', val
        else:
            return 'Polygon', [val]
    elif geom_key == 'rel':
        return [val]

### в relations могут быть nodes together with multipolygons. Probably theiy are accidental. Filter them out!!!

In [7]:
def parse_osm(obj_name, schema, additional_tags):
    '''
    main func
    download osm data with overpass api
    '''
    #get all results for this key/tag and expand them with results from additional tags
    res = get_osm(obj_name)
    for atag in additional_tags:
        ares = get_osm(atag)    #{'node' : overpy.Result, 'way': overpy.Result, 'rel': [....]} for each additional tag
        res['node'].expand(ares['node'])
        res['way'].expand(ares['way'])
        res['rel'].extend(ares['rel'])
        
            
    '''
    #parse osm data
    #1) coordinates for each geometry/relation type
    #сделать проверку на лайн и полигон тут
    objs = {'node': {}, 'way': {}, 'rel': {}}
    objs['node']['coords'] = [[float(node.lon), float(node.lat)] for node in res['node'].nodes]
    objs['way']['coords'] = [[[float(node.lon), float(node.lat)] for node in way.nodes] for way in res['way'].ways]
    objs['rel']['coords'] = [[[[float(node.lon), float(node.lat)] for node in way.nodes] for way in rres.ways] for rres in res['rel']]  #has len(rel['coords']) objects; each has len(rel['coords'][i]) ways
    objs['node']['params'] = [get_params(node, schema, additional_tags) for node in res['node'].nodes]
    objs['way']['params'] = [get_params(way, schema, additional_tags) for way in res['way'].ways]
    objs['rel']['params'] = [get_params(rres.relations[0], schema, additional_tags) for rres in res['rel']]  #here rres represents only one relation
    
    
    #export json
    schema = list(map(lambda x: x[:10], schema)) #shorten field names to 10 characters
    schema[0] = 'type'     #rename the first field to 'type'
    for key in objs.keys():
        if objs[key]['coords']:    #if the geom type has values for this osm tag
            outjson = {"type": "FeatureCollection", "features": []}
            outname = obj_name + '_' + key + '.geojson'
            for i in range(len(objs[key]['coords'])):
                coord, geomtype = format_coord(key, objs[key]['coords'][i])
                feat = {'type': 'Feature',
                        'geometry': {
                            "type": geomtype,
                            "coordinates": coord
                            },
                        'properties': dict(zip(schema, geom['params'][i]))
                }
                outjson['features'].append(feat)
            with open(outname, 'w') as outfile:
                geojson.dump(outjson, outfile)
    def all_unique(inlist):
        seen = list()
        return not any(i in seen or seen.append(i) for i in inlist)
    '''
    objs = {'Point': {}, 'LineString': {'coords': [], 'params': []}, 'Polygon': {'coords': [], 'params': []}, 'MultiPolygon': {}}
    objs['Point']['coords'] = [[float(node.lon), float(node.lat)] for node in res['node'].nodes]
    objs['Point']['params'] = [get_params(node, schema, additional_tags) for node in res['node'].nodes]
    objs['MultiPolygon']['coords'] = [[[[[float(node.lon), float(node.lat)] for node in way.nodes]] for way in rres.ways] for rres in res['rel']]  #has len(rel['coords']) objects; each has len(rel['coords'][i]) ways
    objs['MultiPolygon']['params'] = [get_params(rres.relations[0], schema, additional_tags) for rres in res['rel']]  #here rres represents only one relation/MultiPolygon object
    for way in res['way'].ways:
        coord = [[float(node.lon), float(node.lat)] for node in way.nodes]
        if all_unique(coord):
            objs['LineString']['coords'].append(coord)   #if all points in the 'way' object are unique
            objs['LineString']['params'].append(get_params(way, schema, additional_tags))
        else:                                                  #if there is a shared point
            objs['Polygon']['coords'].append([coord])
            objs['Polygon']['params'].append(get_params(way, schema, additional_tags))
            
    #export json
    schema = list(map(lambda x: x[:10], schema)) #shorten field names to 10 characters
    schema[0] = 'type'     #rename the first field to 'type'
    for key in objs.keys():
        if objs[key]['coords']:    #if this geom type has any objects in this tag, create a geojson file
            outjson = {"type": "FeatureCollection", "features": []}
            outname = obj_name + '_' + key + '.geojson'
            for i in range(len(objs[key]['coords'])):
                feat = {'type': 'Feature',
                        'geometry': {
                            "type": key,
                            "coordinates": objs[key]['coords'][i]
                            },
                        'properties': dict(zip(schema, objs[key]['params'][i]))
                }
                outjson['features'].append(feat)
            with open(outname, 'w') as outfile:
                geojson.dump(outjson, outfile)

In [11]:
example = ['boundary', ['boundary','name','admin_level','place','koatuu','population'], []]
parse_osm(example[0], example[1], example[2])

In [None]:
for tag in objects:
    parse_osm(tag[0], tag[1], tag[2])