In [None]:
# The idea of this script is calculate size distrbutions of osm Features 
# of intresset is to calculate a distrbution for each key value pair that is 
# listed on the wikipages refrences in osm_groups.txt
# for this we will randomly querry the whole OSM dataset to collect samples

In [None]:
# https://gis.stackexchange.com/questions/127427/transforming-shapely-polygon-and-multipolygon-objects

In [39]:
import parse_osm
from shapely.geometry import Polygon, LineString, Point, MultiPolygon, MultiLineString, MultiPoint
import random
import pandas as pd
import pyproj


In [122]:
def random_bb(size,constrain=None):
    # size has to be in degrees since to BB for osm is in degrees
    # constrain has to be a shapely (multi-)polygon and can be used to limit the area
    
    # if a constrain exist we shrink the possibility space donw to the bounds of it 
    if constrain:
        minx, miny, maxx, maxy = constrain.bounds
    else:
        minx, miny, maxx, maxy = -180,-90,180,90
    
    # we generate first the lower left corner of the bounding box
    if constrain:
        # when an constrain exist we make shure the point is within it
        ll_x,ll_y =-9999999,-9999999
        while not constrain.contains(Point(ll_x, ll_y)):
            ll_x,ll_y = random.uniform(minx, maxx),random.uniform(miny, maxy)
    else:
        ll_x,ll_y = random.uniform(minx, maxx),random.uniform(miny, maxy)
        
    
     
    return Polygon([(ll_x,ll_y),(ll_x+size,ll_y),(ll_x+size,ll_y+size),(ll_x,ll_y+size)])
    
    
def generate_tag_list(df):
    tag_list = []
    for row in df.iterrows():
        tag_list.append(f"{row[1]['key']} {row[1]['value']}")
    
    return tag_list


def filter_tags(row):
    # this function looks if key,value are present in the tag_list 
    # if so it returns true
    r_value = False
    if row['tags']:
        for key in row['tags'].keys():
            if f"{key} {row['tags'][key]}" in tag_list:
                 r_value = True
    return r_value

def line_length(line):
    
    geod = pyproj.Geod(ellps='WGS84')
    line_length = geod.geometry_length(line)

    return abs(line_length)

def poly_area(poly):

    geod = pyproj.Geod(ellps='WGS84')
    poly_area = geod.geometry_area_perimeter(poly)[0]
    
    return abs(poly_area)


def calcualte_size_for_tags(row):
    #print(row)
    geometry, tags  = row['geometry'], row['tags']
    sizes_dict = {}
    for key in geometry.keys():
        if 'line' in key:
            sizes_dict['length'] = line_length(geometry[key])
        elif 'poly' in key:
            sizes_dict['area'] = poly_area(geometry[key])
            
    # we only keep tag key combinations that are part of tag_list     
    key_val_list = [f'{key} {tags[key]}' for key in tags.keys() if f'{key} {tags[key]}' in tag_list]
    
    return_dict = {}
    
    # a geometry might have more than one relevant tag
    # {'tag_key':{'length':,'area':},'tag_key2':{'length':,'area':}}
    for key_val in key_val_list:
        return_dict[key_val] = sizes_dict

    return return_dict

In [4]:
q_poly = Polygon([(52.450425727741,13.286182880402),(52.458323725344,13.286182880402),(52.458323725344,13.299744129181),(52.450425727741,13.299744129181)])
r_bb = random_bb(0.005,q_poly)
list(r_bb.exterior.coords)

[(52.45099697006593, 13.291427987294623),
 (52.45599697006593, 13.291427987294623),
 (52.45599697006593, 13.296427987294624),
 (52.45099697006593, 13.296427987294624),
 (52.45099697006593, 13.291427987294623)]

In [None]:
# we use the generated polygon to querry osm for all features within the bounding box
osm_handle = parse_osm.disect_osm(parse_osm.json_from_osm(r_bb))

# now we load the predefined tags from pickle
tag_df =  pd.read_pickle('./kv_df_just_eng.pickle')
tag_list = generate_tag_list(tag_df)

# and filter the querried polygons for a) if a tag from the tag_list is pressent and b) if they are not just a point 
selected_geom_df = osm_handle.feature_df[(osm_handle.feature_df.apply(filter_tags, axis= 1)) & (osm_handle.feature_df['type'] != 'node')] 
type_id_list = selected_geom_df[['type','id']].values.tolist()
# for the selected features we calculate the geometry:
[osm_handle.generate_geometry(f_type,osm_id) for f_type, osm_id in type_id_list]
# and reselect them
selected_geom_df = osm_handle.feature_df[(osm_handle.feature_df.apply(filter_tags, axis= 1)) & (osm_handle.feature_df['type'] != 'node')]

# for these selected feature we calcuate where aplicable either are or length or both
# and then split them into thier key value pairs
results_df = pd.DataFrame(columns=['key','value','area','length']) 
for element in selected_geom_df[['geometry','tags']].apply(calcualte_size_for_tags,axis=1).tolist():
    
    for key_val in element.keys():
        key,value = key_val.split(' ')
        _dict = {'key':key,'value':value,'area':element[key_val].get('area'),'length':element[key_val].get('length')}
        results_df = results_df.append(_dict,ignore_index=True)
    

In [129]:
results_df = pd.DataFrame(columns=['key','value','area','length']) 
for element in selected_geom_df[['geometry','tags']].apply(calcualte_size_for_tags,axis=1).tolist():
    
    for key_val in element.keys():
        key,value = key_val.split(' ')
        _dict = {'key':key,'value':value,'area':element[key_val].get('area'),'length':element[key_val].get('length')}
        results_df = results_df.append(_dict,ignore_index=True)
        

{'key': 'building', 'value': 'university', 'area': 12094.401025812374, 'length': None}
{'key': 'building', 'value': 'university', 'area': 6808.320428214967, 'length': None}
{'key': 'building', 'value': 'university', 'area': 5058.110836043023, 'length': None}
{'key': 'building', 'value': 'yes', 'area': 17964.531341615133, 'length': None}
{'key': 'building', 'value': 'yes', 'area': 10441.846917357296, 'length': None}
{'key': 'public_transport', 'value': 'stop_area', 'area': None, 'length': 21.484651486174325}
{'key': 'building', 'value': 'yes', 'area': 15632.958679543342, 'length': None}
{'key': 'building', 'value': 'university', 'area': 1464.4872720818967, 'length': None}
{'key': 'amenity', 'value': 'university', 'area': 304782.2710177484, 'length': None}
{'key': 'building', 'value': 'university', 'area': 577.9221063107252, 'length': None}
{'key': 'building', 'value': 'university', 'area': 901.8362741917372, 'length': None}
{'key': 'building', 'value': 'university', 'area': 3478.6536210

In [130]:
results_df

Unnamed: 0,key,value,area,length
0,building,university,12094.401026,
1,building,university,6808.320428,
2,building,university,5058.110836,
3,building,yes,17964.531342,
4,building,yes,10441.846917,
...,...,...,...,...
111,landuse,residential,6397.259522,
112,landuse,residential,20702.034009,
113,landuse,residential,18950.204090,
114,landuse,residential,9509.302187,


In [81]:
for row in selected_geom_df[selected_geom_df['type']=='relation'].iterrows():
    print(row[0])
    print(row[1]['tags'])

4394
{'addr:city': 'Berlin', 'addr:country': 'DE', 'addr:housenumber': '14', 'addr:postcode': '14195', 'addr:street': 'Arnimallee', 'addr:suburb': 'Dahlem', 'building': 'university', 'department': 'physics', 'layer': '1', 'name': 'FB Physik', 'operator': 'Freie Universität Berlin', 'short_name': 'Physik', 'short_name:en': 'Physics', 'type': 'multipolygon', 'website': 'https://www.fu-berlin.de/einrichtungen/fachbereiche/fb/physik/'}
4395
{'addr:housenumber': '3', 'addr:street': 'Takustraße', 'building': 'university', 'building:levels': '4', 'roof:levels': '0', 'type': 'multipolygon'}
4396
{'addr:city': 'Berlin', 'addr:country': 'DE', 'addr:housenumber': '22', 'addr:postcode': '14195', 'addr:street': 'Arnimallee', 'addr:suburb': 'Dahlem', 'building': 'university', 'department': 'Institute für Chemie und Biochemie', 'name': 'FB Biologie, Chemie, Pharmazie', 'operator': 'Freie Universität Berlin', 'type': 'multipolygon'}
4399
{'building': 'yes', 'type': 'multipolygon'}
4402
{'building': 'y

In [83]:
for el in selected_geom_df.loc[4403]['tags']:
    print(el)

name
network
network:short
operator
operator:short
public_transport
ref:VBB
type
website


In [73]:
line_length(selected_geom_df.loc[4403]['geometry']['multiline'])

21.484651486174325

In [25]:
import ipyleaflet
from ipyleaflet import Map, basemaps, basemap_to_tiles, GeoJSON, WKTLayer
from ipywidgets import Label

# osmid = test.feature_df[test.feature_df['type']=='relation'].sample().iloc[0]['id']
# test_multi = test.generate_geometry('relation',osmid)

m = Map(
    basemap=basemaps.CartoDB.Positron,
    # for some reason lat lon are switch for centering the map
    center=(r_bb.centroid.coords[0]),
    zoom=14
)


wlayer = WKTLayer(
    wkt_string=Polygon([(cord[1],cord[0]) for cord in r_bb.exterior.coords]).wkt,
    #hover_style={"fillColor": "red"},

)

label = Label()
display(label)



def handle_interaction(**kwargs):
    cords = '[]'
    if kwargs.get('type') == 'click':
        cords = f'{cords[:-1]}({kwargs.get("coordinates")[0]},{kwargs.get("coordinates")[1]})]'
        label.value = cords

m.on_interaction(handle_interaction)


m.add_layer(wlayer)
m

Label(value='')

Map(center=[52.4530841802137, 13.296614789471116], controls=(ZoomControl(options=['position', 'zoom_in_text', …