forked from pysal/momepy
/
elements.py
1070 lines (926 loc) · 36.9 KB
/
elements.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# elements.py
# generating derived elements (street edge, block)
import geopandas as gpd
import libpysal
import warnings
import numpy as np
import pygeos
import pandas as pd
from scipy.spatial import Voronoi
from shapely.geometry.base import BaseGeometry
from shapely.ops import polygonize
from tqdm.auto import tqdm
# TODO: this should not be needed with shapely 2.0
from geopandas._vectorized import _pygeos_to_shapely
__all__ = [
"buffered_limit",
"Tessellation",
"Blocks",
"get_network_id",
"get_node_id",
"enclosures",
"get_network_ratio",
]
def buffered_limit(gdf, buffer=100):
"""
Define limit for :class:`momepy.Tessellation` as a buffer around buildings.
See :cite:`fleischmann2020` for details.
Parameters
----------
gdf : GeoDataFrame
GeoDataFrame containing building footprints
buffer : float
buffer around buildings limiting the extend of tessellation
Returns
-------
MultiPolygon
MultiPolygon or Polygon defining the study area
Examples
--------
>>> limit = mm.buffered_limit(buildings_df)
>>> type(limit)
shapely.geometry.polygon.Polygon
"""
return gdf.buffer(buffer).unary_union
class Tessellation:
"""
Generates tessellation.
Three versions of tessellation can be created:
1. Morphological tessellation around given buildings ``gdf`` within set ``limit``.
2. Proximity bands around given street network ``gdf`` within set ``limit``.
3. Enclosed tessellation based on given buildings ``gdf`` within ``enclosures``.
Pass either ``limit`` to create morphological tessellation or proximity bands or
``enclosures`` to create enclosed tessellation.
See :cite:`fleischmann2020` for details of implementation of morphological
tessellation and :cite:`araldi2019` for proximity bands.
Tessellation requires data of relatively high level of precision and there are three
particular patterns causing issues.\n
1. Features will collapse into empty polygon - these do not have tessellation
cell in the end.\n
2. Features will split into MultiPolygon - at some cases, features with narrow links
between parts split into two during 'shrinking'. In most cases that is not an issue
and resulting tessellation is correct anyway, but sometimes this result in a cell
being MultiPolygon, which is not correct.\n
3. Overlapping features - features which overlap even after 'shrinking' cause
invalid tessellation geometry.\n
All three types can be tested prior :class:`momepy.Tessellation` using
:class:`momepy.CheckTessellationInput`.
Parameters
----------
gdf : GeoDataFrame
GeoDataFrame containing building footprints or street network
unique_id : str
name of the column with unique id
limit : MultiPolygon or Polygon (default None)
MultiPolygon or Polygon defining the study area limiting
morphological tessellation or proximity bands
(otherwise it could go to infinity).
shrink : float (default 0.4)
distance for negative buffer to generate space between adjacent polygons
(if geometry type of gdf is (Multi)Polygon).
segment : float (default 0.5)
maximum distance between points after discretization
verbose : bool (default True)
if True, shows progress bars in loops and indication of steps
enclosures : GeoDataFrame (default None)
Enclosures geometry. Can be generated using :func:`momepy.enclosures`.
enclosure_id : str (default 'eID')
name of the enclosure_id containing unique identifer for each row in
``enclosures``. Applies only if ``enclosures`` are passed.
threshold : float (default 0.05)
The minimum threshold for a building to be considered within an enclosure.
Threshold is a ratio of building area which needs to be within an enclosure to
inlude it in the tessellation of that enclosure. Resolves sliver geometry
issues. Applies only if ``enclosures`` are passed.
use_dask : bool (default True)
Use parallelised algorithm based on ``dask.dataframe``. Requires dask.
Applies only if ``enclosures`` are passed.
n_chunks : None
Number of chunks to be used in parallelization. Ideal is one chunk per thread.
Applies only if ``enclosures`` are passed. Defualt automatically uses
n == dask.system.cpu_count.
Attributes
----------
tessellation : GeoDataFrame
GeoDataFrame containing resulting tessellation
For enclosed tessellation, gdf contains three columns:
- ``geometry``,
- ``unique_id`` matching with parental building,
- ``enclosure_id`` matching with enclosure integer index
gdf : GeoDataFrame
original GeoDataFrame
id : Series
Series containing used unique ID
limit : MultiPolygon or Polygon
limit
shrink : float
used shrink value
segment : float
used segment value
collapsed : list
list of unique_id's of collapsed features (if there are some)
Applies only if ``limit`` is passed.
multipolygons : list
list of unique_id's of features causing MultiPolygons (if there are some)
Applies only if ``limit`` is passed.
Examples
--------
>>> tess = mm.Tessellation(
... buildings_df, 'uID', limit=mm.buffered_limit(buildings_df)
... )
Inward offset...
Generating input point array...
Generating Voronoi diagram...
Generating GeoDataFrame...
Dissolving Voronoi polygons...
>>> tess.tessellation.head()
uID geometry
0 1 POLYGON ((1603586.677274485 6464344.667944215,...
1 2 POLYGON ((1603048.399497852 6464176.180701573,...
2 3 POLYGON ((1603071.342637536 6464158.863329805,...
3 4 POLYGON ((1603055.834005827 6464093.614718676,...
4 5 POLYGON ((1603106.417554705 6464130.215958447,...
>>> enclosures = mm.enclosures(streets, admin_boundary, [railway, rivers])
>>> encl_tess = mm.Tessellation(
... buildings_df, 'uID', enclosures=enclosures
... )
>>> encl_tess.tessellation.head()
uID geometry eID
0 109.0 POLYGON ((1603369.789 6464340.661, 1603368.754... 0
1 110.0 POLYGON ((1603368.754 6464340.097, 1603369.789... 0
2 111.0 POLYGON ((1603458.666 6464332.614, 1603458.332... 0
3 112.0 POLYGON ((1603462.235 6464285.609, 1603454.795... 0
4 113.0 POLYGON ((1603524.561 6464388.609, 1603532.241... 0
"""
def __init__(
self,
gdf,
unique_id,
limit=None,
shrink=0.4,
segment=0.5,
verbose=True,
enclosures=None,
enclosure_id="eID",
threshold=0.05,
use_dask=True,
n_chunks=None,
**kwargs,
):
self.gdf = gdf
self.id = gdf[unique_id]
self.limit = limit
self.shrink = shrink
self.segment = segment
self.enclosure_id = enclosure_id
if gdf.crs and gdf.crs.is_geographic:
raise ValueError(
"Geometry is in a geographic CRS. "
"Use 'GeoDataFrame.to_crs()' to re-project geometries to a "
"projected CRS before using Tessellation.\n",
)
if limit is not None and enclosures is not None:
raise ValueError(
"Both `limit` and `enclosures` cannot be passed together. "
"Pass `limit` for morphological tessellation or `enclosures` "
"for enclosed tessellation."
)
gdf = gdf.copy()
if enclosures is not None:
enclosures = enclosures.copy()
bounds = enclosures.total_bounds
centre_x = (bounds[0] + bounds[2]) / 2
centre_y = (bounds[1] + bounds[3]) / 2
gdf.geometry = gdf.geometry.translate(xoff=-centre_x, yoff=-centre_y)
enclosures.geometry = enclosures.geometry.translate(
xoff=-centre_x, yoff=-centre_y
)
self.tessellation = self._enclosed_tessellation(
gdf,
enclosures,
unique_id,
enclosure_id,
threshold,
use_dask,
n_chunks,
)
else:
if isinstance(limit, (gpd.GeoSeries, gpd.GeoDataFrame)):
limit = limit.unary_union
if isinstance(limit, BaseGeometry):
limit = pygeos.from_shapely(limit)
bounds = pygeos.bounds(limit)
centre_x = (bounds[0] + bounds[2]) / 2
centre_y = (bounds[1] + bounds[3]) / 2
gdf.geometry = gdf.geometry.translate(xoff=-centre_x, yoff=-centre_y)
# add convex hull buffered large distance to eliminate infinity issues
limit = (
gpd.GeoSeries(limit, crs=gdf.crs)
.translate(xoff=-centre_x, yoff=-centre_y)
.values.data[0]
)
self.tessellation = self._morphological_tessellation(
gdf, unique_id, limit, shrink, segment, verbose
)
self.tessellation["geometry"] = self.tessellation["geometry"].translate(
xoff=centre_x, yoff=centre_y
)
def _morphological_tessellation(
self, gdf, unique_id, limit, shrink, segment, verbose, check=True
):
objects = gdf
if shrink != 0:
print("Inward offset...") if verbose else None
mask = objects.type.isin(["Polygon", "MultiPolygon"])
objects.loc[mask, objects.geometry.name] = objects[mask].buffer(
-shrink, cap_style=2, join_style=2
)
objects = objects.reset_index(drop=True).explode()
objects = objects.set_index(unique_id)
print("Generating input point array...") if verbose else None
points, ids = self._dense_point_array(
objects.geometry.values.data, distance=segment, index=objects.index
)
hull = pygeos.convex_hull(limit)
bounds = pygeos.bounds(hull)
width = bounds[2] - bounds[0]
leng = bounds[3] - bounds[1]
hull = pygeos.buffer(hull, 2 * width if width > leng else 2 * leng)
hull_p, hull_ix = self._dense_point_array(
[hull], distance=pygeos.length(hull) / 100, index=[0]
)
points = np.append(points, hull_p, axis=0)
ids = ids + ([-1] * len(hull_ix))
print("Generating Voronoi diagram...") if verbose else None
voronoi_diagram = Voronoi(np.array(points))
print("Generating GeoDataFrame...") if verbose else None
regions_gdf = self._regions(voronoi_diagram, unique_id, ids, crs=gdf.crs)
print("Dissolving Voronoi polygons...") if verbose else None
morphological_tessellation = regions_gdf[[unique_id, "geometry"]].dissolve(
by=unique_id, as_index=False
)
morphological_tessellation = gpd.clip(
morphological_tessellation, gpd.GeoSeries(limit, crs=gdf.crs)
)
if check:
self._check_result(morphological_tessellation, gdf, unique_id=unique_id)
return morphological_tessellation
def _dense_point_array(self, geoms, distance, index):
"""
geoms - array of pygeos lines
"""
# interpolate lines to represent them as points for Voronoi
points = []
ids = []
if pygeos.get_type_id(geoms[0]) not in [1, 2, 5]:
lines = pygeos.boundary(geoms)
else:
lines = geoms
lengths = pygeos.length(lines)
for ix, line, length in zip(index, lines, lengths):
if length > distance: # some polygons might have collapsed
pts = pygeos.line_interpolate_point(
line,
np.linspace(0.1, length - 0.1, num=int((length - 0.1) // distance)),
) # .1 offset to keep a gap between two segments
points.append(pygeos.get_coordinates(pts))
ids += [ix] * len(pts)
points = np.vstack(points)
return points, ids
# here we might also want to append original coordinates of each line
# to get a higher precision on the corners
def _regions(self, voronoi_diagram, unique_id, ids, crs):
"""
Generate GeoDataFrame of Voronoi regions from scipy.spatial.Voronoi.
"""
vertices = pd.Series(voronoi_diagram.regions).take(voronoi_diagram.point_region)
polygons = []
for region in vertices:
if -1 not in region:
polygons.append(pygeos.polygons(voronoi_diagram.vertices[region]))
else:
polygons.append(None)
regions_gdf = gpd.GeoDataFrame(
{unique_id: ids}, geometry=polygons, crs=crs
).dropna()
regions_gdf = regions_gdf.loc[
regions_gdf[unique_id] != -1
] # delete hull-based cells
return regions_gdf
def _check_result(self, tesselation, orig_gdf, unique_id):
"""
Check whether result matches buildings and contains only Polygons.
"""
# check against input layer
ids_original = list(orig_gdf[unique_id])
ids_generated = list(tesselation[unique_id])
if len(ids_original) != len(ids_generated):
self.collapsed = set(ids_original).difference(ids_generated)
warnings.warn(
f"Tessellation does not fully match buildings. "
f"{len(self.collapsed)} element(s) collapsed "
f"during generation - unique_id: {self.collapsed}"
)
# check MultiPolygons - usually caused by error in input geometry
self.multipolygons = tesselation[tesselation.geometry.type == "MultiPolygon"][
unique_id
]
if len(self.multipolygons) > 0:
warnings.warn(
"Tessellation contains MultiPolygon elements. Initial objects should "
f"be edited. unique_id of affected elements: {list(self.multipolygons)}"
)
def _enclosed_tessellation(
self,
buildings,
enclosures,
unique_id,
enclosure_id="eID",
threshold=0.05,
use_dask=True,
n_chunks=None,
**kwargs,
):
"""Enclosed tessellation
Generate enclosed tessellation based on barriers defining enclosures and
building footprints.
Parameters
----------
buildings : GeoDataFrame
GeoDataFrame containing building footprints. Expects (Multi)Polygon
geometry.
enclosures : GeoDataFrame
Enclosures geometry. Can be generated using :func:`momepy.enclosures`.
unique_id : str
name of the column with unique id of buildings gdf
threshold : float (default 0.05)
The minimum threshold for a building to be considered within an enclosure.
Threshold is a ratio of building area which needs to be within an enclosure
to inlude it in the tessellation of that enclosure. Resolves sliver geometry
issues.
use_dask : bool (default True)
Use parallelised algorithm based on ``dask.dataframe``. Requires dask.
n_chunks : None
Number of chunks to be used in parallelization. Ideal is one chunk per
thread. Applies only if ``enclosures`` are passed. Defualt automatically
uses n == dask.system.cpu_count.
**kwargs
Keyword arguments passed to Tessellation algorithm (as ``shrink``
or ``segment``).
Returns
-------
tessellation : GeoDataFrame
gdf contains three columns:
geometry,
unique_id matching with parental building,
enclosure_id matching with enclosure integer index
Examples
--------
>>> enclosures = mm.enclosures(streets, admin_boundary, [railway, rivers])
>>> enclosed_tess = mm.enclosed_tessellation(buildings, enclosures)
"""
enclosures = enclosures.reset_index(drop=True)
enclosures["position"] = range(len(enclosures))
# determine which polygons should be split
inp, res = buildings.sindex.query_bulk(
enclosures.geometry, predicate="intersects"
)
unique, counts = np.unique(inp, return_counts=True)
splits = unique[counts > 1]
single = unique[counts == 1]
if use_dask:
try:
import dask.bag as db
from dask.system import cpu_count
except ImportError:
use_dask = False
warnings.warn(
"dask.dataframe could not be imported. Setting `use_dask=False`."
)
if use_dask:
if n_chunks is None:
n_chunks = cpu_count() - 1 if cpu_count() > 1 else 1
# initialize dask.bag
bag = db.from_sequence(splits, npartitions=n_chunks)
# generate enclosed tessellation using dask
new = bag.map(
self._tess,
enclosures,
buildings,
inp,
res,
threshold,
unique_id,
).compute()
else:
new = [
self._tess(
i,
enclosures,
buildings,
inp,
res,
threshold=threshold,
unique_id=unique_id,
**kwargs,
)
for i in splits
]
# finalise the result
clean_blocks = enclosures.drop(splits)
clean_blocks.loc[single, "uID"] = clean_blocks.loc[single, "position"].apply(
lambda ix: buildings.iloc[res[inp == ix][0]][unique_id]
)
tessellation = pd.concat(new)
return tessellation.append(clean_blocks.drop(columns="position")).reset_index(
drop=True
)
def _tess(
self,
ix,
enclosure,
buildings,
query_inp,
query_res,
threshold,
unique_id,
**kwargs,
):
poly = enclosure.geometry.values.data[ix]
blg = buildings.iloc[query_res[query_inp == ix]]
within = blg[
pygeos.area(pygeos.intersection(blg.geometry.values.data, poly))
> (pygeos.area(blg.geometry.values.data) * threshold)
].copy()
if len(within) > 1:
tess = self._morphological_tessellation(
within,
unique_id,
poly,
shrink=self.shrink,
segment=self.segment,
verbose=False,
check=False,
)
tess[self.enclosure_id] = enclosure[self.enclosure_id].iloc[ix]
return tess
return gpd.GeoDataFrame(
{self.enclosure_id: enclosure[self.enclosure_id].iloc[ix], unique_id: None},
geometry=[poly],
index=[0],
)
class Blocks:
"""
Generate blocks based on buildings, tesselation and street network.
Dissolves tessellation cells based on street-network based polygons.
Links resulting id to ``buildings`` and ``tesselation`` as attributes.
Parameters
----------
tessellation : GeoDataFrame
GeoDataFrame containing morphological tessellation
edges : GeoDataFrame
GeoDataFrame containing street network
buildings : GeoDataFrame
GeoDataFrame containing buildings
id_name : str
name of the unique blocks id column to be generated
unique_id : str
name of the column with unique id. If there is none, it could be generated
by :func:`momepy.unique_id`.
This should be the same for cells and buildings, id's should match.
Attributes
----------
blocks : GeoDataFrame
GeoDataFrame containing generated blocks
buildings_id : Series
Series derived from buildings with block ID
tessellation_id : Series
Series derived from morphological tessellation with block ID
tessellation : GeoDataFrame
GeoDataFrame containing original tessellation
edges : GeoDataFrame
GeoDataFrame containing original edges
buildings : GeoDataFrame
GeoDataFrame containing original buildings
id_name : str
name of the unique blocks id column
unique_id : str
name of the column with unique id
Examples
--------
>>> blocks = mm.Blocks(tessellation_df, streets_df, buildings_df, 'bID', 'uID')
Buffering streets...
Generating spatial index...
Difference...
Defining adjacency...
Defining street-based blocks...
Defining block ID...
Generating centroids...
Spatial join...
Attribute join (tesselation)...
Generating blocks...
Multipart to singlepart...
Attribute join (buildings)...
Attribute join (tesselation)...
>>> blocks.blocks.head()
bID geometry
0 1.0 POLYGON ((1603560.078648818 6464202.366899694,...
1 2.0 POLYGON ((1603457.225976106 6464299.454696888,...
2 3.0 POLYGON ((1603056.595487018 6464093.903488506,...
3 4.0 POLYGON ((1603260.943782872 6464141.327631323,...
4 5.0 POLYGON ((1603183.399594798 6463966.109982309,...
"""
def __init__(self, tessellation, edges, buildings, id_name, unique_id, **kwargs):
self.tessellation = tessellation
self.edges = edges
self.buildings = buildings
self.id_name = id_name
self.unique_id = unique_id
if id_name in buildings.columns:
raise ValueError(
"'{}' column cannot be in the buildings GeoDataFrame".format(id_name)
)
cut = gpd.overlay(
tessellation,
gpd.GeoDataFrame(geometry=edges.buffer(0.001)),
how="difference",
).explode()
W = libpysal.weights.Queen.from_dataframe(cut, silence_warnings=True)
cut["component"] = W.component_labels
buildings_c = buildings.copy()
buildings_c.geometry = buildings_c.representative_point() # make points
centroids_tempID = gpd.sjoin(
buildings_c,
cut[[cut.geometry.name, "component"]],
how="left",
op="intersects",
)
cells_copy = tessellation[[unique_id, tessellation.geometry.name]].merge(
centroids_tempID[[unique_id, "component"]], on=unique_id, how="left"
)
blocks = cells_copy.dissolve(by="component").explode().reset_index(drop=True)
blocks[id_name] = range(len(blocks))
blocks[blocks.geometry.name] = gpd.GeoSeries(
pygeos.polygons(blocks.exterior.values.data), crs=blocks.crs
)
blocks = blocks[[id_name, blocks.geometry.name]]
centroids_w_bl_ID2 = gpd.sjoin(buildings_c, blocks, how="left", op="intersects")
bl_ID_to_uID = centroids_w_bl_ID2[[unique_id, id_name]]
buildings_m = buildings[[unique_id]].merge(
bl_ID_to_uID, on=unique_id, how="left"
)
self.buildings_id = buildings_m[id_name]
cells_m = tessellation[[unique_id]].merge(
bl_ID_to_uID, on=unique_id, how="left"
)
self.tessellation_id = cells_m[id_name]
self.blocks = blocks
def get_network_id(left, right, network_id, min_size=100, verbose=True):
"""
Snap each element (preferably building) to the closest street network segment,
saves its id.
Adds network ID to elements.
Parameters
----------
left : GeoDataFrame
GeoDataFrame containing objects to snap
right : GeoDataFrame
GeoDataFrame containing street network with unique network ID.
If there is none, it could be generated by :func:`momepy.unique_id`.
network_id : str, list, np.array, pd.Series (default None)
the name of the streets dataframe column, ``np.array``, or ``pd.Series``
with network unique id.
min_size : int (default 100)
min_size should be a vaule such that if you build a box centered in each
building centroid with edges of size ``2*min_size``, you know a priori that at
least one
segment is intersected with the box.
verbose : bool (default True)
if True, shows progress bars in loops and indication of steps
Returns
-------
elements_nID : Series
Series containing network ID for elements
Examples
--------
>>> buildings_df['nID'] = momepy.get_network_id(buildings_df, streets_df, 'nID')
Generating centroids...
Generating rtree...
Snapping: 100%|██████████| 144/144 [00:00<00:00, 2718.98it/s]
>>> buildings_df['nID'][0]
1
See also
--------
momepy.get_network_ratio
momepy.get_node_id
"""
INFTY = 1000000000000
left = left.copy()
right = right.copy()
if not isinstance(network_id, str):
right["mm_nid"] = network_id
network_id = "mm_nid"
buildings_c = left.copy()
buildings_c[buildings_c.geometry.name] = buildings_c.centroid # make centroids
idx = right.sindex
# TODO: use sjoin nearest once done
result = []
for p in tqdm(
buildings_c.geometry,
total=buildings_c.shape[0],
desc="Snapping",
disable=not verbose,
):
pbox = (p.x - min_size, p.y - min_size, p.x + min_size, p.y + min_size)
hits = list(idx.intersection(pbox))
d = INFTY
nid = None
for h in hits:
new_d = p.distance(right.geometry.iloc[h])
if d >= new_d:
d = new_d
nid = right[network_id].iloc[h]
if nid is None:
result.append(np.nan)
else:
result.append(nid)
series = pd.Series(result, index=left.index)
if series.isnull().any():
warnings.warn(
"Some objects were not attached to the network. "
"Set larger min_size. {} affected elements".format(sum(series.isnull()))
)
return series
def get_node_id(
objects,
nodes,
edges,
node_id,
edge_id=None,
edge_keys=None,
edge_values=None,
verbose=True,
):
"""
Snap each building to closest street network node on the closest network edge.
Adds node ID to objects (preferably buildings). Gets ID of edge
(:func:`momepy.get_network_id` or :func:`get_network_ratio`)
, and determines which of its end points is closer to building centroid.
Pass either ``edge_id`` with a single value or ``edge_keys`` and ``edge_values``
with ratios.
Parameters
----------
objects : GeoDataFrame
GeoDataFrame containing objects to snap
nodes : GeoDataFrame
GeoDataFrame containing street nodes with unique node ID.
If there is none, it could be generated by :func:`momepy.unique_id`.
edges : GeoDataFrame
GeoDataFrame containing street edges with unique edge ID and IDs of start
and end points of each segment. Start and endpoints are default
outcome of :func:`momepy.nx_to_gdf`.
node_id : str, list, np.array, pd.Series
the name of the nodes dataframe column, ``np.array``, or ``pd.Series``
with unique id
edge_id : str (default None)
the name of the objects dataframe column
with unique edge id (like an outcome of :func:`momepy.get_network_id`)
edge_keys : str (default None)
name the name of the objects dataframe column with edgeID_keys
(like an outcome of :func:`momepy.get_network_ratio`)
edge_values : str (default None)
name the name of the objects dataframe column with edgeID_values
(like an outcome of :func:`momepy.get_network_ratio`)
verbose : bool (default True)
if True, shows progress bars in loops and indication of steps
Returns
-------
node_ids : Series
Series containing node ID for objects
"""
nodes = nodes.set_index(node_id)
if not isinstance(node_id, str):
nodes["mm_noid"] = node_id
node_id = "mm_noid"
results_list = []
if edge_id is not None:
edges = edges.set_index(edge_id)
centroids = objects.centroid
for eid, centroid in tqdm(
zip(objects[edge_id], centroids),
total=objects.shape[0],
disable=not verbose,
):
if np.isnan(eid):
results_list.append(np.nan)
else:
edge = edges.loc[eid]
startID = edge.node_start
start = nodes.loc[startID].geometry
sd = centroid.distance(start)
endID = edge.node_end
end = nodes.loc[endID].geometry
ed = centroid.distance(end)
if sd > ed:
results_list.append(endID)
else:
results_list.append(startID)
elif edge_keys is not None and edge_values is not None:
for edge_i, edge_r, geom in tqdm(
zip(objects[edge_keys], objects[edge_values], objects.geometry),
total=objects.shape[0],
disable=not verbose,
):
edge = edges.iloc[edge_i[edge_r.index(max(edge_r))]]
startID = edge.node_start
start = nodes.loc[startID].geometry
sd = geom.distance(start)
endID = edge.node_end
end = nodes.loc[endID].geometry
ed = geom.distance(end)
if sd > ed:
results_list.append(endID)
else:
results_list.append(startID)
series = pd.Series(results_list, index=objects.index)
return series
def get_network_ratio(df, edges, initial_buffer=500):
"""
Link polygons to network edges based on the proportion of overlap (if a cell
intersects more than one edge)
Useful if you need to link enclosed tessellation to street network. Ratios can
be used as weights when linking network-based values to cells. For a purely
distance-based link use :func:`momepy.get_network_id`.
Links are based on the integer position of edge (``iloc``).
Parameters
----------
df : GeoDataFrame
GeoDataFrame containing objects to snap (typically enclosed tessellation)
edges : GeoDataFrame
GeoDataFrame containing street network
initial_buffer : float
Initial buffer used to link non-intersecting cells.
Returns
-------
DataFrame
See also
--------
momepy.get_network_id
momepy.get_node_id
Examples
--------
>>> links = mm.get_network_ratio(enclosed_tessellation, streets)
>>> links.head()
edgeID_keys edgeID_values
0 [34] [1.0]
1 [0, 34] [0.38508998545027145, 0.6149100145497285]
2 [32] [1]
3 [0] [1.0]
4 [26] [1]
"""
# intersection-based join
buff = edges.buffer(0.01) # to avoid floating point error
inp, res = buff.sindex.query_bulk(df.geometry, predicate="intersects")
intersections = (
df.iloc[inp]
.reset_index(drop=True)
.intersection(buff.iloc[res].reset_index(drop=True))
)
mask = intersections.area > 0.0001
intersections = intersections[mask]
inp = inp[mask]
lengths = intersections.area
grouped = lengths.groupby(inp)
totals = grouped.sum()
ints_vect = []
for name, group in grouped:
ratios = group / totals.loc[name]
ints_vect.append({res[item[0]]: item[1] for item in ratios.iteritems()})
edge_dicts = pd.Series(ints_vect, index=totals.index)
# nearest neighbor join
nans = df.index.difference(edge_dicts.index)
buffered = df.iloc[nans].buffer(initial_buffer)
additional = []
for orig, geom in zip(df.iloc[nans].geometry, buffered.geometry):
query = edges.sindex.query(geom, predicate="intersects")
b = initial_buffer
while query.size == 0:
query = edges.sindex.query(geom.buffer(b), predicate="intersects")
b += initial_buffer
additional.append({edges.iloc[query].distance(orig).idxmin(): 1})
additional = pd.Series(additional, index=nans)
ratios = pd.concat([edge_dicts, additional]).sort_index()
result = pd.DataFrame()
result["edgeID_keys"] = ratios.apply(lambda d: list(d.keys()))
result["edgeID_values"] = ratios.apply(lambda d: list(d.values()))
result.index = df.index
return result
def enclosures(
primary_barriers,
limit=None,
additional_barriers=None,
enclosure_id="eID",
clip=False,
):
"""
Generate enclosures based on passed barriers.
Enclosures are areas enclosed from all sides by at least one type of
a barrier. Barriers are typically roads, railways, natural features
like rivers and other water bodies or coastline. Enclosures are a
result of polygonization of the ``primary_barrier`` and ``limit`` and its
subdivision based on additional_barriers.
Parameters
----------
primary_barriers : GeoDataFrame, GeoSeries
GeoDataFrame or GeoSeries containing primary barriers.
(Multi)LineString geometry is expected.
limit : GeoDataFrame, GeoSeries, shapely geometry (default None)
GeoDataFrame, GeoSeries or shapely geometry containing external limit
of enclosures,
i.e. the area which gets partitioned. If None is passed,
the internal area of ``primary_barriers`` will be used.
additional_barriers : GeoDataFrame
GeoDataFrame or GeoSeries containing additional barriers.
(Multi)LineString geometry is expected.
enclosure_id : str (default 'eID')
name of the enclosure_id (to be created).
clip : bool (default False)
if True, returns enclosures with representative point within the limit
(if given). Requires ``limit`` composed of Polygon or MultiPolygon geometries.
Returns
-------
enclosures : GeoDataFrame
GeoDataFrame containing enclosure geometries and enclosure_id
Examples
--------
>>> enclosures = mm.enclosures(streets, admin_boundary, [railway, rivers])
"""
if limit is not None:
if isinstance(limit, BaseGeometry):
limit = gpd.GeoSeries([limit])
if limit.geom_type.isin(["Polygon", "MultiPolygon"]).any():
limit_b = limit.boundary
else: