/
bbbike.py
483 lines (411 loc) · 22.2 KB
/
bbbike.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
"""
Read OpenStreetMap data extracts available from BBBike free download server.
"""
from pydriosm.downloader import BBBikeDownloader
from pydriosm.reader._reader import _Reader
class BBBikeReader(_Reader):
"""
Read `BBBike <https://download.bbbike.org/>`_ exports of OpenStreetMap data.
"""
#: str: Default download directory.
DEFAULT_DOWNLOAD_DIR = "osm_data\\bbbike"
#: set: Valid file formats.
FILE_FORMATS = {
'.csv.xz',
'.garmin-onroad-latin1.zip',
'.garmin-onroad.zip',
'.garmin-opentopo.zip',
'.garmin-osm.zip',
'.geojson.xz',
'.gz',
'.mapsforge-osm.zip',
'.pbf',
'.shp.zip',
'.svg-osm.zip',
}
def __init__(self, data_dir=None, max_tmpfile_size=None):
"""
:param data_dir: (a path or a name of) a directory where a data file is;
if ``None`` (default), a folder ``osm_bbbike`` under the current working directory
:type data_dir: str | None
:param max_tmpfile_size: defaults to ``None``,
see also :func:`gdal_configurations<pydriosm.settings.gdal_configurations>`
:type max_tmpfile_size: int | None
:ivar BBBikeDownloader downloader: instance of the class
:py:class:`BBBikeDownloader<pydriosm.downloader.BBBikeDownloader>`
:ivar str name: name of the data resource
:ivar str url: url of the homepage to the BBBike free download server
**Examples**::
>>> from pydriosm.reader import BBBikeReader
>>> bbr = BBBikeReader()
>>> bbr.NAME
'BBBike'
"""
# noinspection PyTypeChecker
super().__init__(
downloader=BBBikeDownloader, data_dir=data_dir, max_tmpfile_size=max_tmpfile_size)
def read_osm_pbf(self, subregion_name, data_dir=None, readable=False, expand=False,
parse_geometry=False, parse_other_tags=False, parse_properties=False,
update=False, download=True, pickle_it=False, ret_pickle_path=False,
rm_pbf_file=False, chunk_size_limit=50, verbose=False, **kwargs):
"""
Read a PBF (.osm.pbf) data file of a geographic (sub)region.
:param subregion_name: name of a geographic (sub)region (case-insensitive)
that is available on Geofabrik free download server
:type subregion_name: str
:param data_dir: directory where the .osm.pbf data file is located/saved;
if ``None``, the default local directory
:type data_dir: str | None
:param readable: whether to parse each feature in the raw data, defaults to ``False``
:type readable: bool
:param expand: whether to expand dict-like data into separate columns, defaults to ``False``
:type expand: bool
:param parse_geometry: whether to represent the ``'geometry'`` field
in a `shapely.geometry`_ format, defaults to ``False``
:type parse_geometry: bool
:param parse_properties: whether to represent the ``'properties'`` field
in a tabular format, defaults to ``False``
:type parse_properties: bool
:param parse_other_tags: whether to represent a ``'other_tags'`` (of ``'properties'``)
in a `dict`_ format, defaults to ``False``
:type parse_other_tags: bool
:param download: whether to download/update the PBF data file of the given subregion,
if it is not available at the specified path, defaults to ``True``
:type download: bool
:param update: whether to check to update pickle backup (if available), defaults to ``False``
:type update: bool
:param pickle_it: whether to save the .pbf data as a pickle file, defaults to ``False``
:type pickle_it: bool
:param ret_pickle_path: (when ``pickle_it=True``)
whether to return a path to the saved pickle file
:type ret_pickle_path: bool
:param rm_pbf_file: whether to delete the downloaded .osm.pbf file, defaults to ``False``
:type rm_pbf_file: bool
:param chunk_size_limit: threshold (in MB) that triggers the use of chunk parser,
defaults to ``50``;
if the size of the .osm.pbf file (in MB) is greater than ``chunk_size_limit``,
it will be parsed in a chunk-wise way
:type chunk_size_limit: int | None
:param verbose: whether to print relevant information in console as the function runs,
defaults to ``False``
:type verbose: bool | int
:param kwargs: [optional] parameters of the method
:meth:`_Reader.read_osm_pbf()<pydriosm.reader._Reader.read_osm_pbf>`
:return: dictionary of the .osm.pbf data;
when ``pickle_it=True``, return a tuple of the dictionary and a path to the pickle file
:rtype: dict | tuple | None
.. _`shapely.geometry`:
https://shapely.readthedocs.io/en/latest/manual.html#geometric-objects
.. _`dict`:
https://docs.python.org/3/library/stdtypes.html#dict
**Examples**::
>>> from pydriosm.reader import BBBikeReader
>>> from pyhelpers.dirs import delete_dir
>>> bbr = BBBikeReader()
>>> subrgn_name = 'Leeds'
>>> dat_dir = "tests\\osm_data"
>>> leeds_pbf_raw = bbr.read_osm_pbf(subrgn_name, data_dir=dat_dir, verbose=True)
Downloading "Leeds.osm.pbf"
to "tests\\osm_data\\leeds\\" ... Done.
Reading "tests\\osm_data\\leeds\\Leeds.osm.pbf" ... Done.
>>> type(leeds_pbf_raw)
dict
>>> list(leeds_pbf_raw.keys())
['points', 'lines', 'multilinestrings', 'multipolygons', 'other_relations']
>>> pbf_raw_points = leeds_pbf_raw['points']
>>> type(pbf_raw_points)
list
>>> type(pbf_raw_points[0])
osgeo.ogr.Feature
>>> # (Parsing the data in this example might take up to a few minutes.)
>>> leeds_pbf_parsed = bbr.read_osm_pbf(
... subrgn_name, data_dir=dat_dir, readable=True, expand=True,
... parse_geometry=True, parse_other_tags=True, parse_properties=True,
... verbose=True)
Parsing "tests\\osm_data\\leeds\\Leeds.osm.pbf" ... Done.
>>> list(leeds_pbf_parsed.keys())
['points', 'lines', 'multilinestrings', 'multipolygons', 'other_relations']
>>> # Data of the 'multipolygons' layer
>>> leeds_pbf_parsed_multipolygons = leeds_pbf_parsed['multipolygons']
>>> leeds_pbf_parsed_multipolygons.head()
id geometry ... tourism other_tags
0 10595 (POLYGON ((-1.5030223 53.6725382, -1.5034495 5... ... None None
1 10600 (POLYGON ((-1.5116994 53.6764287, -1.5099361 5... ... None None
2 10601 (POLYGON ((-1.5142403 53.6710831, -1.5143686 5... ... None None
3 10612 (POLYGON ((-1.5129341 53.6704885, -1.5131883 5... ... None None
4 10776 (POLYGON ((-1.5523801 53.7029081, -1.5524772 5... ... None None
[5 rows x 26 columns]
>>> # Delete the example data and the test data directory
>>> delete_dir(dat_dir, verbose=True)
To delete the directory "tests\\osm_data\\" (Not empty)
? [No]|Yes: yes
Deleting "tests\\osm_data\\" ... Done.
.. seealso::
- Examples for the method
:meth:`GeofabrikReader.read_osm_pbf()<pydriosm.reader.GeofabrikReader.read_osm_pbf>`.
"""
osm_pbf_data = super().read_osm_pbf(
subregion_name=subregion_name, data_dir=data_dir, readable=readable, expand=expand,
parse_geometry=parse_geometry, parse_properties=parse_properties,
parse_other_tags=parse_other_tags, update=update, download=download,
pickle_it=pickle_it, ret_pickle_path=ret_pickle_path, rm_pbf_file=rm_pbf_file,
chunk_size_limit=chunk_size_limit, verbose=verbose, **kwargs)
return osm_pbf_data
def read_shp_zip(self, subregion_name, layer_names=None, feature_names=None, data_dir=None,
update=False, download=True, pickle_it=False, ret_pickle_path=False,
rm_extracts=False, rm_shp_zip=False, verbose=False, **kwargs):
"""
Read a shapefile of a geographic (sub)region.
:param subregion_name: name of a geographic (sub)region (case-insensitive)
that is available on BBBike free download server
:type subregion_name: str
:param layer_names: name of a .shp layer, e.g. 'railways', or names of multiple layers;
if ``None`` (default), all available layers
:type layer_names: str | list | None
:param feature_names: name of a feature, e.g. 'rail', or names of multiple features;
if ``None`` (default), all available features
:type feature_names: str | list | None
:param data_dir: directory where the .shp.zip data file is located/saved;
if ``None``, the default directory
:type data_dir: str | None
:param update: whether to check to update pickle backup (if available), defaults to ``False``
:type update: bool
:param download: whether to ask for confirmation
before starting to download a file, defaults to ``True``
:type download: bool
:param pickle_it: whether to save the .shp data as a pickle file, defaults to ``False``
:type pickle_it: bool
:param ret_pickle_path: (when ``pickle_it=True``)
whether to return a path to the saved pickle file
:type ret_pickle_path: bool
:param rm_extracts: whether to delete extracted files from the .shp.zip file,
defaults to ``False``
:type rm_extracts: bool
:param rm_shp_zip: whether to delete the downloaded .shp.zip file, defaults to ``False``
:type rm_shp_zip: bool
:param verbose: whether to print relevant information in console as the function runs,
defaults to ``False``
:type verbose: bool | int
:return: dictionary of the shapefile data, with keys and values being layer names
and tabular data (in the format of `geopandas.GeoDataFrame`_), respectively;
when ``pickle_it=True``, return a tuple of the dictionary and a path to the pickle file
:rtype: dict | collections.OrderedDict | tuple | None
.. _`geopandas.GeoDataFrame`: https://geopandas.org/reference.html#geodataframe
**Examples**::
>>> from pydriosm.reader import BBBikeReader
>>> from pyhelpers.dirs import delete_dir
>>> import os
>>> bbr = BBBikeReader()
>>> subrgn_name = 'Birmingham'
>>> dat_dir = "tests\\osm_data"
>>> bham_shp = bbr.read_shp_zip(
... subregion_name=subrgn_name, data_dir=dat_dir, download=False, verbose=True)
The .shp.zip file for "Birmingham" is not found.
>>> # Set `download=True`
>>> bham_shp = bbr.read_shp_zip(
... subregion_name=subrgn_name, data_dir=dat_dir, download=True, verbose=True)
Downloading "Birmingham.osm.shp.zip"
to "tests\\osm_data\\birmingham\\" ... Done.
Extracting "tests\\osm_data\\birmingham\\Birmingham.osm.shp.zip"
to "tests\\osm_data\\birmingham\\" ... Done.
Reading the shapefile(s) at
"tests\\osm_data\\birmingham\\Birmingham-shp\\shape\\" ... Done.
>>> type(bham_shp)
collections.OrderedDict
>>> list(bham_shp.keys())
['buildings',
'landuse',
'natural',
'places',
'points',
'railways',
'roads',
'waterways']
>>> # Data of 'railways' layer
>>> bham_railways_shp = bham_shp['railways']
>>> bham_railways_shp.head()
osm_id ... shape_type
0 740 ... 3
1 2148 ... 3
2 2950000 ... 3
3 3491845 ... 3
4 3981454 ... 3
[5 rows x 5 columns]
>>> # Read data of 'road' layer only from the original .shp.zip file
>>> # (and delete all extracts)
>>> lyr_name = 'roads'
>>> bham_roads_shp = bbr.read_shp_zip(
... subregion_name=subrgn_name, layer_names=lyr_name, data_dir=dat_dir,
... rm_extracts=True, verbose=True)
Reading "tests\\osm_data\\birmingham\\Birmingham-shp\\shape\\roads.shp" ... Done.
Deleting the extracts "tests\\osm_data\\birmingham\\Birmingham-shp\\" ... Done.
>>> type(bham_roads_shp)
collections.OrderedDict
>>> list(bham_roads_shp.keys())
['roads']
>>> bham_roads_shp[lyr_name].head()
osm_id ... shape_type
0 37 ... 3
1 38 ... 3
2 41 ... 3
3 45 ... 3
4 46 ... 3
[5 rows x 9 columns]
>>> # Read data of multiple layers and features from the original .shp.zip file
>>> # (and delete all extracts)
>>> lyr_names = ['railways', 'waterways']
>>> feat_names = ['rail', 'canal']
>>> bham_rw_rc_shp = bbr.read_shp_zip(
... subregion_name=subrgn_name, layer_names=lyr_names, feature_names=feat_names,
... data_dir=dat_dir, rm_extracts=True, rm_shp_zip=True, verbose=True)
Extracting the following layer(s):
'railways'
'waterways'
from "tests\\osm_data\\birmingham\\Birmingham.osm.shp.zip"
to "tests\\osm_data\\birmingham\\" ... Done.
Reading the data at "tests\\osm_data\\birmingham\\Birmingham-shp\\shape\\" ... Done.
Deleting the extracts "tests\\osm_data\\birmingham\\Birmingham-shp\\" ... Done.
Deleting "tests\\osm_data\\birmingham\\Birmingham.osm.shp.zip" ... Done.
>>> type(bham_rw_rc_shp)
collections.OrderedDict
>>> list(bham_rw_rc_shp.keys())
['railways', 'waterways']
>>> # Data of the 'railways' layer
>>> bham_rw_rc_shp_railways = bham_rw_rc_shp['railways']
>>> bham_rw_rc_shp_railways[['type', 'name']].head()
type name
0 rail Cross-City Line
1 rail Cross-City Line
2 rail Derby to Birmingham (Proof House Junction) Line
3 rail Birmingham to Peterborough Line
4 rail Water Orton to Park Lane Junction Curve
>>> # Data of the 'waterways' layer
>>> bham_rw_rc_shp_waterways = bham_rw_rc_shp['waterways']
>>> bham_rw_rc_shp_waterways[['type', 'name']].head()
type name
2 canal Birmingham and Fazeley Canal
8 canal Birmingham and Fazeley Canal
9 canal Birmingham Old Line Canal Navigations - Rotton P
10 canal Oozells Street Loop
11 canal Worcester & Birmingham Canal
>>> # Delete the example data and the test data directory
>>> delete_dir(dat_dir, verbose=True)
To delete the directory "tests\\osm_data\\" (Not empty)
? [No]|Yes: yes
Deleting "tests\\osm_data\\" ... Done.
"""
shp_data = super().read_shp_zip(
subregion_name=subregion_name, layer_names=layer_names, feature_names=feature_names,
data_dir=data_dir, update=update, download=download, pickle_it=pickle_it,
ret_pickle_path=ret_pickle_path, rm_extracts=rm_extracts, rm_shp_zip=rm_shp_zip,
verbose=verbose, **kwargs)
return shp_data
def read_csv_xz(self, subregion_name, data_dir=None, download=False, verbose=False, **kwargs):
"""
Read a compressed CSV (.csv.xz) data file of a geographic (sub)region.
:param subregion_name: name of a geographic (sub)region (case-insensitive)
that is available on BBBike free download server
:type subregion_name: str
:param data_dir: directory where the .csv.xz data file is located/saved;
if ``None`` (default), the default directory
:type data_dir: str | None
:param download: whether to try to download the requisite data file if it does not exist,
defaults to ``True``
:type download: bool
:param verbose: whether to print relevant information in console as the function runs,
defaults to ``False``
:type verbose: bool | int
:return: tabular data of the .csv.xz file
:rtype: pandas.DataFrame | None
.. _pydriosm-BBBikeReader-read_csv_xz:
**Examples**::
>>> from pydriosm.reader import BBBikeReader
>>> from pyhelpers.dirs import cd, delete_dir
>>> bbr = BBBikeReader()
>>> subrgn_name = 'Leeds'
>>> dat_dir = "tests\\osm_data"
>>> leeds_csv_xz = bbr.read_csv_xz(subrgn_name, dat_dir, verbose=True)
The requisite data file "tests\\osm_data\\leeds\\Leeds.osm.csv.xz" does not exist.
>>> leeds_csv_xz = bbr.read_csv_xz(subrgn_name, dat_dir, verbose=True, download=True)
Downloading "Leeds.osm.csv.xz"
to "tests\\osm_data\\leeds\\" ... Done.
Parsing the data ... Done.
>>> leeds_csv_xz.head()
type id feature note
0 node 154915 None None
1 node 154916 None None
2 node 154921 None None
3 node 154922 None None
4 node 154923 None None
>>> # Delete the downloaded .csv.xz data file
>>> delete_dir(dat_dir, verbose=True)
To delete the directory "tests\\osm_data\\" (Not empty)
? [No]|Yes: yes
Deleting "tests\\osm_data\\" ... Done.
"""
csv_xz_data = self.read_osm_var(
self.VAR.read_csv_xz, subregion_name=subregion_name, osm_file_format=".csv.xz",
data_dir=data_dir, download=download, verbose=verbose, **kwargs)
return csv_xz_data
def read_geojson_xz(self, subregion_name, data_dir=None, parse_geometry=False, download=False,
verbose=False, **kwargs):
"""
Read a .geojson.xz data file of a geographic (sub)region.
:param subregion_name: name of a geographic (sub)region (case-insensitive)
that is available on BBBike free download server
:type subregion_name: str
:param data_dir: directory where the .geojson.xz data file is located/saved;
if ``None`` (default), the default directory
:type data_dir: str | None
:param parse_geometry: whether to represent coordinates in a format of a geometric object,
defaults to ``False``
:type parse_geometry: bool
:param download: whether to try to download the requisite data file if it does not exist,
defaults to ``True``
:type download: bool
:param verbose: whether to print relevant information in console as the function runs,
defaults to ``False``
:type verbose: bool | int
:return: tabular data of the .csv.xz file
:rtype: pandas.DataFrame | None
.. _pydriosm-BBBikeReader-read_geojson_xz:
**Examples**::
>>> from pydriosm.reader import BBBikeReader
>>> from pyhelpers.dirs import cd, delete_dir
>>> import os
>>> bbr = BBBikeReader()
>>> subrgn_name = 'Leeds'
>>> dat_dir = "tests\\osm_data"
>>> leeds_geoj = bbr.read_geojson_xz(subrgn_name, dat_dir, verbose=True)
The requisite data file "tests\\osm_data\\leeds\\Leeds.osm.geojson.xz" does not exist.
>>> # Set `try_download=True`
>>> leeds_geoj = bbr.read_geojson_xz(subrgn_name, dat_dir, verbose=True, download=True)
Downloading "Leeds.osm.geojson.xz"
to "tests\\osm_data\\leeds\\" ... Done.
Parsing the data ... Done.
>>> leeds_geoj.head()
geometry properties
0 {'type': 'Point', 'coordinates': [-1.5558097, ... {'highway': 'motorway_junction'...
1 {'type': 'Point', 'coordinates': [-1.34293, 53... {'highway': 'motorway_junction'...
2 {'type': 'Point', 'coordinates': [-1.517335, 5... {'highway': 'motorway_junction'...
3 {'type': 'Point', 'coordinates': [-1.514124, 5... {'highway': 'motorway_junction'...
4 {'type': 'Point', 'coordinates': [-1.516511, 5... {'highway': 'motorway_junction'...
>>> # Set `parse_geometry` to be True
>>> leeds_geoj_ = bbr.read_geojson_xz(subrgn_name, dat_dir, parse_geometry=True,
... verbose=True)
Parsing "tests\\osm_data\\leeds\\Leeds.osm.geojson.xz" ... Done.
>>> leeds_geoj_['geometry'].head()
0 POINT (-1.5560511 53.6879848)
1 POINT (-1.34293 53.844618)
2 POINT (-1.517335 53.7499667)
3 POINT (-1.514124 53.7416937)
4 POINT (-1.516511 53.7256632)
Name: geometry, dtype: object
>>> # Delete the download directory
>>> delete_dir(dat_dir, verbose=True)
"""
kwargs.update({'parse_geometry': parse_geometry})
geojson_xz_data = self.read_osm_var(
self.VAR.read_geojson_xz, subregion_name=subregion_name, osm_file_format=".geojson.xz",
data_dir=data_dir, download=download, verbose=verbose, **kwargs)
return geojson_xz_data