## Download data from OSM

In [1]:
!wget https://download.openstreetmap.fr/extracts/asia/israel.osm.pbf
!wget https://download.openstreetmap.fr/extracts/asia/palestine.osm.pbf

--2021-10-18 13:24:07--  https://download.openstreetmap.fr/extracts/asia/israel.osm.pbf
Resolving download.openstreetmap.fr (download.openstreetmap.fr)... 213.36.253.212
Connecting to download.openstreetmap.fr (download.openstreetmap.fr)|213.36.253.212|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 78213987 (75M)
Saving to: ‘israel.osm.pbf’


2021-10-18 13:26:04 (658 KB/s) - ‘israel.osm.pbf’ saved [78213987/78213987]

--2021-10-18 13:26:04--  https://download.openstreetmap.fr/extracts/asia/palestine.osm.pbf
Resolving download.openstreetmap.fr (download.openstreetmap.fr)... 213.36.253.212
Connecting to download.openstreetmap.fr (download.openstreetmap.fr)|213.36.253.212|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 33084999 (32M)
Saving to: ‘palestine.osm.pbf’


2021-10-18 13:26:47 (753 KB/s) - ‘palestine.osm.pbf’ saved [33084999/33084999]



## Convert to gpkg

In [2]:
!ogr2ogr israel.osm.gpkg israel.osm.pbf
!ogr2ogr palestine.osm.gpkg palestine.osm.pbf

0...10...20...30...40...50...60...70...80...90...100 - done.
0...10...20...30...40...50...60...70...80...90...100 - done.


In [2]:
import fiona
fiona.listlayers('israel.osm.gpkg')

['points', 'lines', 'multilinestrings', 'multipolygons', 'other_relations']

## Hebrew Labels

In [1]:
import dataflows as DF
import fiona
import pghstore

FILES = ['israel', 'palestine']
sources = [fiona.open(f'{f}.osm.gpkg', layer='points').filter() for f in FILES]
RANKS = dict(
    city=0,
    town=1,
    village=2,
    hamlet=3
)

dp = DF.Flow(
    *sources,
    DF.add_field('tags', 'string', lambda r: r['properties'].get('other_tags') or ''),
    lambda row: row['properties'].update(pghstore.loads(row['tags'])),
    DF.add_field('place', 'string', lambda r: r['properties'].get('place')),
    DF.add_field('population', 'string', lambda r: r['properties'].get('population')),
    DF.add_field('orig_name', 'string', lambda r: r['properties'].get('name')),
    DF.filter_rows(lambda r: r['place'] in RANKS.keys() or r['population'] is not None),
    DF.filter_rows(lambda r: r['geometry'] and r['geometry']['type'] == 'Point'),
    DF.add_field('name', 'string', lambda r: r['properties'].get('name:he') or r['orig_name']),
    DF.add_field('rank', 'integer', lambda r: RANKS.get(r['place'])),
    DF.select_fields(['place', 'name', 'geometry', 'rank']),
    DF.concatenate(
        dict((k, []) for k in ['place', 'name', 'geometry', 'rank']),
        target=dict(name='place_labels_he', path='./place_labels_he.geojson')
    ),
    DF.set_type('geometry', type='geopoint', transform=lambda v: list(v['coordinates']), resources=None),
    DF.dump_to_path('data/place_labels_he', force_format=False),
#     DF.printer(tablefmt='html')
).process()

  for x in self.iterable:


#,place (string),name (string),geometry (geopoint),rank (integer)
1,city,אילת,"[34.9497949, 29.5569348]",0.0
2,city,ירושלים,"[35.2257626, 31.778824200000003]",0.0
3,village,יבנאל,"[35.5060043, 32.7026978]",2.0
4,town,קיסריה,"[34.9057861, 32.5114971]",1.0
5,city,נהריה,"[35.094557900000005, 33.006306]",0.0
6,village,שדות ים,"[34.8933053, 32.492159300000004]",2.0
7,city,אשדוד,"[34.6529922, 31.797731400000004]",0.0
8,city,חולון,"[34.780407600000004, 32.0193121]",0.0
9,town,אור יהודה,"[34.8523936, 32.0309712]",1.0
10,city,ראשון לציון,"[34.8101149, 31.9635712]",0.0


## Locations and Bounds

In [1]:
import dataflows as DF
import fiona
import pghstore
from shapely.geometry import shape
from shapely.ops import unary_union

FILES = ['israel', 'palestine']
sources = [fiona.open(f'{f}.osm.gpkg', layer='multipolygons').filter() for f in FILES]
RANKS = dict(
    city=0,
    town=1,
    village=2,
    hamlet=3
)

def names(r):
    ret = [v for k, v in r['properties'].items() if k.endswith('he')]
    if len(ret) == 0:
        name = r['properties'].get('name')
        if name:
            return [name]
    else:
        return ret
    
def bounds(r):
    geometry = r['geometry']
    geometry = unary_union([shape(g) for g in geometry])
    bounds = geometry.bounds
    return bounds

dp, _ = DF.Flow(
    *sources,
    DF.add_field('tags', 'string', lambda r: r['properties'].get('other_tags') or ''),
    lambda row: row['properties'].update(pghstore.loads(row['tags'])),
    DF.add_field('place', 'string', lambda r: r['properties'].get('place')),
    DF.add_field('boundary', 'string', lambda r: r['properties'].get('boundary')),
    DF.add_field('population', 'string', lambda r: r['properties'].get('population')),
    DF.filter_rows(lambda r: r['place'] in RANKS.keys()),
    DF.filter_rows(lambda r: r['geometry'] and 'Polygon' in r['geometry']['type']),
    DF.add_field('name', 'array', names),
    DF.filter_rows(lambda r: r['name'] is not None),
    DF.add_field('key', 'string', lambda r: '_'.join(r['name'])),
    DF.concatenate(
        dict((k, []) for k in ['place', 'name', 'key', 'geometry', 'population']),
    ),
    DF.join_with_self('concat', ['key'], dict(
        place=None,
        population=None,
        name=None,
        key=None,
        geometry=dict(aggregate='array')
    )),
    DF.add_field('bounds', 'array', bounds),
    DF.select_fields(['key', 'place', 'name', 'population', 'bounds']),
    DF.update_resource(-1, name='place_bounds_he', path='place_bounds_he.csv'),
    DF.set_type('bounds', **{'es:index': False, 'es:itemType': 'number'}),
    DF.set_type('place', **{'es:keyword': True}),
    DF.set_type('name', **{'es:itemType': 'string'}),
    DF.set_type('key', **{'es:keyword': True}),
    DF.set_primary_key(['key']),
    DF.dump_to_zip('data/place_bounds_he.zip'),
    DF.printer(tablefmt='html')
).process()
dp.resources[0].descriptor

  for x in self.iterable:


#,key (string),place (string),name (array),population (string),bounds (array)
1,A'raa'ra Bedouins,hamlet,"[""A'raa'ra Bedouins""]",,"[35.2614135, 31.8504099, 35.2653989, 31.8541037]"
2,Al Nada,hamlet,['Al Nada'],,"[34.2291621, 31.3284825, 34.230813, 31.3293784]"
3,Sebastia,village,['Sebastia'],,"[35.1923239, 32.2690899, 35.2024491, 32.2801727]"
4,Sudia Village,village,['Sudia Village'],,"[34.2329577, 31.3167983, 34.2362099, 31.3200123]"
5,Wadi Abu Hindi,hamlet,['Wadi Abu Hindi'],,"[35.3120845, 31.7564905, 35.3252783, 31.7606013]"
6,א-דווא,hamlet,['א-דווא'],,"[35.3718696, 32.1340848, 35.3765795, 32.1384365]"
7,א-זידאנה אל נסאסרה,hamlet,['א-זידאנה אל נסאסרה'],,"[34.7365106, 31.3658372, 34.742804, 31.3707942]"
8,א-זעיים בדואים,hamlet,['א-זעיים בדואים'],,"[35.2656578, 31.7929091, 35.2730082, 31.7989491]"
9,א-רהניה,hamlet,['א-רהניה'],,"[35.2420533, 31.6662813, 35.2447893, 31.6686151]"
10,אבו ג'ווייעד,village,"[""אבו ג'ווייעד""]",,"[35.0711662, 31.1791438, 35.0738098, 31.1817818]"


{'name': 'place_bounds_he',
 'path': 'place_bounds_he.csv',
 'schema': {'fields': [{'name': 'key',
    'type': 'string',
    'format': 'default',
    'es:keyword': True},
   {'name': 'place',
    'type': 'string',
    'format': 'default',
    'es:keyword': True},
   {'name': 'name',
    'type': 'array',
    'format': 'default',
    'es:itemType': 'string'},
   {'name': 'population', 'type': 'string', 'format': 'default'},
   {'name': 'bounds',
    'type': 'array',
    'es:index': False,
    'es:itemType': 'number'}],
  'primaryKey': ['key']},
 'profile': 'data-resource',
 'encoding': 'utf-8',
 'format': 'csv',
 'dialect': {'lineTerminator': '\r\n',
  'delimiter': ',',
  'doubleQuote': True,
  'quoteChar': '"',
  'skipInitialSpace': False}}

## Cities without names

In [1]:
import dataflows as DF
import fiona
import pghstore
from shapely.geometry import shape
from shapely.ops import unary_union

FILES = ['israel', 'palestine']
sources = [fiona.open(f'{f}.osm.gpkg', layer='multipolygons').filter() for f in FILES]
RANKS = dict(
    city=0,
    town=1,
    village=2,
    hamlet=3
)

def names(r):
    ret = [v for k, v in r['properties'].items() if k.endswith('he')]
    if len(ret) == 0:
        name = r['properties'].get('name')
        if name:
            return [name]
    else:
        return ret

r = DF.Flow(
    *sources,
    DF.add_field('tags', 'string', lambda r: r['properties'].get('other_tags') or ''),
    lambda row: row['properties'].update(pghstore.loads(row['tags'])),
    DF.add_field('place', 'string', lambda r: r['properties'].get('place')),
    DF.add_field('osm_way_id', 'string', lambda r: r['properties'].get('osm_way_id')),
    DF.filter_rows(lambda r: r['place'] in RANKS.keys()),
    DF.filter_rows(lambda r: r['geometry'] and 'Polygon' in r['geometry']['type']),
    DF.add_field('name', 'array', names),
    DF.filter_rows(lambda r: r['name'] is None),
    DF.sort_rows('{place}'),
    DF.delete_fields(['geometry']),
    DF.printer(tablefmt='html')
).results()[0][0]


  for x in self.iterable:


#,type (string),id (string),properties (object),tags (string),place (string),osm_way_id (string),name (array)
1,Feature,2899.0,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,city,34929243.0,
2,Feature,415441.0,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,city,637195949.0,
3,Feature,445798.0,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,city,675627797.0,
4,Feature,16057.0,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,120745730.0,
5,Feature,269566.0,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,432757889.0,
6,Feature,269568.0,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,432757891.0,
7,Feature,269569.0,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,432757892.0,
8,Feature,269570.0,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,432757893.0,
9,Feature,281869.0,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,452985072.0,
10,Feature,316774.0,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,502103410.0,


#,type (string),id (string),properties (object),tags (string),place (string),osm_way_id (string),name (array)
1,Feature,18321,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,265587469,
2,Feature,18331,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,265595164,
3,Feature,264508,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,612000771,
4,Feature,295934,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,hamlet,767112843,
5,Feature,18317,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,town,265587435,
6,Feature,18319,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,town,265587437,
7,Feature,14947,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,village,242723361,
8,Feature,16047,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,village,244087537,
9,Feature,18322,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,village,265587470,
10,Feature,18323,"{'admin_level': None, 'aeroway': None, 'amenity': None, 'barrier': None, 'boundary': None, 'building ...",,village,265587471,


In [None]:
_