In [None]:
import re

import numpy as np
import pyrosm
from pyrosm.data import sources

## Check available data

In [None]:
# Print available source categories
sources.available.keys()

In [None]:
print(sources.cities.available)
# pass the contents of this to a selection input

## Select City

In [None]:
fp = pyrosm.get_data("Bangkok")  # downloads to tmp, think about that
osm = pyrosm.OSM(fp)

## Select features

### Road Network

In [None]:
road_net = osm.get_network(network_type="driving")
road_net.plot()
# walking (default)
# cycling
# driving
# driving+service (includes also public service vehicles)

In [None]:
sum(road_net["length"])

## Buildings

In [None]:
buildings = osm.get_buildings()
# crs needs to be selected for target area
buildings = buildings.to_crs("27700")

In [None]:
buildings["area"] = buildings.area

In [None]:
buildings.groupby("building").sum("area")
# can get area summaries by type. Maybe not that useful for buildings.

## Landuse

In [None]:
land = osm.get_landuse()
land = land.to_crs("27700")

In [None]:
land.plot(column="landuse", legend=True, figsize=(10, 6))

In [None]:
land["area"] = land.area
land.groupby("landuse").sum("area")

## Natural

In [None]:
nat = osm.get_natural()
# this layer includes point observations, get rid
nat = nat[np.array(nat.geom_type != "Point", dtype=bool)]
nat = nat.to_crs("27700")
nat["area"] = nat.area
nat.plot(column="natural", legend=True, figsize=(10, 6))

I notice some encoding problems here, scub versus scrub for example.
Need to filter out any point observations , want shapes not points.
What could be useful for impedance to connectivity? 
* anything containing 'rock'- bare_rock etc. cliff. shingle. sand. stone. scree.
* water - bay, beach, coastline, spring, water, wetland
* green area - containing grass. mud. heath. containing tree. containing shrub. scrub. scub (sic). wood.forest.


In [None]:
# reclassify
nat["reclassified_natural"] = nat.natural
rock_pat = re.compile(
    r"rock|cliff|shingle|sand|stone|scree|gorge|ridge|landslide|mountain"
)
nat.reclassified_natural = [
    "rock" if bool(rock_pat.search(nat_class)) else nat_class
    for nat_class in nat.reclassified_natural
]
water_pat = re.compile(r"bay|beach|coast|spring|water|wet|shoal|river|flood|reed")
nat.reclassified_natural = [
    "water" if bool(water_pat.search(nat_class)) else nat_class
    for nat_class in nat.reclassified_natural
]
green_pat = re.compile(
    r"grass|mud|heath|tree|shrub|scrub|scub|wood|forest|field|earth|meadow|lawn|fell"
)
nat.reclassified_natural = [
    "green" if bool(green_pat.search(nat_class)) else nat_class
    for nat_class in nat.reclassified_natural
]

In [None]:
nat.reclassified_natural.value_counts()

In [None]:
nat.plot(column="reclassified_natural", legend=True, figsize=(10, 6))

In [None]:
# summary table
nat.groupby("reclassified_natural").sum("area")