In [None]:
import pandas as pd
import geopandas as gp
import matplotlib

In [None]:
# load london gdf
london_boundary_gdf = gp.read_file("data/reference_geometries/london_lad.geojson").to_crs(4326) # convert to WGS84
london_boundary_gdf
# combine the london_boundary_gdf to a single geometry
london_boundary_gdf["city"] = "london"
london_boundary_gdf = london_boundary_gdf[["city","geometry"]].dissolve("city").reset_index().to_crs(27700)
london_boundary_gdf.geometry = london_boundary_gdf.geometry.buffer(1000)
london_boundary_gdf = london_boundary_gdf.to_crs(4326)

In [None]:
# load an MSOA file which has the geography code
msoa_gdf = gp.read_file("data/reference_geometries/msoa_2011_geometries.geojson")
msoa_gdf = msoa_gdf.rename(columns={"MSOA11CD":"geography code"})[["geography code","geometry"]]
msoa_gdf

In [None]:
msoa_gdf = msoa_gdf.sjoin(london_boundary_gdf, predicate="within").drop(columns="index_right").reset_index().drop(columns="index")
msoa_gdf

In [None]:
msoa_gdf.explore()

In [None]:
gdf = gp.read_file("data/osm/greater_london_multilinestrings.shp")
# gdf.to_file("greater_london_multilinestrings.geojson", driver="GeoJSON")

In [None]:
bus_routes_gdf = gdf[gdf.other_tags.astype(str).str.contains("bus")]
bus_route_counts_df = bus_routes_gdf.sjoin(msoa_gdf)[["geography code","osm_id"]].groupby("geography code").nunique().rename(columns={"osm_id":"num_bus_routes"}).reset_index()
bus_route_counts_df

In [None]:
train_routes_gdf = gdf[gdf.other_tags.astype(str).str.contains("train")]
train_route_counts_df = train_routes_gdf.sjoin(msoa_gdf)[["geography code","osm_id"]].groupby("geography code").nunique().rename(columns={"osm_id":"num_train_routes"}).reset_index()
train_route_counts_df

In [None]:
cycle_routes_gdf = gdf[gdf.other_tags.astype(str).str.contains("cycle")]
cycle_route_counts_df = cycle_routes_gdf.sjoin(msoa_gdf)[["geography code","osm_id"]].groupby("geography code").nunique().rename(columns={"osm_id":"num_cycle_routes"}).reset_index()
cycle_route_counts_df

In [None]:
# combine these together and save them as a stage file
output_df = msoa_gdf[["geography code"]].merge(bus_route_counts_df, on="geography code", how="outer").merge(train_route_counts_df, on="geography code", how="outer").merge(cycle_route_counts_df, on="geography code", how="outer").fillna(0)
output_df

# Write to disk

In [None]:
%%time
output_df.to_csv("data/staged_files/s1_osm_mobility_routes.csv", index=False)