# Convert roads to CityJSON

This is a notebook that loads an existing CityJSON file and appends it with centrelines from a geopackage.

First we do some necessary imports and setup the filenames for input and output:

In [67]:
import geopandas
import json
from tqdm.notebook import tqdm

# Define the initial CityJSON file
citymodel = "montreal.json"

# Define the roads dataset
roads_file = "roads_3d.gpkg"

output = "montreal_final.json"
output_network = "montreal_with_network.json"

## Function definitions

This is a bunch of functions about converting CityJSON geometries to shapely:

In [1]:
from shapely.geometry import Polygon, MultiPolygon, MultiLineString

def surface_to_polygon(surface, vertices):
    """Return a Polygon from a CityJSON surface."""
    
    outer_ring = map(lambda i: vertices[i], surface[0])
    
    inner_rings = [map(lambda i: vertices[i], ring) for ring in surface[1:]]
    
    return Polygon(outer_ring, inner_rings)

def geom_to_multipolygon(geom, vertices):
    """Return a MultiPolygon from a CityJSON geometry.
    
    Only MultiSurfaces supported."""
    
    surfaces = []
    
    if geom["type"] == "MultiSurface":
        for surface in geom["boundaries"]:
            surfaces.append(surface_to_polygon(surface, vertices))
    
    return MultiPolygon(surfaces)

And this is the opposite (shapely to CityJSON geometry):

In [142]:
def shape_to_geom(shape, lod, vertices):
    """Converts a shapely geometry to a CityJSON geometry"""
    
    if shape.geom_type == "Point":
        geom_type = "MultiPoint"
        b, v = point_to_geom(shape, vertices)
    elif shape.geom_type == "LineString":
        geom_type = "MultiLineString"
        b, v = linestring_to_geom(shape, vertices)
        b = [b]
    elif shape.geom_type == "MultiLineString":
        geom_type = "MultiLineString"
        b = []
        v = vertices
        for line in shape.geoms:
            b_temp, v_temp = linestring_to_geom(shape, vertices)
            b.append(b_temp)
            v.extend(v_temp)
    
    geom = {
        "type": geom_type,
        "lod": lod,
        "boundaries": b
    }
    
    return geom, v

def point_to_geom(point, vertices):
    """Returns the boundary indices and new vertices of Point"""
    
    offset = len(vertices)
    
    indices = [offset]
    verts = [point.coords[0]]
    
    return (indices, verts)

def linestring_to_geom(line, vertices):
    """Returns the boundary indices and new vertices of a LineString
    
    This only returns the indices of one linestring, therefore it cannot be directly
    assigned to a geometry's boundary values.
    """
    
    offset = len(vertices)
    
    indices = [offset + i for i in range(len(line.coords))]
    verts = [list(c) for c in line.coords]
    
    return (indices, verts)

v = []
shape_to_geom(shapely.geometry.Point(0,0), "1.0", v)

({'type': 'MultiPoint', 'lod': '1.0', 'boundaries': [0]}, [(0.0, 0.0)])

## Data loading

Open the CityJSON file that contains the surfaces:

In [76]:
with open(citymodel) as file:
    cm = json.load(file)

len(list(cm["CityObjects"]))

28018

Load the centreslines from the geopackage:

In [5]:
roads = geopandas.read_file(roads_file)

len(roads)

761

## CityJSON road storage method #1

This code loops through the city objects and for each of them it finds the intersecting centrlines, cuts them based on the original geometry and, finally, stores the centrelines as a MultiLineString in the same object.

This method is:
- dictated by the input's areal representation geometry, and
- only outputs geometry (i.e. MultiSurfaces and MultiLineStrings)

Therefore, this method doesn't retain any network information (unless stored as attributes in semantic surfaces).

In [74]:
# TODO: Speed this up with an index
for objid, cobj in tqdm(cm["CityObjects"].items()):
    assert len(cobj["geometry"]) == 1
    
    citygeom = cobj["geometry"][0]
    
    geom = geom_to_multipolygon(citygeom, cm["vertices"])
    # We need to make the geometry valid
    geom = geom.buffer(0)
    
    intersecting_roads = roads[roads["geometry"].intersects(geom)]
#     parts = intersecting_roads["geometry"].intersection(geom)
    
    boundaries = []
    semantics = {
        "values": [],
        "surfaces": []
    }
    
    for idx, road in intersecting_roads.iterrows():
        lines = road["geometry"].intersection(geom)
        if lines.type == "LineString":
            lines = MultiLineString([lines])
            
        for line in lines.geoms:
            b, v = linestring_to_geom(line, cm["vertices"])

            boundaries.append(b)
            cm["vertices"].extend(v)

            semantics["values"].append(len(semantics["surfaces"]))

            atts = dict(road)
            del atts["geometry"]

            semantics["surfaces"].append(atts)
    
    new_geometry = {
        "type": "MultiLineString",
        "boundaries": boundaries,
        "semantics": semantics
    }
    
    cobj["geometry"].append(new_geometry)

  0%|          | 0/28018 [00:00<?, ?it/s]

KeyboardInterrupt: 

And here we store the output:

In [151]:
with open(output, "w") as out:
    json.dump(cm, out)

## CityJSON road storage method #2

This method introduces a network extension for CityJSON to store the linear network of roads. This is done through a `"+network"` attribute that holds the relationship information between nodes and edges of the network.

First, we split any multi-parts to single parts:

In [22]:
segments = roads.explode()

Then we make temporary u and v nodes using the lexigographic coordinates of the nodes:

In [30]:
import shapely
import pandas as pd

def fix_precision(geom, decimals=3):
    return shapely.wkt.loads(shapely.wkt.dumps(geom, rounding_precision=decimals))

def get_lexicographic_boundaries(f, geom_col="geometry"):
    round_geom = fix_precision(f[geom_col])
    
    try:
        return [str(round_geom.coords[0]), str(round_geom.coords[-1])]
    except Exception as e:
        print(f)
        raise e

tmp = segments.apply(get_lexicographic_boundaries, axis=1)
segments['u_node'] = [a[0] for a in tmp]
segments['v_node'] = [a[1] for a in tmp]

We assign an id to every unique set of coordinates:

In [33]:
nodes = set(list(pd.unique(segments['u_node'])) + list(pd.unique(segments['v_node'])))

node_map = {}
for uuid, n in enumerate(nodes):
    node_map[n] = uuid

len(node_map)

513

Finally, setting up the appropriate ids to the u and v nodes of every segment in the dataset:

In [36]:
def get_node_uuid(f):
    return [node_map[f['u_node']], node_map[f['v_node']]]

tmp = segments.apply(get_node_uuid, axis=1)
segments['u'] = [a[0] for a in tmp]
segments['v'] = [a[1] for a in tmp]

### Save the bloody thing again

First we load the citymodel again

In [93]:
with open(citymodel) as file:
    cm = json.load(file)

len(list(cm["CityObjects"]))

28018

In [94]:
cm["+network"] = {
    "nodes": {},
    "edges": {}
}

In [95]:
cm["+network"]["nodes"] = {i: {"geometry": str(shapely.geometry.Point(eval(c)))} for c, i in node_map.items()}

In [96]:
segments["geom_str"] = segments.apply(lambda f: str(f["geometry"]), axis=1)
cm["+network"]["edges"] = segments.set_index("ID_TRC")[["u", "v", "geom_str"]].to_dict(orient="index")

In [97]:
cm["+network"]

{'nodes': {0: {'geometry': 'POINT Z (298914.54 5042773.938 46.454)'},
  1: {'geometry': 'POINT Z (298652.566 5042369.143 48.235)'},
  2: {'geometry': 'POINT Z (298555.718 5042000 55.307)'},
  3: {'geometry': 'POINT Z (298632.232 5042746.054 47.067)'},
  4: {'geometry': 'POINT Z (299032.451 5042000 44.39)'},
  5: {'geometry': 'POINT Z (298836.826 5043536.751 48.35)'},
  6: {'geometry': 'POINT Z (298838.634 5042028.689 46.99)'},
  7: {'geometry': 'POINT Z (298584.128 5042265.107 52.884)'},
  8: {'geometry': 'POINT Z (298000 5043365.992 55.875)'},
  9: {'geometry': 'POINT Z (300000 5042869.988 29.11)'},
  10: {'geometry': 'POINT Z (299655.614 5042005.915 31.388)'},
  11: {'geometry': 'POINT Z (299972.203 5042016.489 25.195)'},
  12: {'geometry': 'POINT Z (299927.502 5042986.528 33.448)'},
  13: {'geometry': 'POINT Z (299144.587 5042736.547 45.393)'},
  14: {'geometry': 'POINT Z (298391.931 5043936.002 52.009)'},
  15: {'geometry': 'POINT Z (298970.474 5042468.614 46.332)'},
  16: {'geomet

In [79]:
for objid, cobj in tqdm(cm["CityObjects"].items()):
    assert len(cobj["geometry"]) == 1
    
    citygeom = cobj["geometry"][0]
    
    geom = geom_to_multipolygon(citygeom, cm["vertices"])
    # We need to make the geometry valid
    geom = geom.buffer(0)
    
    intersecting_roads = roads[roads["geometry"].intersects(geom)]
    
    cobj["+edges"] = list(intersecting_roads["ID_TRC"])

  0%|          | 0/28018 [00:00<?, ?it/s]

In [90]:
objid

'{fff92a18-6fb6-466c-9004-541b938a4020}'

In [80]:
with open(output_network, "w") as out:
    json.dump(cm, out)

## CityJSON roads storage method #3

With this method, we store nodes and edges of the network as CityObjects. Their attributes are enough to allow for someone to reconstruct the topology and they are easy to visualise. In order to make sense of any relations between the objects, we use `CityObjectGroup` objects.

In [148]:
geonodes = geopandas.GeoDataFrame([[i, shapely.geometry.Point(eval(c))] for c, i in node_map.items()], columns=["id", "geometry"], geometry="geometry")

def create_cityobject(feature, vertices, obj_type=None, lod=None, geom_col="geometry"):
    """Create a city object from a GeoSeries"""
    
    if not "type" in feature and obj_type is None:
        raise KeyError("City object type is not defined!")

    atts = dict(feature)
    del atts["geometry"]
    
    obj = {
        "type": feature["type"] if obj_type is None else obj_type,
        "attributes": atts,
        "geometry": []
    }
    
    geom, vertices = shape_to_geom(feature[geom_col], feature["lod"] if lod is None else lod, vertices)
    obj["geometry"].append(geom)
    
    return obj

create_cityobject(geonodes.iloc[0], cm["vertices"], obj_type="Road", lod="0.1")

{'type': 'Road',
 'attributes': {'id': 0},
 'geometry': [{'type': 'MultiPoint', 'lod': '0.1', 'boundaries': [662616]}]}