# Getting the most out of GeoPandas 1.0

After 10 years since the first release, GeoPandas reached version 1.0. This workshop will showcase how to get the most out of the recent enhancements and develop a code ready for 2024 and beyond.

**Martin Fleischmann, Joris van den Bossche**

27/05/2024, Basel

## Setup

Follow the ReadMe to set up the environment correctly. You should have these packages installed:

```
- geopandas 1.0
- pyarrow
- geodatasets
```

## What is GeoPandas?

**Easy, fast and scalable geospatial analysis in Python**

From the docs:

> The goal of GeoPandas is to make working with geospatial data in python easier. It combines the capabilities of pandas and shapely, providing geospatial operations in pandas and a high-level interface to multiple geometries to shapely. GeoPandas enables you to easily do operations in python that would otherwise require a spatial database such as PostGIS.

## Outline

Order to be determined

- [x] dot density mapping (sample)
- [x] union_all
- [x] voronoi and delaunay
- [x] polygonize and build area
- [x] force_2d and force_3d
- [x] dwithin joins
- [ ] sjoin with attribute
- [x] extract_unique_points
- [x] frechet_distance and hausdorff_distance
- [x] segmentize
- [x] hilbert_distance and sort_values
- [ ] save to parquet with bbox and bbox filtering
- [x] shared_paths and get_geometry

In [None]:
import geopandas
import pandas
import numpy
from matplotlib.colors import ListedColormap

from geodatasets import get_path

## Dot density mapping using `sample_points()`

In [None]:
chicago = geopandas.read_file(get_path("geoda chicago_health"), columns=["Hisp14", "Blk14", "AS14", "Wht14", "TRACTCnt"])
chicago.head()

Single layer

In [None]:
white = chicago.sample_points(chicago.Wht14 // 100, rng=42)
white.head()

In [None]:
ax = white.plot(color="k", markersize=0.01, figsize=(12, 12))
chicago.boundary.plot(ax=ax, color="k", linewidth=0.2)
ax.set_axis_off()

Multiple layers

In [None]:
hispanic = chicago.sample_points(chicago.Hisp14 // 100, rng=42)
black = chicago.sample_points(chicago.Blk14 // 100, rng=42)
asian = chicago.sample_points(chicago.AS14 // 100, rng=42)

The manual way

In [None]:
ax = white.plot(color="#F2CF63", markersize=0.01, figsize=(12, 12))
hispanic.plot(color="#ADD9C9", markersize=0.01, ax=ax)
black.plot(color="#D96459", markersize=0.01, ax=ax)
asian.plot(color="#3B2F40", markersize=0.01, ax=ax)
ax.set_axis_off()

As one GeoDataFrame

In [None]:
dot_density = geopandas.GeoDataFrame(
    {"population": ["white", "hispanic", "black", "asian"]},
    geometry=[
        white.union_all(),
        hispanic.union_all(),
        black.union_all(),
        asian.union_all(),
    ],
    crs=chicago.crs,
)
dot_density

In [None]:
cmap = ListedColormap(["#3B2F40", "#D96459", "#ADD9C9", "#F2CF63"])
ax = dot_density.plot(
    column="population",
    cmap=cmap,
    legend=True,
    markersize=0.01,
    figsize=(12, 12),
    legend_kwds=dict(loc="upper right", bbox_to_anchor=(1, 0.95), frameon=False),
)
chicago.boundary.plot(ax=ax, color="k", linewidth=0.1)
ax.set_axis_off()

In [None]:
m = chicago.boundary.explore(
    tiles="Carto DB Positron",
    prefer_canvas=True,
    color="black",
    )
dot_density.explore(
    column="population",
    cmap=cmap,
    legend=True,
    marker_kwds=dict(radius=1),
    m=m
)

## Union_all and coverage union

In [None]:
nyc = geopandas.read_file(get_path("geoda nyc education"), columns=["BoroCode"])
nyc

In [None]:
nyc.explore("BoroCode", tiles="Carto DB Positron")

Doing union of all geometries

In [None]:
%timeit nyc.union_all()
%timeit nyc.union_all(method="coverage")

Dissolving geomerties to boroughs.

In [None]:
boroughs = nyc.dissolve("BoroCode")
boroughs

In [None]:
boroughs.explore(tiles="CartoDB Positron")

In [None]:
%timeit nyc.dissolve("BoroCode")
%timeit nyc.dissolve("BoroCode", method="coverage")

## Voronoi and Delaunay

In [None]:
groceries = geopandas.read_file(get_path("geoda groceries"))
groceries.head()

In [None]:
groceries.explore(tiles="CartoDB Positron")

In [None]:
voronoi = groceries.voronoi_polygons()
voronoi

In [None]:
m = voronoi.explore(tiles="CartoDB Positron")
groceries.explore(m=m, color="red")

In [None]:
voronoi_edges = groceries.voronoi_polygons(only_edges=True)
voronoi_edges

In [None]:
m = voronoi_edges.explore(tiles="CartoDB Positron")
groceries.explore(m=m, color="red")

In [None]:
delaunay = groceries.delaunay_triangles()
delaunay

In [None]:
m = delaunay.explore(tiles="CartoDB Positron")
groceries.explore(m=m, color="red")

In [None]:
delaunay_edges = groceries.delaunay_triangles(only_edges=True)
delaunay_edges

In [None]:
m = delaunay_edges.explore(tiles="CartoDB Positron")
groceries.explore(m=m, color="red")

In [None]:
chicago.head()

In [None]:
chicago_delaunay = chicago.delaunay_triangles(only_edges=True)

m = chicago_delaunay.explore(tiles="CartoDB Positron", prefer_canvas=True)
chicago.boundary.explore(m=m, color="red")


## Extract unique points

In [None]:
points = chicago.extract_unique_points()
points

In [None]:
points.explore(tiles="CartoDB Positron", prefer_canvas=True)

## Polygonize and build_area

In [None]:
delaunay_edges.head()

In [None]:
polygons = delaunay_edges.polygonize()
polygons.head()

In [None]:
polygons.explore(tiles="CartoDB Positron")

In [None]:
delaunay_edges.build_area().explore(tiles="CartoDB Positron")

In [None]:
linework = pandas.concat(
    [voronoi_edges, groceries.buffer(1000).boundary], ignore_index=True
)

In [None]:
linework.explore(tiles="CartoDB Positron")

In [None]:
linework.build_area().explore(tiles="CartoDB Positron")

In [None]:
linework.polygonize().explore(tiles="CartoDB Positron")

## Force_2D and Force_3D

In [None]:
chicago.head()

In [None]:
chicago.has_z.any()

In [None]:
with_z = chicago.force_3d()
with_z.head()

In [None]:
with_z.has_z.all()

In [None]:
with_z_range = chicago.force_3d(z=range(len(chicago)))
with_z_range.head()

In [None]:
without_z = with_z_range.force_2d()
without_z.head()

## Dwithin

In [None]:
cars = geopandas.read_file("data/cars.gpkg")
cars.head()

In [None]:
cars = cars.to_crs(groceries.crs)

In [None]:
cars.explore(tiles="CartoDB Positron")

In [None]:
cars_near_groceries = cars.sjoin(groceries, predicate="dwithin", distance=1000)
cars_near_groceries

In [None]:
cars_near_groceries.explore("Chain", tiles="CartoDB Positron")

## Segmentize


In [None]:
delaunay.extract_unique_points().explore(tiles="CartoDB Positron")

In [None]:
denser = delaunay.segmentize(1000)
denser.head()

In [None]:
dense_points = denser.extract_unique_points()
dense_points.explore(tiles="CartoDB Positron")

## Hilbert distance

In [None]:
dense_points_df = dense_points.to_frame("geometry").explode()
dense_points_df

In [None]:
quantile_label = numpy.repeat(numpy.arange(100), len(dense_points_df) // 100 + 1)[:len(dense_points_df)]

In [None]:
dense_points_df.explore(quantile_label, cmap="viridis", tiles="CartoDB Positron")

In [None]:
dense_points_df.dissolve(quantile_label).envelope.explore(tiles="CartoDB Positron")

In [None]:
spatially_sorted = dense_points_df.sort_values("geometry")

In [None]:
spatially_sorted.explore(quantile_label, cmap="viridis", tiles="CartoDB Positron")

In [None]:
spatially_sorted.dissolve(quantile_label).envelope.explore(tiles="CartoDB Positron")

In [None]:
spatially_sorted.hilbert_distance()

TODO: Include Parquet IO after this

## Shared paths

In [None]:
partition_label = numpy.repeat(numpy.arange(11), 7)
chicago_partitions = chicago.sort_values("geometry").dissolve(partition_label)

In [None]:
boundaries_near_shops = chicago.boundary.clip(groceries.buffer(2000).to_crs(chicago.crs))

In [None]:
m = chicago_partitions.explore(tiles="CartoDB Positron")
boundaries_near_shops.explore(m=m, color="red")

In [None]:
shared = chicago.boundary.shared_paths(chicago_partitions.loc[partition_label].boundary, align=False)

In [None]:
shared

In [None]:
shared.explore(tiles="CartoDB Positron")

In [None]:
same_direction = shared.get_geometry(0)
opposite_direction = shared.get_geometry(1)
same_direction.head()

In [None]:
m = same_direction.explore(tiles="CartoDB Positron")
opposite_direction.explore(m=m, color="red")

## Hausdorrf and Frechet distance

In [None]:
rivers = geopandas.read_file(get_path("eea large rivers")).set_index("NAME")
rivers

In [None]:
rivers.explore()

In [None]:
simplified_rivers = rivers.simplify(10_000)

In [None]:
m = simplified_rivers.explore()
rivers.explore(m=m, color='red')

In [None]:
rivers.frechet_distance(simplified_rivers)

In [None]:
rivers.hausdorff_distance(simplified_rivers)

In [None]:
rivers.frechet_distance(simplified_rivers) / rivers.length