In [None]:
# Standard libraries
import os

# Third-party libraries
import geopandas as gpd
import pandas as pd

DATA_DIR = os.path.join(os.getcwd(), "data")
GEO_DIR = os.path.join(DATA_DIR, "external/spatial")
OUT_DIR = os.path.join(DATA_DIR, "processed/space")


In [None]:
codes = pd.read_csv(os.path.join(GEO_DIR, "country-codes.csv"))


In [None]:
def set_crs(gdf, epsg=27700):
    """Assigns the OSGB36 / British National Grid CRS to a GeoDataFrame"""
    return gdf.set_crs(epsg=epsg, allow_override=True, inplace=True)


def rename_columns(df):
    """Enforces CWUK standard field names"""
    return (
        df.rename(columns={"geo_region": "name"})
        .drop(columns=["region"])
        .sort_values(by="name", axis=0)
        .reset_index(drop=True)
    )


uk = (
    gpd.read_file(
        os.path.join(
            GEO_DIR, "ukcp18-uk-land-country-united_kingdom-hires.geojson"
        ),
        driver="GeoJSON",
    )
    .pipe(set_crs)
    .pipe(rename_columns)
    .pipe((pd.merge, "left"), right=codes, on="name")
)
countries = (
    gpd.read_file(
        os.path.join(GEO_DIR, "ukcp18-uk-land-country-hires.geojson"),
        driver="GeoJSON",
    )
    .pipe(set_crs)
    .pipe(rename_columns)
    .pipe((pd.merge, "left"), right=codes, on="name")
)
regions = (
    gpd.read_file(
        os.path.join(GEO_DIR, "ukcp18-uk-land-region-hires.geojson"),
        driver="GeoJSON",
    )
    .pipe(set_crs)
    .pipe(rename_columns)
    .pipe((pd.merge, "left"), right=codes, on="name")
)


# Parent IDs
In the CWUK database, each spatial entity references its parent entity as a self-referential foreign key.

In [None]:
def get_parent_id(child, parents):
    """Returns the id of the polygon that contains a point"""
    point = gpd.points_from_xy(x=[child.x_coord], y=[child.y_coord])[0]
    for parent in parents.itertuples():
        if parent.id != child.id and parent.geometry.buffer(0).contains(point):
            return parent.id
    return "null"


# The UK is the super-parent...
uk["parentId"] = "null"
# Countries may or may not be in the UK (e.g. Channel Islands, Isle of Man etc...)
countries["parentId"] = countries.apply(
    lambda row: get_parent_id(row, uk), axis=1
)
# Regions are with countries...
regions["parentId"] = regions.apply(
    lambda row: get_parent_id(row, countries), axis=1
)


In [None]:
uk.to_file(os.path.join(OUT_DIR, "uk.geojson"), driver="GeoJSON")
uk.drop(columns="geometry").to_csv(os.path.join(OUT_DIR, "uk.csv"), index=True)

countries.to_file(os.path.join(OUT_DIR, "country.geojson"), driver="GeoJSON")
countries.drop(columns="geometry").to_csv(
    os.path.join(OUT_DIR, "country.csv"), index=True
)

regions.to_file(os.path.join(OUT_DIR, "region.geojson"), driver="GeoJSON")
regions.drop(columns="geometry").to_csv(
    os.path.join(OUT_DIR, "region.csv"), index=True
)
