In [15]:
# Standard libraries
import os

# Third-party libraries
import geopandas as gpd
import pandas as pd

DATA_DIR = os.path.join(os.getcwd(), "data")
GEO_DIR = os.path.join(DATA_DIR, "external/spatial")
OUT_DIR = os.path.join(DATA_DIR, "processed/space")


In [16]:
codes = pd.read_csv(os.path.join(GEO_DIR, "country-codes.csv"))


In [17]:
def set_crs(gdf, epsg=27700):
    """Assigns the OSGB36 / British National Grid CRS to a GeoDataFrame"""
    return gdf.set_crs(epsg=epsg, allow_override=True, inplace=True)


def rename_columns(df):
    """Enforces CWUK standard field names"""
    return (
        df.rename(columns={"geo_region": "name"})
        .drop(columns=["region"])
        .sort_values(by="name", axis=0)
        .reset_index(drop=True)
    )


uk = (
    gpd.read_file(
        os.path.join(
            GEO_DIR, "ukcp18-uk-land-country-united_kingdom-hires.geojson"
        ),
        driver="GeoJSON",
    )
    .pipe(set_crs)
    .pipe(rename_columns)
    .pipe((pd.merge, "left"), right=codes, on="name")
)
countries = (
    gpd.read_file(
        os.path.join(GEO_DIR, "ukcp18-uk-land-country-hires.geojson"),
        driver="GeoJSON",
    )
    .pipe(set_crs)
    .pipe(rename_columns)
    .pipe((pd.merge, "left"), right=codes, on="name")
)
regions = (
    gpd.read_file(
        os.path.join(GEO_DIR, "ukcp18-uk-land-region-hires.geojson"),
        driver="GeoJSON",
    )
    .pipe(set_crs)
    .pipe(rename_columns)
    .pipe((pd.merge, "left"), right=codes, on="name")
)
countries

Unnamed: 0,name,x_coord,y_coord,geometry,id
0,England,436325.368279,296147.848719,"MULTIPOLYGON (((84009.402 5361.604, 84022.099 ...",eng
1,Isle of Man,234265.323626,484561.023312,"MULTIPOLYGON (((216561.496 466536.198, 216335....",iom
2,Northern Ireland,97015.507927,533949.890497,"MULTIPOLYGON (((45089.390 487652.021, 44926.19...",nir
3,Scotland,266651.514962,771689.385884,"MULTIPOLYGON (((336177.917 1014260.958, 336272...",sct
4,Wales,279739.337741,271715.748248,"MULTIPOLYGON (((322349.697 164637.902, 322360....",wls


In [18]:
def get_parent_id(child, parents):
    """Returns the id of the polygon that contains a point"""
    point = gpd.points_from_xy(x=[child.x_coord], y=[child.y_coord])[0]
    for parent in parents.itertuples():
        if parent.id != child.id and parent.geometry.buffer(0).contains(point):
            return parent.id
    return "null"


# The UK is the super-parent...
uk["parentId"] = "null"
# Countries may or may not be in the UK (e.g. Channel Islands, Isle of Man etc...)
countries["parentId"] = countries.apply(
    lambda row: get_parent_id(row, uk), axis=1
)
# Regions are with countries...
regions["parentId"] = regions.apply(
    lambda row: get_parent_id(row, countries), axis=1
)


In [19]:
space = (
    pd.concat([uk, countries, regions])
    .drop_duplicates("id")
    .reset_index(drop=True)
)

In [20]:
uk.to_file(os.path.join(OUT_DIR, "uk.geojson"), driver="GeoJSON")
uk.drop(columns="geometry").to_csv(os.path.join(OUT_DIR, "uk.csv"), index=False)

countries.to_file(os.path.join(OUT_DIR, "country.geojson"), driver="GeoJSON")
countries.drop(columns="geometry").to_csv(
    os.path.join(OUT_DIR, "country.csv"), index=False
)

regions.to_file(os.path.join(OUT_DIR, "region.geojson"), driver="GeoJSON")
regions.drop(columns="geometry").to_csv(
    os.path.join(OUT_DIR, "region.csv"), index=False
)

space.to_file(os.path.join(OUT_DIR, "space.geojson"), driver="GeoJSON")
space.drop(columns="geometry").to_csv(
    os.path.join(OUT_DIR, "space.csv"), index=False
)
