In [54]:
# Standard libraries
import os

# Third-party libraries
import geopandas as gpd
import pandas as pd

DATA_DIR = os.path.join(os.getcwd(), "data")
GEO_DIR = os.path.join(DATA_DIR, "external/spatial")
OUT_DIR = os.path.join(DATA_DIR, "processed/location")


In [55]:
def set_crs(gdf, epsg=27700):
    """Assigns the OSGB36 / British National Grid CRS to a GeoDataFrame"""
    return gdf.set_crs(epsg=epsg, allow_override=True, inplace=True)


def rename_columns(df):
    """Enforces CWUK standard field names"""
    return (
        df.rename(columns={"geo_region": "name"})
        .drop(columns=["region"])
        .sort_values(by="name", axis=0)
        .reset_index(drop=True)
    )

codes = pd.read_csv(os.path.join(GEO_DIR, "country-codes.csv"))

uk = (
    gpd.read_file(
        os.path.join(
            GEO_DIR, "ukcp18-uk-land-country-united_kingdom-hires.geojson"
        ),
        driver="GeoJSON",
    )
    .pipe(set_crs)
    .pipe(rename_columns)
    .pipe((pd.merge, "left"), right=codes, on="name")
)
countries = (
    gpd.read_file(
        os.path.join(GEO_DIR, "ukcp18-uk-land-country-hires.geojson"),
        driver="GeoJSON",
    )
    .pipe(set_crs)
    .pipe(rename_columns)
    .pipe((pd.merge, "left"), right=codes, on="name")
)
regions = (
    gpd.read_file(
        os.path.join(GEO_DIR, "ukcp18-uk-land-region-hires.geojson"),
        driver="GeoJSON",
    )
    .pipe(set_crs)
    .pipe(rename_columns)
    .pipe((pd.merge, "left"), right=codes, on="name")
)
codes

Unnamed: 0,id,name,parent_id,type
0,gb,United Kingdom,,country
1,gb-eng,England,gb,country
2,gb-nir,Northern Ireland,gb,country
3,gb-sct,Scotland,gb,country
4,gb-wls,Wales,gb,country
5,gb-iom,Isle of Man,gb,country
6,gb-emd,East Midlands,gb-eng,region
7,gb-esc,East Scotland,gb-sct,region
8,gb-een,East of England,gb-eng,region
9,gb-lnd,London,gb-eng,region


In [56]:
# Filter countries from regions
regions = regions[~regions.id.isin(countries.id)]
regions

Unnamed: 0,name,x_coord,y_coord,geometry,id,parent_id,type
0,East Midlands,481004.614478,337237.029489,"POLYGON ((534657.901 406164.796, 533900.897 40...",gb-emd,gb-eng,region
1,East Scotland,327936.681532,732569.478746,"MULTIPOLYGON (((320766.200 680581.505, 320697....",gb-esc,gb-sct,region
2,East of England,573749.53721,263770.9148,"POLYGON ((585951.803 181704.897, 585934.901 18...",gb-een,gb-eng,region
4,London,531331.013781,179645.800174,"POLYGON ((531788.999 200757.503, 531643.597 20...",gb-lnd,gb-eng,region
5,North East England,406477.705975,569475.671027,"MULTIPOLYGON (((429246.996 604972.298, 429197....",gb-nee,gb-eng,region
6,North Scotland,241176.638224,880259.99721,"MULTIPOLYGON (((172604.496 740747.502, 172569....",gb-nsc,gb-sct,region
7,North West England,351884.161496,461276.950523,"POLYGON ((364028.497 577614.499, 363932.904 57...",gb-nwe,gb-eng,region
9,South East England,503129.756097,154326.525438,"MULTIPOLYGON (((449149.997 96631.199, 448665.5...",gb-see,gb-eng,region
10,South West England,319466.448995,123179.884115,"MULTIPOLYGON (((84009.402 5361.605, 84022.099 ...",gb-swe,gb-eng,region
12,West Midlands,381687.443315,286831.222805,"POLYGON ((445790.299 252455.405, 445810.203 25...",gb-wmd,gb-eng,region


In [57]:
countries = pd.concat([uk, countries]).reset_index(drop=True)
countries


Unnamed: 0,name,x_coord,y_coord,geometry,id,parent_id,type
0,United Kingdom,348890.323001,461087.31387,"MULTIPOLYGON (((336177.917 1014260.958, 336272...",gb,,country
1,England,436325.368279,296147.848719,"MULTIPOLYGON (((84009.402 5361.604, 84022.099 ...",gb-eng,gb,country
2,Isle of Man,234265.323626,484561.023312,"MULTIPOLYGON (((216561.496 466536.198, 216335....",gb-iom,gb,country
3,Northern Ireland,97015.507927,533949.890497,"MULTIPOLYGON (((45089.390 487652.021, 44926.19...",gb-nir,gb,country
4,Scotland,266651.514962,771689.385884,"MULTIPOLYGON (((336177.917 1014260.958, 336272...",gb-sct,gb,country
5,Wales,279739.337741,271715.748248,"MULTIPOLYGON (((322349.697 164637.902, 322360....",gb-wls,gb,country


In [58]:
locations = (
    pd.concat([uk, countries, regions])
    .drop_duplicates("id")
    .reset_index(drop=True)
)
locations

Unnamed: 0,name,x_coord,y_coord,geometry,id,parent_id,type
0,United Kingdom,348890.323001,461087.31387,"MULTIPOLYGON (((336177.917 1014260.958, 336272...",gb,,country
1,England,436325.368279,296147.848719,"MULTIPOLYGON (((84009.402 5361.604, 84022.099 ...",gb-eng,gb,country
2,Isle of Man,234265.323626,484561.023312,"MULTIPOLYGON (((216561.496 466536.198, 216335....",gb-iom,gb,country
3,Northern Ireland,97015.507927,533949.890497,"MULTIPOLYGON (((45089.390 487652.021, 44926.19...",gb-nir,gb,country
4,Scotland,266651.514962,771689.385884,"MULTIPOLYGON (((336177.917 1014260.958, 336272...",gb-sct,gb,country
5,Wales,279739.337741,271715.748248,"MULTIPOLYGON (((322349.697 164637.902, 322360....",gb-wls,gb,country
6,East Midlands,481004.614478,337237.029489,"POLYGON ((534657.901 406164.796, 533900.897 40...",gb-emd,gb-eng,region
7,East Scotland,327936.681532,732569.478746,"MULTIPOLYGON (((320766.200 680581.505, 320697....",gb-esc,gb-sct,region
8,East of England,573749.53721,263770.9148,"POLYGON ((585951.803 181704.897, 585934.901 18...",gb-een,gb-eng,region
9,London,531331.013781,179645.800174,"POLYGON ((531788.999 200757.503, 531643.597 20...",gb-lnd,gb-eng,region


In [59]:
uk.to_file(os.path.join(OUT_DIR, "uk.geojson"), driver="GeoJSON")
uk.drop(columns="geometry").to_csv(os.path.join(OUT_DIR, "uk.csv"), index=False)

countries.to_file(os.path.join(OUT_DIR, "country.geojson"), driver="GeoJSON")
countries.drop(columns="geometry").to_csv(
    os.path.join(OUT_DIR, "country.csv"), index=False
)

regions.to_file(os.path.join(OUT_DIR, "region.geojson"), driver="GeoJSON")
regions.drop(columns="geometry").to_csv(
    os.path.join(OUT_DIR, "region.csv"), index=False
)

locations.to_file(os.path.join(OUT_DIR, "location.geojson"), driver="GeoJSON")
locations.drop(columns="geometry").to_csv(
    os.path.join(OUT_DIR, "location.csv"), index=False
)
