In [2]:
# Standard libraries
import os

# Third-party libraries
import geopandas as gpd
import pandas as pd


In [3]:
DATA_DIR = os.path.join(os.getcwd(), "data")
GEO_DIR = os.path.join(DATA_DIR, "external/spatial")
OUT_DIR = os.path.join(DATA_DIR, "processed/spatial")
OSGB36 = 27700


In [4]:
def set_crs(gdf):
    """Assigns the OSGB36 / British National Grid CRS to a GeoDataFrame"""
    return gdf.set_crs(epsg=OSGB36, allow_override=True, inplace=True)



def rename_columns(df):
    """Enforces CWUK standard field names"""
    return (
        df.rename(columns={"geo_region": "name"})
        .drop(columns=["region"])
        .sort_values(by="name", axis=0)
        .reset_index(drop=True)
    )

In [5]:
codes = pd.read_csv(os.path.join(GEO_DIR, "country-codes.csv"))

uk = (
    gpd.read_file(os.path.join(GEO_DIR, "ukcp18-uk-land-country-united_kingdom-hires.geojson"), driver="GeoJSON")
    .pipe(set_crs)
    .pipe(rename_columns)
    .pipe((pd.merge, "left"), right=codes, on="name")
)
countries = (
    gpd.read_file(os.path.join(GEO_DIR, "ukcp18-uk-land-country-hires.geojson"), driver="GeoJSON")
    .pipe(set_crs)
    .pipe(rename_columns)
    .pipe((pd.merge, "left"), right=codes, on="name")
)
admin = (
    gpd.read_file(os.path.join(GEO_DIR, "ukcp18-uk-land-region-hires.geojson"), driver="GeoJSON")
    .pipe(set_crs)
    .pipe(rename_columns)
    .pipe((pd.merge, "left"), right=codes, on="name")
)


ERROR 1: PROJ: proj_create_from_database: Open of /home/mikeblackett/anaconda3/envs/climate-watch-uk/share/proj failed


In [7]:
def get_parent_code(child, parents):
    """Returns the code of the polygon that contains a point"""
    point = gpd.points_from_xy(x=[child.x_coord], y=[child.y_coord])[0]
    for parent in parents.itertuples():
        if parent.code != child.code and parent.geometry.contains(point):
            return parent.code
    return 'null'

uk["parentCode"] = 'null'
countries["parentCode"] = countries.apply(lambda row: get_parent_code(row, uk), axis=1)
admin["parentCode"] = admin.apply(lambda row: get_parent_code(row, countries), axis=1)
countries

Unnamed: 0,name,x_coord,y_coord,geometry,code,parentCode
0,Channel Islands,378641.915415,-63617.081409,"MULTIPOLYGON (((388778.830 -70524.923, 387476....",cha,
1,England,436325.368279,296147.848719,"MULTIPOLYGON (((84009.402 5361.604, 84022.099 ...",eng,gbr
2,Isle of Man,234265.323626,484561.023312,"MULTIPOLYGON (((216561.496 466536.198, 216335....",iom,
3,Northern Ireland,97015.507927,533949.890497,"MULTIPOLYGON (((45089.390 487652.021, 44926.19...",nir,gbr
4,Scotland,266651.514962,771689.385884,"MULTIPOLYGON (((336177.917 1014260.958, 336272...",sct,gbr
5,Wales,279739.337741,271715.748248,"MULTIPOLYGON (((322349.697 164637.902, 322360....",wls,gbr


In [36]:
regions = (
  pd.concat([uk, countries, admin])
  .drop_duplicates(subset="name")
  .reset_index(drop=True)
  )
regions

Unnamed: 0,name,x_coord,y_coord,geometry,code,parentCode
0,United Kingdom,348890.323001,461087.31387,"MULTIPOLYGON (((336177.917 1014260.958, 336272...",gbr,
1,Channel Islands,378641.915415,-63617.081409,"MULTIPOLYGON (((388778.830 -70524.923, 387476....",cha,
2,England,436325.368279,296147.848719,"MULTIPOLYGON (((84009.402 5361.604, 84022.099 ...",eng,gbr
3,Isle of Man,234265.323626,484561.023312,"MULTIPOLYGON (((216561.496 466536.198, 216335....",iom,
4,Northern Ireland,97015.507927,533949.890497,"MULTIPOLYGON (((45089.390 487652.021, 44926.19...",nir,gbr
5,Scotland,266651.514962,771689.385884,"MULTIPOLYGON (((336177.917 1014260.958, 336272...",sct,gbr
6,Wales,279739.337741,271715.748248,"MULTIPOLYGON (((322349.697 164637.902, 322360....",wls,gbr
7,East Scotland,327936.681532,732569.478746,"MULTIPOLYGON (((320766.200 680581.505, 320697....",esc,sct
8,East of England,573749.53721,263770.9148,"POLYGON ((585951.803 181704.897, 585934.901 18...",een,eng
9,London,531331.013781,179645.800174,"POLYGON ((531788.999 200757.503, 531643.597 20...",lnd,eng


In [37]:
regions.to_file(os.path.join(OUT_DIR, "region.geojson"), driver="GeoJSON")
regions.drop(columns='geometry').to_csv(os.path.join(OUT_DIR, "region.csv"), index=False)
