In [2]:
from notebook_helper import *
import geopandas
from helpers import la
notebook_setup()

# Center points

This notebook extracts the center points of local authorities from shapefiles and creates the center points for higher level authorities.

In [3]:
# load local authorities

epsg = 7405 # british national grid

gdf = geopandas.read_file(Path("data", "geo", "gb", "district_borough_unitary_region.shp")).to_crs(epsg=epsg)
gdf = gdf[["CODE", "geometry"]]
ni = geopandas.read_file(Path("data", "geo", "ni", "OSNI_Open_Data_Largescale_Boundaries__Local_Government_Districts_2012.shp")).to_crs(epsg=epsg)
ni = ni[["LGDCode", "geometry"]].rename(columns={"LGDCode": "CODE"})
gdf = pd.concat([gdf, ni])
gdf["x"] = gdf["geometry"].centroid.x
gdf["y"] = gdf["geometry"].centroid.y

In [4]:
gdf = gdf.la.create_code_column(from_type="gss")

In [5]:
def group_to_current(df, columns: Optional[List[str]] = None, aggfunc="mean") -> pd.DataFrame:
    if columns is None:
        columns = list(df.columns)
    df = df.la.get_council_info(["replaced_by"], include_historical=True,)
    df["current-code"] = np.where(df["replaced_by"].isna(),
                                  df["local-authority-code"], df["replaced_by"])
    return (df[["current-code"] + columns].groupby("current-code")
            .agg(aggfunc)
            .rename_axis(index="local-authority-code")
            .reset_index())


def group_to_higher_level(df, columns: Optional[List[str]] = None, aggfunc="mean") -> pd.DataFrame:
    if columns is None:
        columns = list(df.columns)
    df = df.la.get_council_info(["overlapping-la"], include_historical=True)
    return (df[["overlapping-la"] + columns].groupby("overlapping-la")
            .agg(aggfunc)
            .rename_axis(index="local-authority-code")
            .reset_index())


def group_to_current_and_higher(*args, **kwargs) -> pd.DataFrame:
    df1 = group_to_current(*args, **kwargs)
    df2 = group_to_higher_level(*args, **kwargs)
    return pd.concat([df1, df2])

df = group_to_current_and_higher(gdf, ["x", "y"])

df = geopandas.GeoDataFrame(df, geometry=geopandas.points_from_xy(df.x, df.y), crs=epsg).to_crs("WGS84")

df["long"] = df["geometry"].y
df["lat"] = df["geometry"].x
df.drop(columns='geometry').to_csv(Path("data","la_xy.csv"), index=False)


In [6]:
df = pd.read_csv(Path("data", "pop_area_2021.csv"), thousands=r',')
df["gss-code"] = df["gss-code"].apply(lambda x: x[:9])
df = df.la.create_code_column(from_type="gss", source_col="gss-code", allow_none=True)


df = df.loc[~(df["local-authority-code"].isna())]

hdf = group_to_higher_level(df, ["area", "pop_2020"], aggfunc="sum")

df = df[["local-authority-code", "area", "pop_2020"]]
df = pd.concat([df, hdf]).drop_duplicates("local-authority-code")
df.to_csv(Path("data","la_area_pop.csv"), index=False)