In [1]:
#HIDE
try:
    import setup
except:
    pass
from notebook_helper import *
notebook_setup()

import geopandas
from modules import la
from shapely import geometry

# Touching local authorities

Produces a list from the shapefile of authorities that are touching/overlapping/of higher geographies.

Code should be robust against future mergers, but will need to be re-run with new dates in mind.

pre-req: extract_center.ipynb

In [27]:
# load local authorities

epsg = 7405 # british national grid

gdf = geopandas.read_file(Path("data", "geo", "gb", "district_borough_unitary_region.shp")).to_crs(epsg=epsg)
gdf = gdf[["CODE", "geometry"]]
ni = geopandas.read_file(Path("data", "geo", "ni", "OSNI_Open_Data_Largescale_Boundaries__Local_Government_Districts_2012.shp")).to_crs(epsg=epsg)
ni = ni[["LGDCode", "geometry"]].rename(columns={"LGDCode": "CODE"})
gdf = pd.concat([gdf, ni])
gdf = gdf.la.code_from_gss("CODE")

gdf["local-authority-code"] = gdf["local-authority-code"].la.update_authority_codes()

Unnamed: 0,CODE,geometry,local-authority-code
0,E06000060,"POLYGON ((503946.104 190047.399, 503948.998 19...",BUC
1,E06000059,"POLYGON ((403126.502 121160.497, 403120.302 12...",DST
2,E08000001,"POLYGON ((366280.602 414615.697, 366295.699 41...",BOL
3,E08000032,"POLYGON ((397063.002 439322.305, 397071.602 43...",BRD
4,E08000002,"POLYGON ((375568.598 419017.997, 375776.002 41...",BUR


In [40]:
# calculate overlapping and touching points

def get_nearby_overlapping_la(v: geometry) -> list:
    """
    get points that touch intersect or overlap each other
    """
    t = gdf["geometry"].touches(v)
    inter = gdf["geometry"].intersects(v)
    over = gdf["geometry"].overlaps(v)
    is_self = gdf["geometry"] == v
    mask = (t|inter|over) & ~is_self
    codes = gdf["local-authority-code"][mask]
    return codes.tolist()


gdf["overlap"] = gdf["geometry"].apply(get_nearby_overlapping_la)

In [189]:


# combine these lists for higher geopgraphies
# this replicates the existing behaviour for county councils, but this can end up with quite a high number
df = gdf[["local-authority-code", "overlap"]]
county_df = df.la.to_multiple_higher(columns=["overlap"], aggfunc="sum")
county_df["overlap"] = county_df["overlap"].apply(lambda x: list(set(x)))

df = pd.concat([df, county_df])

# any higher geographies are automatically touching
df = df.la.add_info(["local-authority-type", "county-la", "combined-authority"])
df.loc[df["local-authority-type"].isin(["COMB", "SRA"]), "overlap"] = None
df["overlap"] = df["overlap"].apply(lambda x: x if x else [])
df["county-la"] = df["county-la"].apply(lambda x: [x] if x != np.nan else [])
df["combined-authority"] = df["combined-authority"].apply(lambda x: [x] if x != np.nan else [])
df["overlap"] = df["overlap"] + df["county-la"] + df["combined-authority"]


In [190]:
# reverse lookup (esepecially for higher geographies), anything touching in one direction should be in the other too

df = df.set_index("local-authority-code")
reverse_lookup = df["overlap"].explode().to_frame().reset_index()
reverse_lookup = reverse_lookup[~reverse_lookup["overlap"].isna()]
reverse_lookup = reverse_lookup.groupby(["overlap"]).agg({"local-authority-code": lambda x: x.tolist()})
reverse_lookup = reverse_lookup["local-authority-code"].to_dict()
df = df.reset_index()

def fix_item(v):
    if isinstance(v, float):
        return []
    return list(set([y for y in v if str(y) != 'nan']))

df["overlap"] += df["local-authority-code"].map(reverse_lookup)
df["overlap"] = df["overlap"].apply(fix_item)
df[df["local-authority-type"].isin(["COMB", "SRA"])]
df = df[["local-authority-code", "overlap"]]
df.head()

Unnamed: 0,local-authority-code,overlap
0,BUC,"[DAC, WNM, HRT, KEN, WNT, CHR, SLG, THE, WOK, ..."
1,DST,"[HRT, EDE, WIL, SSO, NEW, DEV, BPC, SOM]"
2,BOL,"[LAN, BBD, GMCA, BUR, CHO, WGN, SLF]"
3,BRD,"[CLD, PEN, CRA, KIR, WYCA, NYK, LEC, LDS, HAG]"
4,BUR,"[RCH, BBD, GMCA, ROS, LEC, BOL, SLF, MAN]"


In [193]:
touching_df = df.copy()
touching_df["overlap"] = touching_df["overlap"].apply("|".join)

touching_df.to_csv(Path("data", "outputs", "geographic_distance", "touching_las.csv"), index=False)

# Join with cluster information

Now we have all the neigboring information, we also want this in the distance approach used by other methods. 

So we calculate the distance, and then merge with this dataframe to only capture distances of neighbours.

This info may or may not be thrown away in the interface, but is consistent. 

In [2]:
df = pd.read_csv(Path("data", "outputs", "geographic_distance", "touching_las.csv"))
df["overlap"] = df["overlap"].str.split("|")
df = df.explode("overlap")
df.columns = ["local-authority-code_A", "local-authority-code_B"]
df.head()

Unnamed: 0,local-authority-code_A,local-authority-code_B
0,BUC,DAC
0,BUC,WNM
0,BUC,HRT
0,BUC,KEN
0,BUC,WNT


In [3]:
distance = (pd.read_csv(Path("data","la_xy.csv"))
            .space.self_distance("local-authority-code", ["x", "y"], normalize=True)
            .space.match_distance()
            .space.local_rankings()
            .sort_values("distance"))


distance.to_csv(
    Path("data", "outputs", "geographic_distance", "complete_distance_map.csv"), index=False)
distance.head()

Unnamed: 0,local-authority-code_A,local-authority-code_B,distance,match,position
82416,LND,GLA,0.02,99.6,1.0
56712,GLA,LND,0.02,99.6,1.0
68136,HMF,KEC,0.02,99.6,1.0
73848,KEC,HMF,0.02,99.6,1.0
11424,BIR,WMCA,0.02,99.4,1.0


In [4]:
# merge the two, and so only get the distance for neighbouring or intersecting areas
df = df.merge(distance)
df.to_csv(Path("data", "outputs", "geographic_distance", "distance_map.csv"), index=False)

# Create labels

In [3]:
df = la.get_la_df()
df = df[["local-authority-code", "region"]]
df = df.rename(columns={"region":"label"})
df.to_csv(Path("data", "outputs", "geographic_distance", "la_labels.csv"), index=False)

In [4]:
df = pd.Series(df["label"].unique()).to_frame()
df.columns = ["label"]
df

Unnamed: 0,label
0,Northern Ireland
1,Scotland
2,South East
3,Wales
4,North West
5,East Midlands
6,East of England
7,South West
8,London
9,West Midlands


In [5]:
def label_func(region):
    if region.split(" ")[0] in ["North", "South", "East", "West"]:
        region = "the " + region
    return f"Local authority in {region}"

df["desc"] = df["label"].apply(label_func)
df.to_csv(Path("data", "outputs", "geographic_distance", "label_desc.csv"), index=False)