In [1]:
# Notebook to produce the distance calculation

from data_common.notebook import *
from data_common.dataset import get_dataset_df
from datetime import date

council_date = date(2023, 4, 2)


In [2]:
df = get_dataset_df("uk_ruc", "uk_ruc", settings["ruc_release"], "la_ruc.csv")
df

Unnamed: 0,local-authority-code,official-name,highly-rural,rural,urban,ruc-cluster-label
0,ABC,"Armagh City, Banbridge and Craigavon Borough C...",4.07e-01,1.03e-01,0.49,Sparse and rural
1,ABD,Aberdeenshire Council,4.92e-01,2.12e-01,0.30,Sparse and rural
2,ABE,Aberdeen City Council,2.46e-03,6.00e-02,0.94,Urban
3,ADU,Adur District Council,0.00e+00,0.00e+00,1.00,Urban
4,AGB,Argyll and Bute Council,4.70e-01,3.52e-01,0.18,Sparse and rural
...,...,...,...,...,...,...
388,SCR,South Yorkshire Mayoral Combined Authority,2.04e-02,5.04e-02,0.93,Urban
389,TVCA,Tees Valley Combined Authority,2.85e-02,7.12e-02,0.90,Urban
390,WECA,West of England Combined Authority,4.89e-02,3.31e-02,0.92,Urban
391,WMCA,West Midlands Combined Authority,1.17e-03,6.76e-03,0.99,Urban


In [3]:
label_df = df[["local-authority-code", "ruc-cluster-label"]].rename(
    columns={"ruc-cluster-label": "label"}
)
label_df.to_csv(Path("data", "packages", "ruc_distance", "la_labels.csv"), index=False)
label_df.head()

Unnamed: 0,local-authority-code,label
0,ABC,Sparse and rural
1,ABD,Sparse and rural
2,ABE,Urban
3,ADU,Urban
4,AGB,Sparse and rural


In [4]:
label_df.label.unique()

array(['Sparse and rural', 'Urban', 'Rural', 'Urban with rural areas'],
      dtype=object)

In [5]:
descs = {
    "Sparse and rural": "Local authority predominately made up of large and low-density rural neighbourhoods",
    "Urban": "Local authority predominately made up of urban neighbourhoods",
    "Rural": "Local authority predominately made up of rural neighbourhoods",
    "Urban with rural areas": "Local authority mostly made up of urban neighbourhoods, but with a significant number of rural neighbourhoods",
}

descs = (
    pd.DataFrame.from_dict(descs, orient="index")
    .reset_index()
    .rename(columns={"index": "label", 0: "desc"})
)

descs.to_csv(Path("data", "packages", "ruc_distance", "label_desc.csv"), index=False)
descs.head()

Unnamed: 0,label,desc
0,Sparse and rural,Local authority predominately made up of large...
1,Urban,Local authority predominately made up of urban...
2,Rural,Local authority predominately made up of rural...
3,Urban with rural areas,Local authority mostly made up of urban neighb...


In [6]:
d = (
    df.la.get_council_info(["pop-2020", "area"], as_of_date=council_date)
    .assign(density=lambda x: x["pop-2020"] / x["area"])
    .drop(columns=["ruc-cluster-label", "pop-2020", "area", "official-name"])
    .set_index("local-authority-code")
    .space.self_distance(normalize=True)
    .space.match_distance()
    .space.local_rankings()
    .sort_values(["local-authority-code_A", "distance"])
)
d.to_csv(Path("data", "packages", "ruc_distance", "distance_map.csv"), index=False)