In [1]:
# Notebook to produce the distance calculation

from data_common.notebook import *
from data_common.dataset import get_dataset_df

In [2]:
df = get_dataset_df(
    "la-emissions-data", "uk_local_authority_emissions_data", "latest", "la_labels.csv"
)

label_df = df[["local-authority-code", "label"]]
label_df.to_csv(
    Path("data", "packages", "emissions_distance", "la_labels.csv"), index=False
)
label_df.head()

Unnamed: 0,local-authority-code,label
0,GLA,Urban Mainstream
1,WMCA,Urban Mainstream
2,GMCA,Urban Mainstream
3,WYCA,Urban Mainstream
4,KEN,Urban Mainstream


In [3]:
desc = df[["label", "desc"]].drop_duplicates()
desc.to_csv(
    Path("data", "packages", "emissions_distance", "label_desc.csv"), index=False
)
desc.head()

Unnamed: 0,label,desc
0,Urban Mainstream,Below average for all emissions scores
7,Transport/Domestic,Above average transport/domestic score
31,Public sector,"Well above average public sector (government, ..."
46,Agriculture,"Above average agriculture, domestic score"
129,Industry/Commerical/Domestic,"Above average for industry/domestic/doemestic,..."


In [4]:
def density_transform(series: pd.Series):
    """
    Density is scaled down after it is normalized
    So it presents as a pressure pushing apart dissimilar areas, but is far far less significant than the emissions.
    """
    return series * 0.25


d = (
    df.drop(columns=["label", "desc"])
    .set_index("local-authority-code")
    .space.self_distance(normalize=True, transform={"pop_density": density_transform})
    .space.match_distance()
    .space.local_rankings()
    .sort_values(["local-authority-code_A", "distance"])
)
d.to_csv(
    Path("data", "packages", "emissions_distance", "distance_map.csv"), index=False
)