In [17]:
#HIDE
try:
    import setup
except:
    pass
from notebook_helper import *
notebook_setup()
from modules import la
from functools import reduce
from itertools import product


# Composite distance dataset

This notebook creates a new set of distance calculations based on all the individual distances.

Rather than the average distance between two councils on different measures, the overall distance takes each different measure of distance as a different dimention, and calculcates the distance between the councils in the resulting multi-dimentional space. 

All measures are curretly assumed to be equal and are not adjusted to make any more important. As there is some overlap in results between them, in general this approach should push councils that are slightly similar on multiple approachs further up. Generally this composite measure has greater agreement with its components than any single measure does with the others. 

In [18]:
files = {"geographic": Path("data", "outputs", "geographic_distance", "complete_distance_map.csv"),
         "emissions": Path("data", "outputs", "emissions", "distance_map.csv"), 
         "imd": Path("data", "outputs", "imd", "distance_map.csv"),
         "ruc": Path("data", "outputs", "ruc", "distance_map.csv")}

files = {x:pd.read_csv(y) for x,y in files.items()}

In [19]:
df = (pd.DataFrame()
      .space.join_distance(files)
      .joint_space.composite_distance(normalize=False)
      .space.match_distance()
      .space.local_rankings())
df.head()


Unnamed: 0,local-authority-code_A,local-authority-code_B,distance,match,position
0,ABC,MEA,1.99,79.2,1.0
1,ABC,ANN,2.37,75.3,2.0
2,ABC,NMD,2.54,73.5,3.0
3,ABC,CCG,3.04,68.3,4.0
4,ABC,MUL,3.08,67.9,5.0


In [20]:
df.to_csv(Path("data", "outputs", "composite", "distance_map.csv"), index=False)

## Create composite labels

In [21]:
files = {"ruc": Path("data", "outputs", "ruc", "la_labels.csv"),
         "emissions": Path("data", "outputs", "emissions", "la_labels.csv"),
         "imd": Path("data", "outputs", "imd", "la_labels.csv"),
        "geographic": Path("data", "outputs", "geographic_distance", "la_labels.csv"),
         }

files = [pd.read_csv(y).set_index("local-authority-code").rename(columns={"label":x}) for x,y in files.items()]

df = reduce(pd.DataFrame.join, files)


df["emissions"] = "Emissions: " + df["emissions"]

def f(x):
    return "; ".join(x)

ndf = df.apply(f ,axis="columns").to_frame().rename(columns={0:"label"}).reset_index()
ndf.head()

Unnamed: 0,local-authority-code,label
0,ABC,Sparse and rural; Emissions: Industry/domestic...
1,ABD,Sparse and rural; Emissions: Industry/domestic...
2,ABE,Urban; Emissions: Public sector; 4th IMD quint...
3,ADU,Urban; Emissions: Urban mainstream; 4th IMD qu...
4,AGB,Sparse and rural; Emissions: Industry/domestic...


In [22]:
ndf.to_csv(Path("data", "outputs", "composite", "la_labels.csv"), index=False)

## Make label descriptions

Get all combinations of descriptions

In [23]:
files = {"ruc": Path("data", "outputs", "ruc", "label_desc.csv"),
         "emissions": Path("data", "outputs", "emissions", "label_desc.csv"),
         "imd": Path("data", "outputs", "imd", "label_desc.csv"),
        "geographic": Path("data", "outputs", "geographic_distance", "label_desc.csv"),
         }

files = {x:pd.read_csv(y) for x,y in files.items()}

files["emissions"]["label"] = "Emissions: " + files["emissions"]["label"]

files["geographic"]["desc"] = None

labels = [x["label"].tolist() for x in files.values()]
descriptions = [x["desc"].tolist() for x in files.values()]

labels = pd.Series(product(*labels)).apply(lambda x: "; ".join(x))

def process_description(descriptions: List[str]) -> str:
    value = "; ".join([x for x in descriptions if x])
    value = value.replace("Councils ", "")
    return value

descriptions = pd.Series(product(*descriptions)).apply(process_description)

df = pd.DataFrame({"label": labels, "desc": descriptions})
df.head()


Unnamed: 0,label,desc
0,Sparse and rural; Emissions: Industry/domestic...,Local authority predominately made up of large...
1,Sparse and rural; Emissions: Industry/domestic...,Local authority predominately made up of large...
2,Sparse and rural; Emissions: Industry/domestic...,Local authority predominately made up of large...
3,Sparse and rural; Emissions: Industry/domestic...,Local authority predominately made up of large...
4,Sparse and rural; Emissions: Industry/domestic...,Local authority predominately made up of large...


In [24]:
df.to_csv(Path("data", "outputs", "composite", "label_desc.csv"), index=False)