In [64]:
import pandas as pd
import os
from pathlib import Path

# top_level is where pyproject.toml is
while (Path.cwd() / "pyproject.toml").is_file() == False:
    os.chdir("..")

overlap_package = Path("data", "packages", "geographic_overlaps")
format = "{0}_{1}_combo_overlap.parquet"

In [25]:
# how many 2023 constituencies cross local authorities

df = pd.read_parquet(
    overlap_package / format.format("PARL25", "LAD23"), engine="pyarrow"
)
df["over_five"] = df["percentage_overlap_pop"] > 0.05

# limit to those with over 5% overlap
df = df[df["over_five"]]

pt = df.pivot_table(index="PARL25", values="LAD23", aggfunc="count", fill_value=0)

new_counts = pt["LAD23"].value_counts()
new_counts

1    404
2    220
3     26
Name: LAD23, dtype: int64

In [26]:
# how many 2010 constituencies cross local authorities

df = pd.read_parquet(
    overlap_package / format.format("PARL10", "LAD23"), engine="pyarrow"
)
df["over_five"] = df["percentage_overlap_pop"] > 0.05
# limit to those with over 5% overlap
df = df[df["over_five"]]

pt = df.pivot_table(index="PARL10", values="LAD23", aggfunc="count", fill_value=0)

old_counts = pt["LAD23"].value_counts()
old_counts

1    478
2    155
3     16
4      1
Name: LAD23, dtype: int64

In [42]:
combo = (
    pd.DataFrame({"2010_cons": old_counts, "2025_cons": new_counts})
    .fillna(0)
    .astype(int)
)
combo

Unnamed: 0,2010_cons,2025_cons
1,478,404
2,155,220
3,16,26
4,1,0


In [46]:
percent_combo = (combo / 650).applymap(lambda x: "{0:.0f}%".format(x * 100))
# format as percentage
percent_combo

Unnamed: 0,2010_cons,2025_cons
1,74%,62%
2,24%,34%
3,2%,4%
4,0%,0%


In [50]:
# combine both tables so we have the raw number then the percentage
# of overlap between the two tables
# name index "overlap with 2023 LADs"
final = combo.join(percent_combo, rsuffix="%")
final = final.reset_index()
final = final.rename(columns={"index": "overlap with 2023 LADs"})
final.to_csv(Path("data", "interim", "overlap.csv"), index=False)
final

Unnamed: 0,overlap with 2023 LADs,2010_cons,2025_cons,2010_cons%,2025_cons%
0,1,478,404,74%,62%
1,2,155,220,24%,34%
2,3,16,26,2%,4%
3,4,1,0,0%,0%


In [90]:
# minority population - people with a different MP to most of their area

dfs = []

for parl in ["PARL10", "PARL25"]:
    for threshold in [0.5]:

        df = pd.read_parquet(
            overlap_package / format.format(parl, "LAD23"), engine="pyarrow"
        )

        # sort by PARL25 and then decending by percentage_overlap_pop

        df["less_than_threshold"] = df["percentage_overlap_pop"] < threshold
        df = df.rename(columns={"overlap_pop": parl})

        pt = df.pivot_table(
            columns="less_than_threshold", values=parl, aggfunc="sum"
        ).reset_index()
        pt = pt.rename(columns={"index": "parl"})

        pt["total"] = pt[True] + pt[False]
        # rotate the table
        pt["True_per"] = pt[True] / pt["total"]
        pt["False_per"] = pt[False] / pt["total"]
        pt["threshold"] = threshold
        # move threshold column to second position
        cols = list(pt.columns)
        cols = [cols[-1]] + cols[:-1]
        pt = pt[cols]
        dfs.append(pt)

df = pd.concat(dfs).sort_values(["threshold", "parl"])  # type: ignore
df

less_than_threshold,threshold,parl,False,True,total,True_per,False_per
0,0.5,PARL10,61517372.63,5253884.51,66771257.14,0.078685,0.921315
0,0.5,PARL25,59174291.55,7605681.79,66779973.34,0.113892,0.886108


In [None]:
# change from 8 to 11% in minority area.
# Interesting, but not sure how to interpret it.