In [23]:
from pathlib import Path
import geopandas as gpd
import pandas as pd

In [24]:
REF_DATA = Path('../../data/reference')

In [25]:
pcon21 = gpd.read_file(REF_DATA / 'pcon.geojson')[["PCON21CD", "PCON21NM"]].set_index("PCON21NM")

The conventional names seem to have changed to omit a comma if a name of city included in constituency name. Need to clean these up

In [26]:
pcon21.index = pcon21.index.str.replace(r'(Birmingham|Brighton|Ealing|Enfield|Lewisham|Liverpool|Manchester|Plymouth|Sheffield|Southampton),', r'\1', regex=True)

In [27]:
pcon24 = gpd.read_file(REF_DATA / 'uk-constituencies-2024.geojson')[["PCON24CD", "PCON24NM"]]

In [28]:
mappings = pd.read_csv(REF_DATA / 'pcon_mapping.csv')

In [29]:
weights = mappings.groupby('PCON24CD')[['PCON22CD', 'weight']].apply(lambda x: x.set_index('PCON22CD').to_dict()['weight']).to_frame().rename(columns={0: 'weights'})

In [30]:
changes = pcon24.merge(
  pcon21,
  left_on="PCON24NM",
  right_index=True,
  how="outer"
).sort_values('PCON24CD').query('PCON24CD.notna()').set_index('PCON24CD').rename(columns={'PCON21CD': 'PCON10CD'}).merge(
  weights,
  left_index=True,
  right_index=True
)

In [31]:
data_dir = Path('../../data/reference/')

In [32]:
changes.to_json(data_dir / "uk-constituencies-2024-changes.json", orient="index", indent=2)

In [33]:
changes

Unnamed: 0_level_0,PCON24NM,PCON10CD,weights
PCON24CD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
E14001063,Aldershot,E14000530,"{'E14000530': 0.928, 'E14000844': 0.072}"
E14001064,Aldridge-Brownhills,E14000531,"{'E14000531': 0.826, 'E14001012': 0.173}"
E14001065,Altrincham and Sale West,E14000532,{'E14000532': 1.0}
E14001066,Amber Valley,E14000533,"{'E14000533': 0.998, 'E14000814': 0.002}"
E14001067,Arundel and South Downs,E14000534,"{'E14000534': 0.581, 'E14000576': 0.002, 'E140..."
...,...,...,...
W07000108,Swansea West,W07000047,"{'W07000047': 0.436, 'W07000048': 0.564}"
W07000109,Torfaen,W07000053,"{'W07000053': 0.932, 'W07000054': 0.068}"
W07000110,Vale of Glamorgan,W07000078,{'W07000078': 1.0}
W07000111,Wrexham,W07000044,"{'W07000044': 0.352, 'W07000062': 0.648}"


Let's take a look at the English changes

In [34]:
changes_e = changes.reset_index()[(changes.PCON10CD.str.startswith("E", na=False) | changes.reset_index().PCON24CD.str.startswith("E", na=False))]

  changes_e = changes.reset_index()[(changes.PCON10CD.str.startswith("E", na=False) | changes.reset_index().PCON24CD.str.startswith("E", na=False))]


In [35]:
import pandas as pd
pd.Series(
    index=[
      'equivalence_found',
      'new',
      'retired',
      'count_21',
      'count_24',
    ],
    data=[
      (changes_e.PCON24CD.notna() & changes_e.PCON10CD.notna()).sum(),
      (changes_e.PCON24CD.notna() & changes_e.PCON10CD.isna()).sum(),
      (changes_e.PCON24CD.isna() & changes_e.PCON10CD.notna()).sum(),
      (changes_e.PCON10CD.notna()).sum(),
      (changes_e.PCON24CD.notna()).sum(),
    ]
)

equivalence_found    0
new                  0
retired              0
count_21             0
count_24             0
dtype: int64

In [36]:
changes_e[changes_e.PCON10CD.isna()]

Unnamed: 0,PCON24CD,PCON24NM,PCON10CD,weights


There has been a net increase of ten English constituencies, with two coming from Scotland and eight from Wales. Northern Irish constituency counts are the same.

In [37]:
pd.concat([
  pcon21.PCON21CD.str.slice(0, 1).reset_index().rename(columns={'PCON21CD': 'Nation'}).groupby('Nation').count(),
  pcon24.PCON24CD.str.slice(0, 1).reset_index().rename(columns={'PCON24CD': 'Nation'}).groupby('Nation').count()
], axis=1)

Unnamed: 0_level_0,PCON21NM,index
Nation,Unnamed: 1_level_1,Unnamed: 2_level_1
E,533,543
N,18,18
S,59,57
W,40,32
