# Merge PPFI and IMD (LSOA 2021 + LAD 2024) and export GeoJSON

This notebook prepares combined datasets for mapping and analysis by:
- Loading PPFI outputs (LSOA-level deciles + LAD-level scores)
- Loading IMD 2025 at both LSOA 2021 and LAD 2024
- Harmonising area identifiers (LSOA21CD, LAD24CD) and renaming columns
- Merging PPFI + IMD at:
  - LSOA 2021 level (England & Wales shapefile)
  - LAD 2024 level (UK LAD 2024 shapefile)
- Attaching geometry and exporting:
  - ppfi_imd_lsoa_england.geojson
  - ppfi_imd_lad_england.geojson

Outputs are filtered to England only (codes starting with E) for downstream use.

In [1]:
import geopandas as gpd
import pandas as pd

In [2]:
# import lsoa and lad shapefiles
lad_shapes_2024 = gpd.read_file('LAD_MAY_2024_UK_BGC.shp')
lsoa_shapes = gpd.read_file('LSOA_2021_EW_BSC_V4.shp')

In [3]:
# ppfi
ppfi_lsoa = pd.read_csv('hasp_ppfi_v2.1.csv')
ppfi_lad = pd.read_csv('ppfi_final_lad.csv')

In [4]:
# imd
imd25 = pd.read_csv('File_7_IoD2025_All_Ranks_Scores_Deciles_Population_Denominators.csv')
imd_lad = pd.read_csv('IoD-2025-custom_data_download-LAD.csv')

In [5]:
imd_lad.columns

Index(['Local Authority District code (2024)',
       'Local Authority District name (2024)',
       'Index of Multiple Deprivation (IMD) Rank', 'Income Rank',
       'Employment Rank', 'Education Skills and Training Rank',
       'Health Deprivation and Disability Rank', 'Crime Rank',
       'Barriers to Housing and Services Rank', 'Living Environment Rank',
       'Income Deprivation Affecting Children Index (IDACI) Rank',
       'Income Deprivation Affecting Older People (IDAOPI) Rank'],
      dtype='object')

In [6]:
imd_lad = imd_lad.rename(columns={
        'Local Authority District code (2024)': 'LAD24CD',
        'Local Authority District name (2024)':'LAD24NM',
    'Index of Multiple Deprivation (IMD) Rank':'imd_rank',
    'Income Rank':'income_rank',
    'Employment Rank':'employment_rank',
    'Education Skills and Training Rank':'education_rank',
    'Health Deprivation and Disability Rank':'health_rank',
    'Crime Rank':'crime_rank',
    'Barriers to Housing and Services Rank':'barriers_rank',
    'Living Environment Rank':'living_env_rank',})

In [7]:
# renaming LSOA imd columns for ease
imd25 = imd25.rename(columns={
    'Index of Multiple Deprivation (IMD) Rank (where 1 is most deprived)': 'imd_rank',
    'Index of Multiple Deprivation (IMD) Decile (where 1 is most deprived 10% of LSOAs)': 'imd_decile',
    'Income Decile (where 1 is most deprived 10% of LSOAs)': 'imd_income_decile',
    'Employment Decile (where 1 is most deprived 10% of LSOAs)': 'imd_employment_decile',
    'Education, Skills and Training Decile (where 1 is most deprived 10% of LSOAs)': 'imd_education_decile',
    'Health Deprivation and Disability Decile (where 1 is most deprived 10% of LSOAs)': 'imd_health_decile',
    'Crime Decile (where 1 is most deprived 10% of LSOAs)': 'imd_crime_decile',
    'Barriers to Housing and Services Decile (where 1 is most deprived 10% of LSOAs)': 'imd_barriers_decile',
    'Living Environment Decile (where 1 is most deprived 10% of LSOAs)': 'imd_living_env_decile',
    'LSOA code (2021)':'LSOA21CD',
    'LSOA name (2021)':'LSOA21NM',})

In [8]:
# subsetting to only needed imd LSOA and LAD columns
imd25 = imd25[[
    'LSOA21CD', 'LSOA21NM',
    'imd_rank',
    'imd_decile',
    'imd_income_decile',
    'imd_employment_decile',
    'imd_education_decile',
    'imd_health_decile',
    'imd_crime_decile',
    'imd_barriers_decile',
    'imd_living_env_decile',]].copy()

imd_lad=imd_lad[[
    'LAD24CD','LAD24NM','imd_rank','income_rank','employment_rank','health_rank', 'education_rank','crime_rank','barriers_rank','living_env_rank',]].copy()

In [9]:
imd_lad.head()

Unnamed: 0,LAD24CD,LAD24NM,imd_rank,income_rank,employment_rank,health_rank,education_rank,crime_rank,barriers_rank,living_env_rank
0,E06000053,Isles of Scilly,221,296,296,296,293,296,13,1
1,E06000063,Cumberland,98,158,107,67,89,155,126,94
2,E06000009,Blackpool,1,10,1,1,9,2,254,21
3,E07000119,Fylde,188,191,149,109,213,218,215,135
4,E07000128,Wyre,150,149,99,69,178,180,242,120


In [10]:
# align the columns
ppfi_lsoa.rename(columns={'lsoa21cd':'LSOA21CD'},inplace=True)
ppfi_lad.rename(columns={'lad24cd':'LAD24CD'},inplace=True)

In [11]:
# subset ppfi lsoa and lad
ppfi_lsoa = ppfi_lsoa[[
    'LSOA21CD',
    'pp_dec_combined',
    'pp_dec_domain_supermarket_proximity',
    'pp_dec_domain_supermarket_accessibility',
    'pp_dec_domain_ecommerce_access',
    'pp_dec_domain_socio_demographic',
    'pp_dec_domain_nonsupermarket_proximity',
    'pp_dec_domain_food_for_families',
    'pp_dec_domain_fuel_poverty']].copy()
ppfi_lad = ppfi_lad[[
    'LAD24CD',
    'combined',
    'domain_supermarket_proximity',
    'domain_supermarket_accessibility',
    'domain_ecommerce_access',
    'domain_socio_demographic',
    'domain_nonsupermarket_proximity',
    'domain_food_for_families',
    'domain_fuel_poverty']].copy()

In [12]:
# merge IMD + PPFI lsoa level
ppfi_imd_lsoa = ppfi_lsoa.merge(imd25, on='LSOA21CD', how='inner')

In [13]:
# merge imd/ppfi lsoa with geometry
ppfi_imd_lsoa_gdf = ppfi_imd_lsoa.merge(lsoa_shapes, on='LSOA21CD', how='inner')

In [14]:
# merge IMD + PPFI lad level
ppfi_imd_lad = ppfi_lad.merge(imd_lad, on='LAD24CD', how='left')

In [15]:
ppfi_imd_lad.head()

Unnamed: 0,LAD24CD,combined,domain_supermarket_proximity,domain_supermarket_accessibility,domain_ecommerce_access,domain_socio_demographic,domain_nonsupermarket_proximity,domain_food_for_families,domain_fuel_poverty,LAD24NM,imd_rank,income_rank,employment_rank,health_rank,education_rank,crime_rank,barriers_rank,living_env_rank
0,E06000001,95,253,245,3,13,232,7,84,Hartlepool,30.0,32.0,8.0,5.0,80.0,24.0,261.0,224.0
1,E06000002,83,234,270,1,8,220,10,23,Middlesbrough,21.0,19.0,12.0,13.0,27.0,4.0,156.0,133.0
2,E06000003,74,198,256,6,52,199,67,88,Redcar and Cleveland,62.0,65.0,27.0,35.0,105.0,49.0,236.0,187.0
3,E06000004,116,219,247,22,82,218,33,156,Stockton on Tees,119.0,114.0,86.0,40.0,147.0,96.0,267.0,257.0
4,E06000005,104,225,241,14,75,244,16,104,Darlington,103.0,110.0,80.0,54.0,122.0,73.0,272.0,136.0


In [16]:
# merge imd/ppfi lsoa with geometry
ppfi_imd_lad_gdf = ppfi_imd_lad.merge(lad_shapes_2024, on='LAD24CD', how='left')

In [17]:
ppfi_imd_lsoa_gdf = gpd.GeoDataFrame(ppfi_imd_lsoa_gdf, geometry="geometry",crs='4326')
ppfi_imd_lad_gdf = gpd.GeoDataFrame(ppfi_imd_lad_gdf, geometry="geometry",crs='4326')

In [18]:
ppfi_imd_lad_england = ppfi_imd_lad_gdf[
    ppfi_imd_lad_gdf["LAD24CD"].str.startswith("E")
].copy()

ppfi_imd_lsoa_england = ppfi_imd_lsoa_gdf[ppfi_imd_lsoa_gdf['LSOA21CD'].str.startswith('E')].copy()

In [19]:
ppfi_imd_lad_england.head()

Unnamed: 0,LAD24CD,combined,domain_supermarket_proximity,domain_supermarket_accessibility,domain_ecommerce_access,domain_socio_demographic,domain_nonsupermarket_proximity,domain_food_for_families,domain_fuel_poverty,LAD24NM_x,...,barriers_rank,living_env_rank,LAD24NM_y,LAD24NMW,BNG_E,BNG_N,LONG,LAT,GlobalID,geometry
0,E06000001,95,253,245,3,13,232,7,84,Hartlepool,...,261.0,224.0,Hartlepool,,447161,531473,-1.27017,54.6761,8d37e356-5471-457e-879d-d687d4dcd9b5,"MULTIPOLYGON (((450164.5321 525723.4669, 45016..."
1,E06000002,83,234,270,1,8,220,10,23,Middlesbrough,...,156.0,133.0,Middlesbrough,,451141,516887,-1.21099,54.5447,30eaef71-04d1-4d40-a8da-8a4e2e7e0891,"MULTIPOLYGON (((446860 517200.3, 446854.8978 5..."
2,E06000003,74,198,256,6,52,199,67,88,Redcar and Cleveland,...,236.0,187.0,Redcar and Cleveland,,464330,519596,-1.00656,54.5675,a345a607-6a7e-41bd-a268-81e3741ddc76,"MULTIPOLYGON (((451747.3969 520561.1004, 45178..."
3,E06000004,116,219,247,22,82,218,33,156,Stockton on Tees,...,267.0,257.0,Stockton-on-Tees,,444940,518179,-1.30664,54.5569,d1db01d0-100b-4619-b606-f1208ff4c8cb,"MULTIPOLYGON (((446997.146 517642.744, 446963...."
4,E06000005,104,225,241,14,75,244,16,104,Darlington,...,272.0,136.0,Darlington,,428029,515648,-1.56835,54.5353,c2cc315b-a5c5-497c-8aa4-2994a4d4820b,"POLYGON ((436388.0024 522354.1971, 436406.3971..."


In [20]:
ppfi_imd_lsoa_england.to_file('ppfi_imd_lsoa_england.geojson', driver='GeoJSON')
ppfi_imd_lad_england.to_file('ppfi_imd_lad_england.geojson', driver='GeoJSON')