# LSOA to TAZ Population Mapping

### Initialize Script
- Load Packages
- Set Input/Output Folder
- Load Zone and study area

In [2]:
import os
from pathlib import Path
import pandas as pd
import geopandas as gpd
import numpy as np
from fiona.crs import from_epsg

# Current working directory
basepath = os.path.dirname(os.getcwd())

# Read list of stations within model coverage area
raw_data = os.path.join(basepath, '01 Raw Data' )

In [3]:
study_area_boundary = gpd.read_file(f'{basepath}/03 Output/02 Shapefiles/study_area_boundary_zone_zone.SHP').to_crs(crs="EPSG:27700")
study_area_boundary

Unnamed: 0,NO,geometry
0,1,"POLYGON ((416250.434 281688.953, 416277.887 28..."


In [4]:
### Read LSOA shapefile and spatially filter it to csmt study area
lsoa_shp = gpd.read_file(f'{raw_data}/08 Pop data/LSOA_2021_EW_BGC.shp').to_crs(crs="EPSG:27700")
lsoa_filtered = gpd.sjoin(lsoa_shp, study_area_boundary, how='inner', predicate='intersects')[['LSOA21CD','geometry']].reset_index(drop=True)
lsoa_filtered

Unnamed: 0,LSOA21CD,geometry
0,E01009522,"POLYGON ((432119.974 280203.614, 432116.500 28..."
1,E01009523,"POLYGON ((427962.594 284962.406, 428250.313 28..."
2,E01009524,"POLYGON ((431920.188 282904.187, 431918.050 28..."
3,E01009525,"POLYGON ((432565.013 282816.038, 432539.615 28..."
4,E01009526,"POLYGON ((431551.426 282609.757, 431832.553 28..."
...,...,...
428,E01035028,"POLYGON ((453213.094 279580.500, 453507.500 27..."
429,E01035029,"POLYGON ((454350.688 277919.690, 454996.828 27..."
430,E01035096,"POLYGON ((440768.204 294251.877, 440812.900 29..."
431,E01035399,"POLYGON ((456037.783 273192.973, 456222.151 27..."


In [5]:
# Load zone  shapefile
zone_gdf = gpd.read_file(f'{raw_data}/07 Connectors/v18/zonecentroid_08032024_w demand3_zone.SHP').to_crs(crs="EPSG:27700")

## Filter csmt zones to study area which consists of AoDM and FMA 
study_area_zones = zone_gdf[(zone_gdf['MODEL_AREA']=='AoDM')|(zone_gdf['MODEL_AREA']=='FMA')].copy()
study_area_zones = study_area_zones.reset_index(drop=True)
study_area_zones

Unnamed: 0,NO,MODEL_AREA,XCOORD,YCOORD,BUS_DEMA~1,RAIL_DEM~2,geometry
0,1278,AoDM,435567.578000,283621.288000,5.182623,0.0,"POLYGON ((435367.042 283911.939, 435418.135 28..."
1,1281,AoDM,432823.554000,283635.298000,15.085163,0.0,"POLYGON ((433104.042 283543.939, 432973.042 28..."
2,1282,AoDM,433001.681936,283334.630000,7.992603,0.0,"POLYGON ((433311.093 283598.949, 433359.274 28..."
3,1283,AoDM,432666.955000,282852.641000,27.328212,0.0,"POLYGON ((433030.300 283197.731, 433114.793 28..."
4,1284,AoDM,435449.343656,282968.816762,4.699933,0.0,"POLYGON ((435166.593 283395.178, 435170.588 28..."
...,...,...,...,...,...,...,...
475,19015,FMA,439734.000000,283278.000000,0.693223,0.0,"POLYGON ((442053.744 282323.329, 442112.279 28..."
476,19016,FMA,438788.000000,284631.000000,0.196429,0.0,"POLYGON ((438301.042 285714.158, 438901.977 28..."
477,21861,FMA,429403.000000,275752.000000,1.399242,0.0,"POLYGON ((433150.315 274749.928, 433196.349 27..."
478,21862,FMA,434745.000000,275074.000000,3.813430,0.0,"POLYGON ((433801.967 275621.428, 433826.543 27..."


### Map LSOA to CSMT
- Since neither csmt zones or lsoa boundaries contains the other, neither one-to-one or one-to-many mapping are feasible solution. 
- Many-to-many mapping is used to create intersection gdf

In [6]:
# Initialize an empty GeoDataFrame to store the intersections
intersection_gdf = gpd.GeoDataFrame()
crs = 'EPSG:27700'
study_area_zones['geometry'] = study_area_zones['geometry'].buffer(0)
lsoa_filtered['geometry'] = lsoa_filtered['geometry'].buffer(0)
# Iterate over each pair of geometries and calculate the intersections
for idx1, geometry1 in study_area_zones.iterrows():
    for idx2, geometry2 in lsoa_filtered.iterrows():
        intersection = geometry1['geometry'].intersection(geometry2['geometry'])
        
        # Check if the intersection is not empty
        if not intersection.is_empty:
            intersection_gdf = pd.concat([intersection_gdf,(gpd.GeoDataFrame({'zone': geometry1['NO'],
                                                                              'model_area': geometry1['MODEL_AREA'],
                                                                              'LSOA21CD':geometry2['LSOA21CD'],
                                                                              'geometry': [intersection]},crs=crs))])

intersection_gdf.drop_duplicates(inplace=True)
intersection_gdf['geometry'] = intersection_gdf['geometry'].apply(lambda geom: geom.buffer(0) if geom.geom_type == 'LineString' else geom)
# intersection_gdf.to_file(f'{basepath}/03 Output/05 Pop Map/intersection_map.shp',encoding='utf-8')
intersection_gdf.head()

Unnamed: 0,zone,model_area,LSOA21CD,geometry
0,1278,AoDM,E01009584,"MULTIPOLYGON (((435982.583 283582.537, 435973...."
0,1278,AoDM,E01009604,"POLYGON ((435552.325 283343.834, 435420.247 28..."
0,1278,AoDM,E01009605,"MULTIPOLYGON (((436136.504 283376.157, 435964...."
0,1278,AoDM,E01032536,"MULTIPOLYGON (((435514.042 283959.939, 435538...."
0,1278,AoDM,E01032537,"POLYGON ((435418.135 283978.940, 435476.470 28..."
...,...,...,...,...
0,21863,FMA,E01031278,"MULTIPOLYGON (((435158.353 269800.842, 435087...."
0,21863,FMA,E01031280,"MULTIPOLYGON (((435148.230 272961.126, 435070...."
0,21863,FMA,E01031284,"MULTIPOLYGON (((430289.307 270019.739, 430172...."
0,21863,FMA,E01031300,"POLYGON ((431178.023 272080.676, 431174.911 27..."


In [7]:
### Calculate size of overlap intersection polygons

intersection_gdf['int_area'] = intersection_gdf.geometry.area
lsoa_filtered['lsoa_area'] = lsoa_filtered.geometry.area
merged_gdf = pd.merge(intersection_gdf, lsoa_filtered, how='inner', on='LSOA21CD')
merged_gdf['overlap_per'] = (merged_gdf['int_area'] / merged_gdf['lsoa_area']) * 100

### Remove redundant attributes
disagg_gdf = merged_gdf[['zone','model_area','LSOA21CD','int_area','overlap_per']].copy()
disagg_gdf.drop_duplicates(inplace=True)
disagg_gdf.head()


Unnamed: 0,zone,model_area,LSOA21CD,geometry_x,int_area,geometry_y,lsoa_area,overlap_per
0,1278,AoDM,E01009584,"MULTIPOLYGON (((435982.583 283582.537, 435973....",8.000967,"POLYGON ((437328.039 283661.330, 437745.808 28...",1.802186e+06,0.000444
1,1297,AoDM,E01009584,"MULTIPOLYGON (((436802.514 282711.042, 436838....",5.506898,"POLYGON ((437328.039 283661.330, 437745.808 28...",1.802186e+06,0.000306
2,1301,AoDM,E01009584,"MULTIPOLYGON (((436262.666 283035.929, 436360....",9.926433,"POLYGON ((437328.039 283661.330, 437745.808 28...",1.802186e+06,0.000551
3,1303,AoDM,E01009584,"POLYGON ((437576.070 282398.401, 437577.039 28...",0.706873,"POLYGON ((437328.039 283661.330, 437745.808 28...",1.802186e+06,0.000039
4,1307,AoDM,E01009584,"MULTIPOLYGON (((437657.140 282767.019, 437576....",31.432140,"POLYGON ((437328.039 283661.330, 437745.808 28...",1.802186e+06,0.001744
...,...,...,...,...,...,...,...,...
2411,14173,AoDM,E01034743,"POLYGON ((433587.450 279206.002, 433464.000 27...",37161.271682,"POLYGON ((433443.000 279399.999, 433380.110 27...",2.909349e+05,12.773055
2412,14174,AoDM,E01034743,"POLYGON ((433220.102 279219.863, 433122.359 27...",58911.020152,"POLYGON ((433443.000 279399.999, 433380.110 27...",2.909349e+05,20.248869
2413,14181,AoDM,E01034743,"POLYGON ((432993.453 279051.116, 432970.867 27...",6.941856,"POLYGON ((433443.000 279399.999, 433380.110 27...",2.909349e+05,0.002386
2414,14183,AoDM,E01034743,"POLYGON ((433111.864 278899.809, 433042.013 27...",40.813402,"POLYGON ((433443.000 279399.999, 433380.110 27...",2.909349e+05,0.014028


### Read LSOA population data from ONS

In [9]:
### Read population data 
lsoa_pop = pd.read_csv(f'{raw_data}/08 Pop data/sape23dt11mid2020lsoapopulationdensity.csv',
                       skiprows=4, 
                       skipfooter=1, 
                       engine='python')
lsoa_pop.rename(columns={'LSOA Code':'LSOA21CD'},inplace=True)
lsoa_pop.head()

Unnamed: 0,LSOA21CD,LSOA Name,Mid-2020 population,Area Sq Km,People per Sq Km
0,E01011949,Hartlepool 009A,1944,0.52,3746
1,E01011950,Hartlepool 008A,1298,0.13,9796
2,E01011951,Hartlepool 007A,1208,0.21,5791
3,E01011952,Hartlepool 002A,1724,0.46,3715
4,E01011953,Hartlepool 002B,2026,0.88,2294


In [10]:
### Merge population data to intersection gdf

disagg_gdf_pop = pd.merge(disagg_gdf, lsoa_pop[['LSOA21CD','Mid-2020 population',]], how='inner', on='LSOA21CD')
disagg_gdf_pop['Mid-2020 population'] = disagg_gdf_pop['Mid-2020 population'].str.replace(',', '').astype(float)
disagg_gdf_pop['pop'] = (disagg_gdf_pop['overlap_per']*disagg_gdf_pop['Mid-2020 population']/100).round(0)
disagg_gdf_pop.head()


Unnamed: 0,zone,model_area,LSOA21CD,int_area,overlap_per,Mid-2020 population,pop
0,1278,AoDM,E01009584,8.000967,0.000444,1891.0,0.0
1,1297,AoDM,E01009584,5.506898,0.000306,1891.0,0.0
2,1301,AoDM,E01009584,9.926433,0.000551,1891.0,0.0
3,1303,AoDM,E01009584,0.706873,3.9e-05,1891.0,0.0
4,1307,AoDM,E01009584,31.43214,0.001744,1891.0,0.0


In [11]:
### groupby csmt zones to aggregate overlap percentage and population by zone

zonal_pop = disagg_gdf_pop.groupby(['zone','model_area']).agg(pop = ('pop','sum'),overlap_per = ('overlap_per','sum')).reset_index()
zonal_pop['overlap_per'] = zonal_pop['overlap_per'].round(0)
zonal_pop['pop'] = zonal_pop['pop'].round(0)
zonal_pop.to_csv('zonal_population_AoDM_FMA.csv')



Unnamed: 0,zone,model_area,pop,overlap_per
0,1278,AoDM,2050.0,100.0
1,1281,AoDM,1749.0,100.0
2,1282,AoDM,1692.0,100.0
3,1283,AoDM,2080.0,100.0
4,1284,AoDM,1472.0,100.0


In [14]:
### Add geometry for mapping it in GIS file and verify visually
zonal_pop_geom = pd.merge(zonal_pop,study_area_zones[['NO','geometry']],left_on='zone',right_on='NO', how='left')
zonal_pop_geom.head()
zonal_pop_geom = gpd.GeoDataFrame(zonal_pop_geom, geometry='geometry')
zonal_pop_geom.to_file(f'{basepath}/03 Output/05 Pop Map/zone_pop_map.shp',encoding='utf-8')

  zonal_pop_geom.to_file(f'{basepath}/03 Output/05 Pop Map/zone_pop_map.shp',encoding='utf-8')
