### Population data mapping

In [3]:
import os
from pathlib import Path
import pandas as pd
import geopandas as gpd
import numpy as np
from fiona.crs import from_epsg
from tqdm import tqdm
from shapely.geometry import Point

# Current working directory
basepath = Path.cwd().parent.parent.parent

# Read list of stations within model coverage area
raw_data = os.path.join(basepath, '01 Raw Data' )

In [4]:
### Load CSMT Zoning Structure
# Load zone  shapefile
csmt_zones = gpd.read_file(f'{raw_data}/07 Connectors/v20/zone_centroids_zone_centroid.SHP')
crs = csmt_zones.crs
print(crs)

csmt_zones.head()

PROJCS["British_National_Grid_TOWGS",GEOGCS["OSGB36",DATUM["Ordnance_Survey_of_Great_Britain_1936",SPHEROID["Airy 1830",6377563.396,299.3249646,AUTHORITY["EPSG","7001"]],TOWGS84[446.4,-125.2,542.1,0.15,0.247,0.842,-20.49],AUTHORITY["EPSG","6277"]],PRIMEM["Greenwich",0],UNIT["Degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",49],PARAMETER["central_meridian",-2],PARAMETER["scale_factor",0.999601272],PARAMETER["false_easting",400000],PARAMETER["false_northing",-100000],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH]]


Unnamed: 0,NO,MODEL_AREA,BUS_SUM,RAIL_SUM,geometry
0,101,External,0.0,85.22,POINT (483442.337 358139.545)
1,201,External,0.0,178.37,POINT (571805.254 263684.340)
2,301,External,0.0,497.37,POINT (548177.927 143853.334)
3,401,External,0.0,50.82,POINT (434860.371 494161.164)
4,505,External,0.0,221.27,POINT (352895.029 461898.387)


### Not required code

In [5]:
# ### Load CASM Zoning Structure
# # Load zone  shapefile
# casm_zones = gpd.read_file(f'{raw_data}/15 CASM/CASM Zone_zone.SHP')
# print(casm_zones.crs)
# casm_zones.rename(columns={'NO':'zone_no','NAME':'name','XCOORD':'x_coord','YCOORD':'y_coord'},inplace=True)
# casm_zones = casm_zones[['zone_no','name','x_coord','y_coord','geometry']]
# casm_zones['zone_area'] = casm_zones.geometry.area
# casm_zones.head()

In [6]:
# #Map CASM Zones to CSMT Zones
# # Initialize an empty GeoDataFrame to store the intersections
# intersection_gdf = gpd.GeoDataFrame()
# csmt_zones['geometry'] = csmt_zones['geometry'].buffer(0)
# casm_zones['geometry'] = casm_zones['geometry'].buffer(0)
# # Iterate over each pair of geometries and calculate the intersections
# for idx1, geometry1 in csmt_zones.iterrows():
#     for idx2, geometry2 in casm_zones.iterrows():
#         intersection = geometry1['geometry'].intersection(geometry2['geometry'])
        
#         # Check if the intersection is not empty
#         if not intersection.is_empty:
#             intersection_gdf = pd.concat([intersection_gdf,(gpd.GeoDataFrame({'csmt_zone': geometry1['zone_no'],
#                                                                               'model_area': geometry1['model_area'],
#                                                                               'x_coord':geometry1['x_coord'],
#                                                                               'y_coord': geometry1['y_coord'],
#                                                                               'casm_zone':geometry2['zone_no'],
#                                                                               'casm_zone_area':geometry2['zone_area'],
#                                                                               'geometry': [intersection]},crs=crs))])

# intersection_gdf.drop_duplicates(inplace=True)
# intersection_gdf['geometry'] = intersection_gdf['geometry'].apply(lambda geom: geom.buffer(0) if geom.geom_type == 'LineString' else geom)
# intersection_gdf['int_area'] = intersection_gdf.geometry.area
# intersection_gdf['overlap_per'] = (intersection_gdf['int_area'] / intersection_gdf['casm_zone_area'] ) * 100
# intersection_gdf.head()

In [7]:
# csmt_zone_map=intersection_gdf[['casm_zone','csmt_zone','model_area','overlap_per']].sort_values(by='casm_zone')
# csmt_zone_map.head()

In [8]:
# ### Read population data 
# casm_mtx = pd.read_csv(f'{raw_data}/15 CASM/CASM Bus matrices.csv',
#                        engine='python')
# casm_mtx = casm_mtx[['From','To','AM','IP','PM']]
# casm_mtx.head()
# casm_mtx.info()

In [9]:
# exp_df = []
# for idx, od in tqdm(casm_mtx.iterrows(), total = len(casm_mtx)):
#     csmt_o = csmt_zone_map[csmt_zone_map['casm_zone']==od['From']]
#     csmt_d = csmt_zone_map[csmt_zone_map['casm_zone']==od['To']]

#     for id1, org in csmt_o.iterrows():
#         for id2, dest in csmt_d.iterrows():
#             df = {}
#             df['org'] = org['csmt_zone']
#             df['dest'] = dest['csmt_zone']
#             overlap = org['overlap_per']*dest['overlap_per']/10000
#             df['am'] = overlap*od['AM']
#             df['ip'] = overlap*od['IP']
#             df['pm'] = overlap*od['PM']

#             exp_df.append(df)
# expanded_df = pd.DataFrame(exp_df)
# expanded_df.head()

### Sector mapping

In [10]:
### Read sectors
sectors = gpd.read_file(f'{basepath}/01 Raw data/10 Shapefile Sectors/LSOA_Zone_Incl_Sectors_ExportFeatures.shp')
sectors = sectors[['Sector', 'geometry']].dissolve(by='Sector')
sectors = sectors.reset_index(drop=False)
print(sectors.crs)
sectors.head()

EPSG:27700


Unnamed: 0,Sector,geometry
0,Birmingham City Centre,"POLYGON ((406327.028 285730.499, 406326.517 28..."
1,Coventry Central,"POLYGON ((433883.000 279241.000, 433881.248 27..."
2,Coventry North East,"POLYGON ((435843.429 278946.109, 435846.191 27..."
3,Coventry North West,"POLYGON ((432538.656 279600.018, 432535.964 27..."
4,Coventry South East,"POLYGON ((433776.781 275611.094, 433736.642 27..."


In [11]:
sectors.to_file(f'{basepath}/03 Output/10 Sectors/sectors.shp',encoding='utf-8')

In [15]:
csmt_zones.head()

Unnamed: 0,NO,MODEL_AREA,BUS_SUM,RAIL_SUM,geometry
0,101,External,0.0,85.22,POINT (-0.75482 53.11392)
1,201,External,0.0,178.37,POINT (0.51505 52.24452)
2,301,External,0.0,497.37,POINT (0.11834 51.17473)
3,401,External,0.0,50.82,POINT (-1.46530 54.34182)
4,505,External,0.0,221.27,POINT (-2.72097 54.05088)


In [13]:
sectors = sectors.to_crs("epsg:4326")

# Convert the origin and destination coordinates to Points and transform to EPSG:27700
# csmt_zones['geometry'] = [Point(lon, lat) for lon, lat in zip(csmt_zones['x_coord'], csmt_zones['y_coord'])]
csmt_zones = csmt_zones.to_crs("epsg:4326")


# Spatially join the DataFrame with sectors GeoDataFrame based on origin coordinates
csmt_with_sector = gpd.sjoin(csmt_zones, sectors, how="left", predicate='within')

csmt_with_sector.rename(columns={'Sector': 'org_sector'}, inplace=True)
csmt_with_sector = csmt_with_sector.drop(columns=['index_right','BUS_SUM','RAIL_SUM'])
csmt_with_sector.rename(columns = {'NO':'zoneno', 'org_sector':'sector','MODEL_AREA':'model_area'},inplace=True)



Unnamed: 0,zoneno,model_area,geometry,sector
0,101,External,POINT (-0.75482 53.11392),North and Scotland
1,201,External,POINT (0.51505 52.24452),East
2,301,External,POINT (0.11834 51.17473),South & London
3,401,External,POINT (-1.46530 54.34182),North and Scotland
4,505,External,POINT (-2.72097 54.05088),North and Scotland


In [27]:
### Update missing zone 601
csmt_with_sector.loc[csmt_with_sector['zoneno']==601,'sector'] = 'North and Scotland'
csmt_with_sector.head()

Unnamed: 0,zoneno,model_area,geometry,sector
0,101,External,POINT (-0.75482 53.11392),North and Scotland
1,201,External,POINT (0.51505 52.24452),East
2,301,External,POINT (0.11834 51.17473),South & London
3,401,External,POINT (-1.46530 54.34182),North and Scotland
4,505,External,POINT (-2.72097 54.05088),North and Scotland


In [28]:
csmt_with_sector.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 846 entries, 0 to 845
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   zoneno      846 non-null    int64   
 1   model_area  846 non-null    object  
 2   geometry    846 non-null    geometry
 3   sector      846 non-null    object  
dtypes: geometry(1), int64(1), object(2)
memory usage: 33.0+ KB


In [16]:
### Load CSMT Zoning Structure
# Load zone  shapefile
csmt_zones_polygon = gpd.read_file(f'{raw_data}/07 Connectors/v20/zones_geometry_zone.SHP')
print(csmt_zones_polygon.crs)
csmt_zones_polygon.head()


PROJCS["British_National_Grid_TOWGS",GEOGCS["OSGB36",DATUM["Ordnance_Survey_of_Great_Britain_1936",SPHEROID["Airy 1830",6377563.396,299.3249646,AUTHORITY["EPSG","7001"]],TOWGS84[446.4,-125.2,542.1,0.15,0.247,0.842,-20.49],AUTHORITY["EPSG","6277"]],PRIMEM["Greenwich",0],UNIT["Degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",49],PARAMETER["central_meridian",-2],PARAMETER["scale_factor",0.999601272],PARAMETER["false_easting",400000],PARAMETER["false_northing",-100000],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH]]


Unnamed: 0,NO,MODEL_AREA,XCOORD,YCOORD,geometry
0,101,External,483442.337,358139.545,"MULTIPOLYGON (((401565.070 368872.379, 401380...."
1,201,External,571805.254,263684.34,"MULTIPOLYGON (((501963.011 305786.877, 502524...."
2,301,External,548177.927,143853.334,"MULTIPOLYGON (((488290.273 149918.883, 488410...."
3,401,External,434860.371,494161.164,"POLYGON ((380989.322 636877.374, 381197.296 63..."
4,505,External,352895.029,461898.387,"MULTIPOLYGON (((331957.783 391954.992, 331916...."


In [17]:
csmt_zones_polygon.tail()

Unnamed: 0,NO,MODEL_AREA,XCOORD,YCOORD,geometry
841,19015,FMA,439734.0,283278.0,"POLYGON ((442002.364 282450.696, 442020.319 28..."
842,19016,FMA,438788.0,284631.0,"POLYGON ((438301.042 285714.158, 438901.977 28..."
843,21861,FMA,429403.0,275752.0,"POLYGON ((433150.315 274749.928, 433196.349 27..."
844,21862,FMA,434745.0,275074.0,"POLYGON ((433708.371 275537.959, 433751.647 27..."
845,21863,FMA,432277.2963,271461.1357,"POLYGON ((433580.214 275423.669, 433708.371 27..."


In [29]:
csmt_sector_correspondence_shp = pd.merge(csmt_with_sector[['zoneno','sector','model_area']], csmt_zones_polygon[['NO','geometry']], left_on='zoneno', right_on='NO', how='left')
csmt_sector_correspondence_shp.head()

Unnamed: 0,zoneno,sector,model_area,NO,geometry
0,101,North and Scotland,External,101,"MULTIPOLYGON (((401565.070 368872.379, 401380...."
1,201,East,External,201,"MULTIPOLYGON (((501963.011 305786.877, 502524...."
2,301,South & London,External,301,"MULTIPOLYGON (((488290.273 149918.883, 488410...."
3,401,North and Scotland,External,401,"POLYGON ((380989.322 636877.374, 381197.296 63..."
4,505,North and Scotland,External,505,"MULTIPOLYGON (((331957.783 391954.992, 331916...."


In [30]:
csmt_sector_correspondence_shp = csmt_sector_correspondence_shp.drop(columns=['NO'])
csmt_sector_correspondence_shp.head()

Unnamed: 0,zoneno,sector,model_area,geometry
0,101,North and Scotland,External,"MULTIPOLYGON (((401565.070 368872.379, 401380...."
1,201,East,External,"MULTIPOLYGON (((501963.011 305786.877, 502524...."
2,301,South & London,External,"MULTIPOLYGON (((488290.273 149918.883, 488410...."
3,401,North and Scotland,External,"POLYGON ((380989.322 636877.374, 381197.296 63..."
4,505,North and Scotland,External,"MULTIPOLYGON (((331957.783 391954.992, 331916...."


In [31]:
csmt_sector_shp= gpd.GeoDataFrame(csmt_sector_correspondence_shp, geometry='geometry', crs=csmt_zones_polygon.crs).reset_index(drop=True)
csmt_sector_shp.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 846 entries, 0 to 845
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   zoneno      846 non-null    int64   
 1   sector      846 non-null    object  
 2   model_area  846 non-null    object  
 3   geometry    846 non-null    geometry
dtypes: geometry(1), int64(1), object(2)
memory usage: 26.6+ KB


In [23]:
csmt_sector_shp.head()

Unnamed: 0,zoneno,sector,model_area,geometry
0,101,North and Scotland,External,"MULTIPOLYGON (((401565.070 368872.379, 401380...."
1,201,East,External,"MULTIPOLYGON (((501963.011 305786.877, 502524...."
2,301,South & London,External,"MULTIPOLYGON (((488290.273 149918.883, 488410...."
3,401,North and Scotland,External,"POLYGON ((380989.322 636877.374, 381197.296 63..."
4,505,North and Scotland,External,"MULTIPOLYGON (((331957.783 391954.992, 331916...."


In [32]:
csmt_sector_shp.to_file(f'{basepath}/03 Output/10 Sectors/csmtzone_sector_correspondence.shp',encoding='utf-8')

In [33]:

csmt_sector_correspondence_csv = csmt_with_sector[['zoneno','sector','model_area']].copy()
csmt_sector_correspondence_csv.to_csv(f'{basepath}/03 Output/10 Sectors/csmt_zone_correspondence.csv')

### Extra code

In [26]:
# csmt_with_sector = csmt_with_sector.to_crs("epsg:4326")
# csmt_with_sector.head()

In [62]:
csmt_with_sector.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 846 entries, 0 to 845
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   zone_no     846 non-null    int64   
 1   model_area  846 non-null    object  
 2   x_coord     846 non-null    float64 
 3   y_coord     846 non-null    float64 
 4   geometry    846 non-null    geometry
 5   org_sector  845 non-null    object  
dtypes: float64(2), geometry(1), int64(1), object(2)
memory usage: 46.3+ KB


In [63]:
csmt_mtx.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 715716 entries, 0 to 715715
Data columns (total 5 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   org     715716 non-null  int64  
 1   dest    715716 non-null  int64  
 2   am      715716 non-null  float64
 3   ip      715716 non-null  float64
 4   pm      715716 non-null  float64
dtypes: float64(3), int64(2)
memory usage: 27.3 MB


In [64]:
csmt_with_sector = csmt_with_sector[['zone_no','org_sector']]
csmt_mtx_w_sector = pd.merge(left=csmt_mtx, right=csmt_with_sector,left_on='org',right_on='zone_no', how='left')
csmt_with_sector.rename(columns={'org_sector':'dest_sector'}, inplace=True)
csmt_mtx_w_sector = pd.merge(left=csmt_mtx_w_sector, right=csmt_with_sector,left_on='dest',right_on='zone_no',how='left')
csmt_mtx_w_sector.head()

Unnamed: 0,org,dest,am,ip,pm,zone_no_x,org_sector,zone_no_y,dest_sector
0,101,101,3.9e-05,0.0001430057,0.0002887233,101,North and Scotland,101,North and Scotland
1,101,201,0.001026,5.159339e-07,2.399924e-05,101,North and Scotland,201,East
2,101,301,0.000125,1.133165,1.196709e-06,101,North and Scotland,301,South & London
3,101,401,3.8e-05,8.910812e-10,3.895725e-11,101,North and Scotland,401,North and Scotland
4,101,505,0.001382,0.002173914,0.0001097749,101,North and Scotland,505,North and Scotland


In [65]:
csmt_sec_grouped = csmt_mtx_w_sector.groupby(['org_sector','dest_sector']).agg(am = ('am','sum'),ip = ('ip','sum'),pm = ('pm','sum')).reset_index()


org_sector     Birmingham City CentreBirmingham City CentreBi...
dest_sector    Birmingham City CentreCoventry CentralCoventry...
am                                                  55169.916368
ip                                                  53664.884489
pm                                                 103725.558925
dtype: object

In [66]:
csmt_sec_grouped.head()

Unnamed: 0,org_sector,dest_sector,am,ip,pm
0,Birmingham City Centre,Birmingham City Centre,186.021045,282.694802,525.900634
1,Birmingham City Centre,Coventry Central,5.68107,17.483235,18.578631
2,Birmingham City Centre,Coventry North East,3.950018,41.130488,16.866361
3,Birmingham City Centre,Coventry North West,10.359408,37.93445,71.683459
4,Birmingham City Centre,Coventry South East,9.033409,29.423569,11.448233


In [67]:
csmt_sec_grouped.to_csv(f'{basepath}/03 Output/08 CASM in CSMT/csmt_sec_grouped.csv')