# Generate Connectors

### Initialize Script
- Load Packages
- Set Input/Output Folder
- Load Zone and check the attributes and datatypes

In [2]:
### Load Packages
import geopandas as gpd
from shapely.geometry import Point, LineString
from fiona.crs import from_epsg
import os
import pandas as pd
import numpy as np
import math


In [3]:
### Set input/output path

# Current working directory
basepath = os.path.dirname(os.getcwd())

# Set input path
raw_data = os.path.join(basepath, '01 Raw Data' )

# Set output path
output_path = os.path.join(basepath, '03 Output' )

In [4]:
# Load zone centroid shapefile
zone_centroids = gpd.read_file(f'{raw_data}/07 Connectors/v20/zone_centroids_zone_centroid.SHP')
zone_centroids.rename(columns={'BUS_SUM':'bus_sum','RAIL_SUM':'rail_sum'},inplace=True)
zone_centroids.head()
zone_centroids['MODEL_AREA'].value_counts()
crs = zone_centroids.crs

Unnamed: 0,NO,MODEL_AREA,bus_sum,rail_sum,geometry
0,101,External,0.0,85.22,POINT (483442.337 358139.545)
1,201,External,0.0,178.37,POINT (571805.254 263684.340)
2,301,External,0.0,497.37,POINT (548177.927 143853.334)
3,401,External,0.0,50.82,POINT (434860.371 494161.164)
4,505,External,0.0,221.27,POINT (352895.029 461898.387)


### Initial Checks
- Load nodes shapefile for Bus and Rail Stops
- Load stops corresponding to busiest rail and bus stops. Identified from PT supply - highest count for served vehicle journeys
- Check all stops are within the shapefile. Necessary check when new HAM network file received


In [22]:
# Check Busiest bus stops

# Load node shapefile
nodes_bus = gpd.read_file(f'{raw_data}/07 Connectors/v21/bus_stop_node_node.SHP', crs=crs)

# Load CSV file containing busiest stops for external zones
busiest_bus_stops_df = pd.read_csv(f'{raw_data}/07 Connectors/v21/Busiest Bus Stops for Ext Zones.csv')


# Identify which 'Stop Node' values do not exist in 'NO' column of nodes_bus
missing_nodes = busiest_bus_stops_df[~busiest_bus_stops_df['Stop Node'].isin(nodes_bus['NO'])]

print("Nodes in 'busiest_bus_stops_df' that do not exist in 'nodes_bus':")
print(missing_nodes)

Nodes in 'busiest_bus_stops_df' that do not exist in 'nodes_bus':
Empty DataFrame
Columns: [Zone, Stop Node]
Index: []


In [15]:
#Check Busiest Rail nodes

# Load node shapefile
nodes_rail = gpd.read_file(f'{raw_data}/07 Connectors/v21/train_stop_node_node.SHP',crs=crs)
# print(nodes_rail)

# Load CSV file containing busiest stops for external zones
busiest_rail_stops_df = pd.read_csv(f'{raw_data}/07 Connectors/v21/Busiest Rail Stops for Ext Zones.csv')
# print(busiest_rail_stops_df)
# Identify which 'Stop Node' values do not exist in 'NO' column of nodes_bus
missing_nodes_2 = busiest_rail_stops_df[~busiest_rail_stops_df['Stop Node'].isin(nodes_rail['NO'])]

print("Nodes in 'busiest_rail_stops_df' that do not exist in 'nodes_rail':")
print(missing_nodes_2)

Nodes in 'busiest_rail_stops_df' that do not exist in 'nodes_rail':
Empty DataFrame
Columns: [Zone, Stop Node]
Index: []


### Generate Bus Connectors
Spatial for internal and Bespoke connector connecting zone to major stop for external

In [23]:
from collections import Counter

#max_bus_conn = 4
max_bus_connectors = 4

crs = zone_centroids.crs

# Load node shapefile
nodes_bus = gpd.read_file(f'{raw_data}/07 Connectors/v21/bus_stop_node_node.SHP', crs=crs)

# Create GeoDataFrame for buffer
buffer_zone = gpd.GeoDataFrame(columns=['ZONENO', 'buffer'], geometry='buffer',crs=crs)

# Create GeoDataFrame for lines
bus_connector_lines = gpd.GeoDataFrame(columns=['ZONENO', 'NODENO','geometry'], crs=crs)

# Create geometry from XCOORD and YCOORD for zone centroids
# zone_centroids['geometry'] = [Point(x, y) for x, y in zip(zone_centroids['XCOORD'], zone_centroids['YCOORD'])]

### Connectors for external zones

# Load CSV file containing busiest stops for external zones
busiest_bus_stops_df = pd.read_csv(f'{raw_data}/07 Connectors/v21/Busiest Bus Stops for Ext Zones.csv')


# Define a function to calculate the angle between two points
def calculate_angle(point1, point2):
    angle = math.atan2(point2.y - point1.y, point2.x - point1.x)
    return angle if angle >= 0 else angle + 2 * math.pi


# Function to find the nearest node from a list of nodes
def find_nearest_node(point, nodes):
    min_distance = float('inf')
    nearest_node = None
    for _, node in nodes.iterrows():
        distance = point.distance(node.geometry)
        if distance < min_distance:
            min_distance = distance
            nearest_node = node
    return nearest_node

# Initialize counters for each quadrant
quadrant_counters = {i: Counter() for i in range(4)}


for index, row in zone_centroids.iterrows():
    model_area = row['MODEL_AREA']
    zone_no = row['NO']
    buffer_distance = 0 if model_area == 'External' else 400
    b_wk_trips = row['bus_sum']

    # Buffer around zone centroids
    buffer_zone.loc[index] = [zone_no, row['geometry'].buffer(buffer_distance)]

    if (model_area == 'External') and (b_wk_trips > 0 ) and (zone_no not in list(busiest_bus_stops_df['Zone'])):
        # If Model_Area is 'External' and demand is positive, find the closest node spatially and connect it
        closest_node_bus = nodes_bus.iloc[np.argmin(nodes_bus.distance(row['geometry']))]
        bline = LineString([row['geometry'].coords[0], closest_node_bus['geometry'].coords[0]])

        # Update connector_lines GeoDataFrame
        bus_connector_lines = pd.concat([bus_connector_lines, gpd.GeoDataFrame({'ZONENO': [zone_no],
                                                                   'NODENO': [closest_node_bus['NO'].astype(int).tolist()],
                                                                   'geometry': [bline]},crs=crs)])

    elif (model_area != 'External'):

        # Spatial join to find nodes within the buffer
        bnodes_within_buffer = nodes_bus[nodes_bus.geometry.within(buffer_zone.loc[index, 'buffer'])]

        # If no nodes within the buffer and demand is positive, find the closest node spatially and connect it
        if (len(bnodes_within_buffer) == 0) and (b_wk_trips > 0):
            closest_node_bus = nodes_bus.iloc[np.argmin(nodes_bus.distance(row['geometry']))]
            bline = LineString([row['geometry'].coords[0], closest_node_bus['geometry'].coords[0]])

            # Update connector_lines GeoDataFrame
            bus_connector_lines = pd.concat([bus_connector_lines, gpd.GeoDataFrame({'ZONENO': [zone_no],
                                                                                'NODENO': [closest_node_bus['NO'].astype(int).tolist()],
                                                                                'geometry': [bline]},crs=crs)])

        # If no nodes within the buffer and demand is zero, ignore the zone as these would connected to busiest station within zone
        elif (len(bnodes_within_buffer) == 0) and (b_wk_trips == 0):
            pass

        
        elif len(bnodes_within_buffer) > 0:
            # Group nodes by quadrant based on angle

            quadrants = {i: [] for i in range(4)}
            centroid_point = row['geometry']
            for _, node in bnodes_within_buffer.iterrows():
                angle = calculate_angle(centroid_point, node.geometry)
                quadrant = int(math.floor(angle / (math.pi / 2))) % 4
                quadrants[quadrant].append(node)

            # Initialize variables to store the nearest node for each quadrant
            nearest_nodes = [None] * 4

            # Find nearest node in each quadrant
            for quadrant, nodes in quadrants.items():
                nearest_nodes[quadrant] = find_nearest_node(centroid_point, pd.DataFrame(nodes))

            # Find the nearest unconnected node from all quadrants
            nearest_unconnected_node = find_nearest_node(centroid_point, nodes_bus[~nodes_bus['NO'].isin(bus_connector_lines['NODENO'])])
            
            # Create lines connecting to the nearest nodes
            for quadrant, node in enumerate(nearest_nodes):
                if node is not None:

                    bline = LineString([row['geometry'].coords[0], node.geometry.coords[0]])
                    bus_connector_lines = pd.concat([bus_connector_lines, gpd.GeoDataFrame({'ZONENO': [zone_no],
                                                                                            'NODENO': [int(node['NO'])],
                                                                                            'geometry': [bline]},crs=crs)])
                    
                    # Increment the counter for the quadrant
                    quadrant_counters[quadrant][zone_no] += 1                    




# Display the count of connectors generated in each quadrant
for quadrant, counter in quadrant_counters.items():
    print(f"Connectors in Quadrant {quadrant}: {len(counter)}")




# Iterate over each row in the busiest_stops_df and create connectors
for index, row in busiest_bus_stops_df.iterrows():
    # Extract relevant information from the CSV file
    zone_no = row['Zone']
    node_no = row['Stop Node']
    
    # Get the geometry of the zone centroid and node
    zone_geometry = zone_centroids[zone_centroids['NO'] == zone_no].geometry.squeeze()
    node_geometry = nodes_bus[nodes_bus['NO'] == node_no].geometry.squeeze()
    
    # Create a connector line between zone centroid and node
    connector_line = LineString([zone_geometry, node_geometry])
    
    # Append the connector to the rail_connector_lines GeoDataFrame
    bus_connector_lines = pd.concat([bus_connector_lines, gpd.GeoDataFrame({'ZONENO': [zone_no], 'NODENO': [node_no], 'geometry': [connector_line]}, crs=crs)], ignore_index=True)


bus_connector_lines.reset_index(drop=True, inplace=True)
bus_connector_lines['ZONENO'] = bus_connector_lines['ZONENO'].astype('int64')
bus_connector_lines['NODENO'] = bus_connector_lines['NODENO'].astype('int64')
bus_connector_lines.head()



Connectors in Quadrant 0: 336
Connectors in Quadrant 1: 335
Connectors in Quadrant 2: 345
Connectors in Quadrant 3: 350


Unnamed: 0,ZONENO,NODENO,geometry
0,1004,1355722,"LINESTRING (452320.000 319469.000, 450144.625 ..."
1,1006,1136308,"LINESTRING (461113.000 298827.000, 458815.428 ..."
2,1011,86764,"LINESTRING (382701.000 276788.000, 390431.002 ..."
3,1212,1354499,"LINESTRING (456880.617 297446.404, 454964.617 ..."
4,1278,1209077,"LINESTRING (435567.578 283621.288, 435641.171 ..."


In [24]:
bus_connector_lines.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1832 entries, 0 to 1831
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   ZONENO    1832 non-null   int64   
 1   NODENO    1832 non-null   int64   
 2   geometry  1832 non-null   geometry
dtypes: geometry(1), int64(2)
memory usage: 43.1 KB


In [25]:
bus_connector_lines.to_file(f'{basepath}/03 Output/01 Connectors/bus_connector_lines_v21.SHP',encoding='utf-8',crs=crs)

### Rail connectors
- Bespoke Connectors for all external zones connecting them to major internal and then closest external stop points


In [17]:

# Load node shapefile
nodes_rail = gpd.read_file(f'{raw_data}/07 Connectors/v21/train_stop_node_node.SHP',crs=crs)

# Create GeoDataFrame for buffer
buffer_zone = gpd.GeoDataFrame(columns=['ZONENO', 'buffer'], geometry='buffer',crs=crs)

# Create GeoDataFrame for lines
rail_connector_lines = gpd.GeoDataFrame(columns=['ZONENO', 'NODENO','geometry'],crs=crs)

# Load CSV file containing busiest stops for external zones
busiest_rail_stops_df = pd.read_csv(f'{raw_data}/07 Connectors/v21/Busiest Rail Stops for Ext Zones.csv')



for index, row in zone_centroids.iterrows():
    model_area = row['MODEL_AREA']
    zone_no = row['NO']
    buffer_distance = 0 if model_area == 'External' else 800
    r_wk_trips = row['rail_sum']


    # Buffer around zone centroids
    buffer_zone.loc[index] = [zone_no, row['geometry'].buffer(buffer_distance)]

    if (model_area == 'External'):
        if (zone_no not in list(busiest_rail_stops_df['Zone'])):
            closest_node_rail = nodes_rail.iloc[np.argmin(nodes_rail.distance(row['geometry']))]
            rline = LineString([row['geometry'].coords[0], closest_node_rail['geometry'].coords[0]])

            if len(closest_node_rail)>0:
            # Update connector_lines GeoDataFrame
                rail_connector_lines = pd.concat([rail_connector_lines, gpd.GeoDataFrame({'ZONENO': [zone_no],
                                                                        'NODENO': [closest_node_rail['NO'].astype(int).tolist()],
                                                                        'geometry': [rline]},crs=crs)])

    elif (model_area != 'External'):
        # Spatial join to find nodes within the buffer
        rnodes_within_buffer = nodes_rail[nodes_rail.geometry.within(buffer_zone.loc[index, 'buffer'])]

        # If no nodes within the buffer and demand positive find the closest node spatially and connect it
        if (len(rnodes_within_buffer) == 0):
            closest_node_rail = nodes_rail.iloc[np.argmin(nodes_rail.distance(row['geometry']))]
            rline = LineString([row['geometry'].coords[0], closest_node_rail['geometry'].coords[0]])

            if len(closest_node_rail)>0:
            # Update connector_lines GeoDataFrame
                rail_connector_lines = pd.concat([rail_connector_lines, gpd.GeoDataFrame({'ZONENO': [zone_no],
                                                                        'NODENO': [closest_node_rail['NO'].astype(int).tolist()],
                                                                        'geometry': [rline]},crs=crs)])

        # # If no nodes within the buffer and demand is zeroo, ignore 
        # elif (len(rnodes_within_buffer) == 0) and (r_wk_trips == 0  ):
        #     pass

        # Else connect nodes within buffer
        else:
            # Create LineString connecting zone centroids and nodes within the buffer
            rlines = [LineString([row['geometry'].coords[0], node_rail['geometry'].coords[0]]) for _, node_rail in rnodes_within_buffer.iterrows()]

            # Update connector_lines GeoDataFrame
            rail_connector_lines = pd.concat([rail_connector_lines, gpd.GeoDataFrame({'ZONENO': [zone_no]*len(rlines),
                                                                       'NODENO': rnodes_within_buffer['NO'].astype(int).tolist(),
                                                                       'geometry': rlines},crs=crs)])

### Connectors for external zones



# Iterate over each row in the busiest_stops_df and create connectors
for index, row in busiest_rail_stops_df.iterrows():
    # Extract relevant information from the CSV file
    zone_no = row['Zone']
    node_no = row['Stop Node']
    
    # Get the geometry of the zone centroid and node
    zone_geometry = zone_centroids[zone_centroids['NO'] == zone_no].geometry.squeeze()
    node_geometry = nodes_rail[nodes_rail['NO'] == node_no].geometry.squeeze()
    
    # Create a connector line between zone centroid and node
    connector_line = LineString([zone_geometry, node_geometry])
    
    # Append the connector to the rail_connector_lines GeoDataFrame
    rail_connector_lines = pd.concat([rail_connector_lines, gpd.GeoDataFrame({'ZONENO': [zone_no], 'NODENO': [node_no], 'geometry': [connector_line]}, crs=crs)], ignore_index=True)


rail_connector_lines.reset_index(drop=True, inplace=True)
rail_connector_lines['ZONENO'] = rail_connector_lines['ZONENO'].astype('int64')
rail_connector_lines['NODENO'] = rail_connector_lines['NODENO'].astype('int64')
rail_connector_lines.head()


Unnamed: 0,ZONENO,NODENO,geometry
0,1200,102193,"LINESTRING (453320.625 303074.091, 454100.000 ..."
1,1201,102193,"LINESTRING (452238.231 303493.442, 454100.000 ..."
2,1202,102288,"LINESTRING (454623.628 302729.287, 459388.000 ..."
3,1203,102275,"LINESTRING (455851.817 301580.526, 458780.000 ..."
4,1204,102193,"LINESTRING (454666.893 298874.568, 454100.000 ..."


In [18]:
rail_connector_lines.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 852 entries, 0 to 851
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   ZONENO    852 non-null    int64   
 1   NODENO    852 non-null    int64   
 2   geometry  852 non-null    geometry
dtypes: geometry(1), int64(2)
memory usage: 20.1 KB


In [19]:
rail_connector_lines.to_file(f'{output_path}/01 Connectors/rail_connector_lines_v21.shp',encoding='utf-8',crs=crs)