### Calculating the Demand using Grid2demand Library

In [8]:
from __future__ import absolute_import
from grid2demand import GRID2DEMAND
import pandas as pd
import numpy as np
from ast import literal_eval
from shapely.geometry import Point, LineString
import geopandas as gpd
import os
from ast import literal_eval

### Calculate Demand

In [9]:
input_dir = "./data/1m"
gd = GRID2DEMAND(input_dir)

node_dict, poi_dict = gd.load_network.values()

zone_dict = gd.net2zone(node_dict, num_x_blocks=20,num_y_blocks=20)

# Generate zone based on grid size with 10 km width and 10km height for each zone
# zone_dict = gd.net2zone(node_dict, cell_width=10, cell_height=10, unit="km")

# if you have your own zone.csv(TAZs), we can generate zones from your personal TAZs
# zone_dict = gd.taz2zone()


# Synchronize geometry info between zone, node and poi
#       add zone_id to node and poi dictionaries
#       also add node_list and poi_list to zone dictionary
updated_dict = gd.sync_geometry_between_zone_and_node_poi(zone_dict, node_dict, poi_dict)

zone_dict_update, node_dict_update, poi_dict_update = updated_dict.values()

# Generate poi trip rate for each poi

poi_trip_rate = gd.gen_poi_trip_rate(poi_dict_update, trip_rate_file="", trip_purpose=1)

# Generate node production attraction for each node based on poi_trip_rate

node_prod_attr = gd.gen_node_prod_attr(node_dict_update, poi_trip_rate)

# Calculate zone production and attraction based on node production and attraction

zone_prod_attr = gd.calc_zone_prod_attr(node_prod_attr, zone_dict_update)

# Calculate zone-to-zone od distance matrix

zone_od_distance_matrix = gd.calc_zone_od_distance_matrix(zone_dict_update)

# Run gravity model to generate agent-based demand

df_demand = gd.run_gravity_model(zone_prod_attr, zone_od_distance_matrix)
# generate agent-based demand
df_agent = gd.gen_agent_based_demand(node_prod_attr, zone_prod_attr, df_demand=df_demand)

print(gd.pkg_settings)

# Output demand, agent, zone, zone_od_dist_table, zone_od_dist_matrix files
gd.save_demand
gd.save_agent
gd.save_zone
gd.save_zone_od_dist_table
gd.save_zone_od_dist_matrix


  : Checking input directory...
  : input dir ./data/1m, traverse files by type: csv
  : Optional files: ['zone.csv'] are found in ./data/1m.
  : Optional files could be used in the following steps.
  : Input directory is valid.

  : Loading default package settings...
  : Package settings loaded successfully.

INFO Begin to run function: read_network …
  : input dir ./data/1m, traverse files by type: csv
INFO Begin to run function: read_node …
  : Parallel creating Nodes using Pool with 12 CPUs. Please wait...
  : Reading node.csv with specified columns: ['node_id', 'x_coord', 'y_coord', 'activity_type', 'is_boundary', 'poi_id']                 
    and chunksize 10000 for iterations...
  : Successfully loaded node.csv: 2550 Nodes loaded.
INFO Finished running function: read_node, total: 1s

INFO Begin to run function: read_poi …
  : Reading poi.csv with specified columns: ['poi_id', 'building', 'centroid', 'area', 'geometry']                 
    and chunksize 10000 for iterations...

KeyError: 0

#### Finding the closest node

In [10]:
# Function to calculate haversine distance
def haversine(lon1, lat1, lon2, lat2):
    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a))
    r = 6371  # Radius of Earth in kilometers
    return c * r

# Load the zone and node CSVs into dataframes
zones_df = pd.read_csv('./data/1m/zone.csv', converters={'node_id_list': literal_eval}, low_memory=False )
nodes_df = pd.read_csv('./data/1m/node.csv',low_memory=False)

# Function to find the closest node ID to the zone's centroid where the node is a POI
def find_closest_node_id(node_id_list, centroid_x, centroid_y):
    min_distance = float('inf')
    closest_node_id = None
    poi_nodes_df = nodes_df[nodes_df['activity_type'] == 'poi']  # Filter nodes for POIs only
    for node_id in node_id_list:
        node = poi_nodes_df[poi_nodes_df['node_id'] == node_id]
        if not node.empty:
            distance = haversine(centroid_x, centroid_y, node.iloc[0]['x_coord'], node.iloc[0]['y_coord'])
            if distance < min_distance:
                min_distance = distance
                closest_node_id = node_id
    return closest_node_id

# Add a new column for the closest node ID to each zone where production > 0
zones_df['closest_node_id'] = zones_df.apply(
    lambda row: find_closest_node_id(row['node_id_list'], row['centroid_x'], row['centroid_y']) if row['production'] > 0 else None, axis=1)

# Output the zones dataframe with the closest node ID
zones_df.to_csv('./data/1m/updated_zones_with_closest_node.csv', index=False)

centroid_nodes_df = nodes_df[nodes_df['node_id'].isin(zones_df['closest_node_id'])]

# Export this filtered DataFrame to a new CSV file
centroid_nodes_df.to_csv('./data/1m/centroid_nodes.csv', index=False)

print( "Closest node is now located in the folder")

Closest node is now located in the folder


#### Strip the Unsused POI

In [11]:
# Load the CSV files
nodes = pd.read_csv('./data/1m/node.csv', low_memory=False)
links = pd.read_csv('./data/1m/link.csv', low_memory=False)
centroid_nodes = pd.read_csv('./data/0.2m/centroid_nodes.csv')

# Convert node_id in centroid_nodes to a set for fast lookup
centroid_node_ids = set(centroid_nodes['node_id'])

# Filter nodes to keep
nodes_filtered = nodes[(nodes['node_id'].isin(centroid_node_ids)) | (nodes['activity_type'] != 'poi')]

nodes_filtered.to_csv('./data/1m/node.csv', index=False)

# Update the set of valid node_ids from the filtered nodes
valid_node_ids = set(nodes_filtered['node_id'])

# Filter links to keep
links_filtered = links[(links['from_node_id'].isin(valid_node_ids)) & (links['to_node_id'].isin(valid_node_ids))]

links_filtered.to_csv('./data/1m/link.csv', index=False)

print( "Used POI and Connector are now stripped out")

Used POI and Connector are now stripped out


#### Relabelling nodes and links prioritizing closest node

In [12]:
# Step 1: Load the CSV files into pandas DataFrames
node_df = pd.read_csv('./data/1m/node.csv',  low_memory=False)
link_df = pd.read_csv('./data/1m/link.csv',  low_memory=False)
zone_df = pd.read_csv('./data/1m/updated_zones_with_closest_node.csv', low_memory=False)

# Exclude null values and identify closest nodes
closest_nodes = zone_df['closest_node_id'].dropna().unique()

# Ensure all nodes are sorted to maintain a predictable order after prioritizing closest nodes
all_nodes_sorted = sorted(node_df['node_id'].unique())

# New ordering: closest nodes first, followed by the rest, excluding already prioritized ones
new_ordered_nodes = list(closest_nodes) + [node for node in all_nodes_sorted if node not in closest_nodes]

# Create a mapping of old node IDs to new sequential IDs, starting from 1
new_node_mapping = {old_id: new_id for new_id, old_id in enumerate(new_ordered_nodes, start=1)}

# Apply the new labeling
node_df['actual_node_id'] = node_df['node_id'].map(new_node_mapping)
link_df['fromID'] = link_df['from_node_id'].map(new_node_mapping)
link_df['toID'] = link_df['to_node_id'].map(new_node_mapping)

# Save the updated DataFrames (optional)
node_df.to_csv('./data/1m/node.csv', index=False)
link_df.to_csv('./data/1m/link.csv', index=False)

print( "Link and Nodes have been repriotized")

Link and Nodes have been repriotized


#### Relabelling the demand

In [13]:
# Load the data from a CSV file
df = pd.read_csv('./data/1m/demand.csv')

# Creating a unique and sorted list of o_zone_id values
sorted_unique_o_zones = sorted(pd.unique(df['d_zone_id']))
mapping = {old_id: new_id for new_id, old_id in enumerate(sorted_unique_o_zones, start=1)}

# Apply the mapping to create new columns for o_zone_id and d_zone_id
df['o_zone_new_id'] = df['o_zone_id'].map(mapping)
df['d_zone_new_id'] = df['d_zone_id'].map(mapping)

# Save the modified DataFrame back to a new CSV file if needed
# Change 'yourpath' to a directory you have access to
df.to_csv('./data/1m/demand_modified.csv', index=False)

print( "The demand has been relabeled")

The demand has been relabeled


#### Calculating the capacity 

In [14]:
data = pd.read_csv('./data/1m/link.csv', low_memory=False)

# Only perform the operation where the road type is not 'poi'
data.loc[data['Road Type'] != 'poi', 'capacity'] = data['capacity'] * data['Number of Lanes']
data.loc[data['Road Type'] == 'connector', 'capacity'] = 9999
data.to_csv('./data/1m/link.csv', index=False)

print( "The Capacity have been calculated. Move to the next file to generate the tntp file")

The Capacity have been calculated. Move to the next file to generate the tntp file
