In [7]:
import pandas as pd
pd.set_option('display.max_colwidth', 0)
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
import warnings
warnings.filterwarnings('ignore')
pd.options.display.float_format = '{:.5f}'.format
import matplotlib.pyplot as plt
import geopandas as gpd

In [13]:
# %pip install folium matplotlib mapclassify
# %pip is a magic command executed within the notebook kernel

# Load INRIX data

In [23]:
df_road = pd.read_csv("data/TMC_Identification.csv", sep=',', header=0)
df_road = df_road[['tmc_code','intersection','start_latitude','start_longitude','end_latitude','end_longitude','miles']]
print('total number of road segments:',df_road.shape[0])

from shapely.geometry import LineString
from geopandas import GeoDataFrame

df_road['geometry'] = df_road.apply(
    lambda row: LineString([(row['start_longitude'], row['start_latitude']),
                             (row['end_longitude'], row['end_latitude'])]),
    axis=1
)

# Create a GeoDataFrame for road network
gdf_road = GeoDataFrame(df_road, geometry='geometry',crs="EPSG:4326")
gdf_road = gdf_road.to_crs("EPSG:3857")
gdf_road = gdf_road[['tmc_code','start_latitude','start_longitude','end_latitude','end_longitude','geometry']]
gdf_road.head(2)

total number of road segments: 4460


Unnamed: 0,tmc_code,start_latitude,start_longitude,end_latitude,end_longitude,geometry
0,112P13033,30.28172,-97.74193,30.28232,-97.74207,"LINESTRING (-10880581.877 3539813.967, -10880597.461 3539891.312)"
1,112+08908,30.11066,-97.41049,30.11082,-97.37323,"LINESTRING (-10843686.145 3517782.1, -10839538.38 3517802.69)"


In [28]:
gdf_road.explore()

In [25]:
import numpy as np 

# Function to calculate bearing between two points (start and end of a road segment)
def calculate_bearing(start_lat, start_lon, end_lat, end_lon):
    # Convert degrees to radians
    start_lat = np.radians(start_lat)
    start_lon = np.radians(start_lon)
    end_lat = np.radians(end_lat)
    end_lon = np.radians(end_lon)
    
    # Difference in longitudes
    delta_lon = end_lon - start_lon
    
    # Bearing calculation
    x = np.sin(delta_lon) * np.cos(end_lat)
    y = np.cos(start_lat) * np.sin(end_lat) - np.sin(start_lat) * np.cos(end_lat) * np.cos(delta_lon)
    bearing = np.arctan2(x, y)
    
    # Convert from radians to degrees and normalize to 0-360
    bearing = np.degrees(bearing)
    bearing = (bearing + 360) % 360
    return bearing
# The bearing value represents the direction from the starting point to the destination, measured clockwise from true north (0° is north, 90° is east, etc.).

# Apply the function to calculate bearings for road segments
gdf_road['segment_bearing'] = gdf_road.apply(
    lambda row: calculate_bearing(row['start_latitude'], row['start_longitude'], row['end_latitude'], row['end_longitude']), axis=1
)

# Load OSM

In [30]:
edges_osm = pd.read_csv('./output/osm_edges.csv', index_col=[0,1])
# preprocessing OSM network data
edges_osm_need = edges_osm.reset_index()
edges_osm_need['geometry'] = gpd.GeoSeries.from_wkt(edges_osm_need['geometry'])
edges_osm_need = gpd.GeoDataFrame(edges_osm_need,crs="EPSG:4326", geometry='geometry')
edges_osm_need['osm_id'] =  edges_osm_need['osmid'].astype(str) + edges_osm_need['u'].astype(str) +  edges_osm_need['v'].astype(str) 
edges_osm_need.drop(['u','v','osmid','key', 'ref', 'reversed', 'bridge', 'access','tunnel'], axis=1, inplace=True)
edges_osm_need = edges_osm_need.rename(columns={"highway": "road_type"})
edges_osm_need = edges_osm_need.to_crs("EPSG:3857")
print(edges_osm_need.shape[0])
edges_osm_need.head(2)

145430


Unnamed: 0,name,road_type,oneway,length,bearing,geometry,lanes,maxspeed,width,junction,osm_id
0,Heritage Drive,residential,False,88.55,341.4,"LINESTRING (-10908067.216 3529765.696, -10908099.955 3529862.906)",,,,,15287110151371439151462706
1,Hideaway Cove,residential,False,116.618,60.4,"LINESTRING (-10908067.216 3529765.696, -10908027.363 3529780.251, -10908014.562 3529785.79, -10907990.85 3529800.731, -10907966.138 3529820.439, -10907950.887 3529831.903)",,,,,15277135151371439151371447


In [31]:
# edges_osm_need.head(50000).explore()

# Spatial join between INRIX and OSM

In [None]:
%%time
# Before spatial join, find the closest direction
# First, from each inrix link, find the OSM links with similar angle: The abs(delta angle) smaller than 5 or greater than 355
# Then, for each inrix link, always use the start point's closest OSM link as matching target

gdf_road_merged_copy = gdf_road_merged.copy().to_crs("EPSG:3857")
gdf_road_merged_copy['osm_id'] = np.NAN

for idx, road_row in gdf_road_merged_copy.iterrows():
    if idx % 100 == 0:
        print(f"Processing {idx}/{len(gdf_road_merged_copy)}")

    # Filter edges with similar directions
    segment_bearing = road_row['segment_bearing']
    possible_matches = edges_osm_need[
        (abs(edges_osm_need['bearing'] - segment_bearing) < 5) |
        (abs(edges_osm_need['bearing'] - segment_bearing) > 360 - 5)
    ]
    
    # Calculate distances (vectorized for the filtered matches)
    possible_matches['edge_distance'] = possible_matches.geometry.apply(
        lambda geom: road_row.geometry.distance(geom)
    )

    # Keep matches within 5 meters
    nearby_matches = possible_matches[possible_matches['edge_distance'] < 5]

    # Assign the nearest segment's osm_id if matches are found
    if not nearby_matches.empty:
        nearest_match_id = nearby_matches.loc[nearby_matches['edge_distance'].idxmin(), 'osm_id']
        gdf_road_merged_copy.loc[idx, 'osm_id'] = nearest_match_id