In [11]:
import os
import json
import time
import pytz
from datetime import datetime
import requests
import osmnx as ox
import geopandas as gpd
import matplotlib.pyplot as plt
from shapely.geometry import LineString
import networkx as nx
import hashlib
import json
import pandas as pd
import numpy as np

In [12]:
def generate_shape_key(shape):
    shape_str = json.dumps(shape, sort_keys=True)
    return hashlib.md5(shape_str.encode()).hexdigest()

def extract_road_segments(roadData):
    road_segments = []
    for link in roadData['location']['shape']['links']:
        points = [(point['lng'], point['lat']) for point in link['points']]
        road_segments.append(LineString(points))
    return road_segments

def is_good_direction_match(here_geom, osm_geom, angle_threshold=45):
    def direction(g):
        coords = list(g.coords)
        if len(coords) < 2:
            return np.array([0, 0])
        return np.array(coords[-1]) - np.array(coords[0])
    
    v1 = direction(here_geom)
    v2 = direction(osm_geom)
    
    if np.linalg.norm(v1) == 0 or np.linalg.norm(v2) == 0:
        return False
    
    angle = np.degrees(np.arccos(np.clip(
        np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)), -1, 1)))
    angle = min(angle, 180 - angle)
    return angle <= angle_threshold


def match_here_to_osm(gdf_here, gdf_osm, traffic_data, osm_id_col='osmid', buffer_radius=25, angle_threshold=45):
    from shapely.strtree import STRtree

    tree = STRtree(gdf_osm.geometry.values)
    geom_to_osmid = dict(zip(gdf_osm.geometry.values, gdf_osm[osm_id_col]))

    roadKey_to_osmids = {}

    for i, here_geom in enumerate(gdf_here.geometry):
        road_key = generate_shape_key(traffic_data['results'][i]['location']['shape'])

        here_buffer = here_geom.buffer(buffer_radius)
        candidates = tree.query(here_buffer)
        matched_osmids = set()

        for idx in candidates:
            candidate_geom = gdf_osm.geometry.iloc[idx]
            if (
                here_buffer.intersects(candidate_geom)
                and is_good_direction_match(here_geom, candidate_geom, angle_threshold)
            ):
                osmid = gdf_osm[osm_id_col].iloc[idx]
                if isinstance(osmid, list):
                    matched_osmids.update(osmid)
                else:
                    matched_osmids.add(osmid)

        if matched_osmids:
            roadKey_to_osmids[road_key] = list(matched_osmids)

    return roadKey_to_osmids



In [13]:
PATH = "ISTANBUL"
FIGURES_PATH = os.path.join(PATH, "figures")
if not os.path.exists(FIGURES_PATH):
    os.makedirs(FIGURES_PATH)

API_KEY = 'GmQkqnId56a-iDh5h8C_xprCJWEZxjF7NmF9UyQqaWo'

CITY_COORDINATES = {
    "LONDON"    : {"north_latitude": 51.5465, "east_longitude": -0.0570, "south_latitude": 51.4534, "west_longitude": -0.1772},
    "NY"        : {"north_latitude": 40.74469, "east_longitude": -73.96906, "south_latitude": 40.69736, "west_longitude": -74.02099},
    "ISTANBUL"  : {"north_latitude": 41.07178, "east_longitude": 29.03601, "south_latitude": 41.03709, "west_longitude": 29.00262},
}

DATA_FOLDER = os.path.join(PATH, "realFlowData")

CITY_TIMEZONES = {
    "LONDON": pytz.timezone("Europe/London"),
    "NY": pytz.timezone("America/New_York"),
    "ISTANBUL" : pytz.timezone("Europe/Istanbul")
}

def convert_timestamp(turkish_time_str, city):
    """Convert Turkish time string to target city's time."""
    turkish_time = datetime.strptime(turkish_time_str, "%Y%m%d_%H%M%S")
    turkish_time = CITY_TIMEZONES["ISTANBUL"].localize(turkish_time)

    # Convert to target city's timezone
    city_time = turkish_time.astimezone(CITY_TIMEZONES[city])
    return city_time.strftime("%Y-%m-%d %H:%M:%S")

dataCollection = os.listdir(DATA_FOLDER)

convertedTimedDataList = []

with open(os.path.join(DATA_FOLDER, dataCollection[0]), 'r') as f:
    traffic_data = json.load(f)

roadKeyTpRoadDataDict = {}

roadSegments = []
for roadSegN in range(len(traffic_data['results'])):
    key = generate_shape_key(traffic_data['results'][roadSegN]['location']['shape'])
    roadKeyTpRoadDataDict[key] = traffic_data['results'][roadSegN]
    
    roadSegment = extract_road_segments(roadKeyTpRoadDataDict[key])
    roadKeyTpRoadDataDict[key]['roadSegment'] = roadSegment

    for seg in roadSegment:
        roadSegments.append(seg)

selected = CITY_COORDINATES[PATH]

In [14]:
import pickle 

with open(os.path.join(PATH, "roadKey_to_osmid.pkl"), "wb") as f:
    pickle.dump(roadKey_to_osmids, f)

with open(os.path.join(PATH, "osmnx_graph.pkl"), "wb") as f:
    pickle.dump(G, f)

with open(os.path.join(PATH, "osmnx_subgraph.pkl"), "wb") as f:
    pickle.dump(sub_G, f)