In [None]:
import os
import json
import time
import pytz
import requests
import osmnx as ox
import geopandas as gpd
import matplotlib.pyplot as plt
from shapely.geometry import LineString
import networkx as nx
import hashlib
import datetime
import numpy as np

import os
import pickle
import datetime
import numpy as np


In [None]:
def generate_shape_key(shape):
    """Generate a unique hash key for a road shape based on its coordinates."""
    shape_str = json.dumps(shape, sort_keys=True)  # Convert to sorted JSON string
    return hashlib.md5(shape_str.encode()).hexdigest()  # Create hash

def process_traffic_snapshot(traffic_data, roadKey_to_osmid, osmid_to_capacity):
    """
    For one traffic_data dict (one time snapshot), 
    compute the flow = capacity * (jamFactor / 10) for each segment, 
    then return the average flow (or sum, whichever you prefer).
    """
    flows = []
    capacities = []
    
    dividerCount = 0
    for seg in traffic_data['results']:
        shape_data = seg['location']['shape']
        jam_factor = seg['currentFlow']['jamFactor']
        
        # Generate a shape-based key that matches what's used in roadKey_to_osmid
        key = generate_shape_key(shape_data)  # You must define or adapt this.
        
        # Lookup OSM IDs for this shape key
        # roadKey_to_osmid might return a single OSMID or a list
        osmids_for_key = roadKey_to_osmid.get(key, [])
        if not isinstance(osmids_for_key, list):
            osmids_for_key = [osmids_for_key]  # unify to list
        
        # For each OSMID, find capacity and accumulate a flow measure
        for osm_id in osmids_for_key:
            dividerCount += 1
            capacity = osmid_to_capacity.get(osm_id, 0)
            capacities.append(capacity)
            flow_value = (jam_factor / 10.0) * capacity
            flows.append(flow_value)
    
    flows = np.array(flows)
    capacities = np.array(capacities)

    if len(flows) > 0:
        return sum(flows / capacities) / len(flows)  # average
    else:
        return 0.0
    
import datetime
import re

def parse_timestamp_from_filename(filename):
    """
    Given a filename like 'traffic_data_20250214_230741.json' or 'traffic__20250214_230741.json',
    extract the date/time part and return an ISO-like string:
    '2025-02-14T23:07:41Z'
    """
    match = re.search(r"(?:traffic_data_|traffic__|traffic_)(\d{8})_(\d{6})\.json", filename)
    if not match:
        raise ValueError(f"Filename format not recognized: {filename}")

    date_str, time_str = match.group(1), match.group(2)
    dt = datetime.datetime.strptime(date_str + time_str, "%Y%m%d%H%M%S")
    return dt.isoformat() + 'Z'



In [None]:
highwayMap = {'motorway':0, 'highway':1, 'primary':2, 'secondary':3, 'tertiary':4, 'unclassified':5, 'residential':6, 'living_street': 7}

M   =       [[100, 50, 20, 17, 10, 8, 8, 5],
            [55, 55, 22, 20, 15, 10, 10, 5],
            [25, 25, 25, 23, 20, 15, 12, 5],
            [25, 25, 25, 25, 23, 20, 13, 5],
            [22, 22, 22, 22, 22, 21, 15, 5],
            [21, 21, 21, 21, 21, 21, 17, 6],
            [21, 21, 21, 21, 21, 21, 17, 6],
            [18, 18, 18, 18, 18, 18, 18, 18]]

theta       =   [100, 50, 20, 17, 10, 8, 8, 5]

In [None]:
PATH = "ISTANBUL"
REAL_FLOW_DATA_FOLDER   = os.path.join(PATH, "realFlowData")

roadKey_to_osmid_path = os.path.join(PATH, "roadKey_to_osmid.pkl")
graph_path = os.path.join(PATH, "osmnx_graph.pkl")
subgraph_path = os.path.join(PATH, "osmnx_subgraph.pkl")

with open(roadKey_to_osmid_path, "rb") as f:
    roadKey_to_osmid = pickle.load(f)

with open(subgraph_path, "rb") as f:
    subG = pickle.load(f)

flowData = os.listdir(REAL_FLOW_DATA_FOLDER)

for edge in subG.edges:
    try:
        highwayName = edge.attrib["highway"]

        highwayName = highwayName.replace("_link", "")  

        if highwayName.startswith("trunk"):
            highwayName = "primary"

        if highwayName.startswith("motorway"):
            highwayName = "primary"
        
        if highwayName.startswith("living_street"):
            highwayName = "residential"

        if type(highwayName) is list:
            highwayName = highwayName[0]

        if highwayName not in highwayMap.keys():
            highwayName = "residential"

        if highwayName.startswith("unclassified"):
            highwayName = "residential"

    except:
        highwayName = "residential"
    
    subG.edges[edge]['highway'] = highwayName

    subG.edges[edge]["capacity"] = subG.edges[edge]["length"] * theta[highwayMap[subG.edges[edge]["highway"]]]

timezone_map = {
    "ISTANBUL": pytz.timezone("Europe/Istanbul"),
    "LONDON": pytz.timezone("Europe/London"),
    "NY": pytz.timezone("America/New_York"),
    "TOKYO": pytz.timezone("Asia/Tokyo"),
    "TOKYO2": pytz.timezone("Asia/Tokyo"),
    "LONDON2": pytz.timezone("Europe/London"),
    "BERLIN": pytz.timezone("Europe/Berlin"),
}

print("Generating all flow data, date->avgFlow")

osmid_to_capacity = {}

for u, v, k, data in subG.edges(keys=True, data=True):
    edge_osmid = data.get('osmid')
    capacity = data.get('capacity', 0)
    
    if edge_osmid is None:
        continue
    if isinstance(edge_osmid, list):
        for osm in edge_osmid:
            osmid_to_capacity[osm] = capacity
    else:
        osmid_to_capacity[edge_osmid] = capacity

all_flow_data = {}

for filename in flowData:
    timestamp_str = parse_timestamp_from_filename(filename)
    
    with open(os.path.join(REAL_FLOW_DATA_FOLDER, filename), "r") as f:
        try:
            td = json.load(f)
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON from {filename}: {e}")
            continue

    avg_flow = process_traffic_snapshot(td, roadKey_to_osmid, osmid_to_capacity)
    
    all_flow_data[timestamp_str] = avg_flow


sorted_keys = sorted(all_flow_data.keys())
sorted_values = [all_flow_data[k] for k in sorted_keys]

sorted_values = np.array(sorted_values)

date_format = '%Y-%m-%dT%H:%M:%SZ'
sorted_datetimes = [datetime.datetime.strptime(k, date_format) for k in sorted_keys]

istanbulTimeZone = timezone_map[PATH]
timeZone = timezone_map[PATH]
convertedTimezones = []

for dt in sorted_datetimes:
    ist_time = istanbulTimeZone.localize(dt)
    utc_time = ist_time.astimezone(pytz.utc)

    local_time = utc_time.astimezone(timeZone)
    convertedTimezones.append(local_time)

In [None]:
import os
import pytz
import datetime
import re
from tqdm import tqdm

PATHS = ["ISTANBUL"]


# Timezone mapping
timezone_map = {
    "ISTANBUL": pytz.timezone("Europe/Istanbul"),
    "LONDON": pytz.timezone("Europe/London"),
    "NY": pytz.timezone("America/New_York"),
    "TOKYO": pytz.timezone("Asia/Tokyo"),
    "TOKYO2": pytz.timezone("Asia/Tokyo"),
    "LONDON2": pytz.timezone("Europe/London"),
    "BERLIN": pytz.timezone("Europe/Berlin"),
}
# Dictionary to store morning and night filenames for each city
city_file_lists = {city: {"morning": [], "night": []} for city in PATHS}

def parse_timestamp_from_filename(filename):
    """Extracts the datetime from the filename assuming format: traffic_data_YYYYMMDD_HHMMSS.json"""
    match = re.search(r"(?:traffic_data_|traffic__|traffic_)(\d{8})_(\d{6})\.json", filename)
    if match:
        date_part = match.group(1)  # YYYYMMDD
        time_part = match.group(2)  # HHMMSS
        return f"{date_part}T{time_part}Z"
    return None

for PATH in tqdm(PATHS):
    # Define data path
    REAL_FLOW_DATA_FOLDER = os.path.join(PATH, "realFlowData")
    
    # List all traffic data files
    flowData = os.listdir(REAL_FLOW_DATA_FOLDER)

    for filename in flowData:
        # 1) Parse timestamp from filename
        timestamp_str = parse_timestamp_from_filename(filename)
        if not timestamp_str:
            continue

        # 2) Convert filename timestamp (assumed Istanbul time) to UTC
        istanbul_time = datetime.datetime.strptime(timestamp_str, "%Y%m%dT%H%M%SZ")
        istanbul_tz = pytz.timezone("Europe/Istanbul")
        istanbul_time = istanbul_tz.localize(istanbul_time)
        utc_time = istanbul_time.astimezone(pytz.utc)

        # 3) Convert UTC to the local timezone of the city
        local_time = utc_time.astimezone(timezone_map[PATH])
        local_hour = local_time.hour

        # 4) Categorize filenames into morning (09:00-19:00) or night (19:00-09:00)
        if 9 <= local_hour < 19:
            city_file_lists[PATH]["morning"].append(filename)
        else:
            city_file_lists[PATH]["night"].append(filename)

# Now city_file_lists contains only the filenames categorized correctly


In [None]:
import os
import pytz
import datetime
import re
from tqdm import tqdm

PATHS = ["ISTANBUL"]

# Timezone mapping
timezone_map = {
    "ISTANBUL": pytz.timezone("Europe/Istanbul"),
    "LONDON": pytz.timezone("Europe/London"),
    "NY": pytz.timezone("America/New_York"),
}

# Dictionary to store categorized filenames
city_file_lists = {city: {"days": {}} for city in PATHS}

def parse_timestamp_from_filename(filename):
    """Extracts the datetime from the filename assuming format: traffic_data_YYYYMMDD_HHMMSS.json"""
    match = re.search(r"(?:traffic_data_|traffic__|traffic_)(\d{8})_(\d{6})\.json", filename)

    if match:
        date_part = match.group(1)  # YYYYMMDD
        time_part = match.group(2)  # HHMMSS
        return f"{date_part}T{time_part}Z"
    return None

for PATH in tqdm(PATHS):
    # Define data path
    REAL_FLOW_DATA_FOLDER = os.path.join(PATH, "realFlowData")
    
    # List all traffic data files
    flowData = os.listdir(REAL_FLOW_DATA_FOLDER)

    for filename in flowData:
        # 1) Parse timestamp from filename
        timestamp_str = parse_timestamp_from_filename(filename)
        if not timestamp_str:
            continue

        # 2) Convert filename timestamp (assumed Istanbul time) to UTC
        istanbul_time = datetime.datetime.strptime(timestamp_str, "%Y%m%dT%H%M%SZ")
        istanbul_tz = pytz.timezone("Europe/Istanbul")
        istanbul_time = istanbul_tz.localize(istanbul_time)
        utc_time = istanbul_time.astimezone(pytz.utc)

        # 3) Convert UTC to the local timezone of the city
        local_time = utc_time.astimezone(timezone_map[PATH])
        local_hour = local_time.hour
        local_date = local_time.strftime("%Y-%m-%d")  # Store date separately

        # 4) Ensure "days" dictionary exists for that city
        if local_date not in city_file_lists[PATH]["days"]:
            city_file_lists[PATH]["days"][local_date] = {"morning": [], "night": []}

        # 5) Categorize filenames into morning (09:00-19:00) or night (19:00-09:00)
        if 9 <= local_hour < 19:
            city_file_lists[PATH]["days"][local_date]["morning"].append(filename)
        else:
            city_file_lists[PATH]["days"][local_date]["night"].append(filename)

    # Define the output pickle file path
    pickle_filename = "city_file_lists.pkl"

    # Save the dictionary as a pickle file
    with open(os.path.join(PATH, pickle_filename), "wb") as f:
        pickle.dump(city_file_lists, f)

    print(f"Saved city_file_lists to {pickle_filename}")


# Now city_file_lists contains files categorized by city → date → morning/night.


In [None]:
import pickle

# Define the output pickle file path
pickle_filename = "city_file_lists.pkl"

# Save the dictionary as a pickle file
with open(os.path.join(pickle_filename), "wb") as f:
    pickle.dump(city_file_lists, f)

print(f"Saved city_file_lists to {pickle_filename}")
