In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:85% !important; }</style>"))

In [None]:
import os
import torch
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# import geopandas as gpd
import time
import osmnx as ox
# import folium
import pickle
from shapely.geometry import Point, LineString, Polygon
from leuvenmapmatching.matcher.distance import DistanceMatcher
from leuvenmapmatching.map.inmem import InMemMap

import warnings
warnings.filterwarnings('ignore')

## Network Generation and Preprocessing

In [None]:
## Study area definition with Coordinates.
# Point = (Longitude, Latitude)

p1_buffer = (127.052866, 37.521370)
p2_buffer = (127.065862, 37.493310)
p3_buffer = (126.997581, 37.471532)
p4_buffer = (126.985552, 37.501859)

area_buffer = Polygon([p1_buffer, p2_buffer, p3_buffer, p4_buffer])

graph = ox.graph_from_polygon(area_buffer, network_type='drive', truncate_by_edge=True, clean_periphery=True, custom_filter='["highway"~"primary|secondary|tertiary"]')

graph_proj = ox.project_graph(graph)
graph_consolidated = ox.consolidate_intersections(graph_proj, rebuild_graph=True, tolerance=22, dead_ends=False)

graph_proj4326 = ox.project_graph(graph_consolidated, to_crs='EPSG:4326') # Geographic coordinate reference system (EPSG:4326)
graph_nodes, graph_edges = ox.graph_to_gdfs(graph_proj4326, nodes=True, edges=True)

print(len(graph_edges))
ox.plot.plot_graph(graph_consolidated)

In [None]:
graph_nodes['lon'] = graph_nodes.apply(lambda x: x['x'], axis=1)
graph_nodes['lat'] = graph_nodes.apply(lambda x: x['y'], axis=1)

graph_nodes.reset_index(inplace=True)

graph_nodes['osmid_new'] = np.nan
for i in range(len(graph_nodes)):
    graph_nodes['osmid_new'][i] = i

graph_nodes.set_index('osmid_new', drop=False, inplace=True)

####################################################################################################

graph_edges = graph_edges.copy()
graph_edges.reset_index(drop=False, inplace=True)
# graph_edges.drop(['tunnel', 'lanes', 'access', 'ref', 'maxspeed', 'bridge'], axis=1, inplace=True)

nodes_list = graph_nodes[['osmid', 'osmid_new']]
nodes_list.columns = ['u', 'u_new']

graph_edges = pd.merge(graph_edges, nodes_list, how='left', on='u')

nodes_list = graph_nodes[['osmid', 'osmid_new']]
nodes_list.columns = ['v', 'v_new']

graph_edges = pd.merge(graph_edges, nodes_list, how='left', on='v')

In [None]:
map_con = InMemMap('Arbitrary_Area', use_rtree=False, index_edges=True)

for nid, row in graph_nodes[['lat', 'lon']].iterrows():
    map_con.add_node(nid, (row['lat'], row['lon']))
for nid, row in graph_edges[['u_new', 'v_new']].iterrows():
    map_con.add_edge(row['u_new'], row['v_new'])

In [None]:
network_edges = graph_edges.copy()
network_edges['Link'] = network_edges.apply(lambda x: (x.u_new, x.v_new), axis=1)
network_edges = network_edges[['Link', 'name', 'length', 'geometry']]
network_edges.reset_index(drop=False, inplace=True)
network_edges.rename(columns={'index': 'EdgeID'}, inplace=True)
network_edges

In [None]:
network_EdgeID = network_edges[['Link', 'EdgeID']]
network_EdgeID

## Functions

In [None]:
def name_call(day, category, order, extension): # category = [data, traj], extension = [pkl, pickle]
    directory = './DTGoutput'+format(day, '02')+'/'
    filename = str(category)+'_'+format(day, '02')+'_'+format(order, '03')+'.'+str(extension)
    full_name = directory + filename
    return full_name

In [None]:
def name_save(day, category, order, extension): # category = [data, traj], extension = [pkl, pickle]
    model_object_file_path = './Mapmatching_output/18-04-'+format(day, '02')
    if not os.path.exists(model_object_file_path):
        os.makedirs(model_object_file_path)
    directory = model_object_file_path+'/'
    filename = str(category)+'_'+format(day, '02')+'_'+format(order, '03')+'.'+str(extension)
    full_name = directory + filename
    return full_name

In [None]:
def Applying_Mapmatching(df, area, traj_dict):
    samples = pd.DataFrame()
    matcher = DistanceMatcher(area, max_dist=300, min_prob_norm=0.5, obs_noise=50)
    TripID_list = list(traj_dict.keys())
    counts = 0
    
    for idx in TripID_list:
        states, _ = matcher.match(traj_dict[idx])
        if not states:
            pass
        if states:
            sample_id_order = [str(idx)+'_'+str(k+1) for k in range(len(df[df.TripID == idx]))]
            sample_data = {'TripID_order': sample_id_order, 'Link': states}
            sample = pd.DataFrame(dict([(k, pd.Series(v)) for k,v in sample_data.items()]))
            samples = pd.concat([samples, sample])

        counts += 1
    
    dataframe = pd.merge(df, samples, how='left', on='TripID_order')
    dataframe.dropna(subset=['Link'], inplace=True)
    dataframe.reset_index(drop=True, inplace=True)
    dataframe = pd.merge(dataframe, network_EdgeID, how='left', on='Link')
    
    return dataframe

In [None]:
def Network_Speed(dataframe):
    avg_speed = dataframe.groupby(dataframe['Link']).Speed.mean() ## 10분 간 링크별 평균 속도
    return avg_speed

In [None]:
def Connectivity_Adj(dataframe, edge_number):
    # initializing the conenctivity adjacency matrix
    connect_adj = torch.zeros(edge_number, edge_number)
    
    # EdgeID list generation
    EdgeID_list = list(dataframe.EdgeID.unique())
    
    # Updating the conenctivity adjacency matrix
    for idx in EdgeID_list:
        in_num = dataframe.Link.loc[idx][0]
        out_num = dataframe.Link.loc[idx][1]
        
        for i in range(edge_number):
            if dataframe.Link.iloc[i][1] == in_num:
                if dataframe.Link.iloc[i][0] == out_num:
                    connect_adj[idx, dataframe.EdgeID.iloc[i]] = 0
                else:
                    connect_adj[idx, dataframe.EdgeID.iloc[i]] += 1
            elif dataframe.Link.iloc[i][0] == out_num:
                connect_adj[idx, dataframe.EdgeID.iloc[i]] += 1
            else:
                pass
        connect_adj[idx, idx] = 0
        
    return connect_adj

In [None]:
def Generating_FlowAdj(dataframe, edge_number):
    # initializing the traffic inflow and outflow adjacency matrix
    edge_number = len(network_edges)
    a_inflow = torch.zeros(edge_number, edge_number)
    a_outflow = torch.zeros(edge_number, edge_number)
    
    # TripID list generation
    ID_list = list(dataframe.TripID.unique())
    
    # Updating the traffic inflow & outflow adjacency matrix
    for i in ID_list:
        sample = dataframe[dataframe.TripID == i]
        if len(sample) <= 1:
            pass
        else:
            for j in range(len(sample)-1):
                if sample.EdgeID.iloc[j] == sample.EdgeID.iloc[j+1]:
                    pass
                else:
                    a_inflow[sample.EdgeID.iloc[j+1], sample.EdgeID.iloc[j]] += 1 ## In tensor: row = to, column = from
                    a_outflow[sample.EdgeID.iloc[j], sample.EdgeID.iloc[j+1]] += 1 ## In tensor: row = from, column = to
    return a_inflow, a_outflow

## Task

In [None]:
## Target day
daycount = 2

In [None]:
number = len(network_EdgeID)
adj_con = Connectivity_Adj(network_EdgeID, number)

start = time.time()

for i in range(0, 144):
    # Name definition...
    dataname = name_call(day=daycount, category='data', order=i, extension='pkl') # category = [data, traj], extension = [pkl, pickle]
    trajname = name_call(day=daycount, category='traj', order=i, extension='pickle') # category = [data, traj], extension = [pkl, pickle]
    inflow_name = name_save(day=daycount, category='in_flow', order=i, extension='pkl')
    outflow_name = name_save(day=daycount, category='out_flow', order=i, extension='pkl')
    network_speed_name = name_save(day=daycount, category='speed', order=i, extension='pkl')
    
    # Data Loading...
    try:
        data1 = pd.read_pickle(dataname)
        with open(trajname, 'rb') as f:
            data2 = pickle.load(f)
    except: # If the file is missing, skip to the next iteration.
        continue
    
    # Map Matching...
    try:
        matched_dataframe = Applying_Mapmatching(data1, map_con, data2)
    except:
        continue
    
    # Average Speed Dataframe
    average_speed = Network_Speed(matched_dataframe)
    network_speed = pd.merge(network_edges, average_speed, how='left', on='Link')
    
    # Working Flow Adjacency Matrix...
    inflow_adj, outflow_adj = Generating_FlowAdj(matched_dataframe, number)
    inflow_revised = torch.mul(inflow_adj, adj_con) # Hadamard multiplication — reset incorrectly counted flows
    outflow_revised = torch.mul(outflow_adj, adj_con) # Hadamard multiplication — reset incorrectly counted flows
    
    # Save Output...
    torch.save(inflow_revised, inflow_name) ## save to pickle
    torch.save(outflow_revised, outflow_name) ## save to pickle
    network_speed.to_pickle(network_speed_name) ## save to pickle
    
print(f'Task completed...\nElapsed time: {time.time()-start}')