### Postprocess FMM output after running task3_fmm.py, to be used for Task4 and Task 5

In [22]:
import csv
import geopandas as gp

#set network name
network_name = "porto_big" #porto_whole1/porto_big
#set to relevant Graph G network files that were saved using FMM author's method
nodes_shp = gp.GeoDataFrame.from_file(network_name + "/nodes.shp")
edges_shp = gp.GeoDataFrame.from_file(network_name + "/edges.shp")


output_file = network_name + '/'+ network_name + "_output.txt"
input_file = network_name + '/trips_time.txt'

with open(output_file) as f:
    lines = f.readlines()
with open(input_file) as f:
    lines_inp = f.readlines()    


trajectories_mapmatched = []
#Identify the list of trajectory that failed map matching and not included in the output
for line in lines[1:]:
    data_list = line.replace("\n", "").split(";")
    if len(data_list[2]) != 0:
        trajectories_mapmatched.append(int(data_list[0]))
#print(trajectories_mapmatched)
#print("Total trajectory matched: ", len(trajectories_mapmatched))

trajectories_failed = []
for i in range(1, 1000):
    if i not in trajectories_mapmatched:
        trajectories_failed.append(i)
#print("Total trajectory failed: ", len(trajectories_failed))
#print(trajectories_failed)

def generate_nodes_edges_from_cpath(cpath):
    nodeOsmidList = []
    roadOsmidList = []
    edgeIndexList = []
    prevRoadOsmid = []
    for edge in cpath:
        road_osmID_str = edges_shp['osmid'][edge]
        if '[' in road_osmID_str:
            roadID = [int(n) for n in road_osmID_str.strip('][').split(', ')]
        else:
            roadID = [int(road_osmID_str)]
        if roadID != prevRoadOsmid:
            roadOsmidList.append(roadID)
        edgeIndexList.append(edge)
        prevNode = int(edges_shp['u'][edge])
        nextNode = int(edges_shp['v'][edge])
        if len(nodeOsmidList) == 0 or nodeOsmidList[len(nodeOsmidList)-1] != prevNode:
            nodeOsmidList.append(prevNode)
        nodeOsmidList.append(nextNode)
        prevRoadOsmid = roadID
    return nodeOsmidList, edgeIndexList, roadOsmidList

nodes_route_list = []
edges_route_list = []
roads_route_list = []
#Convert cpath output to a list of nodes osmID list and edges osmID list 
for line in lines[1:]:
    data_list = line.replace("\n", "").split(";")
    if len(data_list[2]) != 0: #Only work on lines that has valid mapmatched output
        cpath_list_str = data_list[2].split(",")
        cpath_list = [int(n) for n in cpath_list_str]
        nodes_route, edges_route, roads_route = generate_nodes_edges_from_cpath(cpath_list)
        nodes_route_list.append(nodes_route)
        edges_route_list.append(edges_route)
        roads_route_list.append(roads_route)
print("Successfully process nodes, roads and edge indice route")
print("Total nodes/road osmid routes processed", len(nodes_route_list), len(edges_route_list))
print("Total edge indice (map id) routes processed", len(roads_route_list))

wsg84_routes_list = []
#Convert linestring output to a list of WSG84 Points
for line in lines[1:]:
    data_list = line.replace("\n", "").split(";")
    if len(data_list[2]) != 0: #Only work on lines that has valid mapmatched output
        wsg84_points_route = [n.split(" ") for n in data_list[3].replace("LINESTRING", "").strip(")(").split(",")]
        wsg84_points_route = [[float(n[0]), float(n[1])] for n in wsg84_points_route]
        wsg84_routes_list.append(wsg84_points_route)
print("Successfully process wsg84 points route")
print("Total wsg84 routes processed", len(wsg84_routes_list))

#Mark each data instance as a failed of successful FMM mapped trajectory
full_id_order = trajectories_mapmatched + trajectories_failed

raw_gps_points = []
#process raw gps to be saved
inp_data = lines_inp[1:]
for i in inp_data:
    gps_data_list_str = i.split(";")[1]
    gps_data_list_str = gps_data_list_str.replace("LINESTRING", "").strip(")(")
    gps_data_list = gps_data_list_str.split(",")
    gps_data_list = [n.split(" ") for n in gps_data_list]
    gps_data_list = [[float(n[0]), float(n[1])] for n in gps_data_list]
    raw_gps_points.append(gps_data_list)
print("Successfully process raw gps wsg84 points route")
print("Total raw gps wsg84 routes processed", len(raw_gps_points))

#Save the post-processed output under 7 columns csv file, 
#col1: trajectory ID, col2: original raw GPS point list, col3: match_success, col4: fmm-mapped node indice sequence, 
#col5: fmm-mapped edge indice sequence, col6: fmm-mapped WSG84 points sequence 
#header = ['trajectory_id','raw_gps_trajectories', 'match_success','fmm-mapped node indice sequence(osmid)', 'fmm-mapped edge indice sequence(map edge id)',
          #'fmm-mapped road indice sequence(osmid)', 'fmm-mapped WSG84 points sequence', 'fmm-mapped WSG84 points 2nd sequence']
header = ['trajectory_id','raw_gps_trajectories', 'match_success','fmm-mapped node indice sequence(osmid)', 'fmm-mapped edge indice sequence(map edge id)(equivalent to cpath from FMM)',
          'fmm-mapped WSG84 points sequence', 'fmm-mapped WSG84 points 2nd sequence']

with open(network_name + '/postprocessed_output.csv', 'w',  encoding='UTF8', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(header)
    
    for th, i in enumerate(trajectories_mapmatched):
        trajectory_ID = i - 1
        ####gps point load needed
        raw_gps_trajectories = raw_gps_points[trajectory_ID]
        matched_success = "True"
        node_indice_sequence = nodes_route_list[th]
        edge_indice_sequence = edges_route_list[th]
        road_indice_sequence = roads_route_list[th]
        wsg84_points_sequence = str(wsg84_routes_list[th])
        if len(wsg84_points_sequence) > 32758:
            print("warning long wsg84 route detected! Splitting data into 2 cells for trajectory ID", i)
            #data = [trajectory_ID, raw_gps_trajectories, matched_success, node_indice_sequence, edge_indice_sequence, 
                    #road_indice_sequence, str(wsg84_points_sequence[0:32758]), str(wsg84_points_sequence[32758:])]
            data = [trajectory_ID, raw_gps_trajectories, matched_success, node_indice_sequence, edge_indice_sequence, 
                    str(wsg84_points_sequence[0:32758]), str(wsg84_points_sequence[32758:])]
        else:
            #data = [trajectory_ID, raw_gps_trajectories, matched_success, node_indice_sequence, edge_indice_sequence, 
                    #road_indice_sequence, str(wsg84_points_sequence), ""]
            data = [trajectory_ID, raw_gps_trajectories, matched_success, node_indice_sequence, edge_indice_sequence, 
                    str(wsg84_points_sequence[0:32758]), str(wsg84_points_sequence[32758:])]
        writer.writerow(data)
            
    for th, i in enumerate(trajectories_failed):
        trajectory_ID = i - 1
        raw_gps_trajectories = raw_gps_points[trajectory_ID]
        matched_success = "False"
        data = [trajectory_ID, raw_gps_trajectories, matched_success, "", "", "", ""]
        writer.writerow(data)

Successfully process nodes, roads and edge indice route
Total nodes/road osmid routes processed 944 944
Total edge indice (map id) routes processed 944
Successfully process wsg84 points route
Total wsg84 routes processed 944
Successfully process raw gps wsg84 points route
Total raw gps wsg84 routes processed 1000
