# sampling nodes
nodeの量がかなり大規模なので、samplingを行う。
- input: Trimed_Node.csv
- output: Node.csv

In [None]:
import pandas as pd
import os
from pathlib import Path
pwd = Path(os.getcwd())

import sys
sys.path.append(str(pwd.parent))
sys.path.append(str(pwd.parent / "config"))

from util import haversine
from setting import LocalRegionBound, SideLengthMeter
import random
from tqdm import tqdm

SAMPLING_THREASHOLD = 50

MapWestBound = LocalRegionBound[0]
MapEastBound = LocalRegionBound[1]
MapSouthBound = LocalRegionBound[2]
MapNorthBound = LocalRegionBound[3]

AverageLongitude = (MapEastBound-MapWestBound)/2
AverageLatitude = (MapNorthBound-MapSouthBound)/2

NumGridWidth = int(
    haversine(
        MapWestBound,
        AverageLatitude,
        MapEastBound,
        AverageLatitude
    ) / SideLengthMeter + 1
)
NumGridHeight = int(
    haversine(
        AverageLongitude,
        MapSouthBound,
        AverageLongitude,
        MapNorthBound
    ) / SideLengthMeter + 1
)

In [None]:
class Grid(object):

    def __init__(self,ID,Nodes,Neighbor,RebalanceNumber,IdleVehicles,VehiclesArrivetime,Orders):
        self.ID = ID
        self.Nodes = Nodes
        self.Neighbor = Neighbor
        self.RebalanceNumber = RebalanceNumber
        self.IdleVehicles = IdleVehicles
        self.VehiclesArrivetime = VehiclesArrivetime
        self.Orders = Orders
        self.PerRebalanceIdleVehicles = 0
        self.LaterRebalanceIdleVehicles = 0
        self.PerMatchIdleVehicles = 0

    def Reset(self):
        self.RebalanceNumber = 0
        self.IdleVehicles.clear()
        self.VehiclesArrivetime.clear()
        self.Orders.clear()
        self.PerRebalanceIdleVehicles = 0
        self.PerMatchIdleVehicles = 0

    def ArriveClusterUpDate(self, vehicle):
        self.IdleVehicles.append(vehicle)
        self.VehiclesArrivetime.pop(vehicle)
        
    def Example(self):
        print("ID:",self.ID)
        print("Nodes:",self.Nodes)
        print("Neighbor:[",end=' ')
        for i in self.Neighbor:
            print(i.ID,end=' ')
        print("]")
        print("RebalanceNumber:",self.RebalanceNumber)
        print("IdleVehicles:",self.IdleVehicles)
        print("VehiclesArrivetime:",self.VehiclesArrivetime)
        print("Orders:",self.Orders)
        print()

In [None]:
total_width = MapEastBound - MapWestBound
total_height = MapNorthBound - MapSouthBound
interval_width = total_width / NumGridWidth
interval_height = total_height / NumGridHeight

num_grid = NumGridWidth * NumGridHeight

In [None]:
print(num_grid)

In [None]:
all_grid = [Grid(i,[],[],0,[],{},[]) for i in range(num_grid)]

node_df = pd.read_csv("../data/Trimed_Node.csv")
node_df = node_df[["NodeID", "Longitude", "Latitude"]].drop_duplicates()
node_location = node_df[["Longitude", "Latitude"]].values
node_set = {}
for i, node_id in enumerate(node_df["NodeID"]):
    node_set[(node_location[i][0],node_location[i][1])] = node_id

In [None]:
LeftBound = (MapWestBound + 0 * interval_width)
RightBound = (MapWestBound + (0+1) * interval_width)

In [None]:
NodeID2NodesLocation = {}

for key,value in tqdm(node_set.items()):
    NowGridWidthNum = None
    NowGridHeightNum = None

    for i in range(NumGridWidth):
        LeftBound = (MapWestBound + i * interval_width)
        RightBound = (MapWestBound + (i+1) * interval_width)

        if key[0] > LeftBound and key[0] <= RightBound:
            NowGridWidthNum = i
            break

    for j in range(NumGridHeight):
        DownBound = (MapSouthBound + j * interval_height)
        UpBound = (MapSouthBound + (j+1) * interval_height)

        if key[1] > DownBound and key[1] <= UpBound:
            NowGridHeightNum = j
            break

    if NowGridWidthNum == None or NowGridHeightNum == None:
        print(key[0],key[1])
        raise Exception('error')
    else:
        all_grid[NumGridWidth * NowGridHeightNum + NowGridWidthNum].Nodes.append((value,(key[0],key[1])))
#------------------------------------------------------

for i in all_grid:
    for j in i.Nodes:
        NodeID2NodesLocation[j[0]] = j[1]

#Add neighbors to each grid
#------------------------------------------------------
for i in all_grid:

    #Bound Check
    #----------------------------
    UpNeighbor = True
    DownNeighbor = True
    LeftNeighbor = True
    RightNeighbor = True
    LeftUpNeighbor = True
    LeftDownNeighbor = True
    RightUpNeighbor = True
    RightDownNeighbor = True

    if i.ID >= NumGridWidth * (NumGridHeight - 1):
        UpNeighbor = False
        LeftUpNeighbor = False
        RightUpNeighbor = False
    if i.ID < NumGridWidth:
        DownNeighbor = False
        LeftDownNeighbor = False
        RightDownNeighbor = False
    if i.ID % NumGridWidth == 0:
        LeftNeighbor = False
        LeftUpNeighbor = False
        LeftDownNeighbor = False
    if (i.ID+1) % NumGridWidth == 0:
        RightNeighbor = False
        RightUpNeighbor = False
        RightDownNeighbor = False
    #----------------------------

    #Add all neighbors
    #----------------------------
    if UpNeighbor:
        i.Neighbor.append(all_grid[i.ID+NumGridWidth])
    if DownNeighbor:
        i.Neighbor.append(all_grid[i.ID-NumGridWidth])
    if LeftNeighbor:
        i.Neighbor.append(all_grid[i.ID-1])
    if RightNeighbor:
        i.Neighbor.append(all_grid[i.ID+1])
    if LeftUpNeighbor:
        i.Neighbor.append(all_grid[i.ID+NumGridWidth-1])
    if LeftDownNeighbor:
        i.Neighbor.append(all_grid[i.ID-NumGridWidth-1])
    if RightUpNeighbor:
        i.Neighbor.append(all_grid[i.ID+NumGridWidth+1])
    if RightDownNeighbor:
        i.Neighbor.append(all_grid[i.ID-NumGridWidth+1])


In [None]:
sampled_nodes = []
for grid in all_grid:
    nodes = grid.Nodes
    if len(nodes) < SAMPLING_THREASHOLD:
        sampled_nodes += nodes
    else:
        sampled_nodes += random.sample(nodes, SAMPLING_THREASHOLD)

In [None]:
sampled_node_ids = [sampled_node[0] for sampled_node in sampled_nodes]
sampled_node_df = pd.DataFrame({
    "NodeID": [sampled_node[0] for sampled_node in sampled_nodes],
    "Longitude": [sampled_node[1][0] for sampled_node in sampled_nodes],
    "Latitude": [sampled_node[1][1] for sampled_node in sampled_nodes],
})
sampled_node_df.info()

In [None]:
node_way_df = pd.read_csv("../data/Raw_Node.csv")
way_id_map = {node_id: way_id for node_id, way_id in zip(node_way_df["NodeID"], node_way_df["WayID"])}
sampled_node_df["WayID"] = sampled_node_df["NodeID"].map(way_id_map)

In [None]:
sampled_node_df[["NodeID", "WayID", "Longitude", "Latitude"]].to_csv("../data/Node.csv", index=False)