In [1]:
import numpy as np
import pandas as pd
from datetime import datetime

import networkx as nx
from networkx import algorithms
import multiprocessing

In [55]:
class Node():
    def __init__(self, inter,ID):
        self.inter = inter
        self.id = ID

class Path():
    def __init__(self, nodelist):
        self.nodeList = nodelist
        self.len = len(nodelist)
    
    def append(self, node):
        self.nodeList.append(node)
        self.len += 1
    
    def travelTimePath(self):
        t = 0
        for i in range(self.len-1):
            t += travelTime(self.nodeList[i], self.nodeList[i+1])
        return t
        
class Trip():
    def __init__(self, oneTripData):
        self.node_o = Node(oneTripData['ointer'], str(oneTripData.name) + 'o')
        self.node_d = Node(oneTripData['dinter'], str(oneTripData.name) + 'd')
        self.ointer = self.node_o.inter
        self.dinter = self.node_d.inter
        self.oTime = oneTripData['otime']
        self.dTime = oneTripData['dtime']
        self.id = oneTripData.name


def travelTime(node_1, node_2):
    return distanceMatrixDict[(node_1.inter, node_2.inter)]['time']

def isSharable(Trip_1, Trip_2, timePenalty = 600, distancePenalty = 1, coveragePenalty = 1):
    if (Trip_1.oTime > Trip_2.oTime):
        trip_1 = Trip_2
        trip_2 = Trip_1
    else:
        trip_1 = Trip_1
        trip_2 = Trip_2
    # case 1
    if (trip_1.dTime > trip_2.dTime):
        if(trip_1.oTime + travelTime(trip_1.node_o, trip_2.node_o) < trip_2.oTime):
            path1 = Path([trip_1.node_o, trip_2.node_o, trip_2.node_d, trip_1.node_d])
            if(trip_1.dTime-timePenalty < trip_1.oTime+path1.travelTimePath() < trip_1.dTime):
                path2 = Path([trip_1.node_o, trip_2.node_o, trip_2.node_d])
                if(trip_1.dTime-timePenalty < trip_1.oTime+path2.travelTimePath() < trip_1.dTime):
                    if(path1.travelTimePath() < distancePenalty*travelTime(trip_1.node_o, trip_1.node_d)):
                        return True
                    else:
                        return False
                else:
                    return False
            else:
                return False
        else:
            return False
    else:
        if (trip_2.oTime < trip_1.dTime < trip_2.dTime):
            if(trip_1.oTime + travelTime(trip_1.node_o, trip_2.node_o) < trip_2.oTime):
                path1 = Path([trip_1.node_o, trip_2.node_o, trip_1.node_d])
                if(trip_1.dTime-timePenalty < trip_1.oTime+path1.travelTimePath() < trip_1.dTime):
                    path2 = Path([trip_1.node_o, trip_2.node_o, trip_1.node_d, trip_2.node_d])
                    if(trip_2.dTime-timePenalty < trip_1.oTime+path2.travelTimePath() < trip_2.dTime):
                        if(path1.travelTimePath() < distancePenalty*travelTime(trip_1.node_o, trip_1.node_d)):
                            path3 = Path([trip_2.node_o, trip_1.node_d, trip_2.node_d])
                            if(path3.travelTimePath() < distancePenalty*travelTime(trip_2.node_o, trip_2.node_d)):
                                if(travelTime(trip_1.node_o, trip_2.node_o) < coveragePenalty*travelTime(trip_1.node_o, trip_1.node_d)):
                                    return True
                            else:
                                return False
                        else:
                            return False
                    else:
                        return False
                else:
                    return False
            else:
                return False
        else:
            return False
    return False

class TripEdge():
    def __init__(self, pre_node, suc_node, weight):
        self.node_1 = pre_node
        self.node_2 = suc_node
        self.w = weight
        self.edge = (self.node_1, self.node_2, self.w)
        
def addTrip(i, tripData):
    trip_list = [Trip(tripData.iloc[i])]
    return trip_list
    
def generateTripList(tripData):
    cores = multiprocessing.cpu_count()
    pool = multiprocessing.Pool(processes = cores-2)
    trip_list = []
    for trips in pool.starmap(addTrip, zip(range(len(tripData)), [tripData]*len(tripData))):
        trip_list += trips
    pool.close()
    return trip_list

def addEdge(i,trip_list, max_edge = 10):
    trip_edges = []
    cnt = 0
    for j in range(i+1, len(trip_list)):
        if isSharable(trip_list[i], trip_list[j]):
            weight = 1
            if trip_list[i].oTime > trip_list[j].oTime:
                trip_edges.append((trip_list[j].node_o.id,trip_list[i].node_d.id))
            else:
                trip_edges.append((trip_list[i].node_o.id,trip_list[j].node_d.id))
            cnt += 1
        if cnt == max_edge:
            break
    return trip_edges

def generateTripEdgeList(trip_list):
    cores = multiprocessing.cpu_count()
    pool = multiprocessing.Pool(processes = cores-2)
    trip_edge_list = []
    for trip_edge in pool.starmap(addEdge,zip(range(len(trip_list)-1),[trip_list]*(len(trip_list)-1))):
        trip_edge_list += trip_edge
    pool.close()
    return trip_edge_list
                
class TripGraph():
    def __init__(self, trip_edge_list, trip_list):
        self.trip_edge_list = trip_edge_list
        self.num_trips = len(trip_list)
        self.g = nx.Graph()
        self.g.add_nodes_from([trip.node_d.id for trip in trip_list])
        self.g.add_nodes_from([trip.node_o.id for trip in trip_list])
        self.g.add_edges_from(self.trip_edge_list,weight=1)
        
        
    def addTripEdge(self, trip_edge):
        self.trip_edge_list.append(trip_edge)
        self.num_trips += 1

    def maxMatching(self):
        graphs = list(nx.connected_component_subgraphs(self.g))
        self.matches = {}
        for graph in graphs:
            self.matches.update(algorithms.matching.max_weight_matching(graph))
        self.number_match = len(self.matches)
        return (self.number_match, self.matches)
    
    def minFleeting(self):
        self.minFleet = self.num_trips - self.number_match/2
        return self.minFleet

In [39]:
start = datetime.now()

matrixPath = r'./od_time.xlsx'
tripPath = r'./trip_list.xlsx'
tripData = pd.read_excel(tripPath)
tripData = tripData.set_index('trip_rowid')
distanceMatrix = pd.read_excel(matrixPath)
distanceMatrix = distanceMatrix.sort_values(by = 'time')
distanceMatrix = distanceMatrix.sort_values(by = 'o')
distanceMatrix = distanceMatrix.set_index(list(distanceMatrix.columns[:2]))
distanceMatrixDict = distanceMatrix.to_dict(orient='index')

In [66]:
interval = 6000
minimumFleeting = 0
# for i in range(int(86400/interval)):
tripdata = tripData[tripData['otime'] < interval*(0+1)]
tripdata = tripdata[interval*0 < tripdata['otime']]
print('=='*20)
pre = datetime.now()
print('Data Preprocessing Finished in: %.2f s' % (pre-start).total_seconds())
trip_list = generateTripList(tripData)
print('=='*20)
tripTime = datetime.now()
print('Trip List Generated in: %.2f s' % (tripTime-pre).total_seconds())
trip_edge_list = generateTripEdgeList(trip_list)
print('=='*20)
edgeTime = datetime.now()
print('Trip Edge List Generated in: %.2f s' % (edgeTime-tripTime).total_seconds())
trip_graph = TripGraph(trip_edge_list, trip_list)
graphTime = datetime.now()
print('=='*20)
print('Trip Graph Initialized in: %.2f s' % (graphTime-edgeTime).total_seconds())
(num_matches, matching) = trip_graph.maxMatching()
minFleeting = trip_graph.minFleeting()
minimumFleeting += minFleeting
print('=='*20)
print('The Optimal Result Found is: %d' % minFleeting)
end = datetime.now()
print('=='*20)
print('The Time Spent was: %.2f s' % (end-start).total_seconds())

Data Preprocessing Finished in: 4471.79 s
Trip List Generated in: 8.48 s
Trip Edge List Generated in: 3486.40 s
Trip Graph Initialized in: 1.86 s
The Optimal Result Found is: 309077
The Time Spent was: 16665.67 s


In [67]:
minimumFleeting

309077.5

In [68]:
num_matches

38003

In [71]:
%load_ext line_profiler

In [None]:
%lprun -f generateTripEdgeList generateTripEdgeList(trip_list)

31

In [14]:
trip_graph.g.add_nodes_from([trip.node_d for trip in trip_list])

In [15]:
trip_graph.g.add_nodes_from([trip.node_o for trip in trip_list])

In [16]:
len(trip_graph.g.nodes)

1078447

In [20]:
a = [match.id for match in matching.keys()]
b = [match.id for match in matching.values()]

In [27]:
(212008/2 + (328079-212008))/328079

0.6768948942175512