In [0]:
import pandas as pd
import matplotlib.pyplot as plt
import requests
from h3 import h3
import json
from urllib.request import URLError, Request, urlopen
from itertools import combinations
from itertools import permutations
from dateutil import parser
from datetime import datetime, timedelta
import math
import networkx as nx

In [0]:
df = pd.read_csv('Data/LaGuardia_Dropoff_Final.csv')
columns = ['tpep_pickup_datetime', 'tpep_dropoff_datetime','passenger_count',\
           'trip_distance', 'pickup_longitude','pickup_latitude','dropoff_longitude', 'dropoff_latitude']
df = df[columns]
df.rename(columns={'tpep_pickup_datetime':'pickup_time',
       'tpep_dropoff_datetime':'dropoff_time'},inplace=True)
df['pickup_time'] = pd.to_datetime(df['pickup_time'])
df['dropoff_time'] = pd.to_datetime(df['dropoff_time'])
df['pickup_h3'] = df.apply(lambda x: h3.geo_to_h3(x['pickup_latitude'], x['pickup_longitude'], 8), axis=1)
df['dropoff_h3'] = df.apply(lambda x: h3.geo_to_h3(x['dropoff_latitude'], x['dropoff_longitude'], 10), axis=1)


start_date = '2016-01-29 08:00:00' # Start date with time
end_date = '2016-01-30 23:59:59' # End date with time
df=df[(df['pickup_time'] >= start_date) & (df['dropoff_time'] <= end_date)]
df.head(10)

In [0]:
def ceil_dt(dt, delta):
    return datetime.min + math.ceil((dt - datetime.min) / delta) * delta

pool_time_window = 10 # Change pool time window
df['pool_window'] = df['pickup_time'].apply(lambda x: ceil_dt(x.to_pydatetime(), timedelta(minutes=pool_time_window)))
df['duration'] = (df['pickup_time']-df['dropoff_time']).dt.seconds
df['delay'] = df['duration'].apply(lambda x: x*0.20)

In [0]:
a,b,c,d=[],[],[],[]
df_distance =  pd.DataFrame(columns = ['pickup_h3','dropoff_h3','distance','duration'])
for node_a, node_b in list(permutations(df.index, 2)):
    temp_curr, temp_next = [], []
    try:
        temp_curr = df.iloc[node_a]['pickup_h3']
        temp_next = df.iloc[node_b]['pickup_h3']

        e, f, g, h = df.iloc[node_a]['pickup_latitude'], df.iloc[node_a]['pickup_longitude'], df.iloc[node_b][
            'pickup_latitude'], \
                     df.iloc[node_b]['pickup_longitude']
        request_str = 'http://localhost:8989/route?point=' + str(e) + '%2C' + str(f) + '&point=' + str(
            g) + '%2C' + str(h) + '&vehicle=car'
        request = Request(request_str)
        res = requests.get(request_str)
        distance = json.loads(res.text)['paths'][0]['distance']
        # threshold =
        # if distance > threshold:
        #     continue;
        # NOTE: if you want to use geopy
        #      geopy.distance.geodesic(loc1, loc2).miles where
        #        [loc1, loc2] =
        #               [(df.iloc[node_a]['pickup_latitude'], df.iloc[node_a]['pickup_longitude']),
        #                 (df.iloc[node_b]['pickup_latitude'], df.iloc[node_b]['pickup_longitude'])]

        time = json.loads(res.text)['paths'][0]['time']
        minute, msec = divmod(time, 60000)
        a.append(temp_curr)
        b.append(temp_next)
        c.append(distance / 1609.344)  # convert meters to miles
        d.append(minute + (msec / 100000))  # convert ms to s and add to min

    except:
        continue;
        
df_distance['pickup_h3'] = a
df_distance['dropoff_h3'] = b
df_distance['distance'] = c
df_distance['duration'] = d
df_distance = df_distance.set_index(['pickup_h3','dropoff_h3'])
df_distance.head(10)

In [0]:
class Node:
    def __init__(self,idx,data):
        self.id = idx
        self.pickup_location = (data.pickup_latitude,data.pickup_longitude,data.pickup_h3)
        self.dropoff_location = (data.dropoff_latitude,data.dropoff_longitude,data.dropoff_h3)
        self.pickup_time = data.pickup_time
        self.dropoff_time = data.dropoff_time
        self.distance = data.trip_distance
        self.duration = data.duration
        self.delay = data.delay
        #TBD..

In [0]:
def get_all_pairs(node_a,node_b,trip_type):
    if trip_type == 1:
        #Combination LGA--> a -->b
        #if no distance call graphhopper 
        LGA_a_dist = df_distance.loc[(node_a.pickup_location[2],node_a.dropoff_location[2])]['distance']
        a_b_dist = df_distance.loc[(node_a.dropoff_location[2],node_b.dropoff_location[2])]['distance']
        LGA_a_dur = df_distance.loc[(node_a.pickup_location[2],node_a.dropoff_location[2])]['duration']
        a_b_dur = df_distance.loc[(node_a.dropoff_location[2],node_b.dropoff_location[2])]['duration']
        
        #Combination LGA--> b -->a
        LGA_b_dist = df_distance.loc[(node_b.pickup_location[2],node_b.dropoff_location[2])]['distance']
        b_a_dist = df_distance.loc[(node_b.dropoff_location[2],node_a.dropoff_location[2])]['distance']
        LGA_b_dur = df_distance.loc[(node_b.pickup_location[2],node_b.dropoff_location[2])]['duration']
        b_a_dur = df_distance.loc[(node_b.dropoff_location[2],node_a.dropoff_location[2])]['duration']
        
        path_1_total_dis,path_1_total_dur = LGA_a_dist + a_b_dist,LGA_a_dur + a_b_dur 
        path_1_a_dur,path_1_b_dur = LGA_a_dur,path_1_total_dur
        
        path_2_total_dis,path_2_total_dur = LGA_b_dist+b_a_dist,LGA_b_dur+b_a_dur
        path_2_a_dur,path_2_b_dur         = path_2_total_dur ,LGA_b_dur
               
    else:
        #Combination a--> b --> LGA
        a_b_dist = df_distance.loc[(node_a.pickup_location[2],node_b.pickup_location[2])]['distance']
        b_LGA_dist = node_b.distance 
        a_b_dur = df_distance.loc[(node_a.pickup_location[2],node_b.pickup_location[2])]['duration']
        b_LGA_dur = node_b.duration
        
        #Combination b--> a --> LGA
        b_a_dist = df_distance.loc[(node_b.pickup_location[2],node_a.pickup_location[2])]['distance']
        a_LGA_dist = node_a.distance 
        b_a_dur = df_distance.loc[(node_b.pickup_location[2],node_a.pickup_location[2])]['duration']
        a_LGA_dur = node_a.duration
        
        path_1_total_dis,path_1_total_dur = a_b_dist + b_LGA_dist,a_b_dur + b_LGA_dur 
        path_1_a_dur,path_1_b_dur = path_1_total_dur,b_LGA_dur
        
        path_2_total_dis,path_2_total_dur, = b_a_dist+a_LGA_dist,b_a_dur+a_LGA_dur
        path_2_a_dur,path_2_b_dur         = a_LGA_dur,path_2_total_dur
        
    return ((path_1_total_dis,path_1_total_dur,path_1_a_dur,path_1_b_dur),( path_2_total_dis,path_2_total_dur,path_2_a_dur,path_2_b_dur))

In [0]:
def calculate_edge_weight(node_a,node_b):
    path1,path2 = get_all_pairs(node_a,node_b,2)
    minimum_distance = float('inf')
    for path in (path1,path2):
        distance_contraint = (path[0] <= node_a.distance + node_b.distance)
        delay_constraint = (path[2] <= node_a.duration + node_a.delay) & (path[3] <= node_b.duration + node_b.delay)
        #add social constraint too...
        
        
        if distance_contraint and delay_constraint and path[0]< minimum_distance:
            minimum_distance = path[0]
    distance_saved = node_a.distance + node_b.distance - minimum_distance
    return distance_saved

In [0]:
def get_rsg(G):
    for node_a,node_b in list(combinations(G,2)):
        distance_saved = calculate_edge_weight(node_a,node_b)
        if distance_saved!= float('inf'):
            G.add_edge(node_a,node_b, weight=distance_saved)
    return G

In [0]:
graphs = []
for _,trips in sample.groupby(['pool_window']):
    nodes = []
    trips = trips.reset_index()
    for idx, row in trips.iterrows():
        nodes.append(Node(idx,trips.iloc[idx]))
    G = nx.Graph()
    G.add_nodes_from(nodes)
    graphs.append(G)
    
#Start of the code
weight_matches = []
for g in graphs:
    ride_sharing_graph = get_rsg(g)
    #maximum weighted algorithm
    match = nx.max_weight_matching(ride_sharing_graph, maxcardinality=True)
    g_match = nx.Graph()
    for u,v in match:
        g_match.add_edge(u,v)
        
    weight_matches.append(g_match

In [0]:
print("Selected edges by maximum weight algorithm")
for u,v in weight_matches[0].edges:
    print(u.id,v.id,ride_sharing_graph.get_edge_data(u,v))