In [27]:
import pandas as pd
import matplotlib.pyplot as plt
import requests
from h3 import h3
import json
from urllib.request import URLError, Request, urlopen
from itertools import combinations
from itertools import permutations
from dateutil import parser
from datetime import datetime, timedelta
import math
import networkx as nx

In [2]:
df = pd.read_csv('Data/LaGuardia_Dropoff_Final.csv')
columns = ['tpep_pickup_datetime', 'tpep_dropoff_datetime','passenger_count',\
           'trip_distance', 'pickup_longitude','pickup_latitude','dropoff_longitude', 'dropoff_latitude']
df = df[columns]
df.rename(columns={'tpep_pickup_datetime':'pickup_time',
       'tpep_dropoff_datetime':'dropoff_time'},inplace=True)
df['pickup_time'] = pd.to_datetime(df['pickup_time'])
df['dropoff_time'] = pd.to_datetime(df['dropoff_time'])
df['pickup_h3'] = df.apply(lambda x: h3.geo_to_h3(x['pickup_latitude'], x['pickup_longitude'], 8), axis=1)
df['dropoff_h3'] = df.apply(lambda x: h3.geo_to_h3(x['dropoff_latitude'], x['dropoff_longitude'], 10), axis=1)


start_date = '2016-01-29 08:00:00' # Start date with time
end_date = '2016-01-30 23:59:59' # End date with time
df=df[(df['pickup_time'] >= start_date) & (df['dropoff_time'] <= end_date)]

# Installation steps


### 1.Install the latest JRE and get GraphHopper Server as zip from <a href=https://graphhopper.com/public/releases/graphhopper-web-0.10.3-bin.zip>Graphhopper API</a>. Unzip it.
### 2.Copy this OSM file into the SAME unzipped directory: <a href=https://download.geofabrik.de/north-america/us/new-york-latest.osm.pbf >new-york-latest.osm.pbf</a>
### 3.Start GraphHopper Maps via: ava -jar graphhopper-web-0.10.3-with-dep.jar jetty.resourcebase=webapp config=config-example.properties datareader.file=new-york-latest.osm.pbf. 
### 3.Test to see if its running after you see 'Started server at HTTP 8989' by going to http://localhost:8989/ and you should see a map of New York.
### 4.Keep this running when executing our program because this is the API

In [124]:
#Check how graphhopper works
a,b,c,d = df['pickup_latitude'][0],df['pickup_longitude'][0],df['dropoff_latitude'][0],df['dropoff_longitude'][0]
request_str = 'http://localhost:8989/route?point=' + str(a) + '%2C' + str(b) + '&point=' + str(
            c) + '%2C' + str(d) + '&vehicle=car'
request = Request(request_str)
res=requests.get(request_str)
print("Distance = {}".format(json.loads(res.text)['paths'][0]['distance']))
print("Time = {}".format(json.loads(res.text)['paths'][0]['time']))

Distance = 14964.969
Time = 892052


# To calculate pool_window.
## The dataframe will be divided into pools.We run algorithm for each of these pools

In [3]:
def ceil_dt(dt, delta):
    return datetime.min + math.ceil((dt - datetime.min) / delta) * delta

pool_time_window = 10 # Change pool time window
df['pool_window'] = df['pickup_time'].apply(lambda x: ceil_dt(x.to_pydatetime(), timedelta(minutes=pool_time_window)))
df['duration'] = (df['pickup_time']-df['dropoff_time']).dt.seconds
df['delay'] = df['duration'].apply(lambda x: x*0.20)

## This is expected to be calculated and stored in a csv file. Later loaded into a dataframe before running the algorithm.The columns that should be stored in the csv files are same as following dataframe.

In [40]:
df_distance =  pd.DataFrame(columns = ['pickup_h3','dropoff_h3','distance','duration'])

# The nodes of the graph

In [59]:
class Node:
    def __init__(self,idx,data):
        self.id = idx
        self.pickup_location = (data.pickup_latitude,data.pickup_longitude,data.pickup_h3)
        self.dropoff_location = (data.dropoff_latitude,data.dropoff_longitude,data.dropoff_h3)
        self.pickup_time = data.pickup_time
        self.dropoff_time = data.dropoff_time
        self.distance = data.trip_distance
        self.duration = data.duration
        self.delay = data.delay
        #TBD..

# Lets try with a sample of 4 trips going to LGA

In [223]:
sample=df[:4]
sample

Unnamed: 0,pickup_time,dropoff_time,passenger_count,trip_distance,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude,pickup_h3,dropoff_h3,pool_window,duration,delay
0,2016-01-29 09:18:54,2016-01-29 10:08:47,2.0,12.93,-74.000771,40.76231,-73.865028,40.77058,882a107251fffff,8a2a100f525ffff,2016-01-29 09:20:00,83407,16681.4
1,2016-01-29 09:19:22,2016-01-29 10:01:30,5.0,8.76,-73.977409,40.753002,-73.861877,40.76844,882a100d67fffff,8a2a100e250ffff,2016-01-29 09:20:00,83872,16774.4
2,2016-01-29 09:19:23,2016-01-29 09:51:46,1.0,10.3,-73.985771,40.760078,-73.861649,40.768291,882a100d65fffff,8a2a100e250ffff,2016-01-29 09:20:00,84457,16891.4
3,2016-01-29 09:19:48,2016-01-29 09:49:40,1.0,10.0,-73.975052,40.787968,-73.865036,40.770565,882a100883fffff,8a2a100f525ffff,2016-01-29 09:20:00,84608,16921.6


## Creating a toy distance df for the above sample dataframe.Note: Since the 'Data/LaGuardia_Dropoff_Final.csv' corresponds to LGA as destination we need only the distance between the pickup of individual trips.

## For trips going from LGA we would be needing distance between dropoffs

In [225]:
a,b,d,e=[],[],[],[]
from random import random
# creating toy distance to run algorithm
for node_a,node_b in list(permutations(sample.index,2)):
    a.append(sample.iloc[node_a]['pickup_h3'])
    b.append(sample.iloc[node_b]['pickup_h3'])
    d.append(round(random(),2))
    e.append(round(random(),2))
df_distance['pickup_h3'] = a
df_distance['dropoff_h3'] = b
df_distance['distance'] = d
df_distance['duration'] = e
df_distance = df_distance.set_index(['pickup_h3','dropoff_h3'])
df_distance

Unnamed: 0_level_0,Unnamed: 1_level_0,distance,duration
pickup_h3,dropoff_h3,Unnamed: 2_level_1,Unnamed: 3_level_1
882a107251fffff,882a100d67fffff,0.22,0.17
882a107251fffff,882a100d65fffff,0.36,0.91
882a107251fffff,882a100883fffff,0.25,0.5
882a100d67fffff,882a107251fffff,0.74,0.19
882a100d67fffff,882a100d65fffff,0.53,0.75
882a100d67fffff,882a100883fffff,0.89,0.37
882a100d65fffff,882a107251fffff,0.12,0.26
882a100d65fffff,882a100d67fffff,0.07,0.79
882a100d65fffff,882a100883fffff,0.01,0.92
882a100883fffff,882a107251fffff,0.49,0.51


In [226]:
def get_all_pairs(node_a,node_b,trip_type):
    if trip_type == 1:
        #Combination LGA--> a -->b
        #if no distance call graphhopper 
        LGA_a_dist = df_distance.loc[(node_a.pickup_location[2],node_a.dropoff_location[2])]['distance']
        a_b_dist = df_distance.loc[(node_a.dropoff_location[2],node_b.dropoff_location[2])]['distance']
        LGA_a_dur = df_distance.loc[(node_a.pickup_location[2],node_a.dropoff_location[2])]['duration']
        a_b_dur = df_distance.loc[(node_a.dropoff_location[2],node_b.dropoff_location[2])]['duration']
        
        #Combination LGA--> b -->a
        LGA_b_dist = df_distance.loc[(node_b.pickup_location[2],node_b.dropoff_location[2])]['distance']
        b_a_dist = df_distance.loc[(node_b.dropoff_location[2],node_a.dropoff_location[2])]['distance']
        LGA_b_dur = df_distance.loc[(node_b.pickup_location[2],node_b.dropoff_location[2])]['duration']
        b_a_dur = df_distance.loc[(node_b.dropoff_location[2],node_a.dropoff_location[2])]['duration']
        
        path_1_total_dis,path_1_total_dur = LGA_a_dist + a_b_dist,LGA_a_dur + a_b_dur 
        path_1_a_dur,path_1_b_dur = LGA_a_dur,path_1_total_dur
        
        path_2_total_dis,path_2_total_dur = LGA_b_dist+b_a_dist,LGA_b_dur+b_a_dur
        path_2_a_dur,path_2_b_dur         = path_2_total_dur ,LGA_b_dur
               
    else:
        #Combination a--> b --> LGA
        a_b_dist = df_distance.loc[(node_a.pickup_location[2],node_b.pickup_location[2])]['distance']
        b_LGA_dist = node_b.distance 
        a_b_dur = df_distance.loc[(node_a.pickup_location[2],node_b.pickup_location[2])]['duration']
        b_LGA_dur = node_b.duration
        
        #Combination b--> a --> LGA
        b_a_dist = df_distance.loc[(node_b.pickup_location[2],node_a.pickup_location[2])]['distance']
        a_LGA_dist = node_a.distance 
        b_a_dur = df_distance.loc[(node_b.pickup_location[2],node_a.pickup_location[2])]['duration']
        a_LGA_dur = node_a.duration
        
        path_1_total_dis,path_1_total_dur = a_b_dist + b_LGA_dist,a_b_dur + b_LGA_dur 
        path_1_a_dur,path_1_b_dur = path_1_total_dur,b_LGA_dur
        
        path_2_total_dis,path_2_total_dur, = b_a_dist+a_LGA_dist,b_a_dur+a_LGA_dur
        path_2_a_dur,path_2_b_dur         = a_LGA_dur,path_2_total_dur
        
    return ((path_1_total_dis,path_1_total_dur,path_1_a_dur,path_1_b_dur),( path_2_total_dis,path_2_total_dur,path_2_a_dur,path_2_b_dur))
    

In [229]:
def calculate_edge_weight(node_a,node_b):
    path1,path2 = get_all_pairs(node_a,node_b,2)
    minimum_distance = float('inf')
    for path in (path1,path2):
        distance_contraint = (path[0] <= node_a.distance + node_b.distance)
        delay_constraint = (path[2] <= node_a.duration + node_a.delay) & (path[3] <= node_b.duration + node_b.delay)
        #add social constraint too...
        
        
        if distance_contraint and delay_constraint and path[0]< minimum_distance:
            minimum_distance = path[0]
    distance_saved = node_a.distance + node_b.distance - minimum_distance
    return distance_saved

In [230]:
def get_rsg(G):
    for node_a,node_b in list(combinations(G,2)):
        distance_saved = calculate_edge_weight(node_a,node_b)
        if distance_saved!= float('inf'):
            G.add_edge(node_a,node_b, weight=distance_saved)
    return G

# Running the algorithm for sample dataframe

In [243]:
graphs = []
for _,trips in sample.groupby(['pool_window']):
    nodes = []
    trips = trips.reset_index()
    for idx, row in trips.iterrows():
        nodes.append(Node(idx,trips.iloc[idx]))
    G = nx.Graph()
    G.add_nodes_from(nodes)
    graphs.append(G)
    
#Start of the code
weight_matches = []
for g in graphs:
    ride_sharing_graph = get_rsg(g)
    #maximum weighted algorithm
    match = nx.max_weight_matching(ride_sharing_graph, maxcardinality=True)
    g_match = nx.Graph()
    for u,v in match:
        g_match.add_edge(u,v)
        
    weight_matches.append(g_match)

In [250]:
print("Selected edges by maximum weight algorithm")
for u,v in weight_matches[0].edges:
    print(u.id,v.id,ride_sharing_graph.get_edge_data(u,v))

Selected edges by maximum weight algorithm
3 2 {'weight': 10.290000000000001}
1 0 {'weight': 12.709999999999997}
