This program precomputes the distance given the source data files.The distances for 2015 are already downloaded and made available at <a href="https://drive.google.com/drive/folders/1XZgTY6zH6bzb1sE9kz2yHRQ08YbAJavE?usp=sharing" > Data Files </a>.
If any other month's distance file is needed to be computed please follow the steps:
1. Download the data from <a href = "https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page"> NYC taxi Data </a>.
2. Use data_preparation.ipynb notebook to filter the data to the form compatible for running the algorithms.
3. Specify the path to folder that contains the trip data for which distance needs to be calculated.

In [83]:
import pandas as pd
import matplotlib.pyplot as plt
import requests
from h3 import h3
import json
from urllib.request import URLError, Request, urlopen
from itertools import combinations
from itertools import permutations
from dateutil import parser
from datetime import datetime, timedelta
import math
import networkx as nx
import warnings
warnings.filterwarnings("ignore")

import csv

def get(dataframe,trip_type):
    a,b,c,d=[],[],[],[]
    df_distance =  pd.DataFrame(columns = ['pickup_h3','dropoff_h3','distance','duration'])
    for node_a, node_b in list(permutations(dataframe.index, 2)):
        temp_curr, temp_next = [], []
        
        if trip_type==2:
            h3 = 'pickup_h3'
            lat_long = 'pickup'
        else:
            h3 = 'dropoff_h3'
            lat_long = 'dropoff'
        from_location = dataframe.iloc[node_a][h3]
        to_location = dataframe.iloc[node_b][h3]
        u1,u2,v1,v2= dataframe.iloc[node_a][lat_long+'_latitude'], dataframe.iloc[node_a][lat_long+'_longitude'],\
        dataframe.iloc[node_b][lat_long+'_latitude'],dataframe.iloc[node_b][lat_long+'_longitude']
        
        request_str = 'http://localhost:8989/route?point=' + str(u1) + '%2C' + str(u2) + '&point=' + str(
            v1) + '%2C' + str(v2) + '&vehicle=car'
        request = Request(request_str)
        res = requests.get(request_str)
        if 'paths' in json.loads(res.text):
            distance = json.loads(res.text)['paths'][0]['distance']

            time = json.loads(res.text)['paths'][0]['time']
            minute, msec = divmod(time, 60000)
            if (distance / 1609.344) <=2:
                a.append(from_location)
                b.append(to_location)
                c.append(distance / 1609.344)  # convert meters to miles
                d.append(minute + (msec / 100000))  # convert ms to s and add to min
        else:
            continue

    df_distance['pickup_h3'] = a
    df_distance['dropoff_h3'] = b
    df_distance['distance'] = c
    df_distance['duration'] = d
    return df_distance

In [84]:
from tqdm import tqdm
def get_distance(df_sub,trip_type):
    final_distance=[]
    for _,trips in tqdm(df_sub.groupby(['pool_window']),total=df_sub.groupby(['pool_window']).ngroups):
        trips = trips.reset_index()
        df_distance=  get(trips,trip_type)
        final_distance.append(df_distance)
    
    df_distance = pd.concat(final_distance)
    df_distance.drop_duplicates(subset=['pickup_h3','dropoff_h3'],keep=False,inplace=True)
    return df_distance

# Reload the respective month data

In [98]:
def ceil_dt(dt, delta):
        return datetime.min + math.ceil((dt - datetime.min) / delta) * delta
trip_file = input("Enter the full path to the month's trip data:")
trip_type = input("Enter the trip_type\n 1.From LGA\n 2.To LGA : ")
if trip_type =='1':
    resolution = (15,15)
    distance_file = 'LGA_as_Pickup_months'
else:
    resolution = (10,8)
    distance_file = 'LGA_drop_off_months'
df = pd.read_csv(trip_file)
columns = ['tpep_pickup_datetime', 'tpep_dropoff_datetime','passenger_count',\
       'trip_distance', 'pickup_longitude','pickup_latitude','dropoff_longitude', 'dropoff_latitude']
df = df[columns]
df.rename(columns={'tpep_pickup_datetime':'pickup_time',
       'tpep_dropoff_datetime':'dropoff_time'},inplace=True)
drop_index=df[(df.pickup_latitude==0)|(df.pickup_longitude==0)].index
df.drop(drop_index,inplace=True)
df['pickup_time'] = pd.to_datetime(df['pickup_time'])
df['dropoff_time'] = pd.to_datetime(df['dropoff_time'])
df['pickup_h3'] = df.apply(lambda x: h3.geo_to_h3(x['pickup_latitude'], x['pickup_longitude'], resolution[0]), axis=1)
df['dropoff_h3'] = df.apply(lambda x: h3.geo_to_h3(x['dropoff_latitude'], x['dropoff_longitude'], resolution[1]), axis=1)
df['duration'] = (df['pickup_time']-df['dropoff_time']).dt.seconds
df['delay'] = df['duration'].apply(lambda x: x*0.20)
print("Number of rows in df :{}".format(df.shape[0]))

Enter the full path to the month's trip data:/home/shet/Documents/CS_RideSharing/Data/LGA as dropoff/LaGuardia_as_dropoff_2015-June.csv
Enter the trip_type
 1.From LGA
 2.To LGA : 2
Number of rows in df :72232


In [99]:
from time import time
trip = lambda x : "LGA as pickup" if x=="1" else "LGA as drop-off"
month = int(input("Enter month in number.for example :input 1 for jan:")) # the month you want to download
days= monthrange(2015,month)[1]
for day in range(1,days+1,1):
    start = time()
    if day <= 9:
        start_date ='2015-0'+str(month)+'-0'+str(day)+' 00:00:00'
        end_date = '2015-0'+str(month)+'-0'+str(day)+' 23:59:59'

    else:
        start_date = '2015-0'+str(month)+'-'+str(day)+' 00:00:00'
        end_date = '2015-0'+str(month)+'-'+str(day)+' 23:59:59'

    df_sub = df[(df['pickup_time'] >= start_date) & (df['dropoff_time'] <= end_date)]
    df_sub.reset_index(drop=True,inplace=True)

    df_sub['pool_window'] = df_sub['pickup_time'].apply(lambda x: ceil_dt(x.to_pydatetime(), timedelta(minutes=10)))
    print("Starting distance calculation...")
    df_distance = get_distance(df_sub,int(trip_type))
    df_distance.reset_index(drop=True,inplace=True) 
    print('Time take for 1 day = {} min'.format((time()-start)/60))
    file_name = 'Distance/'+distance_file+'/'+start_date.split()[0]+'.csv'
    df_distance.to_csv(file_name)
    print('Done saving distances for {} trip for date :{}\n'.format(trip(trip_type),start_date.split()[0]))
    del  df_distance

Enter month in number . for eg 1 for jan:6


  9%|▊         | 12/138 [00:00<00:01, 112.93it/s]

Starting distance calculation...


100%|██████████| 138/138 [06:43<00:00,  2.92s/it]
  0%|          | 0/139 [00:00<?, ?it/s]

Time take for 1 day = 6.72128054300944 min
Done saving distances for LGA as drop-off trip for date :2015-06-01

Starting distance calculation...


100%|██████████| 139/139 [05:06<00:00,  2.21s/it]
  0%|          | 0/126 [00:00<?, ?it/s]

Time take for 1 day = 5.113448353608449 min
Done saving distances for LGA as drop-off trip for date :2015-06-02

Starting distance calculation...


100%|██████████| 126/126 [07:35<00:00,  3.61s/it]
  0%|          | 0/125 [00:00<?, ?it/s]

Time take for 1 day = 7.588398663202922 min
Done saving distances for LGA as drop-off trip for date :2015-06-03

Starting distance calculation...


100%|██████████| 125/125 [10:52<00:00,  5.22s/it]
  0%|          | 0/132 [00:00<?, ?it/s]

Time take for 1 day = 10.870968596140544 min
Done saving distances for LGA as drop-off trip for date :2015-06-04

Starting distance calculation...


100%|██████████| 132/132 [09:36<00:00,  4.37s/it]
  0%|          | 0/119 [00:00<?, ?it/s]

Time take for 1 day = 9.607054543495178 min
Done saving distances for LGA as drop-off trip for date :2015-06-05

Starting distance calculation...


100%|██████████| 119/119 [01:27<00:00,  1.36it/s]
  6%|▌         | 8/131 [00:00<00:01, 78.24it/s]

Time take for 1 day = 1.461876877148946 min
Done saving distances for LGA as drop-off trip for date :2015-06-06

Starting distance calculation...


100%|██████████| 131/131 [07:38<00:00,  3.50s/it]
  0%|          | 0/132 [00:00<?, ?it/s]

Time take for 1 day = 7.6494342168172205 min
Done saving distances for LGA as drop-off trip for date :2015-06-07

Starting distance calculation...


100%|██████████| 132/132 [06:28<00:00,  2.95s/it]
  0%|          | 0/128 [00:00<?, ?it/s]

Time take for 1 day = 6.4811557332674665 min
Done saving distances for LGA as drop-off trip for date :2015-06-08

Starting distance calculation...


100%|██████████| 128/128 [03:41<00:00,  1.73s/it]
  6%|▌         | 8/132 [00:00<00:01, 78.10it/s]

Time take for 1 day = 3.7005164782206217 min
Done saving distances for LGA as drop-off trip for date :2015-06-09

Starting distance calculation...


100%|██████████| 132/132 [05:36<00:00,  2.55s/it]
  0%|          | 0/124 [00:00<?, ?it/s]

Time take for 1 day = 5.606385087966919 min
Done saving distances for LGA as drop-off trip for date :2015-06-10

Starting distance calculation...


100%|██████████| 124/124 [06:51<00:00,  3.32s/it]
  0%|          | 0/133 [00:00<?, ?it/s]

Time take for 1 day = 6.856366113821665 min
Done saving distances for LGA as drop-off trip for date :2015-06-11

Starting distance calculation...


100%|██████████| 133/133 [07:01<00:00,  3.17s/it]
  0%|          | 0/127 [00:00<?, ?it/s]

Time take for 1 day = 7.020516471068064 min
Done saving distances for LGA as drop-off trip for date :2015-06-12

Starting distance calculation...


100%|██████████| 127/127 [01:21<00:00,  1.56it/s]
  0%|          | 0/134 [00:00<?, ?it/s]

Time take for 1 day = 1.356859838962555 min
Done saving distances for LGA as drop-off trip for date :2015-06-13

Starting distance calculation...


100%|██████████| 134/134 [04:38<00:00,  2.08s/it]
  0%|          | 0/134 [00:00<?, ?it/s]

Time take for 1 day = 4.644190510114034 min
Done saving distances for LGA as drop-off trip for date :2015-06-14

Starting distance calculation...


100%|██████████| 134/134 [04:57<00:00,  2.22s/it]
  0%|          | 0/133 [00:00<?, ?it/s]

Time take for 1 day = 4.967387656370799 min
Done saving distances for LGA as drop-off trip for date :2015-06-15

Starting distance calculation...


100%|██████████| 133/133 [04:22<00:00,  1.98s/it]
  0%|          | 0/133 [00:00<?, ?it/s]

Time take for 1 day = 4.383940776189168 min
Done saving distances for LGA as drop-off trip for date :2015-06-16

Starting distance calculation...


100%|██████████| 133/133 [05:15<00:00,  2.37s/it]
  0%|          | 0/135 [00:00<?, ?it/s]

Time take for 1 day = 5.260117455323537 min
Done saving distances for LGA as drop-off trip for date :2015-06-17

Starting distance calculation...


100%|██████████| 135/135 [06:47<00:00,  3.02s/it]
  0%|          | 0/132 [00:00<?, ?it/s]

Time take for 1 day = 6.785191269715627 min
Done saving distances for LGA as drop-off trip for date :2015-06-18

Starting distance calculation...


100%|██████████| 132/132 [06:37<00:00,  3.01s/it]
  0%|          | 0/128 [00:00<?, ?it/s]

Time take for 1 day = 6.621238390604655 min
Done saving distances for LGA as drop-off trip for date :2015-06-19

Starting distance calculation...


100%|██████████| 128/128 [01:12<00:00,  1.76it/s]
 11%|█         | 14/129 [00:00<00:00, 132.36it/s]

Time take for 1 day = 1.21262123187383 min
Done saving distances for LGA as drop-off trip for date :2015-06-20

Starting distance calculation...


100%|██████████| 129/129 [03:29<00:00,  1.63s/it]
  9%|▉         | 12/130 [00:00<00:01, 115.63it/s]

Time take for 1 day = 3.4980862657229106 min
Done saving distances for LGA as drop-off trip for date :2015-06-21

Starting distance calculation...


100%|██████████| 130/130 [04:09<00:00,  1.92s/it]
  7%|▋         | 8/123 [00:00<00:01, 76.45it/s]

Time take for 1 day = 4.163862073421479 min
Done saving distances for LGA as drop-off trip for date :2015-06-22

Starting distance calculation...


100%|██████████| 123/123 [03:11<00:00,  1.56s/it]
  0%|          | 0/128 [00:00<?, ?it/s]

Time take for 1 day = 3.1924225449562074 min
Done saving distances for LGA as drop-off trip for date :2015-06-23

Starting distance calculation...


100%|██████████| 128/128 [04:53<00:00,  2.30s/it]
  0%|          | 0/125 [00:00<?, ?it/s]

Time take for 1 day = 4.898269899686178 min
Done saving distances for LGA as drop-off trip for date :2015-06-24

Starting distance calculation...


100%|██████████| 125/125 [05:40<00:00,  2.72s/it]
  0%|          | 0/128 [00:00<?, ?it/s]

Time take for 1 day = 5.678181866804759 min
Done saving distances for LGA as drop-off trip for date :2015-06-25

Starting distance calculation...


100%|██████████| 128/128 [05:05<00:00,  2.38s/it]
  0%|          | 0/122 [00:00<?, ?it/s]

Time take for 1 day = 5.0863069494565325 min
Done saving distances for LGA as drop-off trip for date :2015-06-26

Starting distance calculation...


100%|██████████| 122/122 [00:54<00:00,  2.24it/s]
  0%|          | 0/130 [00:00<?, ?it/s]

Time take for 1 day = 0.9103903214136759 min
Done saving distances for LGA as drop-off trip for date :2015-06-27

Starting distance calculation...


100%|██████████| 130/130 [02:54<00:00,  1.34s/it]
  0%|          | 0/120 [00:00<?, ?it/s]

Time take for 1 day = 2.9035946289698282 min
Done saving distances for LGA as drop-off trip for date :2015-06-28

Starting distance calculation...


100%|██████████| 120/120 [03:49<00:00,  1.91s/it]
  0%|          | 0/123 [00:00<?, ?it/s]

Time take for 1 day = 3.8273470362027484 min
Done saving distances for LGA as drop-off trip for date :2015-06-29

Starting distance calculation...


100%|██████████| 123/123 [03:51<00:00,  1.88s/it]


Time take for 1 day = 3.8631370306015014 min
Done saving distances for LGA as drop-off trip for date :2015-06-30



# concatenate all days file into month

In [101]:
from calendar import monthrange
import pandas as pd
for folder in ('Distance/LGA_drop_off_months/2015-','Distance/LGA_as_Pickup_months/2015-'):
    for days in monthrange(2015, month):
        df_final = pd.DataFrame()
        for day in range(1,days+1,1):
            if day <= 9: 
                day= '0'+str(day)
            else:
                day = str(day)
            if month <=9:
                file = str(folder)+'0'+str(month)+'-'+day+'.csv'
            else:
                file = str(folder)+str(month)+'-'+day+'.csv'

            df = pd.read_csv(file)
            df_final = pd.concat([df_final,df],axis=0)
            if 'drop_off' in folder:
                destination = 'Distance/LGA_drop_off/2015-'
            else:
                destination = 'Distance/LGA_as_pickup/2015-'
            df_final.to_csv(str(destination)+str(month)+'.csv')
            print("Done merging :{}".format(file))

Done merging :Distance/LGA_drop_off_months/2015-06-01.csv
Done merging :Distance/LGA_drop_off_months/2015-06-02.csv
Done merging :Distance/LGA_drop_off_months/2015-06-03.csv
Done merging :Distance/LGA_drop_off_months/2015-06-04.csv
Done merging :Distance/LGA_drop_off_months/2015-06-05.csv
Done merging :Distance/LGA_drop_off_months/2015-06-06.csv
Done merging :Distance/LGA_drop_off_months/2015-06-07.csv
Done merging :Distance/LGA_drop_off_months/2015-06-08.csv
Done merging :Distance/LGA_drop_off_months/2015-06-09.csv
Done merging :Distance/LGA_drop_off_months/2015-06-10.csv
Done merging :Distance/LGA_drop_off_months/2015-06-11.csv
Done merging :Distance/LGA_drop_off_months/2015-06-12.csv
Done merging :Distance/LGA_drop_off_months/2015-06-13.csv
Done merging :Distance/LGA_drop_off_months/2015-06-14.csv
Done merging :Distance/LGA_drop_off_months/2015-06-15.csv
Done merging :Distance/LGA_drop_off_months/2015-06-16.csv
Done merging :Distance/LGA_drop_off_months/2015-06-17.csv
Done merging :