In [28]:
import pandas as pd
import gc
import requests
import itertools
from geopy.distance import great_circle

In [2]:
df = pd.read_csv(r'../../data/tidy/large/vehicle-locations-mapped-powertrain-weight-consistent-lat-long-oct2021-sep2022.csv', delimiter=',', skiprows=0, low_memory=False)

In [16]:
df.columns

Index(['Unnamed: 0.1', 'Unnamed: 0', 'ServiceDateTime', 'DateKey',
       'CalendarDate', 'Year', 'Month', 'OpKey', 'Operator', 'RtKey', 'RtKey2',
       'Route', 'RouteName', 'TripKey', 'Trip', 'StopKey', 'Stop', 'StopName',
       'Vehicle', 'VehicleType', 'SeatedCap', 'TotalCap', 'Lat', 'Lon',
       'Boards', 'Alights', 'Onboard', 'Bike', 'Daytype', 'Hour', 'Minute',
       'Second', 'Date', 'Powertrain', 'VehicleModel', 'VehiclWeight(lb)'],
      dtype='object')

In [17]:
stop = df[['StopKey', 'Stop', 'StopName', 'Lat', 'Lon']].copy()

In [18]:
stop

Unnamed: 0,StopKey,Stop,StopName,Lat,Lon
0,22540,251,Smith College,42.319620,-72.637550
1,22543,9009,Noho Garage,42.333550,-72.629412
2,22540,251,Smith College,42.319620,-72.637550
3,22543,9009,Noho Garage,42.333550,-72.629412
4,22543,9009,Noho Garage,42.333550,-72.629412
...,...,...,...,...,...
15621724,34774,64,ILC,42.390987,-72.525304
15621725,34693,71,Fine Arts Ctr,42.387828,-72.523914
15621726,38697,77,SW/Mass Ave (W),42.384961,-72.528281
15621727,34775,76,Boyden Gym (N),42.386549,-72.531202


In [19]:
len(stop['Stop'].unique())

1895

In [20]:
stop = stop.drop_duplicates(subset='Stop')

In [21]:
stop

Unnamed: 0,StopKey,Stop,StopName,Lat,Lon
0,22540,251,Smith College,42.319620,-72.637550
1,22543,9009,Noho Garage,42.333550,-72.629412
18,22581,267,Walter Salvo Ho,42.313356,-72.627657
19,22490,261,Acad. of Music,42.317699,-72.633052
24,1000,0,(X) Undefined,42.375823,-72.507237
...,...,...,...,...,...
14373918,38306,6140,Mountain Farms,42.355540,-72.552075
14373919,38307,6141,Chipotle,42.356442,-72.553862
14375003,38308,6255,Pine / Spring,42.330710,-72.678447
14415749,36117,496,Westover Job Co,42.196182,-72.561707


In [22]:
del df
gc.collect()

942

In [31]:
# Initialize a list to store the results
results = []

# Iterate over all pairs of stops
for (index1, row1), (index2, row2) in itertools.combinations(stop.iterrows(), 2):
    # Get the origin and destination coordinates
    origins = (row1["Lat"], row1["Lon"])
    destinations = (row2["Lat"], row2["Lon"])

    # Calculate the distance
    distance = great_circle(origins, destinations).meters  # in meters

    # Append the results
    # Note: Duration cannot be calculated using this method, so we leave it out
    results.append([row1['Stop'], row2['Stop'], distance])

# Convert the results to a DataFrame
result_df = pd.DataFrame(results, columns=['Stop1', 'Stop2', 'Distance'])

# Convert 'Distance' from meters to miles
result_df['Distance'] = result_df['Distance'] / 1609.34

# Save the results to a CSV file
result_df.to_csv(r'../../results/stops-pairwise-distances.csv', index=False)


In [32]:
result_df

Unnamed: 0,Stop1,Stop2,Distance
0,251,9009,1.048412
1,251,267,0.665421
2,251,261,0.265374
3,251,0,7.704626
4,251,244,0.553588
...,...,...,...
1794560,6141,496,11.080200
1794561,6141,8005,7.605058
1794562,6255,496,11.046725
1794563,6255,8005,10.666192


In [33]:
result_df['Distance'].max()

52.03865833424283

In [34]:
result_df['Distance'].min()

0.0