In [1]:
import pandas as pd
import gc
import requests
import itertools
from geopy.distance import great_circle

In [2]:
df = pd.read_csv(r'../../data/tidy/large/vehicle-locations-mapped-powertrain-weight-consistent-lat-long-oct2021-sep2022.csv', delimiter=',', skiprows=0, low_memory=False)

In [3]:
df.columns

Index(['Unnamed: 0.1', 'Unnamed: 0', 'ServiceDateTime', 'DateKey',
       'CalendarDate', 'Year', 'Month', 'OpKey', 'Operator', 'RtKey', 'RtKey2',
       'Route', 'RouteName', 'TripKey', 'Trip', 'StopKey', 'Stop', 'StopName',
       'Vehicle', 'VehicleType', 'SeatedCap', 'TotalCap', 'Lat', 'Lon',
       'Boards', 'Alights', 'Onboard', 'Bike', 'Daytype', 'Hour', 'Minute',
       'Second', 'Date', 'Powertrain', 'VehicleModel', 'VehiclWeight(lb)'],
      dtype='object')

In [4]:
stop = df[['StopKey', 'Stop', 'StopName', 'Lat', 'Lon']].copy()

In [5]:
stop

Unnamed: 0,StopKey,Stop,StopName,Lat,Lon
0,22540,251,Smith College,42.319620,-72.637550
1,22543,9009,Noho Garage,42.333550,-72.629412
2,22540,251,Smith College,42.319620,-72.637550
3,22543,9009,Noho Garage,42.333550,-72.629412
4,22543,9009,Noho Garage,42.333550,-72.629412
...,...,...,...,...,...
15621724,34774,64,ILC,42.390987,-72.525304
15621725,34693,71,Fine Arts Ctr,42.387828,-72.523914
15621726,38697,77,SW/Mass Ave (W),42.384961,-72.528281
15621727,34775,76,Boyden Gym (N),42.386549,-72.531202


In [6]:
len(stop['StopKey'].unique())

12763

In [7]:
stop = stop.drop_duplicates(subset='StopKey')

In [8]:
stop

Unnamed: 0,StopKey,Stop,StopName,Lat,Lon
0,22540,251,Smith College,42.319620,-72.637550
1,22543,9009,Noho Garage,42.333550,-72.629412
18,22581,267,Walter Salvo Ho,42.313356,-72.627657
19,22490,261,Acad. of Music,42.317699,-72.633052
24,1000,0,(X) Undefined,42.375823,-72.507237
...,...,...,...,...,...
15075620,38680,8005,Garage Rd (I),42.465287,-72.576032
15076222,38692,156,Mayflower (O),42.350473,-72.470063
15076246,38691,154,Mayflower (I),42.350719,-72.470169
15323414,36288,1618,Memorial / Expo,42.092681,-72.620408


In [9]:
del df
gc.collect()

0

In [10]:
# Initialize a list to store the results
results = []

# Iterate over all pairs of stops
for (index1, row1), (index2, row2) in itertools.combinations(stop.iterrows(), 2):
    # Get the origin and destination coordinates
    origins = (row1["Lat"], row1["Lon"])
    destinations = (row2["Lat"], row2["Lon"])

    # Calculate the distance
    distance = great_circle(origins, destinations).meters  # in meters

    # Append the results
    results.append([row1['StopKey'], row2['StopKey'], distance])

# Convert the results to a DataFrame
result_df = pd.DataFrame(results, columns=['Stop1', 'Stop2', 'Distance'])

# Convert 'Distance' from meters to miles
result_df['Distance'] = result_df['Distance'] / 1609.34

# Save the results to a CSV file
result_df.to_csv(r'../../results/stops-pairwise-distances.csv', index=False)



KeyboardInterrupt



In [None]:
result_df

In [None]:
result_df['Distance'].max()

In [None]:
result_df['Distance'].min()