# Stops to Routes

This notebook provides code to generate a CSV file that contains two columns:

1. stop_id -- the unique identifier of the agency's stops
2. route_short_names -- the commonly used names of the routes that service those stops

In [None]:
import pandas as pd, numpy as np, time, datetime, csv, os

In [None]:
list_of_agencies = sorted(list(os.walk('gtfs'))[0][1])

In [None]:
agency = "gtfs/la-metro--184/" # bus
#agency = "gtfs/la-metro--677/" # rail
# agency = "gtfs/sfmta--60/"
path = agency

In [None]:
rand_agency = list_of_agencies[np.random.randint(len(list_of_agencies))]
rand_agency

path = "gtfs/" + rand_agency + "/"
path

In [None]:
agency_name = pd.read_csv(path + 'agency.txt')['agency_name'][0]
agency_name = agency_name.replace("/","-")
print(agency_name)

trips = pd.read_csv(path + 'trips.txt')
print(len(trips), "trips")

routes = pd.read_csv(path + 'routes.txt')
print(len(routes), "routes")

stops = pd.read_csv(path + 'stops.txt')
print(len(stops), "stops")

stop_times = pd.read_csv(path + 'stop_times.txt')
print(len(stop_times), "stop times")

try:
    calendar = pd.read_csv(path + "calendar.txt")
    print(len(calendar), "schedules")
    calend = True
except FileNotFoundError:
    calend = False

calendar_dates = pd.read_csv(path + "calendar_dates.txt")
print(len(calendar_dates), "exception dates")

In [None]:
# create the fully-formed stops file

trip_info = trips[["route_id", "service_id", "trip_id", "direction_id"]]
route_info = routes[['route_id', 'route_type', "route_short_name"]]

trip_route_merged = trip_info.merge(route_info, on="route_id")

# reorder columns
trip_route_merged = trip_route_merged[["trip_id", "route_id", "service_id", "direction_id", "route_type", "route_short_name"]]

if trips['trip_id'].dtype == object:

    # create a string version of trip_id
    stop_times['trip_id_str'] = stop_times['trip_id'].astype("str")

    # merge trip info onto stop_times
    stop_times_merged = stop_times[["trip_id_str", 'arrival_time', 'stop_id']].merge(trip_route_merged, right_on="trip_id", left_on="trip_id_str", how='left')
        
else:
    
    stop_times_merged = stop_times[["trip_id", 'arrival_time', 'stop_id']].merge(trip_route_merged, right_on="trip_id", left_on="trip_id", how='left')

# calculate a interpolated (float) time for future analysis


'''
if (sum(pd.isnull(stop_times_merged['arrival_time']))/len(stop_times_merged)) > 0.05:
    agency_trips = list(stop_times.drop_duplicates('trip_id')['trip_id'])
    allInterpolatedTimes = []
    for trip in agency_trips:
        allInterpolatedTimes += interpolator(trip)
    stop_times_merged['interpolated'] = allInterpolatedTimes
    print("INTERP-O-LATED")
else:
    stop_times_merged['interpolated'] = [minSinceMidnight(x) for x in stop_times_merged['arrival_time']]
'''
stop_times_merged.head()

In [None]:
routes_at_stops = pd.DataFrame(columns=['stop_id', 'route_short_names'])
for stop in stop_times_merged['stop_id'].unique():
    q = "stop_id == %s" % str(stop)
    route_names = list(stop_times_merged.query(q)['route_short_name'].unique())
    routes_at_stops = pd.concat([routes_at_stops, pd.DataFrame([[stop, route_names]], columns=['stop_id', 'route_short_names'])], ignore_index=True)

routes_at_stops.to_csv("output/v2/stops-to-routes/" + agency_name + " stops2routes.csv")

del q, route_names, routes_at_stops