In [1]:
import pandas as pd, numpy as np, os, datetime, time

In [2]:
list_of_agencies = sorted(list(os.walk('gtfs'))[0][1])

In [3]:
# define functions

def minSinceMidnight(t):
    if type(t) == str:
        tt = t.split(':')
        msm = (int(tt[0])*60) + int(tt[1]) + (int(tt[2])/60)
    else:
        msm = t
    return msm

## REWROTE on 9/20 TO DEAL WITH DUPLICATE TIMES

def interpolator(trip):
    
    lsst = list(stop_times[stop_times['trip_id'] == trip]['arrival_time'])
    lsst = [minSinceMidnight(x) for x in lsst]
    
    last_scheduled_time = lsst[0]
    lst_index = 0
    
    values = [last_scheduled_time]
    
    for x in range(1,len(lsst)):
        #print(x)
        if pd.isnull(lsst[x]) == False:
            #idx = lsst.index(x)
            #print(idx)
            yy = list(np.linspace(last_scheduled_time, lsst[x], num=(x-lst_index+1)))

            values += yy[1:]
            last_scheduled_time = lsst[x]
            lst_index = x
        else:
            pass
    return values # returns linear interpolation of times

In [4]:
# unchanging data

# days to evaluate
weekday = "20190315"
saturday = "20190316"
sunday = "20190317"

# write the evaluated days to file
f_txt = "The headways represented here correspond to the following revenue days:\n\n"
f_txt += "weekday: {}\n".format(weekday)
f_txt += "saturday: {}\n".format(saturday)
f_txt += "sunday: {}\n".format(sunday)

f = open("output/v3/date-info.txt", "w")
f.write(f_txt)
f.close()

days_of_week = {0:"monday", 1:"tuesday", 2:"wednesday", 3:"thursday", 4:"friday", 5:"saturday", 6:"sunday"}

# modes
bus_types = [3]
rail_types = [0,1,2]
ferry_types = [4]

# queries to filter by various time criteria
# strings for pd.DF.query

am_peak_query = "minutes >= 360 and minutes <= 600"
pm_peak_query = "minutes >= 900 and minutes <= 1140"
weekday_range_query = "minutes >= 360 and minutes <= 1320"
saturday_range_query = "minutes >= 480 and minutes <= 1320"
sunday_range_query = "minutes >= 480 and minutes <= 1320"

# defines the schema for the dict object holding headways

dataHolderSpec = {
                  "name":'',
                  "longitude":np.NaN,
                  "latitude":np.NaN,
                0: {
                    "served_by": [],
                    "AM Peak": np.NaN,
                    "PM Peak": np.NaN,
                    "Weekdays": np.NaN,
                    "Saturday": np.NaN,
                    "Sunday": np.NaN
                     },
                 1: {
                    "served_by": [],
                    "AM Peak": np.NaN,
                    "PM Peak": np.NaN,
                    "Weekdays": np.NaN,
                    "Saturday": np.NaN,
                    "Sunday": np.NaN
                     }
                 }


# starting index
start_index = 0

In [5]:
scenarios = pd.DataFrame([[15,20,30],[10,15,25], [5,10,20],[10,20,30]], columns=["peak", "weekday", "weekend"],\
                         index=["A", "B", "C", "AMENDED"])

scenarios.head()




Unnamed: 0,peak,weekday,weekend
A,15,20,30
B,10,15,25
C,5,10,20
AMENDED,10,20,30


In [90]:
%%time

errors = pd.DataFrame(columns=["agency","path", "error"])

for agency in list_of_agencies[:]:
    #agency = "la-metro--184"
    path = "gtfs/" + agency + "/"
    print(path)
    
    startTime = time.time()
    
    
    # LOAD GTFS DATA
    agency_name = pd.read_csv(path + 'agency.txt')['agency_name'][0]
    agency_name = agency_name.replace("/","-")
    print(agency_name, list_of_agencies.index(agency)+1, "of", len(list_of_agencies))

    trips = pd.read_csv(path + 'trips.txt', dtype={"trip_id":str, "route_id":str})
    if "direction_id" not in trips.columns:
        trips["direction_id"] = np.zeros(len(trips))
    trips["direction_id"] = trips["direction_id"].fillna(0)
    
    #print(len(trips), "trips")

    routes = pd.read_csv(path + 'routes.txt',dtype={"route_id":str})
    #print(len(routes), "routes")

    stops = pd.read_csv(path + 'stops.txt', dtype={"stop_id":str})
    #print(len(stops), "stops")

    stop_times = pd.read_csv(path + 'stop_times.txt', dtype={"stop_id":str})
    #print(len(stop_times), "stop times")

    if os.path.isfile(path + "calendar.txt"):
        calendar = pd.read_csv(path + "calendar.txt")
        #print(len(calendar), "schedules")
        calend = True
        
        if sum(calendar[["monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"]].sum()) == 0:
            # build a calendar
            calend = True
            
            calendar_dates= pd.read_csv(path + "calendar_dates.txt")
            calendar_dates["day_of_week"] = [datetime.datetime(int(str(x)[:4]), int(str(x)[4:6]), int(str(x)[6:])).weekday() for x in calendar_dates["date"]]

            calendar = pd.crosstab(calendar_dates["service_id"], calendar_dates["day_of_week"]).rename(days_of_week, axis=1).reset_index()

            service_id_df = []

            for service_id in calendar_dates["service_id"].unique():

                tempServiceID_df = calendar_dates[calendar_dates["service_id"] == service_id]
                minDate = tempServiceID_df["date"].min()
                maxDate = tempServiceID_df["date"].max()
                service_id_df.append(pd.DataFrame([[service_id, minDate,maxDate]], columns=["service_id", "start_date","end_date"]))
    
    
            calendar = calendar.merge(pd.concat(service_id_df, ignore_index=True))
            for date in ["monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"]:
                calendar[date] = [1 if x > 0 else 0 for x in calendar[date]]
    else:
        calend = False

    calendar_dates = pd.read_csv(path + "calendar_dates.txt")
    
    # adds day-of-week information to calendar_dates
    calendar_dates["weekday"] = [days_of_week[datetime.datetime.strptime(str(x), "%Y%m%d").weekday()] for x in calendar_dates["date"]]
    #print(len(calendar_dates), "exception dates")

    # READ AND DETERMINE CALENDARS

    # create dictionary object with exceptions dates for different types of service

    exceptions = {}
    
    for x in range(len(calendar_dates)):
        exception_date = str(calendar_dates['date'][x])
        exception_service_id = calendar_dates['service_id'][x]
        exception_type = calendar_dates['exception_type'][x]

        if exception_date not in exceptions:
            exceptions[exception_date] = {1:[],2:[]}
            exceptions[exception_date][exception_type].append(exception_service_id)
        else:
            exceptions[exception_date][exception_type].append(exception_service_id)

    # GENERATES service_ids IN USE ON SPECIFIED DAY OF WEEK
    # ALSO ENSURES THAT SCHEDULES ARE ACTIVE DURING SPECIFIED TIME FRAME

    weekday_day = days_of_week[datetime.datetime.strptime(weekday, "%Y%m%d").weekday()]
    saturday_day = "saturday"
    sunday_day = "sunday"

    # this if-else statement checks to make sure that calendar.txt file is up to date
    # if none of the service_id are currently active, we take them anyway
    # otherwise, we filter out any inactive service_id
    
    # only use start dates for service filtering
    weekday_query = "(%s == 1) & (%s >= start_date)" % (weekday_day, weekday)
    saturday_query = "(%s == 1) & (%s >= start_date)" % (saturday_day, saturday)
    sunday_query = "(%s == 1) & (%s >= start_date)" % (sunday_day, sunday)
    
    
    '''try:
        if len([x for x in list(calendar['end_date']) if x > np.min([int(weekday), int(saturday), int(sunday)])]) == 0:
            print("just start date")
            weekday_query = "(%s == 1) & (%s >= start_date)" % (weekday_day, weekday)
            saturday_query = "(%s == 1) & (%s >= start_date)" % (saturday_day, saturday)
            sunday_query = "(%s == 1) & (%s >= start_date)" % (sunday_day, sunday)
        else:
            print("end date too")
            weekday_query = "(%s == 1) & (%s >= start_date) & (%s <= end_date)" % (weekday_day, weekday, weekday)
            saturday_query = "(%s == 1) & (%s >= start_date) & (%s <= end_date)" % (saturday_day, saturday, saturday)
            sunday_query = "(%s == 1) & (%s >= start_date) & (%s <= end_date)" % (sunday_day, sunday, sunday)
    except NameError:
        # the Long Beach Exception
        weekday_query = "(%s == 1) & (%s >= start_date) & (%s <= end_date)" % (weekday_day, weekday, weekday)
        saturday_query = "(%s == 1) & (%s >= start_date) & (%s <= end_date)" % (saturday_day, saturday, saturday)
        sunday_query = "(%s == 1) & (%s >= start_date) & (%s <= end_date)" % (sunday_day, sunday, sunday)'''

    # LISTS OF SERVICE_IDs FOR USE IN ANALYSIS
    if calend == True:
        weekday_service = list(calendar.query(weekday_query)['service_id'])
        saturday_service = list(calendar.query(saturday_query)['service_id'])
        sunday_service = list(calendar.query(sunday_query)['service_id'])
        
        # this option fires if none of the GTFS schedules are within the
        # study dates
        if len(weekday_service) == 0 & len(saturday_service) == 0 & len(sunday_service) == 0:

            print("study dates before current schedule")

            weekday_service = list(calendar.query("({} == 1)".format(weekday_day))['service_id'])
            saturday_service = list(calendar.query("saturday == 1")['service_id'])
            sunday_service = list(calendar.query("sunday == 1")['service_id'])
        else:
            pass
    elif calend == False:
        try:
            weekday_service = exceptions[weekday][1]
            saturday_service = exceptions[saturday][1]
            sunday_service = exceptions[sunday][1]
        # Long Beach Exception
        # agencies that use calendar_date to assign service
        # but the GTFS data does not include our study dates
        except KeyError:
            print("here lies the error")

            # take the earliest available schedule date if the GTFS data
            # does not include the study date

            min_weekday = calendar_dates[calendar_dates["weekday"] == weekday_day]["date"].min()
            min_saturday = calendar_dates[calendar_dates["weekday"] == "saturday"]["date"].min()
            min_sunday = calendar_dates[calendar_dates["weekday"] == "sunday"]["date"].min()

            weekday_service = exceptions[str(min_weekday)][1]
            saturday_service = exceptions[str(min_saturday)][1]
            sunday_service = exceptions[str(min_sunday)][1]
    else:
        pass
    # create dictionary object to hold information about stops
    # dictionary will be used for quick access
    
    if len(weekday_service) == 0 :
        if sum(calendar[weekday_day]) == 0:
            #print("no agency weekday service")
            errors = errors.append(pd.DataFrame([[agency_name, path,"no agency weekday service"]], columns=["agency", "path","error"]), ignore_index=True)
        else:
            #print("missing weekday service")
            errors = errors.append(pd.DataFrame([[agency_name, path,"missing weekday service"]], columns=["agency", "path","error"]), ignore_index=True)
    if len(saturday_service) == 0:
        if sum(calendar["saturday"]) == 0:
            #print("no agency saturday service")
            errors = errors.append(pd.DataFrame([[agency_name, path, "no agency saturday service"]], columns=["agency", "path","error"]), ignore_index=True)
        else:
            #print("missing weekday service") 
            errors = errors.append(pd.DataFrame([[agency_name, path,"missing saturday service"]], columns=["agency", "path","error"]), ignore_index=True)
    if len(sunday_service) == 0:
        if sum(calendar["sunday"]) == 0:
            #print("no agency sunday service")
            errors = errors.append(pd.DataFrame([[agency_name, path,"no agency sunday service"]], columns=["agency", "path","error"]), ignore_index=True)
        else:
            #print("missing weekday service")
            errors = errors.append(pd.DataFrame([[agency_name, path,"missing sunday service"]], columns=["agency", "path","error"]), ignore_index=True)
    
    # create the fully-formed stops file
    if "direction_id" in trips.columns:
        trip_info = trips[["route_id", "service_id", "trip_id", "direction_id"]]
    else:
        trip_info = trips[["route_id", "service_id", "trip_id"]]
    route_info = routes[['route_id', 'route_type']]

    trip_route_merged = trip_info.merge(route_info, on="route_id")

    # reorder columns
    if "direction_id" in trips.columns:
        trip_route_merged = trip_route_merged[["trip_id", "route_id", "service_id", "direction_id", "route_type"]]
    else:
        trip_route_merged = trip_route_merged[["trip_id", "route_id", "service_id", "route_type"]]

    if trips['trip_id'].dtype == object:

        # create a string version of trip_id
        stop_times['trip_id_str'] = stop_times['trip_id'].astype("str")

        # merge trip info onto stop_times
        stop_times_merged = stop_times[["trip_id_str", 'arrival_time', 'stop_id']].merge(trip_route_merged, right_on="trip_id", left_on="trip_id_str", how='left')

    else:

        stop_times_merged = stop_times[["trip_id", 'arrival_time', 'stop_id', "stop_sequence"]].merge(trip_route_merged, right_on="trip_id", left_on="trip_id", how='left')

    # calculate a interpolated (float) time for future analysis


    if (sum(pd.isnull(stop_times_merged['arrival_time']))/len(stop_times_merged)) > 0.05:
        agency_trips = list(stop_times.drop_duplicates('trip_id')['trip_id'])
        allInterpolatedTimes = []
        for trip in agency_trips:
            allInterpolatedTimes += interpolator(trip)
        stop_times_merged['minutes'] = allInterpolatedTimes
        #print("INTERP-O-LATED")
    else:
        stop_times_merged['minutes'] = [minSinceMidnight(x) for x in stop_times_merged['arrival_time']]

        
    stop_times_merged = stop_times_merged.query("route_type == 3")
    
    # WEEKDAY

    if weekday in exceptions:
        weekday_stops = [weekday_service.remove(x) for x in exceptions[weekday][2] if x in weekday_service] + exceptions[weekday][1]
    else:
        pass

    weekday_stops = stop_times_merged[stop_times_merged['service_id'].isin(weekday_service)]
    weekday_stops.drop_duplicates(subset=["arrival_time", "stop_id"], inplace=True)
    #print(weekday_stops.shape)

    # SATURDAY

    if saturday in exceptions:
        saturday_service = [saturday_service.remove(x) for x in exceptions[saturday][2]] + exceptions[saturday][1]
    else:
        pass

    saturday_stops = stop_times_merged[stop_times_merged['service_id'].isin(saturday_service)]
    saturday_stops.drop_duplicates(subset=["arrival_time", "stop_id"], inplace=True)
    #print(saturday_stops.shape)
    
    # SUNDAY

    if sunday in exceptions:
        sunday_service = [sunday_service.remove(x) for x in exceptions[sunday][2]] + exceptions[sunday][1]
    else:
        pass
    
    sunday_stops = stop_times_merged[stop_times_merged['service_id'].isin(sunday_service)]
    sunday_stops.drop_duplicates(subset=["arrival_time", "stop_id"], inplace=True)    
    
    queries = [am_peak_query, pm_peak_query, weekday_range_query, saturday_range_query, sunday_range_query]
    periods = ['am_pk', 'pm_pk', 'wkdy', 'sat', 'sun']
    minute_ranges = [(600-360), (1140-900), (1320-360), (1320-480), (1320-480)]
    criteria = [15, 15, 20, 30, 30]
    dfs = [weekday_stops, weekday_stops, weekday_stops, saturday_stops, sunday_stops]
    
    indices = [0,1,2,3,4]
    
        
    if len(weekday_stops) == 0:
        indices.remove(0)
        indices.remove(1)
        indices.remove(2)
    if len(saturday_stops) == 0:
        indices.remove(3)
    if len(sunday_stops) == 0:
        indices.remove(4)

    for i in indices:

        if i == 0:
            headways = (minute_ranges[i] / dfs[i].query(queries[i]).groupby(["stop_id", "direction_id", "route_id"])\
.agg("count")[["arrival_time"]]).reset_index()[(minute_ranges[i] / dfs[i].query(queries[i]).groupby(["stop_id", "direction_id", "route_id"])\
.agg("count")[["arrival_time"]]).reset_index()["arrival_time"] < (minute_ranges[i] / dfs[i].query(queries[i]).groupby(["stop_id", "direction_id", "route_id"])\
.agg("count")[["arrival_time"]]).reset_index()["arrival_time"].quantile(0.95)].groupby(["route_id", "direction_id"]).agg(np.median)\
            .rename({"arrival_time":periods[i]}, axis=1)
        else:
            try:
                headways = headways.join((minute_ranges[i] / dfs[i].query(queries[i]).groupby(["stop_id", "direction_id", "route_id"])\
.agg("count")[["arrival_time"]]).reset_index()[(minute_ranges[i] / dfs[i].query(queries[i]).groupby(["stop_id", "direction_id", "route_id"])\
.agg("count")[["arrival_time"]]).reset_index()["arrival_time"] < (minute_ranges[i] / dfs[i].query(queries[i]).groupby(["stop_id", "direction_id", "route_id"])\
.agg("count")[["arrival_time"]]).reset_index()["arrival_time"].quantile(0.95)].groupby(["route_id", "direction_id"]).agg(np.median)\
            .rename({"arrival_time":periods[i]}, axis=1), how="outer")
            except ValueError:
                print("passed on", periods[i])
                pass

    print(headways.shape)
    
    headways = headways.fillna(9999).astype(int)
    
    headways.reset_index().merge(routes[["route_id", "route_short_name", "route_long_name"]], on="route_id" ).to_csv("output/route-based/headways/{}.csv".format(agency_name))

    for scenario in scenarios.index:

        
        filters = ["(am_pk <= {})".format(scenarios.loc[scenario, "peak"]), "(pm_pk <= {})".format(scenarios.loc[scenario, "peak"]), "(wkdy <= {})".format(scenarios.loc[scenario, "weekday"]), "(sat <= {})".format(scenarios.loc[scenario, "weekend"]), "(sun <= {})".format(scenarios.loc[scenario, "weekend"])]

        hqt_filter = ""

        for j in indices:
            hqt_filter += filters[j] + " & "
    
        hqt_filter = hqt_filter[:-3]
        
        if hqt_filter != "":
            hqt = headways.query(hqt_filter).reset_index().set_index(["route_id", "direction_id"])
            print(scenario, hqt.shape)


            qualifying_stops_dfs = []

            for route_id in hqt.index.levels[0]:
                #print(route_id)
                for direction_id in hqt.index.levels[1]:

                    try:

                        stops_on_route = list(stop_times_merged[(stop_times_merged["route_id"] == route_id) & (stop_times_merged["direction_id"] == direction_id)]["stop_id"].unique())

                        qualifying_stops = stops[stops["stop_id"].isin(stops_on_route)][["stop_id", "stop_name","stop_lon", "stop_lat"]]
                        qualifying_stops["agency"] = agency_name
                        qualifying_stops["route_id"] = route_id

                        qualifying_stops_dfs.append(qualifying_stops)

                    except TypeError:
                        pass
                    except KeyError:
                        pass
            try:
                agency_results = pd.concat(qualifying_stops_dfs, ignore_index=True)
                try:
                    agency_results = agency_results.merge(routes[["route_id", "route_short_name","route_long_name"]], how="left")
                except KeyError:
                    print("A KeyError")
                agency_results.to_csv("output/route-based/{} - Scenario {}.csv".format(agency_name, scenario))
                print("SCENARIO {}, {} of {} stops eligible".format(scenario, agency_results["stop_id"].nunique(), len(stops)))
                print("{} of {} routes eligible".format(agency_results["route_id"].nunique(), len(routes)))
            except ValueError:
                pass
        else:
            print("No Values to Filter")

gtfs/ac-transit--121/
AC Transit 1 of 105
study dates before current schedule


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


(230, 5)
A (18, 5)
SCENARIO A, 900 of 5179 stops eligible
11 of 156 routes eligible
B (6, 5)
SCENARIO B, 224 of 5179 stops eligible
3 of 156 routes eligible
C (0, 5)
AMENDED (6, 5)
SCENARIO AMENDED, 224 of 5179 stops eligible
3 of 156 routes eligible
gtfs/airport-valet-express--773/
Airport Valet Express 2 of 105
(2, 5)
A (0, 5)
B (0, 5)
C (0, 5)
AMENDED (0, 5)
gtfs/altamont-corridor-express--823/
Altamont Corridor Express 3 of 105
(2, 5)
No Values to Filter
No Values to Filter
No Values to Filter
No Values to Filter
gtfs/amador-transit--390/
Amador Transit 4 of 105
(6, 3)
A (0, 3)
B (0, 3)
C (0, 3)
AMENDED (0, 3)
gtfs/anaheim-resort-transportation--410/
Anaheim Resort Transportation 5 of 105




(3, 5)
A (0, 5)
B (0, 5)
C (0, 5)
AMENDED (0, 5)
gtfs/arcata-mad-river-transit-system--148/
Eureka Transit Service 6 of 105
(16, 5)
A (0, 5)
B (0, 5)
C (0, 5)
AMENDED (0, 5)
gtfs/avalon-transit--951/
Avalon Transit 7 of 105
study dates before current schedule
(1, 5)
A (0, 5)
B (0, 5)
C (0, 5)
AMENDED (0, 5)
gtfs/bart--58/
Bay Area Rapid Transit 8 of 105
(1, 5)
No Values to Filter
No Values to Filter
No Values to Filter
No Values to Filter
gtfs/blue-gold-fleet--824/
San Francisco Bay Ferry 9 of 105
(1, 5)
No Values to Filter
No Values to Filter
No Values to Filter
No Values to Filter
gtfs/calaveras-transit--399/
Calaveras Transit 10 of 105
(5, 4)
A (0, 4)
B (0, 4)
C (0, 4)
AMENDED (0, 4)
gtfs/caltrain--122/
Caltrain 11 of 105
passed on sat
(6, 5)
A (0, 5)
B (0, 5)
C (0, 5)
AMENDED (0, 5)
gtfs/capitol-corridor--825/
Capitol Corridor Joint Powers Authority 12 of 105
(2, 5)
A (0, 5)
B (0, 5)
C (0, 5)
AMENDED (0, 5)
gtfs/city-of-escalon--601/
eTrans 13 of 105
(2, 3)
A (0, 3)
B (0, 3)
C (0, 

  x2 = take(ap, indices_above, axis=axis) * weights_above


(7, 5)
A (0, 5)
B (0, 5)
C (0, 5)
AMENDED (0, 5)
gtfs/emery-go-round--769/
Emery Go-Round 27 of 105
(7, 5)
A (0, 5)
B (0, 5)
C (0, 5)
AMENDED (0, 5)
gtfs/fairfield-and-suisun-transit--337/
Fairfield and Suisun Transit 28 of 105
(23, 4)
A (0, 4)
B (0, 4)
C (0, 4)
AMENDED (0, 4)
gtfs/foothill-transit--506/
Foothill Transit 29 of 105
study dates before current schedule
(71, 5)
A (3, 5)
SCENARIO A, 122 of 1971 stops eligible
2 of 39 routes eligible
B (0, 5)
C (0, 5)
AMENDED (0, 5)
gtfs/fresno-area-express--315/
Fresno Public Transportation (FAX) 30 of 105
(32, 5)
A (2, 5)
SCENARIO A, 52 of 1497 stops eligible
1 of 16 routes eligible
B (1, 5)
SCENARIO B, 27 of 1497 stops eligible
1 of 16 routes eligible
C (0, 5)
AMENDED (1, 5)
SCENARIO AMENDED, 27 of 1497 stops eligible
1 of 16 routes eligible
gtfs/fresno-county-rural-transit-agency--338/
Fresno County Rural Transit Agency 31 of 105
(12, 4)
A (0, 4)
B (0, 4)
C (0, 4)
AMENDED (0, 4)
gtfs/glendale-beeline--917/
Glendale Beeline 32 of 105
(26,



(274, 5)
A (57, 5)
SCENARIO A, 4018 of 13989 stops eligible
30 of 143 routes eligible
B (20, 5)
SCENARIO B, 1370 of 13989 stops eligible
12 of 143 routes eligible
C (2, 5)
SCENARIO C, 33 of 13989 stops eligible
1 of 143 routes eligible
AMENDED (20, 5)
SCENARIO AMENDED, 1370 of 13989 stops eligible
12 of 143 routes eligible
gtfs/la-metro--677/
Metro - Los Angeles 41 of 105
study dates before current schedule
(274, 5)
No Values to Filter
No Values to Filter
No Values to Filter
No Values to Filter
gtfs/ladot-transit-services--303/
LADOT 42 of 105
(45, 5)
A (2, 5)
SCENARIO A, 136 of 2440 stops eligible
2 of 46 routes eligible
B (0, 5)
C (0, 5)
AMENDED (1, 5)
SCENARIO AMENDED, 42 of 2440 stops eligible
1 of 46 routes eligible
gtfs/laguna-beach-transit--402/
Laguna Beach Transit 43 of 105
(3, 4)
A (0, 4)
B (0, 4)
C (0, 4)
AMENDED (0, 4)
gtfs/lake-transit-authority--598/
Lake Transit 44 of 105
(18, 4)
A (0, 4)
B (0, 4)
C (0, 4)
AMENDED (0, 4)
gtfs/lassen-rural-bus--414/
Lassen Rural Bus 45 of



(121, 6)
A (0, 6)
B (0, 6)
C (0, 6)
AMENDED (0, 6)
gtfs/san-joaquin-rtd--347/
San Joaquin Regional Transit District (RTD) 79 of 105
(90, 5)
A (0, 5)
B (0, 5)
C (0, 5)
AMENDED (0, 5)
gtfs/san-luis-obispo-rta--710/
San Luis Obispo Regional Transit Authority 80 of 105
(17, 4)
A (0, 4)
B (0, 4)
C (0, 4)
AMENDED (0, 4)
gtfs/santa-cruz-metro--343/
Santa Cruz Metro 81 of 105
(27, 5)
A (0, 5)
B (0, 5)
C (0, 5)
AMENDED (0, 5)
gtfs/santa-maria-area-transit--760/
Guadalupe Flyer 82 of 105
(11, 5)
A (0, 5)
B (0, 5)
C (0, 5)
AMENDED (0, 5)
gtfs/sfmta--60/
San Francisco Municipal Transportation Agency 83 of 105
study dates before current schedule
(122, 5)
A (52, 5)
SCENARIO A, 1964 of 3519 stops eligible
27 of 82 routes eligible
B (33, 5)
SCENARIO B, 1422 of 3519 stops eligible
19 of 82 routes eligible
C (3, 5)
SCENARIO C, 141 of 3519 stops eligible
2 of 82 routes eligible
AMENDED (33, 5)
SCENARIO AMENDED, 1422 of 3519 stops eligible
19 of 82 routes eligible
gtfs/sfmta--942/
San Francisco Municipal 

## Evaluation

### Route-Based

In [131]:
dfs = []

for file in list(os.walk("output/route-based"))[0][2]:
    if "Scenario A.csv" in file:
        dfs.append(pd.read_csv("output/route-based/" + file))
        

qbs = pd.concat(dfs, ignore_index=True).drop_duplicates(subset=["agency", "stop_id"])

In [132]:
# Original SB 50
qbs.shape

(8343, 9)

In [114]:
# Scenario B
qbs.shape

(3156, 9)

In [116]:
# Scenario C
qbs.shape

(174, 9)

In [128]:
# New SB 50 (Scenario Amended)
qbs.shape

(3198, 9)

In [133]:
qbs["agency"].value_counts()

Metro - Los Angeles                              4018
San Francisco Municipal Transportation Agency    1964
AC Transit                                        900
VTA                                               349
Culver CityBus                                    282
Big Blue Bus                                      235
LADOT                                             136
Foothill Transit                                  122
Golden Empire Transit District                    105
Long Beach Transit                                101
SamTrans                                           63
Fresno Public Transportation (FAX)                 52
County Connection                                  16
Name: agency, dtype: int64

### Stop-Based

In [125]:
dfs = []

for file in list(os.walk("output/stop-based"))[0][2]:
    if "Scenario AMENDED.csv" in file:
        dfs.append(pd.read_csv("output/stop-based/" + file))
        

qbs = pd.concat(dfs, ignore_index=True).drop_duplicates()

In [120]:
# Scenario A
qbs.shape

(9611, 10)

In [122]:
# Scenario B
qbs.shape

(3877, 10)

In [124]:
# Scenario C
qbs.shape

(554, 10)

In [126]:
# Scenario Amended
qbs.shape

(3971, 10)

In [100]:
dfs

['Culver CityBus - Scenario AMENDED.csv',
 'LADOT - Scenario AMENDED.csv',
 'San Francisco Municipal Transportation Agency - Scenario AMENDED.csv',
 'Golden Empire Transit District - Scenario AMENDED.csv',
 'Metro - Los Angeles - Scenario AMENDED.csv',
 'AC Transit - Scenario AMENDED.csv',
 'Fresno Public Transportation (FAX) - Scenario AMENDED.csv']

## Holding tank

In [None]:
# OUTPUT AND ANALYSIS
    output_path = "output/v3/bus/" + agency_name + ".csv"

    if len(stop_times_merged) > 0:
        headways = pd.DataFrame(data=stops['stop_id'])
        #headways["stop_id"] = headways["stop_id"].astype(str)

        minutes = 60 * 4
        

        queries = [am_peak_query, pm_peak_query, weekday_range_query, saturday_range_query, sunday_range_query]
        periods = ['am_pk', 'pm_pk', 'wkdy', 'sat', 'sun']
        minute_ranges = [(600-360), (1140-900), (1320-360), (1320-480), (1320-480)]
        dfs = [weekday_stops, weekday_stops, weekday_stops, saturday_stops, sunday_stops]


        for i in range(5):

            # conditional parameters
            df = dfs[i]
            minutes = minute_ranges[i]
            query = queries[i]

            if "direction_id" in df.columns:
                for direction in [0,1]:
                    results = pd.DataFrame(minutes / df[df['direction_id']==direction].query(query)['stop_id'].value_counts())
                    results.reset_index(inplace=True)
                    results.rename(columns={"stop_id":periods[i]}, inplace=True)
                    results.rename(columns={"index":"stop_id"}, inplace=True)

                    results["stop_id"] = results["stop_id"].astype(str)
                    headways = headways.merge(results, on="stop_id", suffixes=["_dir0", "_dir1"], how='left')
                
            else:
                results = pd.DataFrame(minutes / df.query(query)['stop_id'].value_counts())
                results.reset_index(inplace=True)
                results.rename(columns={"stop_id":periods[i]}, inplace=True)
                results.rename(columns={"index":"stop_id"}, inplace=True)

                results["stop_id"] = results["stop_id"].astype(str)
                headways = headways.merge(results, on="stop_id", how='left')
        
        if "am_pk_dir0" in headways.columns:
            headways["am_pk"] = headways[["am_pk_dir0", "am_pk_dir1"]].min(axis=1)
            headways["pm_pk"] = headways[["pm_pk_dir0", "pm_pk_dir1"]].min(axis=1)

            headways["wkdy"] = headways[["wkdy_dir0", "wkdy_dir1"]].min(axis=1)
            headways["sat"] = headways[["sat_dir0", "sat_dir1"]].min(axis=1)
            headways["sun"] = headways[["sun_dir0", "sun_dir1"]].min(axis=1)
            headways = headways[['stop_id', "am_pk", "pm_pk", "wkdy", "sat", "sun"]] 
        else:
            pass
        #stops["stop_id"] = stops["stop_id"].astype(str)
        output = stops[["stop_id", "stop_name", "stop_lon", "stop_lat"]].merge(headways, on="stop_id", how="outer")
        output.fillna(9999, inplace=True)
        
        output.to_csv(output_path)
        print("exported file -->", output.shape[0], "bus stops")
        
        hqt_filter = "(am_pk <= 15) & (pm_pk <= 15)"
        hqt_filter += " & (wkdy <= 20) & "
        hqt_filter += "(sat <= 30) & (sun <= 30)"
    
    
        hqt = output.query(hqt_filter)

        if len(hqt) > 0:
            hqt.to_csv("output/v3/bus-hqt/" + agency_name + " (HQT).csv")
            print("exported hqt file -->", hqt.shape[0], "hqt bus stops of {}*".format(len(stops)), "({}%)".format(str(round(hqt.shape[0]/len(stops)*100, 0))))
            #print("\n*stops contains all types of transit stops, not just bus")
        else:
            pass
        del output
    else:
        print("No bus data.")
    print("\n")
# output.head()

errors_path = "output/v3/errors-{}-{}-{}.csv".format(weekday, saturday, sunday)
errors.to_csv(errors_path)

print("HEADWAY CALCULATION COMPLETE")