# This file contains functions used to generate new schedules when add/remove routes

+ **get_trips_for_routes**
  - From trips.txt, get trip_id for certain routes
+ **filter_stoptimes_by_routes**
  - Keep only rows that are related to certain routes in stop_times.txt
+ **convert_time_to_sec**
  - For arrival_time/departure_time in stop_time.txt, HH:MM:SS -> HH*60*60+MM*60+SS
+ **time_to_sec**
  - For single string time in format "08:00:00", convert to seconds
+ **get_travel_time**
  - Based on the existing schedule, get the travel time and distance for sequenced two stops, return in a pd dataframe
+ **get_total_travel_time**
  - Get total travel time for a new routes based on existing rotues' information

...
    

In [9]:
import pandas as pd

In [10]:
def get_trips_for_routes(routes = [30,34,36]):
    ### GET TRIPS FOR CERTAIN ROUTES ###
    # para: routes in list (float number)
    # return: trip_id mapping with rotue numbers in dictionary
    
    trips_dict = dict() # A dictionary mapping routes with multiple trips
    
    # read trips.txt file, keep only 2 columns of routes_short_name and trip_id
    trips = (pd.read_csv(r"mmt_gtfs/trips.txt", sep = ",", engine = "python")
            [['route_short_name','trip_id']])
    
    # for each routes (30,34,36), select their trips and update into stop_dict
    for r in routes:
        trip_id = trips.loc[trips['route_short_name'] == r,:]
        tp = trip_id["trip_id"].tolist()
        
        trips_dict.update({r:tp})

    return(trips_dict)

In [11]:
def filter_stoptimes_by_routes(routes = [30.,34,36]):
    ### READ stop_times.txt, AND FIILTER WITH CERTIAN ROUTES ###
    # para: routes in a list
    # return: filtered stop_times in pd.dataframe
    # calling function: get_trips_for_routes
    
    # get trips into a list
    tr_dict = get_trips_for_routes(routes)
    
    tr_list = []
    for r,tr in tr_dict.items():
        tr_list += tr
    
    # read stoptimes.txt file
    stop_times = pd.read_csv(r"mmt_gtfs/stop_times.txt", sep = ",", engine = "python")
    # filter by trips under certain routes
    stop_times_filter = stop_times.loc[stop_times['trip_id'].isin(tr_list)].reset_index()

    # return filtered stop_times in dataframe    
    return stop_times_filter
    

In [23]:
def convert_time_to_sec(stop_times):
    ###Convert str arrival_time/departure_time to int seconds###
    # HH:MM:SS -> HH*60*60+MM*60+SS
    # para: stop_times (a standard GTFS stop_times file)(pd dataframe)
    # return: stop_times with two columns added on
   
    df = stop_times
    
    # split string time into hours, minutes, and seconds
    df[['ar_h','ar_m','ar_s']] = df.arrival_time.str.split(':',expand=True)
    df[['dp_h','dp_m','dp_s']] = df.departure_time.str.split(':',expand=True)

    # compute time into seconds
    df=(df.assign(arrival_time = lambda x: df['ar_h'].astype(int)*60*60+df['ar_m'].astype(int)*60+df['ar_s'].astype(int),
                departure_time = lambda x: df['dp_h'].astype(int)*60*60+df['dp_m'].astype(int)*60+df['dp_s'].astype(int))
        .filter(regex = '^((?!ar_).)*$')  # remove intermediate variables
        .filter(regex = '^((?!dp_).)*$'))
    

    return df


def convert_time_to_str(stop_times_sec):
    
    ### Incomplete Function...###
    
    #FIXEME: intend to be the invert function of convert_time_to_sec
    
    df = stop_times
    df = df.assign(ar_h = lambda x: df['arrival_time']//3600,
                    ar_m = lambda x: df['arrival_time']%3600//60,
                    ar_s = lambda x: df['arrival_time']%3600%60,
                  dp_h = lambda x: df['departure_time']//3600,
                    dp_m = lambda x: df['departure_time']%3600//60,
                    dp_s = lambda x: df['departure_time']%3600%60)
    
def time_to_sec(time = '8:00:00'):
    
    ### Convert single string time into int seconds ###
    # HH:MM:SS -> HH*60*60+MM*60+SS
    # para: time 'HH:MM:SS'
    # return: time in seconds
    
    t_sec = int(time[0:-6])*60*60 + int(time[-5:-3])*60 + int(time[-2:-1])
    
    return t_sec

In [21]:
def get_travel_time(stop_list, stop_times):
    ### Based on the existing schedule, get the travel time and distance for sequenced two stops, return in a pd dataframe###
    # para: stop_list (a list of existing stop_id)
    #       stop_times (an existing schedule, arrival_times MUST BE CONVERTED TO SECONDS)
    # return: stop_seq (pd dataframe)
    
    # Create a new df to store outputs
    stop_seq = pd.DataFrame({"upstop_id" : [],
                             "downstop_id":[],
                             "travel_time" : [],
                             "travel_dist" : []})
    
    # for every two sequenced stop_ids in the given stop_list, search for the matching result in stop_times
    for i in range(len(stop_list)-1):
        # get a pair of stop_ids 
        upstop = stop_list[i]
        downstop = stop_list[i+1]

        # search in the existing schedule
        for j in range(len(stop_times)-1):
            if stop_times['stop_id'][j] == upstop:
                if stop_times['stop_id'][j+1] == downstop:
                    if stop_times['stop_sequence'][j+1]-stop_times['stop_sequence'][j] == 1:
                        
                        # get travel time and distance
                        time = stop_times['arrival_time'][j+1]-stop_times['arrival_time'][j]
                        dist = stop_times['shape_dist_traveled'][j+1]-stop_times['shape_dist_traveled'][j]
                        
                        # append to stop_seq the dataframe 
                        row={'upstop_id':upstop,'downstop_id':downstop,'travel_time':time,'travel_dist':dist}
                        stop_seq.loc[i]=row

                        # for every pair (upstop,downstop), once we got a result, move to the next pair
                        break

    return stop_seq

def get_total_travel_time(stop_seq):
    
    total_travel_time = sum(stop_seq.travel_time)
    total_travel_dist = sum(stop_seq.travel_dist)
    
    return total_travel_time, total_travel_dis
    
    
    

In [15]:
stop_times = convert_time_to_sec(filter_stoptimes_by_routes(routes = [30,34,36]))
stop_times.head()

Unnamed: 0,index,trip_id,stop_sequence,stop_id,pickup_type,drop_off_type,arrival_time,departure_time,timepoint,stop_headsign,shape_dist_traveled
0,36696,908292,1,7100,0,1,22500,22500,1,E TOWNE: VIA NAKOOSA,0.0092
1,36697,908292,2,7169,0,0,22544,22544,0,E TOWNE: VIA NAKOOSA,0.263
2,36698,908292,3,9629,0,0,22776,22776,0,E TOWNE: VIA NAKOOSA,1.5965
3,36699,908292,4,9753,0,0,22800,22800,1,E TOWNE: VIA NAKOOSA,1.7287
4,36700,908292,5,9388,0,0,22822,22822,0,E TOWNE: VIA NAKOOSA,1.8701


In [18]:
stop_list_test = [9237,9175,9373,9918,9364,9100,9642,9378,9564,9802,9288,1420,1751,1309,1105,7100,7169,7167,7209,7633,9167,9811,9277,9243,9439,9137,9745,9951,9645,9291,9432,9719,9677,9800,9783,9391,9351,9140,9310,9620,9863,9917,9928,9422,9800]
stop_seq = get_travel_time(stop_list_test,stop_times)
stop_seq.head()

Unnamed: 0,upstop_id,downstop_id,travel_time,travel_dist
0,9237.0,9175.0,33.0,0.1165
1,9175.0,9373.0,68.0,0.2397
2,9373.0,9918.0,43.0,0.1527
3,9918.0,9364.0,70.0,0.2461
4,9364.0,9100.0,86.0,0.2899


In [29]:
int(10.9)

10

In [27]:
first_departure_time = '8:00:00'
last_departure_time = '18:00:00'
interval_time = 60 * 60

trip_num = int((time_to_sec(last_departure_time)-time_to_sec(first_departure_time))/interval_time)

for i in range(trip_num):
    for j in stop_list:
        


    