In [7]:
import tempfile
import time 
import shutil
import math
import sys

from datetime import datetime, timedelta

import pandas as pd
import numpy as np
import gtfstk as gt

import geopandas as gp
import shapely.geometry as geom

DIR = Path('..')
sys.path.append(str(DIR))

DATA_DIR = DIR/'data/'

import gtfsanalyst as hp

%load_ext autoreload
%autoreload 2

NameError: name 'Path' is not defined

In [None]:
def calc_feed_segments(feed):
    feed,
    max_cuf_off = 2 * 3600, 
    analysis_start = 10800,
    analysis_end = 97140, 
    convert_to_gpd = False): 
    """
    The objective is to create a dataframe of all links in a GTFS
    a link connects two stops and includes these fields:
        - origin stop (o_stop), in other words this is start node of a link
        - origin stop (o_stop) xy
        - origin time (o_time), in other words this is start time of link
        - destination stop (d_stop), in other words this is end of link
        - destination time (d_time), in other words this is end time of link
        - origin stop (d_stop) xy
        - trip id (trip_id), in other words this is the name of the trip that connects the nodes of this link.
        - route id (route_id), in other words this is the name of the route that connects the nodes of this link.
        - transit mode (type), in other words this could be for example bus, ferry, rail, etc.
        - the travel time (duration), in other words how long it take to go from one node to another.
        - the sequence of stops after the link, this will help to find averate wait time and remove duplice services
    Notes:
        - tne input must be a dictionary of GTFS tables in pandas dataframe format
        - the keys in this dictinonary must be GTFS table names
        - max cut off is the maximum travel time in seconds. The default is 2 hours.
        - analysis period is a list of two times the default is '03:00:00' to '26:59:00'
    """
    
    analysis_duration_sec = (analysis_end - analysis_start) 
    analysis_duration_h = analysis_duration_sec / 3600
    analysis_mid_sec = analysis_start + (analysis_duration_sec / 2)
    analysis_start_sec = analysis_start
    analysis_end_sec = analysis_mid_sec + max_cuf_off + (0.25 * 3600)
    

    
    def fix_dup_tirps(row):
        sid = row['o_stop']+";"
        seq = row['stop_seq']
        return seq.split(sid, 1)[-1]

    if not PT_links_df.empty:
        PT_links_df['stop_seq'] = PT_links_df.apply(fix_dup_tirps, axis = 1)
        
        PT_links_df = PT_links_df.merge(feed['stops'][['stop_id', 'stop_lat', 'stop_lon']], 
                                        left_on='o_stop', 
                                        right_on='stop_id', 
                                        how='left').drop('stop_id', axis = 1) 
        PT_links_df.rename(columns = {'stop_lat': 'o_stop_lat',
                                      'stop_lon': 'o_stop_lon'},
                          inplace = True)
        PT_links_df = PT_links_df.merge(feed['stops'][['stop_id', 'stop_lat', 'stop_lon']], 
                                        left_on='d_stop', 
                                        right_on='stop_id', 
                                        how='left').drop('stop_id', axis = 1)
        PT_links_df.rename(columns = {'stop_lat': 'd_stop_lat',
                                      'stop_lon': 'd_stop_lon'},
                          inplace = True)
    
    #calculates the average wait time (awt) depending on the analysis awt period.
 
    #removes the PT links outside the analysis awt period
    cond = (PT_links_df['o_time_sec'] >= analysis_start)&\
           (PT_links_df['d_time_sec'] <= analysis_end)

    #calculates the frequency of trips
    frq_df = PT_links_df[cond]['stop_seq'].value_counts().reset_index()
    frq_df.columns = ['stop_seq', 'freq']
    frq_df['freq'] = frq_df['freq'] / analysis_duration_h # frequency per hour

    # keep only three hours period for simplicity
    cond = (PT_links_df['o_time_sec'] >= analysis_mid_sec)&\
           (PT_links_df['d_time_sec'] <= analysis_end_sec)
    PT_links_df = PT_links_df[cond].copy().merge(frq_df, how = 'left')
    #calculates the awt
    PT_links_df['awt'] = 3600 / PT_links_df['freq'] / 2 #average waite time (sec) is half the headway
    
    PT_links_df.fillna(0, inplace=True)
    
    if convert_to_gpd == True:
        #converting the PT_links_df to a geodataframe
        l = lambda x: geom.LineString([geom.Point(x.o_stop_lon,x.o_stop_lat), geom.Point(x.d_stop_lon, x.d_stop_lat)])
        PT_links_df['geometry'] = PT_links_df.apply(l, axis=1)
        PT_links_gdf = gp.GeoDataFrame(PT_links_df)
        return PT_links_gdf

    return PT_links_df

In [1]:
class Feed(gt.Feed):
    def __init__(self, dist_units, agency=None, stops=None, routes=None,
        trips=None, stop_times=None, calendar=None, calendar_dates=None,
        fare_attributes=None, fare_rules=None, shapes=None,
        frequencies=None, transfers=None, feed_info=None, feed_segments=None):
        
        gt.Feed.__init__(self, dist_units, agency=None, stops=None, routes=None,
            trips=None, stop_times=None, calendar=None, calendar_dates=None,
            fare_attributes=None, fare_rules=None, shapes=None,
            frequencies=None, transfers=None, feed_info=None)
        
        self.feed_segments = feed_segs   
    

NameError: name 'gt' is not defined

In [None]:
def read_gtfs(path, dist_units=None):
    """
    Create a Feed instance from the given path and given distance units.
    The path should be a directory containing GTFS text files or a
    zip file that unzips as a collection of GTFS text files
    (and not as a directory containing GTFS text files).
    The distance units given must lie in :const:`constants.dist_units`
    Notes
    -----
    - Ignore non-GTFS files
    - Automatically strip whitespace from the column names in GTFS files
    - This is based on gtfstk library
    """
    gt_feed = gt.read_gtfs(path, dist_units)
    
    #calculate segments
    feed_dict = hp.feed_obj_to_dict(feed_obj) 
    
    PT_links_df =  feed_dict['stop_times'].copy()
    PT_links_df.rename(columns = {'arrival_time': 'o_time', 
                                  'stop_id': 'o_stop',
                                  'stop_sequence': 'o_sequence'}, inplace = True)  
    
    PT_links_df[['d_time', 'd_stop', 'd_sequence']] = PT_links_df[['o_time', 'o_stop', 'o_sequence']].shift(-1)
    
    PT_links_df = PT_links_df[PT_links_df['o_sequence'] < PT_links_df['d_sequence']].copy() #removes the last stops
    
    #Convert the time into seconds for easier time calculatins
    PT_links_df['o_time_sec'] = PT_links_df['o_time'].apply(hp.text2sec)
    PT_links_df['d_time_sec'] = PT_links_df['d_time'].apply(hp.text2sec)
    PT_links_df['duration'] = PT_links_df['d_time_sec'] - PT_links_df['o_time_sec']
    
    #Add route_id using the trips table
    PT_links_df = PT_links_df.merge(feed['trips'])
        
    #Add route type in text format to the link dataset
    PT_links_df = PT_links_df.merge(feed['routes'])

    route_type = {'0': 'Tram, Streetcar, Light rail',
                  '1': 'Subway, Metro',
                  '2': 'Rail',
                  '3': 'Bus',
                  '4': 'Ferry',
                  '5': 'Cable car',
                  '6': 'Gondola, Suspended cable car',
                  '7': 'Funicular'}

    PT_links_df['route_type'] = PT_links_df['route_type'].astype(str)
    PT_links_df['route_type'].replace(route_type, inplace = True)

    #add stop sequence to PT_links_df
    stop_seq_df = stop_seq_for_trips(feed['stop_times'])
    PT_links_df = PT_links_df.merge(stop_seq_df)
    
    feed_dict['feed_segments'] = PT_links_df
    
    return Feed(**feed_dict)
    