In [1]:
from __future__ import division
import datetime as dt
from collections import OrderedDict
import sys, os
import dateutil.relativedelta as rd
import json
from pathlib import Path

import utm
import pandas as pd
import numpy as np
import shapely.geometry as sg

DIR = Path('..')
sys.path.append(str(DIR))

import gtfstk as gt

%load_ext autoreload
%autoreload 2

DATA_DIR = DIR/'data'

In [2]:
#path = DATA_DIR/'sample_gtfs.zip'
#path = DATA_DIR/'other_feeds'/'gtfs.zip'
path = DATA_DIR/'cairns_gtfs.zip'

print(gt.list_gtfs(path))

feed = gt.read_gtfs(path, dist_units='km')

# Pick date
date = feed.get_first_week()[0]
print('date', date)


            file_name  file_size
0  calendar_dates.txt        387
1          routes.txt       1478
2           trips.txt     143081
3           stops.txt      26183
4          agency.txt        199
5      stop_times.txt    2561019
6          shapes.txt     864694
7        calendar.txt        337
date 20140526


In [3]:
import copy


def proto1(feed, dates, split_directions=False,
  headway_start_time='07:00:00', headway_end_time='19:00:00'):
    frames = []
    for date in dates:
        f = gt.compute_stop_stats_base(feed.stop_times, feed.get_trips(date),
          split_directions=split_directions,
          headway_start_time=headway_start_time,
          headway_end_time=headway_end_time)
        f['date'] = date
        frames.append(f)
    
    return pd.concat(frames)

def proto2(feed, dates, split_directions=False,
  headway_start_time='07:00:00', headway_end_time='19:00:00'):
    """
    Call ``compute_stop_stats_base()`` with the subset of trips active on the given date and with the keyword arguments ``split_directions``,   ``headway_start_time``, and ``headway_end_time``.

    See ``compute_stop_stats_base()`` for a description of the output.

    Assume the following feed attributes are not ``None``:

    - ``feed.stop_timtes``
    - Those used in :func:`get_trips`

    NOTES:

    This is a more user-friendly version of ``compute_stop_stats_base()``.
    The latter function works without a feed, though.
    """
    activity = feed.compute_trip_activity(dates)

    # Collect stats for each date, memoizing stats by trip ID sequence
    # to avoid unnecessary recomputations.
    # Store in dictionary of the form
    # trip ID sequence ->
    # [stats DataFarme, date list that stats apply]
    stats_and_dates_by_ids = {}  
    for date in dates:
        ids = tuple(activity.loc[activity[date] > 0, 'trip_id'])
        if ids in stats_and_dates_by_ids:
            # Append date to date list
            stats_and_dates_by_ids[ids][1].append(date)
        else:
            # Compute stats
            t = feed.trips
            trips = t[t['trip_id'].isin(ids)].copy()
            stats = gt.compute_stop_stats_base(feed.stop_times, trips,
              split_directions=split_directions,
              headway_start_time=headway_start_time,
              headway_end_time=headway_end_time)
            
            # Remember stats
            stats_and_dates_by_ids[ids] = [stats, [date]]

    # Assemble stats into DataFrame
    frames = []
    for stats, dates in stats_and_dates_by_ids.values():
        for date in dates:
            f = stats.copy()
            f['date'] = date
            frames.append(f)
            
    f = pd.concat(frames).sort_values(['date', 'stop_id'])
    return f

In [4]:
ts = feed.compute_trip_stats()
dates = feed.get_dates()
len(dates)

217

In [5]:
feed.compute_feed_stats(ts, dates[:7])

Unnamed: 0,date,num_routes,num_stops,num_trips,peak_end_time,peak_num_trips,peak_start_time,service_distance,service_duration,service_speed
1,20140526,20,416,622,08:18:00,39,08:16:00,13774.027234,472.6,29.145212
2,20140527,20,416,622,08:18:00,39,08:16:00,13774.027234,472.6,29.145212
3,20140528,20,416,622,08:18:00,39,08:16:00,13774.027234,472.6,29.145212
4,20140529,20,416,622,08:18:00,39,08:16:00,13774.027234,472.6,29.145212
6,20140530,22,416,636,08:18:00,39,08:16:00,14290.423827,483.016667,29.585778
5,20140531,22,415,437,17:37:00,23,17:28:00,9911.526097,310.4,31.931463
0,20140601,14,411,266,14:37:00,17,14:31:00,6390.846315,197.683333,32.328706


In [None]:
%time p1 = proto1(feed, dates[:7], split_directions=True)
%time p2 = proto2(feed, dates[:7], split_directions=True)
p2

In [None]:
feed.compute_feed_stats(ts, dates[0])

In [None]:
feed.describe()

In [None]:
feed.summarize()