In [None]:
from __future__ import division
import datetime as dt
from collections import OrderedDict
import sys, os
import dateutil.relativedelta as rd
import json
from pathlib import Path

import utm
import pandas as pd
import numpy as np
import shapely.geometry as sg

DIR = Path('..')
sys.path.append(str(DIR))

import gtfstk as gt

%load_ext autoreload
%autoreload 2

DATA_DIR = DIR/'data'

In [2]:
path = DATA_DIR/'cairns_gtfs.zip'
#path = DATA_DIR/'sample_gtfs.zip'
#path = DATA_DIR/'other_feeds'/'seq_20170310_gtfs.zip'
feed = gt.read_gtfs(path, dist_units='km')

# Pick date
date = feed.get_first_week()[0]
print('date', date)


date 20140526


In [3]:
print(feed)

* agency --------------------
	                                         agency_name  \
0  Department of Transport and Main Roads - Trans...   

                 agency_url     agency_timezone agency_lang  agency_phone  
0  http://www.sunbus.com.au  Australia/Brisbane          en  (07)40576411  
* calendar --------------------
	                           service_id  monday  tuesday  wednesday  thursday  \
0          CNS2014-CNS_MUL-Weekday-00       1        1          1         1   
1  CNS2014-CNS_MUL-Weekday-00-0000100       0        0          0         0   
2         CNS2014-CNS_MUL-Saturday-00       0        0          0         0   
3           CNS2014-CNS_MUL-Sunday-00       0        0          0         0   

   friday  saturday  sunday start_date  end_date  
0       1         0       0   20140526  20141226  
1       1         0       0   20140530  20141226  
2       0         1       0   20140531  20141227  
3       0         0       1   20140601  20141228  
* calendar_dates ---

In [9]:
gt.list_gtfs(path)


Unnamed: 0,file_name,file_size
0,calendar_dates.txt,387
1,routes.txt,1478
2,trips.txt,143081
3,stops.txt,26183
4,agency.txt,199
5,stop_times.txt,2561019
6,shapes.txt,864694
7,calendar.txt,337


In [10]:
feed.describe()

Unnamed: 0,indicator,value
0,agencies,[Department of Transport and Main Roads - Tran...
1,timezone,Australia/Brisbane
2,start_date,20140526
3,end_date,20141228
4,num_routes,22
5,num_trips,1339
6,num_stops,416
7,num_shapes,54
8,sample_date,20140529
9,num_routes_active_on_sample_date,20


In [11]:
feed.summarize()

Unnamed: 0,table,column,#values,#nonnull_values,#unique_values,min_value,max_value
0,agency,agency_name,1,1,1,Department of Transport and Main Roads - Trans...,Department of Transport and Main Roads - Trans...
1,agency,agency_url,1,1,1,http://www.sunbus.com.au,http://www.sunbus.com.au
2,agency,agency_timezone,1,1,1,Australia/Brisbane,Australia/Brisbane
3,agency,agency_lang,1,1,1,en,en
4,agency,agency_phone,1,1,1,(07)40576411,(07)40576411
0,calendar,service_id,4,4,4,CNS2014-CNS_MUL-Saturday-00,CNS2014-CNS_MUL-Weekday-00-0000100
1,calendar,monday,4,4,2,0,1
2,calendar,tuesday,4,4,2,0,1
3,calendar,wednesday,4,4,2,0,1
4,calendar,thursday,4,4,2,0,1


In [None]:
ts = feed.compute_trip_stats()
dates = feed.get_dates()


In [32]:

def proto1(feed, trip_stats, dates):
    frames = []
    for date in dates:
        f = feed.compute_feed_stats(ts, date)
        f['date'] = date
        frames.append(f)
    
    return pd.concat(frames)

def proto2(feed, trip_stats, dates):
    """
    Given trip stats of the form output by :func:`compute_trip_stats` and a date, return a DataFrame including the following feed stats for the date.

    - num_trips: number of trips active on the given date
    - num_routes: number of routes active on the given date
    - num_stops: number of stops active on the given date
    - peak_num_trips: maximum number of simultaneous trips in service
    - peak_start_time: start time of first longest period during which
      the peak number of trips occurs
    - peak_end_time: end time of first longest period during which
      the peak number of trips occurs
    - service_distance: sum of the service distances for the active routes
    - service_duration: sum of the service durations for the active routes
    - service_speed: service_distance/service_duration

    If there are no stats for the given date, return an empty DataFrame with the specified columns.

    Assume the following feed attributes are not ``None``:

    - Those used in :func:`get_trips`
    - Those used in :func:`get_routes`
    - Those used in :func:`get_stops`

    """
    cols = [
      'num_trips',
      'num_routes',
      'num_stops',
      'peak_num_trips',
      'peak_start_time',
      'peak_end_time',
      'service_distance',
      'service_duration',
      'service_speed',
    ]
    trips = feed.trips.copy()
    activity = feed.compute_trip_activity(dates)
    hp = gt
    # Compute peak stats
    trip_stats[['start_time', 'end_time']] =\
      trip_stats[['start_time', 'end_time']].applymap(hp.timestr_to_seconds)
    
    rows = []
    for date in dates:
        d = OrderedDict()
        active_ids = activity.loc[activity[date] > 0, 'trip_id']
        atrips = trips[trips['trip_id'].isin(active_ids)].copy()
        d['date'] = date
        if atrips.empty:
            for col in cols:
                d[col] = np.nan
        if not atrips.empty:
            f = atrips.merge(trip_stats)
            d['date'] = date
            d['num_trips'] = f.shape[0]
            d['num_routes'] = f['route_id'].nunique()
            #d['num_stops'] = feed.get_stops(date).shape[0]  # Fix this

            times = np.unique(f[['start_time', 'end_time']].values)
            counts = [hp.count_active_trips(f, t) for t in times]
            start, end = hp.get_peak_indices(times, counts)
            d['peak_num_trips'] = counts[start]

            # Compute remaining stats
            d['service_distance'] = f['distance'].sum()
            d['service_duration'] = f['duration'].sum()
            d['service_speed'] = d['service_distance']/d['service_duration']
        
        rows.append(d)

    f = pd.DataFrame(rows)
    for col in ['peak_start_time', 'peak_end_time']:
        f[col] = f[col].map(lambda t: hp.timestr_to_seconds(t, inverse=True))
        
    return f

In [33]:
#%time p1 = proto1(feed, ts, dates)
%time p2 = proto2(feed, ts, dates)
p2

IndexError: index 1244 is out of bounds for axis 0 with size 1244