In [None]:
from __future__ import division
import datetime as dt
from collections import OrderedDict
import sys, os
import dateutil.relativedelta as rd
import json
from pathlib import Path

import utm
import pandas as pd
import numpy as np
import shapely.geometry as sg

DIR = Path('..')
sys.path.append(str(DIR))

import gtfstk as gt

%load_ext autoreload
%autoreload 2

DATA_DIR = DIR/'data'

In [11]:
#path = DATA_DIR/'sample_gtfs.zip'
#path = DATA_DIR/'other_feeds'/'gtfs.zip'
path = DATA_DIR/'cairns_gtfs.zip'

print(gt.list_gtfs(path))

feed = gt.read_gtfs(path, dist_units='km')


            file_name  file_size
0  calendar_dates.txt        387
1          routes.txt       1478
2           trips.txt     143081
3           stops.txt      26183
4          agency.txt        199
5      stop_times.txt    2561019
6          shapes.txt     864694
7        calendar.txt        337


In [12]:
feed.routes.route_type.iat[0] = 2
feed.routes.route_type.unique()

array([2, 3])

In [14]:
trip_stats = feed.compute_trip_stats()

In [63]:
#dates = ["20100101"]

feed1 = feed.copy()
c = feed1.calendar
c["monday"] = 0
feed1.calendar = c
dates = feed1.get_first_week()[:2] + ["20100101"]
print(dates)

f = feed1.compute_feed_stats(trip_stats, dates=dates, split_route_types=True)
f.T

['20140526', '20140527', '20100101']


Unnamed: 0,0,1,2
num_stops,,66,386
num_routes,,1,19
num_trips,,59,563
num_trip_starts,,59,563
num_trip_ends,,58,559
peak_num_trips,,5,35
peak_start_time,,18:13:00,08:46:00
peak_end_time,,18:20:00,08:48:00
service_distance,,1894.22,11879.8
service_duration,,57.35,415.25


In [65]:
rts = feed.compute_route_time_series(trip_stats, dates, freq="12H")
rts

indicator,num_trip_ends,num_trip_ends,num_trip_ends,num_trip_ends,num_trip_ends,num_trip_ends,num_trip_ends,num_trip_ends,num_trip_ends,num_trip_ends,...,service_speed,service_speed,service_speed,service_speed,service_speed,service_speed,service_speed,service_speed,service_speed,service_speed
route_id,110-423,111-423,112-423,113-423,120-423,120N-423,121-423,122-423,123-423,130-423,...,131-423,131N-423,133-423,140-423,141-423,142-423,143-423,143W-423,150-423,150E-423
2014-05-26 00:00:00,19,18,4,3,11,0,13,13,21,11,...,24.049902,,25.035081,25.634669,20.802088,26.021598,24.366138,,31.565566,
2014-05-26 12:00:00,39,39,11,3,21,2,21,20,38,22,...,24.0518,24.067374,26.439134,25.832253,20.828188,26.417789,24.307749,30.331218,31.547064,34.12138
2014-05-27 00:00:00,19,18,4,3,11,0,13,13,21,11,...,24.049902,,25.035081,25.634669,20.802088,26.021598,24.366138,,31.565566,
2014-05-27 12:00:00,39,39,11,3,21,2,21,20,38,22,...,24.0518,24.067374,26.439134,25.832253,20.828188,26.417789,24.307749,30.331218,31.547064,34.12138


In [146]:
def unstack_time_series(time_series):
    """
    Given a route, stop, or feed time series of the form output by the functions,
    :func:`compute_route_time_series`, :func:`compute_stop_time_series`, or
    :func:`compute_feed_time_series`, respectively, unstack it to return a DataFrame
    of with the columns:
    
    - ``"datetime"``
    - ``"route_id"``, ``"stop_id"``, or no column in the respective cases
    - ``"indicator"``: e.g. "num_trips"
    - ``"value"``: value of the indicator for the datetime and possible id column
    
    """
    if "route_id" in time_series.columns.names:
        id_col = "route_id"
    elif "stop_id" in time_series.columns.names:
        id_col = "stop_id"
    else:
        id_col = None
        
    if id_col:
        # Route or stop time series
        result = (
            time_series
            .unstack()
            .pipe(pd.DataFrame)
            .reset_index()
            .rename(columns={0: "value", "level_2": "datetime"})
            # Reorder columns
            .filter(["datetime", id_col, "indicator", "value"])
            .sort_values(["datetime", id_col, "indicator"])
        )
    else:
        # Feed time series
        result = (
            time_series
            .reset_index()
            .rename(columns={"index": "datetime"})
            .melt(id_vars=["datetime"], var_name="indicator")
            .sort_values(["datetime", "indicator"])
        )
        
    return result

In [143]:
sts = feed.compute_stop_time_series(dates, freq="12H")
rts = feed.compute_route_time_series(trip_stats, dates, freq="12H")
fts = feed.compute_feed_time_series(trip_stats, dates, freq="12H")

In [148]:
fts

Unnamed: 0,num_trip_ends,num_trip_starts,num_trips,service_distance,service_duration,service_speed
2014-05-26 00:00:00,214,243,242,5020.407336,175.833333,28.55208
2014-05-26 12:00:00,403,379,408,8753.619898,296.766667,29.496641
2014-05-27 00:00:00,214,243,242,5020.407336,175.833333,28.55208
2014-05-27 12:00:00,403,379,408,8753.619898,296.766667,29.496641


In [9]:
s1 = pd.Series({"hello": 12, "goodbye": 13})
s2 = s1.copy()
pd.DataFrame([s1, s2])

Unnamed: 0,hello,goodbye
0,12,13
1,12,13


In [None]:
feed.compute_stop_stats(['20140601', '20140603'])

In [None]:
trip_stats = feed.compute_trip_stats()
feed = feed.append_dist_to_stop_times(trip_stats)

# Load screen line
with (DATA_DIR/'cairns_screen_line.geojson').open() as src:
    line = json.load(src)
    line = sg.shape(line['features'][0]['geometry'])



In [None]:
f = feed.compute_screen_line_counts(line, dates[:7])
f[f['crossing_time'] < '06:00:00']

In [None]:
feed.compute_feed_time_series(ts, dates[-2:], freq='12H')

In [None]:
sd = False
#%time p1 = proto1(feed, ts, dates[:14], split_directions=sd)
%time p2 = proto2(feed, [dates[0], '20010101'], split_directions=sd, freq='12H')
p2

In [None]:
feed.compute_feed_stats(ts, dates[0])

In [None]:
feed.describe()

In [None]:
feed.summarize()