In [None]:
from __future__ import division
import datetime as dt
from collections import OrderedDict
import sys, os
import dateutil.relativedelta as rd
import json
from pathlib import Path

import utm
import pandas as pd
import numpy as np
import shapely.geometry as sg

DIR = Path('..')
sys.path.append(str(DIR))

import gtfstk as gt

%load_ext autoreload
%autoreload 2

DATA_DIR = DIR/'data'

In [None]:
#path = DATA_DIR/'sample_gtfs.zip'
#path = DATA_DIR/'other_feeds'/'gtfs.zip'
path = DATA_DIR/'cairns_gtfs.zip'

print(gt.list_gtfs(path))

feed = gt.read_gtfs(path, dist_units='km')


In [None]:
feed.routes.route_type.iat[0] = 2
feed.routes.route_type.unique()

In [None]:
trip_stats = feed.compute_trip_stats()

In [None]:
#dates = ["20100101"]

dates = feed.get_first_week()[:2] + ["20100101"]
f = feed.compute_feed_stats(trip_stats, dates=dates, split_route_types=True)
f.T

In [None]:
sts = feed.compute_stop_time_series(dates, freq="12H")
rts = feed.compute_route_time_series(trip_stats, dates, freq="12H")
fts = feed.compute_feed_time_series(trip_stats, dates, freq="12H")

In [37]:
def bingo(
    feed, trip_stats, dates, freq="5Min", 
    *, split_route_types=False
):
    rts = feed.compute_route_time_series(trip_stats, dates, freq=freq)
    if rts.empty:
        return pd.DataFrame()
    
    cols = [
        "num_trip_starts",
        "num_trip_ends",
        "num_trips",
        "service_distance",
        "service_duration",
    ]

    if split_route_types:
        f = (
            gt.unstack_time_series(rts)
            .merge(feed.routes.filter(["route_id", "route_type"]), how="left")
            .groupby(["datetime", "indicator", "route_type"])
            .agg({"value": "sum"})
            .reset_index()
            .pivot_table(index=["datetime", "route_type"], columns="indicator")
            .value
            .rename_axis(columns={"indicator": None})
            .filter(cols)
        )
    else:
        f = (
            pd.concat([rts[col].sum(axis=1, min_count=1) for col in cols], axis=1, keys=cols)
            .sort_index(axis=1)
            .rename_axis(index="datetime")
        )
        
    # Calculate service speed
    f = f.assign(service_speed=lambda x: x.service_distance/x.service_duration)

    return f

bingo(feed, trip_stats, dates=dates, freq="12H", split_route_types=True)

Unnamed: 0_level_0,Unnamed: 1_level_0,num_trip_starts,num_trip_ends,num_trips,service_distance,service_duration,service_speed
datetime,route_type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2014-05-26 00:00:00,2,23.0,19.0,23.0,677.652816,21.016667,32.243592
2014-05-26 00:00:00,3,220.0,195.0,219.0,4342.754519,154.816667,28.05095
2014-05-26 12:00:00,2,36.0,39.0,40.0,1216.570247,36.333333,33.483585
2014-05-26 12:00:00,3,343.0,364.0,368.0,7537.049652,260.433333,28.940418
2014-05-27 00:00:00,2,23.0,19.0,23.0,677.652816,21.016667,32.243592
2014-05-27 00:00:00,3,220.0,195.0,219.0,4342.754519,154.816667,28.05095
2014-05-27 12:00:00,2,36.0,39.0,40.0,1216.570247,36.333333,33.483585
2014-05-27 12:00:00,3,343.0,364.0,368.0,7537.049652,260.433333,28.940418


In [26]:
f.index
f.loc[(slice(None), slice(2)), :]

Unnamed: 0_level_0,Unnamed: 1_level_0,num_trip_ends,num_trip_starts,num_trips,service_distance,service_duration,service_speed
datetime,route_type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2014-05-26 00:00:00,2,19.0,23.0,23.0,677.652816,21.016667,32.243592
2014-05-26 12:00:00,2,39.0,36.0,40.0,1216.570247,36.333333,33.483585
2014-05-27 00:00:00,2,19.0,23.0,23.0,677.652816,21.016667,32.243592
2014-05-27 12:00:00,2,39.0,36.0,40.0,1216.570247,36.333333,33.483585


In [12]:
feed.compute_feed_time_series(trip_stats, dates, freq="12H")

Unnamed: 0,num_trip_ends,num_trip_starts,num_trips,service_distance,service_duration,service_speed
2014-05-26 00:00:00,214,243,242,5020.407336,175.833333,28.55208
2014-05-26 12:00:00,403,379,408,8753.619898,296.766667,29.496641
2014-05-27 00:00:00,214,243,242,5020.407336,175.833333,28.55208
2014-05-27 12:00:00,403,379,408,8753.619898,296.766667,29.496641


In [None]:
s1 = pd.Series({"hello": 12, "goodbye": 13})
s2 = s1.copy()
pd.DataFrame([s1, s2])

In [None]:
feed.compute_stop_stats(['20140601', '20140603'])

In [None]:
trip_stats = feed.compute_trip_stats()
feed = feed.append_dist_to_stop_times(trip_stats)

# Load screen line
with (DATA_DIR/'cairns_screen_line.geojson').open() as src:
    line = json.load(src)
    line = sg.shape(line['features'][0]['geometry'])



In [None]:
f = feed.compute_screen_line_counts(line, dates[:7])
f[f['crossing_time'] < '06:00:00']

In [None]:
feed.compute_feed_time_series(ts, dates[-2:], freq='12H')

In [None]:
sd = False
#%time p1 = proto1(feed, ts, dates[:14], split_directions=sd)
%time p2 = proto2(feed, [dates[0], '20010101'], split_directions=sd, freq='12H')
p2

In [None]:
feed.compute_feed_stats(ts, dates[0])

In [None]:
feed.describe()

In [None]:
feed.summarize()