In [16]:
from __future__ import division
import datetime as dt
from collections import OrderedDict
import sys, os
import dateutil.relativedelta as rd
import json
from pathlib import Path

import utm
import pandas as pd
import numpy as np
import shapely.geometry as sg

DIR = Path('..')
sys.path.append(str(DIR))

import gtfstk as gt

%load_ext autoreload
%autoreload 2

DATA_DIR = DIR/'data'

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [17]:
path = DATA_DIR/'cairns_gtfs.zip'
feed = gt.read_gtfs(path, dist_units='km')

# Pick date
date = feed.get_first_week()[0]
print('date', date)


date 20140526


In [21]:
gt.duplicate_stops(feed)

Unnamed: 0,stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station
0,750000,,Cedar Rd (Palm Cove) - Hail and Ride Location,,-16.743590,145.668217,,,0,
1,750001,,Williams Esplanade N201,,-16.744015,145.671110,,,0,
2,750002,,Talpa Close - Hail and Ride Location,,-16.749258,145.667893,,,0,
3,750003,,Veivers Road N203,,-16.748213,145.663675,,,0,
4,750004,,Captain Cook Hwy N3 (Cairns Tropical Zoo),,-16.757931,145.663283,,,0,
5,750005,,Elford Street - Hail and Ride Location,,-16.764017,145.667882,,,0,
6,750006,,Endeavour Road N206,,-16.762802,145.669406,,,0,
7,750007,,Endeavour Rd N208,,-16.761411,145.672815,,,0,
8,750008,,Arlington Esplanade - Hail and Ride Location,,-16.764349,145.675419,,,0,
9,750009,,Arlington Esplanade N5 (Clifton Beach),,-16.767375,145.677058,,,0,


In [14]:
feed.describe(date)

Unnamed: 0,indicator,value
0,start_date,20140526
1,end_date,20141228
2,study_date,20140526
3,num_routes,20
4,num_trips,622
5,num_stops,416
6,num_shapes,43


In [13]:
feed.assess()

Unnamed: 0,indicator,value
0,num_duplicated_route_short_names,0
1,frac_duplicated_route_short_names,0
2,has_shape_dist_traveled,False
3,num_missing_dists,37790
4,frac_missing_dists,1
5,has_direction_id,True
6,num_missing_directions,0
7,frac_missing_directions,0
8,num_trips_missing_shapes,0
9,frac_trips_missing_shapes,0


In [14]:
ts = feed.compute_trip_stats()
#ts.to_csv(str(DATA_DIR/'cairns_trip_stats.csv'), index=False)
ts.head()

Unnamed: 0,trip_id,route_id,route_short_name,route_type,direction_id,shape_id,num_stops,start_time,end_time,start_stop_id,end_stop_id,is_loop,duration,distance,speed
703,CNS2014-CNS_MUL-Weekday-00-4165878,110-423,110,3,0,1100023,35,05:50:00,06:50:00,750337,750449,0,1.0,32.507121,32.507121
0,CNS2014-CNS_MUL-Saturday-00-4165937,110-423,110,3,0,1100023,35,06:16:00,07:10:00,750337,750449,0,0.9,32.507121,36.119024
704,CNS2014-CNS_MUL-Weekday-00-4165879,110-423,110,3,0,1100023,35,06:20:00,07:20:00,750337,750449,0,1.0,32.507121,32.507121
705,CNS2014-CNS_MUL-Weekday-00-4165880,110-423,110,3,0,1100023,35,06:50:00,07:50:00,750337,750449,0,1.0,32.507121,32.507121
706,CNS2014-CNS_MUL-Weekday-00-4165881,110-423,110,3,0,1100023,35,07:15:00,08:20:00,750337,750449,0,1.083333,32.507121,30.006574


In [15]:
rts = feed.compute_route_time_series(ts, date)
gt.downsample(rts, freq='12H')

indicator,num_trip_starts,num_trip_starts,num_trip_starts,num_trip_starts,num_trip_starts,num_trip_starts,num_trip_starts,num_trip_starts,num_trip_starts,num_trip_starts,...,service_speed,service_speed,service_speed,service_speed,service_speed,service_speed,service_speed,service_speed,service_speed,service_speed
route_id,110-423,111-423,112-423,113-423,120-423,120N-423,121-423,122-423,123-423,130-423,...,131-423,131N-423,133-423,140-423,141-423,142-423,143-423,143W-423,150-423,150E-423
2014-05-26 00:00:00,23,22,5,3,12,0,14,14,23,12,...,24.049902,,25.035081,25.634669,20.802088,26.021598,24.366138,,31.565566,
2014-05-26 12:00:00,36,36,10,3,20,2,20,19,37,21,...,24.0518,24.067374,26.439134,25.832253,20.828188,26.417789,24.307749,30.331218,31.547064,34.12138


In [15]:
trip_id = feed.trips['trip_id'].iat[0]
geo = feed.trip_to_geojson(trip_id, include_stops=True)
path = DATA_DIR/'trip_{!s}.geojson'.format(route_id)
with path.open('w') as tgt:
    json.dump(geo, tgt)

In [16]:
route_id = feed.routes['route_id'].iat[0]
geo = feed.route_to_geojson(route_id, include_stops=True)
path = DATA_DIR/'route_{!s}.geojson'.format(route_id)
with path.open('w') as tgt:
    json.dump(geo, tgt)

In [16]:
# Add distances to feed
#
trip_stats = feed.compute_trip_stats(compute_dist_from_shapes=True)
feed = feed.append_dist_to_stop_times(trip_stats)
feed.stop_times.T

Unnamed: 0,17709,17710,17711,17712,17713,17714,17715,17716,17717,17718,...,17699,17700,17701,17702,17703,17704,17705,17706,17707,17708
trip_id,CNS2014-CNS_MUL-Saturday-00-4165937,CNS2014-CNS_MUL-Saturday-00-4165937,CNS2014-CNS_MUL-Saturday-00-4165937,CNS2014-CNS_MUL-Saturday-00-4165937,CNS2014-CNS_MUL-Saturday-00-4165937,CNS2014-CNS_MUL-Saturday-00-4165937,CNS2014-CNS_MUL-Saturday-00-4165937,CNS2014-CNS_MUL-Saturday-00-4165937,CNS2014-CNS_MUL-Saturday-00-4165937,CNS2014-CNS_MUL-Saturday-00-4165937,...,CNS2014-CNS_MUL-Weekday-00-4180831,CNS2014-CNS_MUL-Weekday-00-4180831,CNS2014-CNS_MUL-Weekday-00-4180831,CNS2014-CNS_MUL-Weekday-00-4180831,CNS2014-CNS_MUL-Weekday-00-4180831,CNS2014-CNS_MUL-Weekday-00-4180831,CNS2014-CNS_MUL-Weekday-00-4180831,CNS2014-CNS_MUL-Weekday-00-4180831,CNS2014-CNS_MUL-Weekday-00-4180831,CNS2014-CNS_MUL-Weekday-00-4180831
arrival_time,06:16:00,06:16:00,06:18:00,06:20:00,06:21:00,06:23:00,06:25:00,06:26:00,06:27:00,06:28:00,...,18:16:00,18:17:00,18:18:00,18:19:00,18:20:00,18:20:00,18:20:00,18:21:00,18:23:00,18:25:00
departure_time,06:16:00,06:16:00,06:18:00,06:20:00,06:21:00,06:23:00,06:25:00,06:26:00,06:27:00,06:28:00,...,18:16:00,18:17:00,18:18:00,18:19:00,18:20:00,18:20:00,18:20:00,18:21:00,18:23:00,18:25:00
stop_id,750337,750000,750001,750002,750003,750004,750005,750006,750007,750008,...,750418,750312,750313,750314,750315,750416,750415,750414,750413,750412
stop_sequence,1,2,3,4,5,6,7,8,9,10,...,20,21,22,23,24,25,26,27,28,29
pickup_type,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
drop_off_type,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
shape_dist_traveled,0,0.46864,1.19038,2.15478,2.619,3.85331,4.76089,4.97181,5.38468,5.91894,...,24.9827,26.0544,26.555,27.2556,28.1887,28.3785,28.9218,29.7083,30.8707,32.3213


In [17]:
trip_stats = feed.compute_trip_stats()
route_stats = feed.compute_route_stats(trip_stats, date)
route_stats


Unnamed: 0,route_id,route_short_name,route_type,num_trips,is_loop,is_bidirectional,start_time,end_time,max_headway,min_headway,mean_headway,peak_num_trips,peak_start_time,peak_end_time,service_distance,service_duration,service_speed,mean_trip_distance,mean_trip_duration
0,110-423,110,3,59,0,1,05:50:00,24:02:00,35.0,23.0,29.955556,5,18:13:00,18:20:00,1894.223063,57.35,33.029173,32.105476,0.972034
1,111-423,111,3,58,0,1,06:02:00,24:36:00,67.0,25.0,30.954545,5,07:57:00,08:05:00,2002.677211,59.35,33.743508,34.528917,1.023276
2,112-423,112,3,15,1,0,07:55:00,22:31:00,60.0,60.0,60.0,1,07:55:00,08:31:00,317.42188,9.0,35.269098,21.161459,0.6
3,113-423,113,3,6,0,1,06:05:00,18:42:00,60.0,60.0,60.0,2,06:35:00,06:45:00,147.601565,3.933333,37.525822,24.600261,0.655556
4,120-423,120,3,32,0,1,05:34:00,22:23:00,60.0,60.0,60.0,2,07:00:00,07:23:00,899.055146,26.633333,33.756764,28.095473,0.832292
5,120N-423,120N,3,2,0,0,22:00:00,23:51:00,,,,1,22:00:00,22:51:00,81.004241,1.7,47.649553,40.50212,0.85
6,121-423,121,3,34,0,1,06:28:00,22:00:00,60.0,30.0,52.8,3,07:46:00,07:48:00,586.058611,18.133333,32.319409,17.237018,0.533333
7,122-423,122,3,33,0,1,06:16:00,21:30:00,60.0,30.0,54.0,2,07:16:00,07:30:00,541.875382,15.4,35.186713,16.420466,0.466667
8,123-423,123,3,60,0,1,06:14:00,24:15:00,50.0,10.0,29.565217,3,07:10:00,07:31:00,1127.869795,40.466667,27.871576,18.79783,0.674444
9,130-423,130,3,33,0,1,06:04:00,23:01:00,60.0,60.0,60.0,2,06:30:00,06:35:00,361.052975,17.05,21.176128,10.940999,0.516667


In [18]:
a = pd.Series([np.nan, np.nan])
a.dropna().max()
#gt.get_trips(feed, date, "07:30:00")


nan

In [21]:

# Pick screen line
path = DATA_DIR/'cairns_screen_line.geojson'
with path.open() as src:
    line = json.load(src)
    line = sg.shape(line['features'][0]['geometry'])

g = feed.compute_screen_line_counts(line, date)
g

Unnamed: 0,trip_id,route_id,route_short_name,crossing_time,orientation
0,CNS2014-CNS_MUL-Weekday-00-4166383,120-423,120,05:42:28,-1
1,CNS2014-CNS_MUL-Weekday-00-4166383,120-423,120,05:50:28,1
2,CNS2014-CNS_MUL-Weekday-00-4166384,120-423,120,06:42:28,-1
3,CNS2014-CNS_MUL-Weekday-00-4166384,120-423,120,06:50:28,1
34,CNS2014-CNS_MUL-Weekday-00-4166400,120-423,120,07:31:32,-1
35,CNS2014-CNS_MUL-Weekday-00-4166400,120-423,120,07:39:28,1
4,CNS2014-CNS_MUL-Weekday-00-4166385,120-423,120,07:42:28,-1
5,CNS2014-CNS_MUL-Weekday-00-4166385,120-423,120,07:50:28,1
36,CNS2014-CNS_MUL-Weekday-00-4166401,120-423,120,08:31:32,-1
37,CNS2014-CNS_MUL-Weekday-00-4166401,120-423,120,08:39:28,1


In [22]:
g.groupby(['route_id', 'orientation']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,trip_id,route_short_name,crossing_time
route_id,orientation,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
120-423,-1,32,32,32
120-423,1,32,32,32
120N-423,-1,2,2,2
120N-423,1,2,2,2
