In [1]:
# coding: utf-8
from google.transit import gtfs_realtime_pb2
from google.protobuf.json_format import MessageToDict
from pandas.io.json import json_normalize
from apscheduler.schedulers.background import BackgroundScheduler

# from sqlalchemy import inspect
# from sqlalchemy import MetaData
# from sqlalchemy import Table
# import sqlalchemy as db

import json
import requests
import numpy as np
import pandas as pd
import datetime
import os, time
import pytz


def get_feed():
    # initialise the feed message parser from Google
    feed = gtfs_realtime_pb2.FeedMessage()
    
    # get the response from the api
    response = requests.get('http://files.transport.act.gov.au/feeds/lightrail.pb', allow_redirects=True)

    # pass the response to the Parser
    feed.ParseFromString(response.content)

    # convert to dict from our original protobuf feed
    dict_obj = MessageToDict(feed)

    return dict_obj


def get_updates(feed_obj):
    # check if empty
    if len(feed_obj) > 0:
        # get the trip updates listed on the feed
        updates = [update for update in feed_obj['entity'] if 'tripUpdate' in update]
        return updates
    else:
        return None


def updates_to_dataframe_old(updates):
    # transform feed to a data frame
    df = json_normalize(updates)
    df.columns = ['ID', 'Trip Update', 'Request Timestamp', 'Trip ID']
    df['Request Timestamp'] = pd.to_datetime(df['Request Timestamp'],unit='s')

    # parse Trip Update column
    df['Trip Update'] = df['Trip Update'].apply(lambda x: x[0])
    df_2 = json_normalize(df['Trip Update'])
    
    # change arrival time and departure time to datetime
    df_2['arrival.time'] = pd.to_datetime(df_2['arrival.time'], unit='s')
    df_2['departure.time'] = pd.to_datetime(df_2['departure.time'], unit='s')

    # combine data frames
    updates_df = pd.concat([df, df_2],axis=1)
    updates_df.rename(inplace=True, columns=
        {
            'arrival.time':'Arrival Time',
            'arrival.delay':'Arrival Delay',
            'departure.time':'Departure Time',
            'departure.delay':'Departure Delay'
        })

    # drop unnecessary colimns
    updates_df.drop(['ID', 'Trip Update', 'arrival.uncertainty', 'departure.uncertainty'], axis=1, inplace=True)
    
    # set time zone
    updates_df['Request Timestamp'] = updates_df['Request Timestamp'].dt.tz_localize('UTC').dt.tz_convert('Australia/Canberra')
    updates_df['Arrival Time'] = updates_df['Arrival Time'].dt.tz_localize('UTC').dt.tz_convert('Australia/Canberra')
    updates_df['Departure Time'] = updates_df['Departure Time'].dt.tz_localize('UTC').dt.tz_convert('Australia/Canberra')
    
    return updates_df


def updates_to_dataframe(updates):
    # transform feed to a dataframe 
    df = json_normalize(updates)
    df['tripUpdate.stopTimeUpdate'] = df['tripUpdate.stopTimeUpdate'].apply(lambda x: x[0])
    print("length of updates: {}".format(len(updates))) # debug: print number of updates in the feed
    
    # format feed
    x = json_normalize(df['tripUpdate.stopTimeUpdate'])
    x['tripUpdate.trip.tripId'] = df['tripUpdate.trip.tripId']
    x['tripUpdate.timestamp'] = df['tripUpdate.timestamp']
    x['tripUpdate.delay'] = df['tripUpdate.delay']

    # format date time
    x['arrival.time'] = x['arrival.time'].apply(lambda xx: datetime.datetime.fromtimestamp(int(xx)))
    x['departure.time'] = x['departure.time'].apply(lambda xx: datetime.datetime.fromtimestamp(int(xx)))
    x['tripUpdate.timestamp'] = x['tripUpdate.timestamp'].apply(lambda xx: datetime.datetime.fromtimestamp(int(xx)))

    # transform to datetime
    x['arrival.time'] = pd.to_datetime(x['arrival.time'])
    x['departure.time'] = pd.to_datetime(x['departure.time'])
    x['tripUpdate.timestamp'] = pd.to_datetime(x['tripUpdate.timestamp'])

    return x


def validate(updates):
    if updates is None:
        print('Feed is empty')
        return False

    else:
        return True

---
# Get Trip Updates

In [25]:
feed = get_feed() # get raw feed
updates = get_updates(feed) # get trip updates
df = updates_to_dataframe(updates) # transform updates to dataframe

length of updates: 129


# Write Feed to CSV

In [26]:
# df.to_csv('updates/update.csv', index=False)

In [27]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 129 entries, 0 to 128
Data columns (total 12 columns):
arrival.delay             129 non-null int64
arrival.time              129 non-null datetime64[ns]
arrival.uncertainty       129 non-null int64
departure.delay           129 non-null int64
departure.time            129 non-null datetime64[ns]
departure.uncertainty     129 non-null int64
scheduleRelationship      129 non-null object
stopId                    129 non-null object
stopSequence              129 non-null int64
tripUpdate.trip.tripId    129 non-null object
tripUpdate.timestamp      129 non-null datetime64[ns]
tripUpdate.delay          129 non-null int64
dtypes: datetime64[ns](3), int64(6), object(3)
memory usage: 12.2+ KB


In [28]:
df.head()

Unnamed: 0,arrival.delay,arrival.time,arrival.uncertainty,departure.delay,departure.time,departure.uncertainty,scheduleRelationship,stopId,stopSequence,tripUpdate.trip.tripId,tripUpdate.timestamp,tripUpdate.delay
0,-6,2019-06-02 08:23:54,0,-6,2019-06-02 08:23:54,0,SCHEDULED,8129,13,780,2019-06-02 20:26:00,0
1,196,2019-06-02 08:57:16,0,196,2019-06-02 08:57:16,0,SCHEDULED,8100,13,718,2019-06-02 20:26:00,0
2,1,2019-06-02 09:24:01,0,1,2019-06-02 09:24:01,0,SCHEDULED,8129,13,784,2019-06-02 20:26:00,0
3,-9,2019-06-02 09:53:51,0,-9,2019-06-02 09:53:51,0,SCHEDULED,8100,13,722,2019-06-02 20:26:00,0
4,-9,2019-06-02 10:23:51,0,-9,2019-06-02 10:23:51,0,SCHEDULED,8129,13,788,2019-06-02 20:26:00,0


In [30]:
x = datetime.datetime.now()
x

datetime.datetime(2019, 6, 2, 20, 26, 53, 682235)

In [31]:
x.hour

20

In [32]:
x.minute

26

In [33]:
x + pd.Timedelta(minutes=30)

datetime.datetime(2019, 6, 2, 20, 56, 53, 682235)

In [37]:
df[(df['arrival.time'] >= datetime.datetime.now() - pd.Timedelta(minutes=15)) & (df['arrival.time'] <= datetime.datetime.now() + pd.Timedelta(minutes=15))]

Unnamed: 0,arrival.delay,arrival.time,arrival.uncertainty,departure.delay,departure.time,departure.uncertainty,scheduleRelationship,stopId,stopSequence,tripUpdate.trip.tripId,tripUpdate.timestamp,tripUpdate.delay
24,26,2019-06-02 20:24:26,0,26,2019-06-02 20:24:26,0,SCHEDULED,8129,13,828,2019-06-02 20:26:00,0
25,0,2019-06-02 20:30:00,1,0,2019-06-02 20:30:00,1,SCHEDULED,8129,1,766,2019-06-02 20:26:00,0
58,1,2019-06-02 20:26:10,0,1,2019-06-02 20:26:30,0,SCHEDULED,8114,6,829,2019-06-02 20:26:00,0
59,0,2019-06-02 20:45:00,1,0,2019-06-02 20:45:00,1,SCHEDULED,8129,1,767,2019-06-02 20:26:00,0
89,-8,2019-06-02 20:23:52,0,-8,2019-06-02 20:23:52,0,SCHEDULED,8100,13,764,2019-06-02 20:26:00,0
90,0,2019-06-02 20:30:00,1,0,2019-06-02 20:30:00,1,SCHEDULED,8100,1,830,2019-06-02 20:26:00,0
121,-6,2019-06-02 20:25:39,0,-6,2019-06-02 20:25:59,0,SCHEDULED,8117,7,765,2019-06-02 20:26:00,0
122,0,2019-06-02 20:45:00,1,0,2019-06-02 20:45:00,1,SCHEDULED,8100,1,831,2019-06-02 20:26:00,0


In [45]:
trips = pd.read_csv('../GTFS/google_transit_lr/trips.txt')
trips = trips[trips['route_id'] == 'ACTO001']

In [40]:
stop_times = pd.read_csv('../GTFS/google_transit_lr/stop_times.txt')

In [46]:
trips.head()

Unnamed: 0,route_id,service_id,trip_id,trip_headsign,direction_id,block_id,shape_id,wheelchair_accessible,bikes_allowed
1,ACTO001,SU,780,Alinga St,1,3,1003,1,1
2,ACTO001,SU,718,Gungahlin Pl,0,3,1004,1,1
3,ACTO001,SU,784,Alinga St,1,3,1003,1,1
4,ACTO001,SU,722,Gungahlin Pl,0,3,1004,1,1
5,ACTO001,SU,788,Alinga St,1,3,1003,1,1


In [47]:
stop_times.head()

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,timepoint
0,712,07:36:16,07:36:36,8111,1,Well Station Drive,0,0,1
1,712,07:38:20,07:38:40,8109,2,Nullarbor Avenue,0,0,1
2,712,07:39:51,07:40:11,8107,3,Mapleton Avenue,0,0,1
3,712,07:41:45,07:42:05,8105,4,Manning Clark Crescent,0,0,1
4,712,07:43:43,07:43:43,8100,5,Gungahlin Place,0,0,1


In [53]:
ref = pd.merge(left=trips, left_on='trip_id', right=stop_times, right_on='trip_id', how='inner')

In [57]:
ref = ref[['service_id', 'trip_id', 'trip_headsign', 'direction_id', 'arrival_time', 'departure_time', 'stop_sequence', 'stop_headsign']]

In [58]:
ref.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9243 entries, 0 to 9242
Data columns (total 8 columns):
service_id        9243 non-null object
trip_id           9243 non-null int64
trip_headsign     9243 non-null object
direction_id      9243 non-null int64
arrival_time      9243 non-null object
departure_time    9243 non-null object
stop_sequence     9243 non-null int64
stop_headsign     9243 non-null object
dtypes: int64(3), object(5)
memory usage: 649.9+ KB


In [59]:
ref.head()

Unnamed: 0,service_id,trip_id,trip_headsign,direction_id,arrival_time,departure_time,stop_sequence,stop_headsign
0,SU,780,Alinga St,1,08:00:00,08:00:00,1,Gungahlin Place
1,SU,780,Alinga St,1,08:02:04,08:02:24,2,Manning Clark Crescent
2,SU,780,Alinga St,1,08:04:04,08:04:24,3,Mapleton Avenue
3,SU,780,Alinga St,1,08:05:40,08:06:00,4,Nullarbor Avenue
4,SU,780,Alinga St,1,08:07:31,08:07:51,5,Well Station Drive


In [60]:
ref['service_id'].unique()

array(['SU', 'WD', 'FR', 'SA'], dtype=object)

In [75]:
arrival_time = ref['arrival_time']

In [76]:
for idx, value in enumerate(arrival_time):
    string = value.split(':')
    if string[0] == '24':
        string[0] = '00'
    elif string[0] == '25':
        string[0] = '01'
    else:
        continue
    arrival_time[idx] = ':'.join(string)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [77]:
ref['arrival_time'] = arrival_time

In [79]:
ref['arrival_time'] = ref['arrival_time'].apply(lambda x: datetime.datetime.strptime(x, '%H:%M:%S').time()) 

In [80]:
departure_time = ref['departure_time']

In [81]:
for idx, value in enumerate(departure_time):
    string = value.split(':')
    if string[0] == '24':
        string[0] = '00'
    elif string[0] == '25':
        string[0] = '01'
    else:
        continue
    departure_time[idx] = ':'.join(string)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [82]:
ref['departure_time'] = departure_timere_time
ref['departure_time'] = ref['departure_time'].apply(lambda x: datetime.datetime.strptime(x, '%H:%M:%S').time()) 

In [83]:
ref.head()

Unnamed: 0,service_id,trip_id,trip_headsign,direction_id,arrival_time,departure_time,stop_sequence,stop_headsign
0,SU,780,Alinga St,1,08:00:00,08:00:00,1,Gungahlin Place
1,SU,780,Alinga St,1,08:02:04,08:02:24,2,Manning Clark Crescent
2,SU,780,Alinga St,1,08:04:04,08:04:24,3,Mapleton Avenue
3,SU,780,Alinga St,1,08:05:40,08:06:00,4,Nullarbor Avenue
4,SU,780,Alinga St,1,08:07:31,08:07:51,5,Well Station Drive


In [93]:
ref[ref['arrival_time'] <= datetime.time(hour=9)]

Unnamed: 0,service_id,trip_id,trip_headsign,direction_id,arrival_time,departure_time,stop_sequence,stop_headsign
0,SU,780,Alinga St,1,08:00:00,08:00:00,1,Gungahlin Place
1,SU,780,Alinga St,1,08:02:04,08:02:24,2,Manning Clark Crescent
2,SU,780,Alinga St,1,08:04:04,08:04:24,3,Mapleton Avenue
3,SU,780,Alinga St,1,08:05:40,08:06:00,4,Nullarbor Avenue
4,SU,780,Alinga St,1,08:07:31,08:07:51,5,Well Station Drive
5,SU,780,Alinga St,1,08:11:09,08:11:29,6,EPIC and Racecourse
6,SU,780,Alinga St,1,08:13:25,08:13:45,7,Phillip Avenue
7,SU,780,Alinga St,1,08:15:34,08:15:54,8,Swinden Street
8,SU,780,Alinga St,1,08:17:17,08:17:37,9,Dickson Interchange
9,SU,780,Alinga St,1,08:19:17,08:19:37,10,Macarthur Avenue
