In [92]:
# Install packages
import pandas as pd
import requests
import zipfile
import io
from datetime import date
from datetime import datetime

In [93]:
# Create functions to access GTFS and store files in memory
def load_gtfs_data(
        url,
        files = [
            'stops.txt', 'routes.txt', 'trips.txt', 'stop_times.txt',
            'calendar.txt', 'calendar_dates.txt', 'shapes.txt'
        ]):
    """
    Load GTFS data from a URL and convert each required .txt file into a pandas DataFrame, all in memory.
    """
    response = requests.get(url)
    zip_file = zipfile.ZipFile(io.BytesIO(response.content))

    # Define the required GTFS files
    required_files = files

    # Load each file into a DataFrame
    dataframes = {}
    for file in required_files:
        try:
            with zip_file.open(file) as f:
                df = pd.read_csv(f)
                dataframes[file[:len(file)-4]] = df
        except KeyError:
            print(f"{file} not found in the GTFS feed.")

    return dataframes

In [94]:
# Build a table of [[route_id],[trip_id],[start_time],[end_time]]
# look for stops with sequence 1 before current time

#pull GTFS
gtfs_path = 'https://www.itsmarta.com/google_transit_feed/google_transit.zip'
feed = load_gtfs_data(gtfs_path)

In [95]:
# Find service_id for current date
today = date.today()
day_of_week = today.strftime("%A")
service_id = 0

if day_of_week == "Saturday":
    service_id = 3
elif day_of_week == "Sunday":
    service_id = 4
else:
    service_id = 5

In [96]:
# filter trips for service_id
trips = feed['trips']
day_trips = trips[trips['service_id'] == service_id]
day_trips = day_trips[['route_id', 'service_id', 'trip_id', 'shape_id']]

day_trips.head()

Unnamed: 0,route_id,service_id,trip_id,shape_id
599,20774,3,8776016,113855
600,20774,3,8776017,113855
601,20774,3,8776018,113855
602,20774,3,8776019,113855
603,20774,3,8776020,113855


In [97]:
# look for last stop for each trip in sequence 1 list, only keep if after current time

stop_times = feed['stop_times']
stop_times = stop_times[['trip_id', 'arrival_time', 'departure_time', 'stop_id', 'stop_sequence', 'shape_dist_traveled']]

# pull curent time
current_time = datetime.now().strftime("%H:%M:%S")



# find stops with sequence == 1 that are prior to current_time
first_stops = stop_times[(stop_times['stop_sequence'] == 1) &
                         (stop_times['arrival_time'] <= current_time)]

In [98]:
# find last stops with max sequence that are on trips in first_stops
last_stops_sequence = stop_times.groupby('trip_id')['stop_sequence'].max().reset_index()
last_stops = pd.merge(stop_times, last_stops_sequence,
                      on=['trip_id', 'stop_sequence'])

# Filter last stops to have arrival time after current time
last_stops = last_stops[last_stops['arrival_time'] > current_time]

In [78]:
# Merge first_stops and last_stops with filtered_trips on trip_id
ongoing_trips = pd.merge(day_trips, first_stops, on='trip_id', how='inner')
ongoing_trips = pd.merge(ongoing_trips, last_stops, on='trip_id', how='inner', suffixes=('_first', '_last'))

In [91]:
print(len(ongoing_trips))
ongoing_trips.head()

212


Unnamed: 0,route_id,service_id,trip_id,shape_id,arrival_time_first,departure_time_first,stop_id_first,stop_sequence_first,shape_dist_traveled_first,arrival_time_last,departure_time_last,stop_id_last,stop_sequence_last,shape_dist_traveled_last
0,20774,3,8776022,113855,9:33:00,9:33:00,57008,1,,10:05:00,10:05:00,114900,38,12.3343
1,20774,3,8776049,113856,9:40:00,9:40:00,114900,1,,10:12:00,10:12:00,57008,38,11.7128
2,20775,3,8776423,113857,9:50:00,9:50:00,81900,1,,10:12:00,10:12:00,84902,28,8.0162
3,20776,3,8776829,113860,9:58:00,9:58:00,999755,1,,10:30:00,10:30:00,98900,47,11.1138
4,20777,3,8777147,113864,9:55:00,9:55:00,151059,1,,10:20:00,10:20:00,95901,44,11.1614


# Create object for trips

In [80]:
#trip object:
class Trip:
    def __init__(
            self,
            trip_id: int,
            route_id: int,
            active: bool,
            geo: list,
            last_stop = ''
            ):
        self.trip_id = trip_id
        self.route_id = route_id
        self.active = active
        self.geo = geo
        self.last_stop = last_stop

## Pull GTFS Realtime

In [81]:
from google.transit import gtfs_realtime_pb2
import requests

In [82]:
# Pull actual trips  for current time from GTFS-RT (vehiclepositions.pb)
# pull vehicle geo, route, and trip from vehiclepositions.pb
trips = {}
list1 = []
feed = gtfs_realtime_pb2.FeedMessage()
url = 'https://gtfs-rt.itsmarta.com/TMGTFSRealTimeWebService/vehicle/vehiclepositions.pb'
response = requests.get(url)
feed.ParseFromString(response.content)
for entity in feed.entity:
  if entity.HasField('vehicle'):
    list1.append(entity.vehicle.trip.trip_id)
  #  trips[entity.vehicle.trip.trip_id] = Trip(entity.vehicle.trip.trip_id, entity.vehicle.trip.route_id,
  #                                            True, [entity.vehicle.position.latitude, entity.vehicle.position.longitude])

In [83]:

# pull last stop from trip_updates.pb
list2 = []
feed = gtfs_realtime_pb2.FeedMessage()
url = 'https://gtfs-rt.itsmarta.com/TMGTFSRealTimeWebService/tripupdate/tripupdates.pb'
response = requests.get(url)
feed.ParseFromString(response.content)
for entity in feed.entity:
  if entity.HasField('id'):
    list2.append(entity.id)
  #  trips[entity.id].last_stop = entity.trip_update.stop_time_update[0].stop_id


In [87]:
list1.sort()
list2.sort()

In [90]:
print(len(list1),len(list2))

62 77


In [None]:


# Need lat/long, route_id, trip_id, stop


#Compare planned with actual, build DF with missed trips [[trip, route]]

#return table of missed trips