In [391]:
# Install packages
import pandas as pd
import requests
import zipfile
import io
from datetime import date
from datetime import datetime

In [392]:
# Create functions to access GTFS and store files in memory
'''def load_gtfs_data(
        url,
        files = [
            'stops.txt', 'routes.txt', 'trips.txt', 'stop_times.txt',
            'calendar.txt', 'calendar_dates.txt', 'shapes.txt'
        ]):
    """
    Load GTFS data from a URL and convert each required .txt file into a pandas DataFrame, all in memory.
    """
    response = requests.get(url)
    zip_file = zipfile.ZipFile(io.BytesIO(response.content))

    # Define the required GTFS files
    required_files = files

    # Load each file into a DataFrame
    dataframes = {}
    for file in required_files:
        try:
            with zip_file.open(file) as f:
                df = pd.read_csv(f)
                dataframes[file[:len(file)-4]] = df
        except KeyError:
            print(f"{file} not found in the GTFS feed.")

    return dataframes'''

'def load_gtfs_data(\n        url,\n        files = [\n            \'stops.txt\', \'routes.txt\', \'trips.txt\', \'stop_times.txt\',\n            \'calendar.txt\', \'calendar_dates.txt\', \'shapes.txt\'\n        ]):\n    """\n    Load GTFS data from a URL and convert each required .txt file into a pandas DataFrame, all in memory.\n    """\n    response = requests.get(url)\n    zip_file = zipfile.ZipFile(io.BytesIO(response.content))\n\n    # Define the required GTFS files\n    required_files = files\n\n    # Load each file into a DataFrame\n    dataframes = {}\n    for file in required_files:\n        try:\n            with zip_file.open(file) as f:\n                df = pd.read_csv(f)\n                dataframes[file[:len(file)-4]] = df\n        except KeyError:\n            print(f"{file} not found in the GTFS feed.")\n\n    return dataframes'

In [393]:
def load_gtfs_data(file_path):
    """
    Load GTFS data from a local .zip file and convert each required .txt file into a pandas DataFrame, all in memory.
    """
    # Open the zip file
    with zipfile.ZipFile(file_path, 'r') as zip_file:

        # Define the required GTFS files
        required_files = [
            'stops.txt', 'routes.txt', 'trips.txt', 'stop_times.txt',
            'calendar.txt', 'calendar_dates.txt', 'shapes.txt'
        ]

        # Load each file into a DataFrame
        dataframes = {}
        for file in required_files:
            try:
                with zip_file.open(file) as f:
                    df = pd.read_csv(f)
                    dataframes[file[:len(file)-4]] = df
            except KeyError:
                print(f"{file} not found in the GTFS feed.")

    return dataframes


In [394]:
# Build a table of [[route_id],[trip_id],[start_time],[end_time]]
# look for stops with sequence 1 before current time

#pull GTFS
#gtfs_path = 'https://www.itsmarta.com/google_transit_feed/google_transit.zip'
#feed = load_gtfs_data(gtfs_path)
file_path = 'google_transit.zip'
feed = load_gtfs_data(file_path)

In [395]:
# Find service_id for current date
today = date.today()
day_of_week = today.strftime("%A")
service_id = 0

if day_of_week == "Saturday":
    service_id = 3
elif day_of_week == "Sunday":
    service_id = 4
else:
    service_id = 5

In [396]:
# filter trips for service_id
active_trips = feed['trips']
day_trips = active_trips[active_trips['service_id'] == service_id]
day_trips = day_trips[['route_id', 'service_id', 'trip_id', 'shape_id']]

day_trips.head()

Unnamed: 0,route_id,service_id,trip_id,shape_id
599,20774,3,8776016,113855
600,20774,3,8776017,113855
601,20774,3,8776018,113855
602,20774,3,8776019,113855
603,20774,3,8776020,113855


In [397]:
# look for last stop for each trip in sequence 1 list, only keep if after current time
stop_times = feed['stop_times']
stop_times = stop_times[['trip_id', 'arrival_time', 'departure_time', 'stop_id', 'stop_sequence', 'shape_dist_traveled']]

# pull curent time
current_time = datetime.now().strftime("%H:%M:%S")

# find stops with sequence == 1 that are prior to current_time
first_stops = stop_times[(stop_times['stop_sequence'] == 1) &
                         (stop_times['arrival_time'] < current_time)]

In [398]:
# find last stops with max sequence that are on trips in first_stops
last_stops_sequence = stop_times.groupby('trip_id')['stop_sequence'].max().reset_index()
last_stops = pd.merge(stop_times, last_stops_sequence,
                      on=['trip_id', 'stop_sequence'])

# Filter last stops to have arrival time after current time
last_stops = last_stops[last_stops['arrival_time'] > current_time]

In [399]:
# Merge first_stops and last_stops with filtered_trips on trip_id
ongoing_trips = pd.merge(day_trips, first_stops, on='trip_id', how='inner')
ongoing_trips = pd.merge(ongoing_trips, last_stops, on='trip_id', how='inner', suffixes=('_first', '_last'))

In [400]:
print(len(ongoing_trips))
ongoing_trips.head()

167


Unnamed: 0,route_id,service_id,trip_id,shape_id,arrival_time_first,departure_time_first,stop_id_first,stop_sequence_first,shape_dist_traveled_first,arrival_time_last,departure_time_last,stop_id_last,stop_sequence_last,shape_dist_traveled_last
0,20774,3,8776043,113855,23:34:00,23:34:00,57008,1,,24:05:00,24:05:00,114900,38,12.3343
1,20774,3,8776070,113856,23:40:00,23:40:00,114900,1,,24:12:00,24:12:00,57008,38,11.7128
2,20775,3,8776441,113858,23:30:00,23:30:00,84902,1,,23:50:00,23:50:00,81900,28,8.0096
3,20776,3,8776847,113860,23:28:00,23:28:00,999755,1,,24:00:00,24:00:00,98900,47,11.1138
4,20777,3,8777138,113861,23:35:00,23:35:00,95901,1,,23:57:00,23:57:00,151144,43,10.0398


# Create object for trips

In [401]:
#trip object:
class Trip:
    def __init__(
            self,
            trip_id: int,
            route_id: int,
            active: bool,
            geo: list,
            last_stop = ''
            ):
        self.trip_id = trip_id
        self.route_id = route_id
        self.active = active
        self.geo = geo
        self.last_stop = last_stop

## Pull GTFS Realtime

In [402]:
from google.transit import gtfs_realtime_pb2
import requests

In [403]:
# Pull actual trips  for current time from GTFS-RT (vehiclepositions.pb)
# pull vehicle geo, route, and trip from vehiclepositions.pb
trips = {}
feed_RT = gtfs_realtime_pb2.FeedMessage()
url = 'https://gtfs-rt.itsmarta.com/TMGTFSRealTimeWebService/vehicle/vehiclepositions.pb'
response = requests.get(url)
feed_RT.ParseFromString(response.content)
for entity in feed_RT.entity:
#  if entity.HasField('vehicle'):
  trips[entity.vehicle.trip.trip_id] = Trip(entity.vehicle.trip.trip_id, entity.vehicle.trip.route_id,
                                            True, [entity.vehicle.position.latitude, entity.vehicle.position.longitude])
active_trips = trips.keys()

In [404]:
# pull last stop from trip_updates.pb
feed_RT = gtfs_realtime_pb2.FeedMessage()
url = 'https://gtfs-rt.itsmarta.com/TMGTFSRealTimeWebService/tripupdate/tripupdates.pb'
response = requests.get(url)
feed_RT.ParseFromString(response.content)
for entity in feed_RT.entity:
  if (entity.id) in trips:
    trips[entity.id].last_stop = entity.trip_update.stop_time_update[0].stop_id

In [405]:
# compare ongoing_trips with trips from GTFS-RT to find missed trips
active_trips_series = pd.Series(active_trips)
mask = ~ongoing_trips['trip_id'].isin(active_trips_series)

missed_trips = ongoing_trips[mask]
missed_trips.head()

Unnamed: 0,route_id,service_id,trip_id,shape_id,arrival_time_first,departure_time_first,stop_id_first,stop_sequence_first,shape_dist_traveled_first,arrival_time_last,departure_time_last,stop_id_last,stop_sequence_last,shape_dist_traveled_last
0,20774,3,8776043,113855,23:34:00,23:34:00,57008,1,,24:05:00,24:05:00,114900,38,12.3343
1,20774,3,8776070,113856,23:40:00,23:40:00,114900,1,,24:12:00,24:12:00,57008,38,11.7128
2,20775,3,8776441,113858,23:30:00,23:30:00,84902,1,,23:50:00,23:50:00,81900,28,8.0096
3,20776,3,8776847,113860,23:28:00,23:28:00,999755,1,,24:00:00,24:00:00,98900,47,11.1138
4,20777,3,8777138,113861,23:35:00,23:35:00,95901,1,,23:57:00,23:57:00,151144,43,10.0398


In [406]:
type(feed)

dict

In [407]:
# Create missed_trips with last_stop geo attached
stops = feed['stops']
stops_for_join = stops[['stop_id', 'stop_lat','stop_lon']]
missed_trips_geo = missed_trips.join(
    stops_for_join, lsuffix='stop_id', rsuffix='stop_id'
)
missed_trips_geo.head()


Unnamed: 0,route_id,service_id,trip_id,shape_id,arrival_time_first,departure_time_first,stop_id_first,stop_sequence_first,shape_dist_traveled_first,arrival_time_last,departure_time_last,stop_id_last,stop_sequence_last,shape_dist_traveled_last,stop_id,stop_lat,stop_lon
0,20774,3,8776043,113855,23:34:00,23:34:00,57008,1,,24:05:00,24:05:00,114900,38,12.3343,27,33.754553,-84.469302
1,20774,3,8776070,113856,23:40:00,23:40:00,114900,1,,24:12:00,24:12:00,57008,38,11.7128,28,33.753328,-84.445329
2,20775,3,8776441,113858,23:30:00,23:30:00,84902,1,,23:50:00,23:50:00,81900,28,8.0096,39,33.753247,-84.445568
3,20776,3,8776847,113860,23:28:00,23:28:00,999755,1,,24:00:00,24:00:00,98900,47,11.1138,40,33.754517,-84.469824
4,20777,3,8777138,113861,23:35:00,23:35:00,95901,1,,23:57:00,23:57:00,151144,43,10.0398,53,33.920862,-84.344213


In [408]:
# create objects of missed trips
def create_missed_trip(row):
    trips[row['trip_id']] = Trip(row['trip_id'],row['route_id'],False,[row['stop_lat'],row['stop_lon']],row['stop_id_last'])
    return

missed_trips_geo.apply(create_missed_trip,axis=1)


0      None
1      None
2      None
3      None
4      None
       ... 
162    None
163    None
164    None
165    None
166    None
Length: 167, dtype: object

In [409]:


# find most recently scheduled served stop


# From stop_times, find list for each bus stop from missed.last_stop to just prior the bus stop for the
# previous bus on same route

## Create map of buses

In [410]:
# Create map element

# Tracked buses as green, missed as red

# Create lines between stops
