In [1]:
# Install packages
import pandas as pd
import requests
import zipfile
import io
from datetime import date
from datetime import datetime

In [2]:
# Create functions to access GTFS and store files in memory
def load_gtfs_data(
        url,
        files = [
            'stops.txt', 'routes.txt', 'trips.txt', 'stop_times.txt',
            'calendar.txt', 'calendar_dates.txt', 'shapes.txt'
        ]):
    """
    Load GTFS data from a URL and convert each required .txt file into a pandas DataFrame, all in memory.
    """
    response = requests.get(url)
    zip_file = zipfile.ZipFile(io.BytesIO(response.content))

    # Define the required GTFS files
    required_files = files

    # Load each file into a DataFrame
    dataframes = {}
    for file in required_files:
        try:
            with zip_file.open(file) as f:
                df = pd.read_csv(f)
                dataframes[file[:len(file)-4]] = df
        except KeyError:
            print(f"{file} not found in the GTFS feed.")

    return dataframes

In [3]:
# Build a table of [[route_id],[trip_id],[start_time],[end_time]]
# look for stops with sequence 1 before current time

#pull GTFS
gtfs_path = 'https://www.itsmarta.com/google_transit_feed/google_transit.zip'
feed = load_gtfs_data(gtfs_path)

In [4]:
# Find service_id for current date
today = date.today()
day_of_week = today.strftime("%A")
service_id = 0

if day_of_week == "Saturday":
    service_id = 3
elif day_of_week == "Sunday":
    service_id = 4
else:
    service_id = 5

In [5]:
# filter trips for service_id
trips = feed['trips']
day_trips = trips[trips['service_id'] == service_id]
day_trips = day_trips[['route_id', 'service_id', 'trip_id', 'shape_id']]

day_trips.head()

Unnamed: 0,route_id,service_id,trip_id,shape_id
15344,20774,5,8776184,113855
15345,20774,5,8776185,113855
15346,20774,5,8776186,113855
15347,20774,5,8776187,113855
15348,20774,5,8776188,113855


In [6]:
# look for last stop for each trip in sequence 1 list, only keep if after current time

stop_times = feed['stop_times']
stop_times = stop_times[['trip_id', 'arrival_time', 'departure_time', 'stop_id', 'stop_sequence', 'shape_dist_traveled']]

# pull curent time
current_time = datetime.now().strftime("%H:%M:%S")



# find stops with sequence == 1 that are prior to current_time
first_stops = stop_times[(stop_times['stop_sequence'] == 1) &
                         (stop_times['arrival_time'] <= current_time)]


In [7]:

# find last stops with max sequence that are on trips in first_stops
last_stops_sequence = stop_times.groupby('trip_id')['stop_sequence'].max().reset_index()
last_stops = pd.merge(stop_times, last_stops_sequence,
                      on=['trip_id', 'stop_sequence'])

# Filter last stops to have arrival time after current time
last_stops = last_stops[last_stops['arrival_time'] > current_time]

In [8]:
# Merge first_stops and last_stops with filtered_trips on trip_id
ongoing_trips = pd.merge(day_trips, first_stops, on='trip_id', how='inner')
ongoing_trips = pd.merge(ongoing_trips, last_stops, on='trip_id', how='inner', suffixes=('_first', '_last'))

In [9]:
ongoing_trips.head()

Unnamed: 0,route_id,service_id,trip_id,shape_id,arrival_time_first,departure_time_first,stop_id_first,stop_sequence_first,shape_dist_traveled_first,arrival_time_last,departure_time_last,stop_id_last,stop_sequence_last,shape_dist_traveled_last
0,20774,5,8776194,113855,12:13:00,12:13:00,57008,1,,12:45:00,12:45:00,114900,38,12.3343
1,20775,5,8776697,113858,12:00:00,12:00:00,84902,1,,12:24:00,12:24:00,81900,28,8.0096
2,20776,5,8776954,113859,12:15:00,12:15:00,98900,1,,12:48:00,12:48:00,999755,48,11.0464
3,20776,5,8776979,113860,12:13:00,12:13:00,999755,1,,12:45:00,12:45:00,98900,47,11.1138
4,20777,5,8777295,113862,12:15:00,12:15:00,95901,1,,12:36:00,12:36:00,151059,42,9.9514


# Create object for trips

In [None]:
#trip object:
class Trip:
    def __init__(
            self,
            trip_id: int,
            route_id: int,
            active: bool,
            geo: list,
            last_stop: int
            ):
        self.trip_id = trip_id
        self.route_id = route_id
        self.active = active
        self.geo = geo
        self.last_stop = last_stop

In [None]:
# Pull actual trips  for current time from GTFS-RT (vehiclepositions.pb)
# compile protocol buffer


# Need lat/long, route_id, trip_id, stop


#Compare planned with actual, build DF with missed trips [[trip, route]]

#return table of missed trips