# Endpoint cleaning needed for 01_1️⃣_StopTimesPerRoute.py 

In [1]:
import pandas as pd
import requests
import os
API_KEY = os.environ.get("API_KEY")

In [2]:
VEHICLES_API_URL = "https://api.nationaltransport.ie/gtfsr/v2/Vehicles?format=json"
GTFSR_API_URL = "https://api.nationaltransport.ie/gtfsr/v2/gtfsr?format=json"

# Fetch data from the Vehicles endpoint
def fetch_vehicles_data():
    headers = {"x-api-key": API_KEY}
    try:
        response = requests.get(VEHICLES_API_URL, headers=headers, verify=False)
        response.raise_for_status()
        return response.json()
    except Exception as e:
        print(f"Error fetching vehicles data: {e}")
        return None

# Process Vehicles data to extract active trips
def process_vehicles_data(vehicles_data):
    active_trips = []
    if vehicles_data and "entity" in vehicles_data:
        for entity in vehicles_data["entity"]:
            vehicle_info = entity.get("vehicle", {})
            trip_info = vehicle_info.get("trip", {})
            route_id = trip_info.get("route_id")
            trip_id = trip_info.get("trip_id")
            direction_id = trip_info.get("direction_id")
            if route_id and trip_id and direction_id is not None:
                active_trips.append({
                    "route_id": route_id,
                    "trip_id": trip_id,
                    "direction_id": direction_id
                })
    return pd.DataFrame(active_trips)

# Fetch data from the GTFSR endpoint
def fetch_gtfsr_data():
    headers = {"x-api-key": API_KEY}
    try:
        response = requests.get(GTFSR_API_URL, headers=headers, verify=False)
        response.raise_for_status()
        return response.json()
    except Exception as e:
        print(f"Error fetching GTFSR data: {e}")
        return None

# Process GTFSR data
def process_gtfsr_data(gtfsr_data, active_trip_ids):
    trips_data = []
    if gtfsr_data and "entity" in gtfsr_data:
        for entity in gtfsr_data["entity"]:
            trip_update = entity.get("trip_update", {})
            trip_info = trip_update.get("trip", {})
            stop_time_updates = trip_update.get("stop_time_update", [])
            route_id = trip_info.get("route_id")
            trip_id = trip_info.get("trip_id")
            direction_id = trip_info.get("direction_id")

            if trip_id not in active_trip_ids:
                continue  # Skip trips not in active trips

            for stop_update in stop_time_updates:
                stop_id = stop_update.get("stop_id")
                arrival = stop_update.get("arrival", {})
                arrival_delay = arrival.get("delay", None)
                if route_id and trip_id and direction_id is not None and stop_id and arrival_delay is not None:
                    trips_data.append({
                        "route_id": route_id,
                        "trip_id": trip_id,
                        "direction_id": direction_id,
                        "stop_id": stop_id,
                        "arrival_delay": arrival_delay
                    })
    return pd.DataFrame(trips_data)

# Main logic
vehicles_data = fetch_vehicles_data()
if vehicles_data:
    active_trips_df = process_vehicles_data(vehicles_data)
    active_trip_ids = set(active_trips_df["trip_id"])  # Store active trip IDs for filtering

    gtfsr_data = fetch_gtfsr_data()
    if gtfsr_data:
        gtfsr_df = process_gtfsr_data(gtfsr_data, active_trip_ids)

        # Debugging outputs
        print("Active Trips DataFrame:")
        display(active_trips_df)

        print("\nFiltered GTFSR DataFrame:")
        display(gtfsr_df)
    else:
        print("No GTFSR data available.")
else:
    print("No vehicle data available.")




Active Trips DataFrame:


Unnamed: 0,route_id,trip_id,direction_id
0,4434_85716,4434_967,0
1,4434_85716,4434_1341,1
2,4318_78162,4318_2758,1
3,4318_78163,4318_3284,0
4,4318_78163,4318_3826,1
...,...,...,...
767,4450_86014,4451_7870,0
768,4450_86015,4451_8139,0
769,4440_85765,4440_3580,1
770,4398_84639,4398_77254,0



Filtered GTFSR DataFrame:


Unnamed: 0,route_id,trip_id,direction_id,stop_id,arrival_delay
0,4398_84431,4398_136939,0,8360B357901,733
1,4398_84431,4398_136939,0,8470B635461,591
2,4398_84431,4398_136939,0,8470B6312201,575
3,4398_84431,4398_136939,0,8470B577651,442
4,4398_84431,4398_136939,0,8470B6311301,288
...,...,...,...,...,...
5109,4450_86091,4451_16226,0,8230DB004796,-1
5110,4450_86091,4451_16226,0,8230DB004690,0
5111,4398_84423,4398_111218,1,8220B1354201,-600
5112,4398_84606,4398_1610,1,8300B138931,-3506


In [4]:
# Save Active Trips DataFrame
active_trips_df.to_csv("active_trips.csv", index=False)
print("Active trips have been saved to active_trips.csv")

# Save Filtered GTFSR DataFrame
gtfsr_df.to_csv("filtered_gtfsr.csv", index=False)
print("Filtered GTFSR data has been saved to filtered_gtfsr.csv")


Active trips have been saved to active_trips.csv
Filtered GTFSR data has been saved to filtered_gtfsr.csv
