In [1]:
from google.transit import gtfs_realtime_pb2
from google.protobuf.json_format import MessageToDict
import pandas as pd
import folium
from requests import get
import time

In [13]:
# Sample GTFS-R URL from Malaysia's Open API
URL = 'https://api.data.gov.my/gtfs-realtime/vehicle-position/prasarana?category=rapid-bus-kl'

# Parse the GTFS Realtime feed
feed = gtfs_realtime_pb2.FeedMessage()
response = get(URL)
feed.ParseFromString(response.content)
 
# Extract and print vehicle position information
vehicle_positions = [MessageToDict(entity.vehicle) for entity in feed.entity]
print("Rapid Bus KL")
print(f'Total vehicles: {len(vehicle_positions)}')
df = pd.json_normalize(vehicle_positions)
print(df.sample(n=10))

Rapid Bus KL
Total vehicles: 217
      timestamp               trip.tripId trip.startTime trip.startDate  \
78   1727757541   weekday_U3000_U300002_4       12:30:58       20241001   
62   1727757550   weekday_U4500_U450002_6       12:32:23       20241001   
116  1727757559   weekday_U2200_U220002_9       12:17:49       20241001   
164  1727757529   weekday_P0010_P001001_7       12:23:19       20241001   
178  1727757564   weekday_S4000_S400002_3       12:31:33       20241001   
125  1727757531   weekday_S0040_S004002_2       12:30:21       20241001   
169  1727757525   weekday_S1140_S114002_1       12:02:08       20241001   
36   1727757558   weekday_T7870_T787002_1       12:02:30       20241001   
108  1727757560   weekday_U1730_U173001_4       11:48:41       20241001   
11   1727757554  weekday_U6000_U600002_16       12:20:00       20241001   

    trip.routeId  position.latitude  position.longitude  position.bearing  \
78         U3000           3.124237          101.762840         

In [14]:
# Create a Folium map centered in Kuala Lumpur
m = folium.Map(location=[3.1390, 101.6869], zoom_start=12)

# Loop through the DataFrame and add markers to the map
for i, row in df.iterrows():
    # Extract latitude and longitude
    lat = row.get('position.latitude')
    lon = row.get('position.longitude')
    
    # Add marker to the map if latitude and longitude are available
    if pd.notna(lat) and pd.notna(lon):
        folium.Marker(
            location=[lat, lon],
            popup=f"Vehicle ID: {row.get('vehicle.id')}<br>Route ID: {row.get('trip.routeId')}",
            icon=folium.Icon(color="blue", icon="bus", prefix="fa")
        ).add_to(m)

# Save the map to an HTML file and display it
m.save('rapid_bus_kl_map.html')

# If you're in a Jupyter notebook, you can display the map directly
m

# Ingestion from all source

In [3]:
from google.transit import gtfs_realtime_pb2
from google.protobuf.json_format import MessageToDict
import pandas as pd
import pytz
from datetime import datetime
from requests import get

# Function to extract data from GTFS Realtime feed
def extract_data(url):
    feed = gtfs_realtime_pb2.FeedMessage()  # GTFS feed message object
    response = get(url)  # Fetch the data from the URL
    feed.ParseFromString(response.content)  # Parse the response content
    
    # Extract only the required fields: tripID, routeID, latitude, longitude, bearing, speed, vehicleID
    vehicle_positions = []
    for entity in feed.entity:
        vehicle_dict = MessageToDict(entity.vehicle)
        
        trip_id = vehicle_dict.get('trip', {}).get('tripId', None)
        route_id = vehicle_dict.get('trip', {}).get('routeId', None)
        latitude = vehicle_dict.get('position', {}).get('latitude', None)
        longitude = vehicle_dict.get('position', {}).get('longitude', None)
        bearing = vehicle_dict.get('position', {}).get('bearing', None)
        speed = vehicle_dict.get('position', {}).get('speed', None)
        vehicle_id = vehicle_dict.get('vehicle', {}).get('id', None)
        
        vehicle_positions.append({
            'trip_id': trip_id,
            'route_id': route_id,
            'latitude': latitude,
            'longitude': longitude,
            'bearing': bearing,
            'speed': speed,
            'vehicle_id': vehicle_id
        })
    
    return vehicle_positions

# Function to process the data into a DataFrame and append it
def append_data(vehicle_positions, agency_name, df=None):
    # Get current timestamp in Malaysia (Asia/Kuala_Lumpur) and format it to include only up to seconds
    malaysia_timezone = pytz.timezone('Asia/Kuala_Lumpur')
    current_time_malaysia = datetime.now(malaysia_timezone).strftime('%Y-%m-%d %H:%M:%S')
    
    # Convert list of dicts to DataFrame
    new_df = pd.DataFrame(vehicle_positions)
    
    # Add current timestamp as the first column
    new_df.insert(0, 'current_timestamp', current_time_malaysia)
    
    # Add agency name as the second column
    new_df.insert(1, 'agency', agency_name)
    
    # Check if the new DataFrame is empty before concatenating
    if new_df.empty:
        print(f"No data found for {agency_name}. Skipping concatenation.")
        return df  # Return original DataFrame if no new data
    
    # If df is None, initialize it with new_df; otherwise, concatenate the data
    if df is not None and not df.empty:
        df = pd.concat([df, new_df], ignore_index=True)  # Append if DataFrame exists
    else:
        df = new_df  # Initialize DataFrame if it's the first data
    
    return df

# List of GTFS-R URLs and their corresponding agencies
url_agency_map = [
    ('https://api.data.gov.my/gtfs-realtime/vehicle-position/prasarana?category=rapid-bus-kl', 'Rapid Bus KL'),
    ('https://api.data.gov.my/gtfs-realtime/vehicle-position/prasarana?category=rapid-bus-mrtfeeder', 'Rapid Bus MRT Feeder'),
    ('https://api.data.gov.my/gtfs-realtime/vehicle-position/prasarana?category=rapid-bus-kuantan', 'Rapid Bus Kuantan'),
    ('https://api.data.gov.my/gtfs-realtime/vehicle-position/prasarana?category=rapid-bus-penang', 'Rapid Bus Penang'),
    ('https://api.data.gov.my/gtfs-realtime/vehicle-position/mybas-johor', 'Mybas Johor')
]

# Initialize an empty DataFrame to store vehicle positions
df = None

# Loop through each URL and agency to fetch and append vehicle data
for url, agency_name in url_agency_map:
    vehicle_positions = extract_data(url)
    df = append_data(vehicle_positions, agency_name, df)

  df = pd.concat([df, new_df], ignore_index=True)  # Append if DataFrame exists


In [4]:
print(df.sample(n=10))

       current_timestamp                agency                   trip_id  \
305  2024-10-01 12:32:34  Rapid Bus MRT Feeder            240909010029S8   
659  2024-10-01 12:32:37           Mybas Johor                      None   
258  2024-10-01 12:32:34  Rapid Bus MRT Feeder            240909010140S7   
158  2024-10-01 12:32:34          Rapid Bus KL   weekday_U3000_U300002_4   
41   2024-10-01 12:32:34          Rapid Bus KL   weekday_U6510_U651002_0   
77   2024-10-01 12:32:34          Rapid Bus KL   weekday_U4200_U420002_3   
582  2024-10-01 12:32:37           Mybas Johor                      None   
771  2024-10-01 12:32:37           Mybas Johor                      None   
588  2024-10-01 12:32:37           Mybas Johor                      None   
215  2024-10-01 12:32:34          Rapid Bus KL  weekday_U6000_U600002_16   

    route_id  latitude   longitude  bearing  speed vehicle_id  
305     T110  3.215854  101.613960    150.0  37.00    VAJ2854  
659      CW2  1.491636  103.785080 

In [15]:
import folium
import pandas as pd
import pytz
from datetime import datetime
from requests import get
from google.transit import gtfs_realtime_pb2
from google.protobuf.json_format import MessageToDict

# Function to extract data from GTFS Realtime feed
def extract_data(url):
    feed = gtfs_realtime_pb2.FeedMessage()  # GTFS feed message object
    response = get(url)  # Fetch the data from the URL
    feed.ParseFromString(response.content)  # Parse the response content
    
    # Extract only the required fields: tripID, routeID, latitude, longitude, bearing, speed, vehicleID
    vehicle_positions = []
    for entity in feed.entity:
        vehicle_dict = MessageToDict(entity.vehicle)
        
        trip_id = vehicle_dict.get('trip', {}).get('tripId', None)
        route_id = vehicle_dict.get('trip', {}).get('routeId', None)
        latitude = vehicle_dict.get('position', {}).get('latitude', None)
        longitude = vehicle_dict.get('position', {}).get('longitude', None)
        bearing = vehicle_dict.get('position', {}).get('bearing', None)
        speed = vehicle_dict.get('position', {}).get('speed', None)
        vehicle_id = vehicle_dict.get('vehicle', {}).get('id', None)
        
        vehicle_positions.append({
            'trip_id': trip_id,
            'route_id': route_id,
            'latitude': latitude,
            'longitude': longitude,
            'bearing': bearing,
            'speed': speed,
            'vehicle_id': vehicle_id
        })
    
    return vehicle_positions

# Function to process the data into a DataFrame and append it
def append_data(vehicle_positions, agency_name, df=None):
    # Get current timestamp in Malaysia (Asia/Kuala_Lumpur)
    malaysia_timezone = pytz.timezone('Asia/Kuala_Lumpur')
    current_time_malaysia = datetime.now(malaysia_timezone).strftime('%Y-%m-%d %H:%M:%S')
    
    # Convert list of dicts to DataFrame
    new_df = pd.DataFrame(vehicle_positions)
    
    # Add current timestamp as the first column
    new_df.insert(0, 'current_timestamp', current_time_malaysia)
    
    # Add agency name as the second column
    new_df.insert(1, 'agency', agency_name)
    
    # Check if the new DataFrame is empty before concatenating
    if new_df.empty:
        print(f"No data found for {agency_name}. Skipping concatenation.")
        return df
    
    # Concatenate new_df to df if it exists, otherwise return new_df
    if df is not None and not df.empty:
        df = pd.concat([df, new_df], ignore_index=True)
    else:
        df = new_df
    
    return df

# List of GTFS-R URLs and their corresponding agencies
url_agency_map = [
    ('https://api.data.gov.my/gtfs-realtime/vehicle-position/prasarana?category=rapid-bus-kl', 'Rapid Bus KL'),
    ('https://api.data.gov.my/gtfs-realtime/vehicle-position/prasarana?category=rapid-bus-mrtfeeder', 'Rapid Bus MRT Feeder'),
    ('https://api.data.gov.my/gtfs-realtime/vehicle-position/prasarana?category=rapid-bus-kuantan', 'Rapid Bus Kuantan'),
    ('https://api.data.gov.my/gtfs-realtime/vehicle-position/prasarana?category=rapid-bus-penang', 'Rapid Bus Penang'),
    ('https://api.data.gov.my/gtfs-realtime/vehicle-position/mybas-johor', 'Mybas Johor')
]

# Initialize an empty DataFrame to store vehicle positions
df = None

# Loop through each URL and agency to fetch and append vehicle data
for url, agency_name in url_agency_map:
    vehicle_positions = extract_data(url)
    df = append_data(vehicle_positions, agency_name, df)

# Create a base map centered around Malaysia (change coordinates as needed)
m = folium.Map(location=[3.1390, 101.6869], zoom_start=12)

# Add vehicle positions to the Folium map
for i, row in df.iterrows():
    if pd.notna(row['latitude']) and pd.notna(row['longitude']):
        folium.Marker(
            location=[row['latitude'], row['longitude']],
            popup=f"Agency: {row['agency']}<br>Vehicle ID: {row['vehicle_id']}<br>Trip ID: {row['trip_id']}",
            icon=folium.Icon(color="blue", icon="bus", prefix="fa")
        ).add_to(m)

# Save map to an HTML file
m.save('vehicle_positions_map.html')

# Display the map (for use in Jupyter Notebook or similar environments)
m


  df = pd.concat([df, new_df], ignore_index=True)
