In [1]:
import os
from pathlib import Path
os.chdir("../")
from pipelines.utils import *
from pipelines.gtfs_realtime_utils import *
from pipelines.sirivm_utils import *

ROOT = Path("./")
ROOT.resolve()

PosixPath('/Users/lukestrange/Code/bus-tracking')

In [2]:
gtfs_files = [ROOT / 'investigations/test/GTFSRT/binary/GTFS-RT-2024-12-10T04_50_20.bin',
              ROOT / 'investigations/test/GTFSRT/binary/GTFS-RT-2024-12-10T04_50_09.bin',
              ROOT / 'investigations/test/GTFSRT/binary/GTFS-RT-2024-12-10T04_49_59.bin',
              ROOT / 'investigations/test/GTFSRT/binary/GTFS-RT-2024-12-10T04_49_48.bin',
              ROOT / 'investigations/test/GTFSRT/binary/GTFS-RT-2024-12-10T04_49_38.bin',
              ROOT / 'investigations/test/GTFSRT/binary/GTFS-RT-2024-12-10T04_49_27.bin',
              ROOT / 'investigations/test/GTFSRT/binary/GTFS-RT-2024-12-10T04_49_17.bin',
              ROOT / 'investigations/test/GTFSRT/binary/GTFS-RT-2024-12-10T04_49_07.bin',
              ROOT / 'investigations/test/GTFSRT/binary/GTFS-RT-2024-12-10T04_48_56.bin',
              ROOT / 'investigations/test/GTFSRT/binary/GTFS-RT-2024-12-10T04_48_46.bin']

In [3]:
siri_files = ['investigations/test/sirivm/xml/sirivm-2024-12-10T04_50_19.xml',
              'investigations/test/sirivm/xml/sirivm-2024-12-10T04_50_09.xml',
              'investigations/test/sirivm/xml/sirivm-2024-12-10T04_49_58.xml',
              'investigations/test/sirivm/xml/sirivm-2024-12-10T04_49_48.xml',
              'investigations/test/sirivm/xml/sirivm-2024-12-10T04_49_37.xml',
              'investigations/test/sirivm/xml/sirivm-2024-12-10T04_49_27.xml',
              'investigations/test/sirivm/xml/sirivm-2024-12-10T04_49_16.xml',
              'investigations/test/sirivm/xml/sirivm-2024-12-10T04_49_06.xml',
              'investigations/test/sirivm/xml/sirivm-2024-12-10T04_48_55.xml',
              'investigations/test/sirivm/xml/sirivm-2024-12-10T04_48_45.xml']

In [4]:
ns = {'base': 'http://www.siri.org.uk/siri'} # namespace
timestamp = []
longitude = []
latitude = []
vehicle_ref = []
bearing = []

for i in range(len(siri_files)-1, -1, -1):

    file = siri_files[i]
    try:
        tree = ET.parse(ROOT / file)
    except:
        print(file, 'could not be parsed. Skipping')
        continue
    root = tree.getroot()
    for e in root.findall(".//base:VehicleActivity", ns):
        try:
            t = e.find(".//base:RecordedAtTime", ns).text
            unix_time = int(datetime.fromisoformat(t).timestamp())
        except:
            unix_time = None
        try:
            lon = e.find("./base:MonitoredVehicleJourney/base:VehicleLocation/base:Longitude", ns).text
            lon=lon.strip()
        except:
            lon = None
        try:
            lat = e.find("./base:MonitoredVehicleJourney/base:VehicleLocation/base:Latitude", ns).text
            lat = lat.strip()
        except:
            lat = None
        try:
            ref = e.find("./base:MonitoredVehicleJourney/base:VehicleRef", ns).text
            ref = ref.strip()
        except:
            ref = None
        try:
            b = e.find("./base:MonitoredVehicleJourney/base:Bearing", ns).text
            b = b.strip()
        except:
            b = None
        timestamp.append(unix_time)
        longitude.append(lon)
        latitude.append(lat)
        vehicle_ref.append(ref)
        bearing.append(b)

sirivm = pd.DataFrame({'timestamp': timestamp, 'latitude': latitude, 'longitude': longitude, 'vehicle_id': vehicle_ref, 'bearing': bearing})

sirivm['latitude'] = sirivm['latitude'].astype('Float64')
sirivm['longitude'] = sirivm['longitude'].astype('Float64')
# sirivm = round_coordinates(sirivm, 'latitude', 'longitude', 5)

# sirivm = remove_duplicate_locations(sirivm, subset=['timestamp', 'longitude', 'latitude', 'vehicle_id'],sortby=['vehicle_id', 'timestamp'])

In [9]:
sirivm[sirivm.vehicle_id == '.3-YJ55BKG'].sort_values('timestamp')

Unnamed: 0,timestamp,latitude,longitude,vehicle_id,bearing
9402,1733849292,52.75561,0.446326,.3-YJ55BKG,
36657,1733849292,52.75561,0.446326,.3-YJ55BKG,
63912,1733849292,52.75561,0.446326,.3-YJ55BKG,
91168,1733849325,52.75561,0.446326,.3-YJ55BKG,
118423,1733849325,52.75561,0.446326,.3-YJ55BKG,
145678,1733849325,52.75561,0.446326,.3-YJ55BKG,
172933,1733849358,52.755615,0.44634,.3-YJ55BKG,
200188,1733849358,52.755615,0.44634,.3-YJ55BKG,
227443,1733849358,52.755615,0.44634,.3-YJ55BKG,
254698,1733849391,52.755615,0.44634,.3-YJ55BKG,


In [6]:
# sirivm.sort_values(['timestamp', 'vehicle_id']).tail(50)

In [7]:
# Initialise the feed object
feed = gtfs_realtime_pb2.FeedMessage()

# Add all the entities (bus location objects) to a list to iterate through.
entities = entities_to_list(feed, gtfs_files, nth_file=None)

# Add entities to a dataframe and cleanse the data
data = entity_list_to_df(entities)

There are 332470 entities.


In [10]:
data = data.loc[:, ['timestamp', 'latitude', 'longitude', 'vehicle_id', 'bearing']]
data[data.vehicle_id == '.3-YJ55BKG'].sort_values('timestamp')

Unnamed: 0,timestamp,latitude,longitude,vehicle_id,bearing
300572,1733849235,52.755573,0.446278,.3-YJ55BKG,96.0
200883,1733849260,52.755611,0.446326,.3-YJ55BKG,0.0
234022,1733849260,52.755611,0.446326,.3-YJ55BKG,0.0
267269,1733849260,52.755611,0.446326,.3-YJ55BKG,0.0
101220,1733849292,52.755611,0.446326,.3-YJ55BKG,0.0
134467,1733849292,52.755611,0.446326,.3-YJ55BKG,0.0
167714,1733849292,52.755611,0.446326,.3-YJ55BKG,0.0
1369,1733849325,52.755611,0.446326,.3-YJ55BKG,0.0
34537,1733849325,52.755611,0.446326,.3-YJ55BKG,0.0
67784,1733849325,52.755611,0.446326,.3-YJ55BKG,0.0
