# SIRI Realtime without position using GTFS 
Ruter and others do not return position in lat/long only what stations it has passed and has to pass and time to station.
Could we get approx position using GTFS data shapes/stops?

In [50]:
from pathlib import Path
import pandas as pd
import geopandas
import requests
from lxml import etree
import duckdb

from spartid_pubtransport import siri


In [None]:
exclude_vm_providers = ",".join([key for key,value in siri.PROVIDERS.items() if value["rt"] == "VM"])
exclude_vm_providers

In [52]:
nsmap = {
    "siri": "http://www.siri.org.uk/siri",
    "ns2": "http://www.ifopt.org.uk/acsb",
    "ns3": "http://www.ifopt.org.uk/ifopt",
    "ns4": "http://datex2.eu/schema/2_0RC1/2_0",
}

In [53]:
#resp = requests.get("https://api.entur.io/realtime/v1/rest/et?datasetId=RUT")# BNR")
resp = requests.get(f"https://api.entur.io/realtime/v1/rest/et?excludedDatasetIds={exclude_vm_providers}")
assert resp.ok

In [None]:
resp.content[0:1000]

In [None]:
prstree = etree.fromstring(resp.content)
root = prstree.tag

all_items = []

for journey in prstree.iter(etree.QName(nsmap["siri"], "EstimatedVehicleJourney")):
    journey_level1_dict = {
        etree.QName(x).localname: x.text for x in journey if x.text is not None
    }

    if (dated_vehicle_ref := journey.find(
        etree.QName(nsmap["siri"], "DatedVehicleJourneyRef")
    )) is not None:
        data_str = dated_vehicle_ref.text.split(":")
        framed_vehicle_dict = {
            "DataFrameRef": data_str[-1],
            "DatedVehicleJourneyRef": data_str[0],
        }
    elif (estimated_vehicle_journey_code := journey.find(
        etree.QName(nsmap["siri"], "EstimatedVehicleJourneyCode")
    )) is not None:
        data_str = estimated_vehicle_journey_code.text
        framed_vehicle_dict = {
            "DataFrameRef": data_str,
            "DatedVehicleJourneyRef": data_str,
        }
    else:
        framed_vehicle_ref = journey.find(
            etree.QName(nsmap["siri"], "FramedVehicleJourneyRef")
        )
        framed_vehicle_dict = {
            etree.QName(x).localname: x.text
            for x in framed_vehicle_ref
            if x.text is not None
        }
    journey_dict = journey_level1_dict | framed_vehicle_dict
    for estimated in journey.iter(etree.QName(nsmap["siri"], "EstimatedCall")):
        estimated_dict = {
            etree.QName(x).localname: x.text for x in estimated
        } | journey_dict
        estimated_dict["XType"] = "EstimatedCall"
        all_items.append(estimated_dict)
    for recorded in journey.iter(etree.QName(nsmap["siri"], "RecordedCall")):
        recorded_dict = {
            etree.QName(x).localname: x.text for x in recorded
        } | journey_dict
        recorded_dict["XType"] = "RecordedCall"
        all_items.append(recorded_dict)

df_raw = pd.DataFrame(all_items).convert_dtypes(dtype_backend="pyarrow")
print(len(df_raw))
df_raw.sample(10).T

## Convert datatypes
All XML is string, convert to DataFrame friendly types.

In [59]:
df = df_raw.assign(
    Order=df_raw.Order.astype("int64[pyarrow]"),
    AimedDepartureTime=pd.to_datetime(df_raw.AimedDepartureTime, format="ISO8601"),
    ActualDepartureTime=pd.to_datetime(df_raw.ActualDepartureTime, format="ISO8601"),
    ExpectedDepartureTime=pd.to_datetime(
        df_raw.ExpectedDepartureTime, format="ISO8601"
    ),
    ExpectedArrivalTime=pd.to_datetime(df_raw.ExpectedArrivalTime, format="ISO8601"),
    RecordedAtTime=pd.to_datetime(df_raw.RecordedAtTime, format="ISO8601"),
)

## Call type
1. EstimatedCall
1. RecordedCall

In [None]:
trip_id, df_trip = next(iter(df
 .groupby("DatedVehicleJourneyRef")
))
df_one_trip = (df_trip.sort_values("Order"))
df_one_trip

### Next stop of trip
EstimatedCall first() will be the next stop this trip is approaching

In [None]:
(
    df.query("XType == 'EstimatedCall'")
    .sort_values("Order")
    .groupby(["DatedVehicleJourneyRef"])
    .first()
    .T
)

### Last stop visited
RecordedCall last is the last stop visited

In [None]:
df_last_stop = (
    df.query("Order > 1")
    .query("XType == 'RecordedCall'")
    .sort_values("Order")
    .groupby(["DatedVehicleJourneyRef"])
    .last()
)
df_last_stop.T

## Merge Siri with Stop information
### Read stops from GTFS

In [None]:
gtfs_root = Path().resolve().parent / "data/gtfs/rb_norway-aggregated-gtfs"
assert gtfs_root.exists()
stops = duckdb.read_parquet(str(gtfs_root / "stops.parquet"))
df_stops = stops.to_df()
df_stops

In [None]:
df_last_stop_with_info = (
    df_last_stop
        .merge(df_stops, left_on="StopPointRef", right_on="stop_id")
        .dropna(axis="columns")
)
df_last_stop_with_info

## Create Geographic data for display

In [65]:
df_last_stop_geo = geopandas.GeoDataFrame(
    df_last_stop_with_info,
    geometry=geopandas.points_from_xy(df_last_stop_with_info["stop_lon"], df_last_stop_with_info["stop_lat"]),
    crs="EPSG:4326",
)

In [None]:
df_last_stop_geo.explore()