In [None]:
import datetime
import sqlite3 as db

import folium
import geopandas
import pandas as pd

# from rich.traceback import install
from shapely.geometry import LineString

# install(show_locals=True)

In [None]:
con = db.connect("../data/entur.db")

In [None]:
df_raw = pd.read_sql("SELECT * FROM VEHICLE_MONITORING;", con=con)
len(df_raw)

In [None]:
df_raw.sample(5).T

In [None]:
df_raw.dtypes

In [None]:
df = (
    df_raw.query("~Latitude.isna() or ~Longitude.isna()")  # Some points are Nan
    .query(
        "50 < Latitude < 71 or 2 < Longitude < 20"
    )  # Some values are far outside Norway
    .query(
        "- 10*60*60 < Delay < 10*60*60"
    )  # Some delays are huge and indicate false measurements or large jump in time
    .query(
        "~(DatedVehicleJourneyRef == 'VYB:ServiceJourney:')"
    )  # Some journeys seem to use same ID
)

df

In [None]:
(
    df.groupby(["DataFrameRef", "DatedVehicleJourneyRef"])
    .agg(
        latitude_min=("Latitude", "min"),
        latitude_max=("Latitude", "max"),
        longitude_min=("Longitude", "min"),
        longitude_max=("Longitude", "max"),
        dataframeref_count=("DataFrameRef", "count"),
    )
    .assign(
        latitude_diff=lambda df1: df1["latitude_max"] - df1["latitude_min"],
        longitude_diff=lambda df1: df1["longitude_max"] - df1["longitude_min"],
    )
    .reset_index()
    .query("latitude_diff > 0.1 or longitude_diff > 0.1")
    .sort_values(["latitude_diff", "longitude_diff"])
)

In [None]:
(
    df.query(
        "DatedVehicleJourneyRef == 'ATB:ServiceJourney:78_230306097864734_105'"
    ).tail(60)
)

In [None]:
ref = "ATB:ServiceJourney:78_230306097864734_105"
df_investigate = df_raw.query("DatedVehicleJourneyRef == @ref")
display(df_investigate.T)

df_investigate.plot("Longitude", "Latitude")

In [None]:
df_one = df.query("DatedVehicleJourneyRef == @ref")
geo_df_one = geopandas.GeoDataFrame(
    df_one,
    geometry=geopandas.points_from_xy(df_one.Longitude, df_one.Latitude),
    crs="EPSG:4326",
)

(
    geo_df_one.groupby(["DataFrameRef", "DatedVehicleJourneyRef"])["geometry"].apply(
        lambda x: LineString(x.tolist())
    )
).set_crs("EPSG:4326").explore()

In [None]:
geo_df = geopandas.GeoDataFrame(
    df, geometry=geopandas.points_from_xy(df.Longitude, df.Latitude), crs="EPSG:4326"
)

last_hours = pd.to_datetime(datetime.datetime.today() - datetime.timedelta(days=4))
print(last_hours)

(
    geo_df[
        # Some points are empty, not sure reason
        ~geo_df["geometry"].is_empty
    ]
    .assign(timestamp=lambda df1: pd.to_datetime(df1["RecordedAtTime"]))
    .groupby(["DataFrameRef", "DatedVehicleJourneyRef"])
    .filter(lambda x: len(x) >= 10)
    .groupby(["DataFrameRef", "DatedVehicleJourneyRef"])["geometry"]
    .apply(lambda x: LineString(x.tolist()))
).set_crs("EPSG:4326").explore()

In [None]:
map = folium.Map([60, 10], zoom_start=8)


popup = folium.GeoJsonPopup(
    fields=[
        "DataFrameRef",
        "DatedVehicleJourneyRef",
        "RecordedAtTime",
        "LineRef",
        "VehicleMode",
        "Delay",
    ]
)

folium.GeoJson(
    data=geo_df.tail(100),
    style_function=lambda feature: {"color": "black"},
    popup=popup,
).add_to(map)
map