In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from geopy import Nominatim
from pprint import pprint

geolocator = Nominatim(user_agent="RBC locator")

In [3]:
from log import load_data, parse_events
from column_names import *

In [4]:
import re

In [5]:
regex = re.compile(r"\w+")

In [6]:
import pandas as pd

In [7]:
NAMES = "names"
RESPONSES = "responses"
SEARCHED = "searched"

In [9]:
import os
os.getcwd()

'/home/jovyan/work'

In [10]:
data = load_data("./data/20230127_Olomouc_Unicov.xlsx")

In [32]:
%matplotlib widget
pd.Series(data.resample("D", on=TIME).groups.keys()).dt.strftime("%Y-%m-%d")

0     2023-01-01
1     2023-01-02
2     2023-01-03
3     2023-01-04
4     2023-01-05
5     2023-01-06
6     2023-01-07
7     2023-01-08
8     2023-01-09
9     2023-01-10
10    2023-01-11
11    2023-01-12
12    2023-01-13
13    2023-01-14
14    2023-01-15
15    2023-01-16
16    2023-01-17
17    2023-01-18
18    2023-01-19
19    2023-01-20
20    2023-01-21
21    2023-01-22
22    2023-01-23
23    2023-01-24
24    2023-01-25
25    2023-01-26
26    2023-01-27
dtype: object

In [10]:
rbc = pd.DataFrame(parse_events(data[EVENT])[RBC_NAME])

In [11]:
rbc

Unnamed: 0,Název RBC
0,Uničov - Bohuňovice
1,Uničov - Bohuňovice
2,Uničov - Bohuňovice
3,Uničov - Bohuňovice
4,Uničov - Bohuňovice
...,...
9998,Uničov - Bohuňovice
9999,Uničov - Bohuňovice
10000,Uničov - Bohuňovice
10001,Uničov - Bohuňovice


In [12]:
stations = pd.DataFrame({"searched": pd.unique(rbc[RBC_NAME].str.extractall(r"(?P<station>\w+)")["station"])})

In [13]:
stations

Unnamed: 0,searched
0,Uničov
1,Bohuňovice


In [14]:
from functools import partial, reduce
from operator import attrgetter

In [15]:
stations[RESPONSES] = stations[SEARCHED].apply(partial(geolocator.geocode, exactly_one=False))

In [16]:
stations

Unnamed: 0,searched,responses
0,Uničov,"[(Uničov, okres Olomouc, Olomoucký kraj, Střed..."
1,Bohuňovice,"[(Bohuňovice, okres Svitavy, Pardubický kraj, ..."


In [17]:
import geopandas as geo

In [18]:
from map_tools import get_bounding_box

In [19]:
gdf = geo.read_parquet("./geo/stations.parquet")

In [20]:
x = stations.merge(gdf[["name", "geometry"]], how="left", left_on="searched", right_on="name")

In [21]:
import ipyleaflet
from ipywidgets import HTML

In [24]:
marker = ipyleaflet.Marker(location=[x.loc[1, "geometry"].y, x.loc[1, "geometry"].x], draggable=False)

In [25]:
get_bounding_box([marker.location])

((49.66306, 17.2810574), (49.66306, 17.2810574))

In [26]:
from IPython.display import display

In [27]:
background = ipyleaflet.TileLayer(
    url='http://tiles.openrailwaymap.org/standard/{z}/{x}/{y}.png'
)
m = ipyleaflet.Map(zoom=12)
m.add_layer(background)
m.add_layer(marker)

display(m)

m.fit_bounds(get_bounding_box([marker.location]))


Map(center=[0.0, 0.0], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_out_t…

In [28]:
stations = stations.explode("responses")

In [29]:
stations = stations.reset_index(drop=True)

In [30]:
stations

Unnamed: 0,searched,responses
0,Uničov,"(Uničov, okres Olomouc, Olomoucký kraj, Středn..."
1,Uničov,"(Uničov, okres Olomouc, Olomoucký kraj, Středn..."
2,Uničov,"(Uničov, Nádražní, Uničov, Dolní Sukolom, Unič..."
3,Uničov,"(Uničov, U Oskavy, Uničov, okres Olomouc, Olom..."
4,Uničov,"(Uničov, Masarykovo nám., Uničov, okres Olomou..."
5,Uničov,"(Uničov, Pod Šibeníkem, Uničov, okres Olomouc,..."
6,Uničov,"(Uničov, hřbitov, Litovelská, Uničov, okres Ol..."
7,Uničov,"(Uničov, Renoty, náves, 44621, Renoty, Uničov,..."
8,Uničov,"(Uničov, Benkov, restaurace, 4494, Benkov u St..."
9,Uničov,"(Uničov, Bezručovo náměstí, Stromořadí, Uničov..."


In [31]:
stations = stations.join(stations[RESPONSES].apply(lambda row: pd.Series(row.raw)))

In [32]:
stations = stations.drop(RESPONSES, axis="columns")

In [33]:
stations.loc[stations["class"] == "railway", "importance"] += 1

In [34]:
gdf = gdf.drop(gdf.columns[gdf.columns.str.contains("name:")], axis="columns")

In [35]:
stations = stations.drop(["place_id", "licence", "osm_id"], axis="columns")

In [37]:
stations = pd.DataFrame({"searched": pd.unique(rbc[RBC_NAME].str.extractall(r"(?P<station>\w+)")["station"])})

In [38]:
stations

Unnamed: 0,searched
0,Uničov
1,Bohuňovice


In [39]:
gdf.loc[gdf["name"].str.contains("Olomouc")].dissolve().to_crs(5514).centroid.to_crs("WGS84")

0    POINT (17.24900 49.59888)
dtype: geometry

In [40]:
def get_position(row: pd.Series):
    relevant_results: geo.GeoDataFrame = gdf.loc[gdf["name"] == row["searched"]]
    if relevant_results.empty:
        relevant_results = gdf.loc[gdf["name"].str.contains(row["searched"])]
    return relevant_results.dissolve().to_crs(5514).centroid.to_crs("WGS84")

In [41]:
def get_position_scalar(name: str):
    relevant_results: geo.GeoDataFrame = gdf.loc[gdf["name"] == name]
    if relevant_results.empty:
        relevant_results = gdf.loc[gdf["name"].str.contains(name)]
    return relevant_results.dissolve().to_crs(5514).centroid.to_crs("WGS84")

In [42]:
stations["location"] = stations.apply(lambda series: gdf.loc[gdf["name"].str.contains(series["searched"])].dissolve().to_crs(5514).centroid.to_crs("WGS84"), axis="columns")

In [43]:
stations["location"] = stations.apply(get_position, axis="columns")

In [44]:
stations

Unnamed: 0,searched,location
0,Uničov,POINT (17.11592 49.77922)
1,Bohuňovice,POINT (17.28106 49.66306)


In [49]:
# stations["position"] = stations["location"].apply(get_position_scalar)

In [50]:
stations

Unnamed: 0,searched,location
0,Uničov,POINT (17.11592 49.77922)
1,Bohuňovice,POINT (17.28106 49.66306)


In [51]:
geo.GeoDataFrame(stations).set_geometry("location").set_crs("WGS84").explore()

In [52]:
from map_tools import get_marker_cluster, get_map

In [57]:
event_details = parse_events(data[EVENT])

In [56]:
data

Unnamed: 0,Čas,Název DLS,Událost,OBU ETCS ID,Číslo vlaku,Délka [m],Max. rychlost [km/h],Nápravový tlak [t],Povel stůj,Druh trakce,Druh vlaku
0,2023-01-01 05:59:34,ETCS Olomouc - Uničov,RBC 101 Uničov - Bohuňovice - 94361: vznik vla...,94361,3625,53,160,18,,,osobní
1,2023-01-01 05:59:50,ETCS Olomouc - Uničov,RBC 101 Uničov - Bohuňovice - 94361: vlak 3625...,94361,3625,53,160,18,,,osobní
2,2023-01-01 06:17:49,ETCS Olomouc - Uničov,RBC 101 Uničov - Bohuňovice - 94359: vznik vlaku,94359,,,,,,,
3,2023-01-01 06:17:53,ETCS Olomouc - Uničov,RBC 101 Uničov - Bohuňovice - 94359: vlak 3624...,94359,3624,53,160,18,,,osobní
4,2023-01-01 06:18:58,ETCS Olomouc - Uničov,RBC 101 Uničov - Bohuňovice - 94359: vlak 3624...,94359,3624,53,160,18,,,osobní
...,...,...,...,...,...,...,...,...,...,...,...
9998,2023-01-27 10:07:35,ETCS Olomouc - Uničov,RBC 101 Uničov - Bohuňovice - 94361: zánik vlaku,94361,,,,,,,
9999,2023-01-27 10:17:01,ETCS Olomouc - Uničov,RBC 101 Uničov - Bohuňovice - 94360: vlak 3631...,94360,3631,53,160,18,,,osobní
10000,2023-01-27 10:17:01,ETCS Olomouc - Uničov,RBC 101 Uničov - Bohuňovice - 94360: zánik vla...,94360,3631,,,,,,
10001,2023-01-27 10:41:05,ETCS Olomouc - Uničov,RBC 101 Uničov - Bohuňovice - 94361: vznik vla...,94361,13707,53,160,18,,,osobní


In [58]:
event_details = parse_events(data[EVENT])
data = data.drop([EVENT, OBU], axis="columns")
data = data.join(event_details)

In [19]:
from ipywidgets import DatePicker

In [20]:
DatePicker()

DatePicker(value=None, step=1)

In [22]:
import ipydatetime

In [25]:
datetime_picker = ipydatetime.DatetimePicker()
datetime_picker

DatetimePicker(value=None)