In [1]:
from datetime import datetime
import pandas as pd

from bus_speeding import parse_data, get_speeding_buses, generate_map
from punctuality import calculate_delays

In [2]:
filepath_morning = "../data/bus-locations-2024-02-19 09:35:12.222315.json"
filepath_evening = "../data/bus-locations-2024-02-18 20:29:31.691732.json"

timestamp_morning = datetime.strptime("2024-02-19 09:35:12", '%Y-%m-%d %H:%M:%S')
timestamp_evening = datetime.strptime("2024-02-18 20:29:31", '%Y-%m-%d %H:%M:%S')

In [3]:
bus_to_data = parse_data(filepath_evening)

buses_speeding = get_speeding_buses(bus_to_data)

generate_map(buses_speeding)

skipped 481 elements out of 83585
found 258 buses that exceeded           speed limit out of 1769 buses


In [4]:
bus_to_data = parse_data(filepath_morning)

buses_speeding = get_speeding_buses(bus_to_data)

generate_map(buses_speeding)

skipped 446 elements out of 85341
found 336 buses that exceeded           speed limit out of 1770 buses


In [5]:
delayed_buses = calculate_delays("../data", filepath_morning, timestamp_morning)

skipped 444 elements out of 85341


100%|██████████| 20851/20851 [00:33<00:00, 620.07it/s] 

bus lines not found in live data: 77,           buses that did not arrive: 2536





In [6]:
columns = ["Bus line", "Bus stop", "Time", "Delay"]

df = pd.DataFrame(delayed_buses, columns=columns)

df

Unnamed: 0,Bus line,Bus stop,Time,Delay
0,147,1001,2024-02-19 10:27:00,0 days 00:04:07
1,166,1001,2024-02-19 09:54:00,0 days 00:04:26
2,509,1001,2024-02-19 10:03:00,0 days 00:02:39
3,138,1001,2024-02-19 09:44:00,0 days 00:05:12
4,166,1001,2024-02-19 10:04:00,0 days 00:04:48
...,...,...,...,...
7901,185,7104,2024-02-19 10:21:00,0 days 00:11:56
7902,185,7106,2024-02-19 10:03:00,0 days 00:02:36
7903,108,7107,2024-02-19 10:24:00,0 days 00:07:07
7904,162,7107,2024-02-19 09:38:00,0 days 00:04:08


In [7]:
df.sort_values(by="Delay")

Unnamed: 0,Bus line,Bus stop,Time,Delay
2211,509,2140,2024-02-19 10:21:00,0 days 00:02:00
1501,143,2015,2024-02-19 10:31:00,0 days 00:02:00
3934,179,3190,2024-02-19 10:16:00,0 days 00:02:00
7745,162,7072,2024-02-19 10:31:00,0 days 00:02:00
5157,191,4094,2024-02-19 09:38:00,0 days 00:02:00
...,...,...,...,...
88,326,1014,2024-02-19 09:39:00,0 days 00:52:05
2573,722,2291,2024-02-19 09:40:00,0 days 00:53:01
2671,219,2371,2024-02-19 09:37:00,0 days 00:54:04
736,332,1173,2024-02-19 09:36:00,0 days 00:55:06


In [8]:
df['Delay'].mean()

Timedelta('0 days 00:07:29.045914495')

In [9]:
df.groupby('Bus line').agg({'Delay': 'mean'}).reset_index().sort_values(by='Delay')

Unnamed: 0,Bus line,Delay
172,720,0 days 00:02:01
208,L31,0 days 00:02:12
194,L-2,0 days 00:02:23
164,711,0 days 00:02:25
132,349,0 days 00:02:38.777777777
...,...,...
216,L46,0 days 00:30:00
179,729,0 days 00:30:42.333333333
193,L-1,0 days 00:31:59
174,722,0 days 00:50:33.500000


In [10]:
from bus_stop_criticality import calculate_bus_stop_criticality, generate_criticality_map

count_scheduled_stops = calculate_bus_stop_criticality("../data")

bus_stops = [stop for stop in count_scheduled_stops]
criticality = [count_scheduled_stops[stop] for stop in bus_stops]

In [11]:
from punctuality import get_bus_stops_locations

bus_stops_locations = get_bus_stops_locations("../data")

bus_stops_lat = [bus_stops_locations[bus_stop][0] for bus_stop in bus_stops]
bus_stops_lon = [bus_stops_locations[bus_stop][1] for bus_stop in bus_stops]

In [12]:
data = {
    "Bus stop": bus_stops,
    "Number of scheduled stops": criticality,
    "Latitude": bus_stops_lat,
    "Longitude": bus_stops_lon,
}

df = pd.DataFrame(data)

df

Unnamed: 0,Bus stop,Number of scheduled stops,Latitude,Longitude
0,"(1001, 01)",641,52.248455,21.044827
1,"(1001, 02)",250,52.249078,21.044443
2,"(1001, 03)",538,52.248928,21.044169
3,"(1001, 04)",622,52.249969,21.041588
4,"(1001, 06)",376,52.250078,21.043848
...,...,...,...,...
6966,"(R-11, 99)",0,52.196605,20.922746
6967,"(R-13, 00)",67,52.263590,21.047868
6968,"(R-13, 99)",0,52.263631,21.047922
6969,"(R-19, 00)",0,52.271236,20.968586


In [15]:
top_rows = df.nlargest(50, "Number of scheduled stops").sort_values("Number of scheduled stops")

top_rows

Unnamed: 0,Bus stop,Number of scheduled stops,Latitude,Longitude
15,"(1003, 03)",661,52.254118,21.033248
6937,"(7099, 09)",664,52.244708,21.002661
6938,"(7099, 10)",672,52.244544,21.001806
82,"(1016, 01)",676,52.302715,20.989802
4483,"(4001, 03)",681,52.225006,20.990644
6772,"(7049, 01)",683,52.236112,21.018153
6769,"(7047, 02)",683,52.246767,21.014831
6756,"(7042, 01)",683,52.232358,21.020015
4643,"(4044, 02)",690,52.218056,20.965851
6773,"(7049, 02)",692,52.235839,21.01833


In [13]:
generate_criticality_map(top_rows)