In [1]:
import pandas as pd
import plotly.express as px
import os

import warnings
warnings.filterwarnings('ignore')

In [2]:
%%time

collection = []

for table in os.listdir('vehicle_data/'):
    
    date_id = table[:-4]
    table_read_in = pd.read_csv('vehicle_data/' + table)
    table_read_in['datetime'] = date_id
    collection.append(table_read_in)    

Wall time: 286 ms


In [3]:
%%time

data = pd.concat((df for df in collection), axis = 0, join = 'outer')

Wall time: 84.9 ms


In [4]:
data['datetime'] = pd.to_datetime(data['datetime'], format = '%Y-%m-%d %H-%M-%S')

In [5]:
print(data.shape)
data = data[data['shortName'].notnull()] # where name is Null --> vehicle is heading to garage
data['tripId'].fillna('not_given', inplace = True) # where tripId is Null --> vehicle is still in service
print(data.shape)

(28020, 10)
(14769, 10)


In [6]:
data.loc[(data['shortName'].str.startswith('9')) & 
         ((data['shortName'].str.len() == 3) & (~data['shortName'].str.contains('E|A|M|B')) |
          (data['shortName'].str.len() > 3)), 'vehicleRouteType'] = 'NIGHT_BUS'

### Overall count of vehicles plotted by time series

In [7]:
count_by_time = data.groupby('datetime').agg({'licensePlate' : 'nunique'}).reset_index().rename(columns = {'licensePlate' : 'total_vehicle_number'})

In [8]:
fig = px.line(count_by_time, x = 'datetime', y = 'total_vehicle_number', 
              title = 'Aktív járművek száma az idő függvényében', 
              labels = {'datetime' : 'Dátum - idő'}, width = 950, height = 450)
fig.update_yaxes(visible = True, showticklabels = True, title = None)
#fig.update_traces(mode='lines+markers')
fig.show()

In [9]:
count_by_type_by_time = data.groupby(['datetime', 'vehicleRouteType']).agg({'licensePlate' : 'nunique'}).reset_index().rename(columns = {'licensePlate' : 'total_vehicle_number', 'vehicleRouteType' : 'type'})

In [10]:
fig = px.line(count_by_type_by_time, x = 'datetime', y = 'total_vehicle_number', facet_col = 'type',
              facet_col_wrap = 2, facet_row_spacing = 0.1, facet_col_spacing = 0.1, color = 'type',
              color_discrete_map = {'BUS' : 'blue', 'TRAM' : 'yellow', 'TROLLEYBUS' : 'red', 'SUBURBAN_RAILWAY' : 'green', 'NIGHT_BUS' : 'black'},
              title = 'Aktív járművek száma az idő függvényében, típusonként', 
              labels = {'datetime' : 'Dátum - idő'}, width = 950, height = 750)
fig.update_yaxes(visible = True, showticklabels = True, title = None, matches=None)
fig.for_each_annotation(lambda a: a.update(text=a.text.split('=')[-1]))
fig.update_layout(showlegend = False)
fig.show()

### Total vehicles used in a day by line

In [11]:
count_by_line = data.groupby(['vehicleRouteType', 'shortName', 'From', 'To']).agg({'licensePlate' : 'nunique'}).reset_index().rename(columns = {'licensePlate' : 'total_vehicle_number', 'vehicleRouteType' : 'type', 'shortName' : 'line_id'})
count_by_line.sort_values(['type', 'total_vehicle_number'], ascending = [True, False], inplace = True)

In [12]:
top_10_line = count_by_line.groupby('type').head(10).sort_values(['type', 'total_vehicle_number'], ascending = [False, True])

In [13]:
fig = px.bar(top_10_line, x = 'total_vehicle_number', y = 'line_id', facet_col = 'type',
             hover_data = ['From', 'To'],
              facet_col_wrap = 2, facet_row_spacing = 0.15, facet_col_spacing = 0.1, color = 'type',
              color_discrete_map = {'BUS' : 'blue', 'TRAM' : 'yellow', 'TROLLEYBUS' : 'red', 'SUBURBAN_RAILWAY' : 'green', 'NIGHT_BUS' : 'black'},
              title = 'Legtöbb járművel rendelkező vonalak - teljes nap', 
              labels = {'line_id' : 'Vonal jelzés'}, width = 950, height = 850)
fig.update_yaxes(visible = True, showticklabels = True, title = None, matches=None)
fig.update_xaxes(visible = True, showticklabels = True, title = None, matches=None)
fig.for_each_annotation(lambda a: a.update(text=a.text.split('=')[-1]))
fig.update_layout(showlegend = False)
fig.show()

### Total trips taken by vehicles by line

In [22]:
trip_by_line = data.groupby(['vehicleRouteType', 'shortName', 'From', 'To']).agg({'tripId' : 'nunique'}).reset_index().rename(columns = {'tripId' : 'total_trips_taken', 'vehicleRouteType' : 'type', 'shortName' : 'line_id'})
trip_by_line.sort_values(['type', 'total_trips_taken'], ascending = [True, False], inplace = True)

In [23]:
top_10_trips_line = trip_by_line.groupby('type').head(10).sort_values(['type', 'total_trips_taken'], ascending = [False, True])

In [24]:
fig = px.bar(top_10_trips_line, x = 'total_trips_taken', y = 'line_id', facet_col = 'type',
             hover_data = ['From', 'To'],
              facet_col_wrap = 2, facet_row_spacing = 0.15, facet_col_spacing = 0.1, color = 'type',
              color_discrete_map = {'BUS' : 'blue', 'TRAM' : 'yellow', 'TROLLEYBUS' : 'red', 'SUBURBAN_RAILWAY' : 'green', 'NIGHT_BUS' : 'black'},
              title = 'Legtöbb útvonalat megtett jűrművek - teljes nap', 
              labels = {'line_id' : 'Vonal jelzés'}, width = 950, height = 850)
fig.update_yaxes(visible = True, showticklabels = True, title = None, matches=None)
fig.update_xaxes(visible = True, showticklabels = True, title = None, matches=None)
fig.for_each_annotation(lambda a: a.update(text=a.text.split('=')[-1]))
fig.update_layout(showlegend = False)
fig.show()

### ToDo: how many kms traveles
- by line
- by type
- overall

#### Latest available datetime --> lines with most vehicles

In [43]:
# mr_date = sorted(data['datetime'].unique())[-1]
# most_recent_data = data[data['datetime'] == mr_date].copy()

In [44]:
# count_by_line_mr = most_recent_data.groupby(['vehicleRouteType', 'shortName', 'From', 'To']).agg({'licensePlate' : 'nunique'}).reset_index().rename(columns = {'licensePlate' : 'total_vehicle_number', 'vehicleRouteType' : 'type', 'shortName' : 'line_id'})
# count_by_line_mr.sort_values(['type', 'total_vehicle_number'], ascending = [True, False], inplace = True)

In [45]:
# top_10_line_mr = count_by_line_mr.groupby('type').head(10).sort_values(['type', 'total_vehicle_number'], ascending = [False, True])

In [46]:
# fig = px.bar(top_10_line_mr, x = 'total_vehicle_number', y = 'line_id', facet_col = 'type',
#              hover_data = ['From', 'To'],
#               facet_col_wrap = 2, facet_row_spacing = 0.15, facet_col_spacing = 0.1, color = 'type',
#               color_discrete_map = {'BUS' : 'blue', 'TRAM' : 'yellow', 'TROLLEYBUS' : 'red', 'SUBURBAN_RAILWAY' : 'green', 'NIGHT_BUS' : 'black'},
#               title = 'Legtöbb járművel rendelkező vonalak - legútóbbi elérhető dátum: ' + pd.to_datetime(mr_date).strftime('%Y-%m-%d %H:%M:%S'), 
#               labels = {'line_id' : 'Vonal jelzés'}, width = 950, height = 650)
# fig.update_yaxes(visible = True, showticklabels = True, title = None, matches=None)
# fig.update_xaxes(visible = True, showticklabels = True, title = None, matches=None)
# fig.for_each_annotation(lambda a: a.update(text=a.text.split('=')[-1]))
# fig.update_layout(showlegend = False)
# fig.show()

### Map animation

In [18]:
map_data = data.copy()

Cannot plot with 5 min intervals - too many datapoints - too slow animation

In [19]:
print(map_data.shape)
map_data = map_data[map_data['datetime'].dt.minute.isin([0, 30])]
print(map_data.shape)

(304778, 9)
(50949, 9)


In [20]:
map_data['datetime'] = map_data['datetime'].astype(str)

In [21]:
map_data.drop(['model', 'From', 'To', 'licensePlate'], 1, inplace = True)

In [22]:
def plot_vehicles_map():
    
    map_data['size'] = 1

    fig = px.scatter_mapbox(map_data, lat = 'lat', lon = 'lon', 
                            title = 'Forgalomban lévő járművek alakulása', size = 'size', size_max = 4.5,                              
                            color = 'vehicleRouteType', animation_frame = 'datetime', animation_group='vehicleRouteType',
                            color_discrete_map = {'BUS' : 'blue', 'TRAM' : 'yellow', 'TROLLEYBUS' : 'red', 'SUBURBAN_RAILWAY' : 'green', 'NIGHT_BUS' : 'black'},
                            zoom = 10, mapbox_style = 'carto-positron', height = 650, width = 950)
    
    fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 500
    fig.layout.updatemenus[0].buttons[0].args[1]["transition"]["duration"] = 500
    fig.update_layout(showlegend = False)
    fig.update_layout(transition = {'easing' : 'linear'})
    fig.update_traces(hoverinfo='skip', hovertemplate=None)
    fig.show()
    
    map_data.drop('size', 1, inplace = True)

In [24]:
plot_vehicles_map()

In [30]:
# map_data[(map_data['datetime'].str[11:13] == '03') & 
#          (map_data['datetime'].str[14:16] == '00')& 
#          (map_data['vehicleRouteType'] == 'NIGHT_BUS')]