In [1]:
import pandas as pd
import plotly.express as px
import os

import warnings
warnings.filterwarnings('ignore')

In [2]:
%%time

collection = []

for table in os.listdir('vehicle_data/'):
    
    date_id = table[:-4]
    table_read_in = pd.read_csv('vehicle_data/' + table)
    table_read_in['datetime'] = date_id
    collection.append(table_read_in)    

Wall time: 5.83 s


In [3]:
%%time

data = pd.concat((df for df in collection), axis = 0, join = 'outer')

Wall time: 388 ms


In [4]:
data['datetime'] = pd.to_datetime(data['datetime'], format = '%Y-%m-%d %H-%M-%S')

In [5]:
print(data.shape)
data = data[data['shortName'].notnull()] # where name is Null --> vehicle is heading to garage
data['tripId'].fillna('not_given', inplace = True) # where tripId is Null --> vehicle is still in service
print(data.shape)

(377579, 10)
(301364, 10)


In [6]:
data.loc[(data['shortName'].str.startswith('9')) & 
         ((data['shortName'].str.len() == 3) & (~data['shortName'].str.contains('E|A|M|B')) |
          (data['shortName'].str.len() > 3)), 'vehicleRouteType'] = 'NIGHT_BUS'

### Overall count of vehicles plotted by time series

In [7]:
count_by_time = data.groupby('datetime').agg({'licensePlate' : 'nunique'}).reset_index().rename(columns = {'licensePlate' : 'total_vehicle_number'})

In [8]:
fig = px.line(count_by_time, x = 'datetime', y = 'total_vehicle_number', 
              title = 'Aktív járművek száma az idő függvényében', 
              labels = {'datetime' : 'Dátum - idő'}, width = 950, height = 450)
fig.update_yaxes(visible = True, showticklabels = True, title = None)
#fig.update_traces(mode='lines+markers')
fig.show()

In [9]:
count_by_type_by_time = data.groupby(['datetime', 'vehicleRouteType']).agg({'licensePlate' : 'nunique'}).reset_index().rename(columns = {'licensePlate' : 'total_vehicle_number', 'vehicleRouteType' : 'type'})

In [10]:
fig = px.line(count_by_type_by_time, x = 'datetime', y = 'total_vehicle_number', facet_col = 'type',
              facet_col_wrap = 2, facet_row_spacing = 0.1, facet_col_spacing = 0.1, color = 'type',
              color_discrete_map = {'BUS' : 'blue', 'TRAM' : 'yellow', 'TROLLEYBUS' : 'red', 'SUBURBAN_RAILWAY' : 'green', 'NIGHT_BUS' : 'black'},
              title = 'Aktív járművek száma az idő függvényében, típusonként', 
              labels = {'datetime' : 'Dátum - idő'}, width = 950, height = 750)
fig.update_yaxes(visible = True, showticklabels = True, title = None, matches=None)
fig.update_xaxes(matches=None)
fig.for_each_xaxis(lambda xaxis:xaxis.update(showticklabels=True))
fig.for_each_annotation(lambda a: a.update(text=a.text.split('=')[-1]))
fig.update_layout(showlegend = False)
fig.show()

### Total vehicles used in a day by line

In [14]:
count_by_line = data.groupby(['vehicleRouteType', 'shortName', 'From', 'To']).agg({'licensePlate' : 'nunique'}).reset_index().rename(columns = {'licensePlate' : 'total_vehicle_number', 'vehicleRouteType' : 'type', 'shortName' : 'line_id'})
count_by_line.sort_values(['type', 'total_vehicle_number'], ascending = [True, False], inplace = True)

In [25]:
top_10_line = count_by_line.groupby('type').head(10).sort_values(['type', 'total_vehicle_number'], ascending = [False, True])

In [31]:
fig = px.bar(top_10_line, x = 'total_vehicle_number', y = 'line_id', facet_col = 'type',
             hover_data = ['From', 'To'],
              facet_col_wrap = 2, facet_row_spacing = 0.15, facet_col_spacing = 0.1, color = 'type',
              color_discrete_map = {'BUS' : 'blue', 'TRAM' : 'yellow', 'TROLLEYBUS' : 'red', 'SUBURBAN_RAILWAY' : 'green', 'NIGHT_BUS' : 'black'},
              title = 'Legtöbb járművel rendelkező vonalak - teljes nap', 
              labels = {'line_id' : 'Vonal jelzés'}, width = 950, height = 850)
fig.update_yaxes(visible = True, showticklabels = True, title = None, matches=None, type='category', categoryorder = 'total ascending')
fig.update_xaxes(visible = True, showticklabels = True, title = None, matches=None)
fig.for_each_annotation(lambda a: a.update(text=a.text.split('=')[-1]))
fig.update_layout(showlegend = False)
fig.show()

### Total trips taken by vehicles by line

In [32]:
trip_by_line = data.groupby(['vehicleRouteType', 'shortName', 'From', 'To']).agg({'tripId' : 'nunique'}).reset_index().rename(columns = {'tripId' : 'total_trips_taken', 'vehicleRouteType' : 'type', 'shortName' : 'line_id'})
trip_by_line.sort_values(['type', 'total_trips_taken'], ascending = [True, False], inplace = True)

In [33]:
top_10_trips_line = trip_by_line.groupby('type').head(10).sort_values(['type', 'total_trips_taken'], ascending = [False, True])

In [35]:
fig = px.bar(top_10_trips_line, x = 'total_trips_taken', y = 'line_id', facet_col = 'type',
             hover_data = ['From', 'To'],
              facet_col_wrap = 2, facet_row_spacing = 0.15, facet_col_spacing = 0.1, color = 'type',
              color_discrete_map = {'BUS' : 'blue', 'TRAM' : 'yellow', 'TROLLEYBUS' : 'red', 'SUBURBAN_RAILWAY' : 'green', 'NIGHT_BUS' : 'black'},
              title = 'Legtöbb útvonalat megtett jűrművek - teljes nap', 
              labels = {'line_id' : 'Vonal jelzés'}, width = 950, height = 850)
fig.update_yaxes(visible = True, showticklabels = True, title = None, matches=None, type='category', categoryorder = 'total ascending')
fig.update_xaxes(visible = True, showticklabels = True, title = None, matches=None)
fig.for_each_annotation(lambda a: a.update(text=a.text.split('=')[-1]))
fig.update_layout(showlegend = False)
fig.show()

### How many kms are traveled by different vehicles?

In [36]:
night_bus_map = data[['shortName', 'vehicleRouteType']].drop_duplicates()
night_bus_map = night_bus_map[night_bus_map['vehicleRouteType'] == 'NIGHT_BUS']
night_bus_map = night_bus_map['shortName'].unique().tolist()

In [37]:
route_data = pd.read_csv('route_collector/master.csv')
route_data.rename(columns = {'type' : 'vehicleRouteType'}, inplace = True)
route_data.drop('id', 1, inplace = True)
route_data.loc[route_data['shortName'].isin(night_bus_map), 'vehicleRouteType'] = 'NIGHT_BUS'

In [38]:
km_count = data[['licensePlate', 'vehicleRouteType', 'tripId', 'shortName']].drop_duplicates().copy()
km_count = km_count.merge(route_data, on = ['shortName', 'vehicleRouteType'], how = 'left')

In [39]:
# km_count[km_count['total_distance'].isnull()]

In [40]:
km_count = km_count[km_count['total_distance'].notnull()]

In [41]:
km_count['total_distance'] = km_count['total_distance'] / 1000

In [42]:
km_count.head(3)

Unnamed: 0,licensePlate,vehicleRouteType,tripId,shortName,num_of_stops,total_distance,From,To
0,V1440,TRAM,BKK_C21846820,47,20.0,8.893,Városház tér,Deák Ferenc tér M
1,V2112,TRAM,BKK_C348931180J,1,31.0,18.178,Kelenföld vasútállomás M,Bécsi út / Vörösvári út
2,LOV873,BUS,BKK_C36950518,123A,20.0,8.834,Szentlőrinci úti ltp.,Határ út M


In [43]:
km_by_license = km_count.groupby(['shortName', 'vehicleRouteType', 'licensePlate']).agg({'total_distance' : 'sum'}).reset_index().sort_values('total_distance', ascending = False)
km_by_line = km_count.groupby(['shortName', 'vehicleRouteType']).agg({'total_distance' : 'sum'}).reset_index().sort_values('total_distance', ascending = False)
km_by_type = km_count.groupby(['vehicleRouteType']).agg({'total_distance' : 'sum'}).reset_index().sort_values('total_distance', ascending = False)

Overall

In [44]:
print('1 nap alatt teljes megtett km:', km_count['total_distance'].sum())

1 nap alatt teljes megtett km: 446473.33699999994


By type

In [45]:
fig = px.bar(km_by_type, x = 'total_distance', y = 'vehicleRouteType', color = 'vehicleRouteType',
           color_discrete_map = {'BUS' : 'blue', 'TRAM' : 'yellow', 'TROLLEYBUS' : 'red', 'SUBURBAN_RAILWAY' : 'green', 'NIGHT_BUS' : 'black'},
           title = '1 nap alatt megtett km járműtípusonként',
           width = 750, height = 350)
fig.update_yaxes(visible = True, showticklabels = True, title = None)
fig.update_xaxes(visible = True, showticklabels = True, title = None)
fig.update_layout(showlegend = False)
fig.show()

By line

In [47]:
km_by_line_top = km_by_line.head(15)

fig = px.bar(km_by_line_top, x = 'total_distance', y = 'shortName', color = 'vehicleRouteType',
           color_discrete_map = {'BUS' : 'blue', 'TRAM' : 'yellow', 'TROLLEYBUS' : 'red', 'SUBURBAN_RAILWAY' : 'green', 'NIGHT_BUS' : 'black'},
           title = '1 nap alatt legtöbb km-t megtett vonalak',
           width = 750, height = 450)
fig.update_yaxes(visible = True, showticklabels = True, title = None, type='category', categoryorder = 'total ascending')
fig.update_xaxes(visible = True, showticklabels = True, title = None)
fig.update_layout(showlegend = False)
fig.show()

By licensePlate

In [48]:
km_by_license_top = km_by_license.head(15)

fig = px.bar(km_by_license_top, x = 'total_distance', y = 'licensePlate', color = 'vehicleRouteType',
             hover_data = ['shortName'], 
           color_discrete_map = {'BUS' : 'blue', 'TRAM' : 'yellow', 'TROLLEYBUS' : 'red', 'SUBURBAN_RAILWAY' : 'green', 'NIGHT_BUS' : 'black'},
           title = '1 nap alatt legtöbb km-t megtett jármű (rendszám)',
           width = 750, height = 450)
fig.update_yaxes(visible = True, showticklabels = True, title = None, categoryorder='total ascending')
fig.update_xaxes(visible = True, showticklabels = True, title = None)
fig.update_layout(showlegend = False)
fig.show()

#### Latest available datetime --> lines with most vehicles

In [29]:
# mr_date = sorted(data['datetime'].unique())[-1]
# most_recent_data = data[data['datetime'] == mr_date].copy()

In [30]:
# count_by_line_mr = most_recent_data.groupby(['vehicleRouteType', 'shortName', 'From', 'To']).agg({'licensePlate' : 'nunique'}).reset_index().rename(columns = {'licensePlate' : 'total_vehicle_number', 'vehicleRouteType' : 'type', 'shortName' : 'line_id'})
# count_by_line_mr.sort_values(['type', 'total_vehicle_number'], ascending = [True, False], inplace = True)

In [31]:
# top_10_line_mr = count_by_line_mr.groupby('type').head(10).sort_values(['type', 'total_vehicle_number'], ascending = [False, True])

In [32]:
# fig = px.bar(top_10_line_mr, x = 'total_vehicle_number', y = 'line_id', facet_col = 'type',
#              hover_data = ['From', 'To'],
#               facet_col_wrap = 2, facet_row_spacing = 0.15, facet_col_spacing = 0.1, color = 'type',
#               color_discrete_map = {'BUS' : 'blue', 'TRAM' : 'yellow', 'TROLLEYBUS' : 'red', 'SUBURBAN_RAILWAY' : 'green', 'NIGHT_BUS' : 'black'},
#               title = 'Legtöbb járművel rendelkező vonalak - legútóbbi elérhető dátum: ' + pd.to_datetime(mr_date).strftime('%Y-%m-%d %H:%M:%S'), 
#               labels = {'line_id' : 'Vonal jelzés'}, width = 950, height = 650)
# fig.update_yaxes(visible = True, showticklabels = True, title = None, matches=None)
# fig.update_xaxes(visible = True, showticklabels = True, title = None, matches=None)
# fig.for_each_annotation(lambda a: a.update(text=a.text.split('=')[-1]))
# fig.update_layout(showlegend = False)
# fig.show()

### Map animation

In [49]:
map_data = data.copy()

Cannot plot with 5 min intervals - too many datapoints - too slow animation

In [50]:
print(map_data.shape)
map_data = map_data[map_data['datetime'].dt.minute.isin([25, 45, 5])]
# map_data = map_data[map_data['datetime'].dt.hour.isin([21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])]
print(map_data.shape)

(301364, 10)
(76943, 10)


In [51]:
map_data['datetime'] = map_data['datetime'].astype(str)

In [52]:
map_data.drop(['model', 'From', 'To', 'licensePlate'], 1, inplace = True)

In [55]:
def plot_vehicles_map():
    
    map_data['size'] = 1

    fig = px.scatter_mapbox(map_data, lat = 'lat', lon = 'lon', 
                            title = 'Forgalomban lévő járművek alakulása', size = 'size', size_max = 4.5,                              
                            color = 'vehicleRouteType', animation_frame = 'datetime', animation_group='vehicleRouteType',
                            color_discrete_map = {'BUS' : 'blue', 'TRAM' : 'yellow', 'TROLLEYBUS' : 'red', 'SUBURBAN_RAILWAY' : 'green', 'NIGHT_BUS' : 'black'},
                            zoom = 10, mapbox_style = 'carto-positron', height = 650, width = 950)
    
    fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 250
    fig.layout.updatemenus[0].buttons[0].args[1]["transition"]["duration"] = 250
    fig.update_layout(showlegend = False)
    fig.update_layout(transition = {'easing' : 'linear'})
    #fig.update_traces(hoverinfo='skip', hovertemplate=None)
    fig.show()
    
    map_data.drop('size', 1, inplace = True)

In [56]:
plot_vehicles_map()

In [30]:
# map_data[(map_data['datetime'].str[11:13] == '03') & 
#          (map_data['datetime'].str[14:16] == '00')& 
#          (map_data['vehicleRouteType'] == 'NIGHT_BUS')]