In [None]:
import pandas as pd
import numpy as np
import time

VEHICLE_DATAFILE = 'vehicle_data.csv'

# get df and clean up
vehicle_df = pd.read_csv(
    VEHICLE_DATAFILE,
    parse_dates=['event_datetime'],
    infer_datetime_format=True
).dropna()

# group by vin
vehicle_df = vehicle_df.groupby(['vin'])

supply_df = pd.DataFrame()

for _, group in vehicle_df:
    # get pointers for prev and next events
    # sort, just in case it comes in unsorted
    group = group.sort_values(by='event_datetime')
    
    # get event_datetime when is_available goes from true to false (becomes unavailable)
    left = group[(group['is_available'] == False) & (group['is_available'].shift() == True)]
        
    # get event_datetime when is_available goes from false to true (becomes available)
    right = group[(group['is_available'] == True) & (group['is_available'].shift() == False)]
    right['available_at'] = right['event_datetime']  # keep this so we know when it was made available

    # can't assume symmetry for events
    # can't tell which event comes first
    group = pd.merge_asof(left, right, on='event_datetime')
    
    supply_df = supply_df.append(group)

supply_df = supply_df.dropna()
supply_df['unavailable_at'] = supply_df['event_datetime']
supply_df.drop(['event_datetime'], axis=1)
supply_df.reset_index(inplace=True)

# construct multi-index columns?
supply_df


In [6]:
# available_hours_per_day = supply_df.apply(
#     lambda x: pd.date_range(x['available_at'], x['unavailable_at'], freq='H').day_name().value_counts(), axis=1)

df = supply_df.apply(
    lambda x: pd.date_range(x['available_at'], x['unavailable_at'], freq='H'), axis=1)
df

0        DatetimeIndex(['2019-05-01 01:48:26.832000', '...
1        DatetimeIndex(['2019-05-01 04:21:16.126000', '...
2        DatetimeIndex(['2019-05-01 21:11:05.932000'], ...
3        DatetimeIndex(['2019-05-01 21:15:30.293000', '...
4        DatetimeIndex(['2019-05-01 23:53:47.072000', '...
5        DatetimeIndex(['2019-05-02 15:34:18.517000', '...
6        DatetimeIndex(['2019-05-03 00:53:16.834000'], ...
7        DatetimeIndex(['2019-05-03 00:59:38.482000'], ...
8        DatetimeIndex(['2019-05-03 01:45:45.558000', '...
9        DatetimeIndex(['2019-05-03 04:25:54.612000', '...
10       DatetimeIndex(['2019-05-03 14:56:36.956000', '...
11       DatetimeIndex(['2019-05-03 19:09:31.368000'], ...
12       DatetimeIndex(['2019-05-03 19:31:49.396000'], ...
13       DatetimeIndex(['2019-05-03 19:53:13.619000'], ...
14       DatetimeIndex(['2019-05-03 20:04:51.028000'], ...
15       DatetimeIndex(['2019-05-03 21:02:55.259000', '...
16       DatetimeIndex(['2019-05-05 00:20:24.974000'], .

In [11]:
# construct large dow/hour df
import pandas as pd
import calendar
from copy import deepcopy

# create multi-index and multi-index dataframe
mi = pd.MultiIndex.from_product([list(calendar.day_name), list(range(0, 24))], names=['dow', 'hour'])
base_series = pd.Series(index=mi).fillna(value=0)
mi_df = pd.DataFrame(columns=mi)
# cc_df = pd.concat([df, mi_df], axis=1, levels=['dow', 'hour'])
# h = cc_df[0][0].hour
# dow = cc_df[0][0].day_name()

def extractor(x):
    global mi_df
    temp = deepcopy(base_series)
    for i, j, in zip(x.day_name(), x.hour):
        temp[i, j] += 1
    mi_df = mi_df.append(temp, ignore_index=True)

df.apply(extractor)
mi_df

dow,Monday,Monday,Monday,Monday,Monday,Monday,Monday,Monday,Monday,Monday,...,Sunday,Sunday,Sunday,Sunday,Sunday,Sunday,Sunday,Sunday,Sunday,Sunday
hour,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
supply_df = supply_df.merge(mi_df, left_index=True, right_index=True)
supply_df



Unnamed: 0,index,available_at,battery_level_x,battery_level_y,charging_state_x,charging_state_y,door_status_x,door_status_y,event_datetime,fleet_id_x,...,"(Sunday, 14)","(Sunday, 15)","(Sunday, 16)","(Sunday, 17)","(Sunday, 18)","(Sunday, 19)","(Sunday, 20)","(Sunday, 21)","(Sunday, 22)","(Sunday, 23)"
0,0,2019-05-01 01:48:26.832,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-01 04:11:12.595,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,2019-05-01 04:21:16.126,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-01 20:41:19.948,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,2019-05-01 21:11:05.932,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,UNLOCKED,2019-05-01 21:11:31.322,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,2019-05-01 21:15:30.293,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,UNLOCKED,2019-05-01 23:41:50.193,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4,2019-05-01 23:53:47.072,100.000000,96.666667,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-02 15:32:31.492,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,5,2019-05-02 15:34:18.517,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-02 23:43:28.259,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,6,2019-05-03 00:53:16.834,100.000000,99.166667,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-03 00:58:09.221,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,7,2019-05-03 00:59:38.482,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-03 01:18:45.768,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,8,2019-05-03 01:45:45.558,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-03 04:15:16.443,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,9,2019-05-03 04:25:54.612,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-03 14:40:52.814,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
from pyproj import Proj

def convert_to_mercator(lngs, lats):
    projection = Proj(init='epsg:3857')
    xs = []
    ys = []
    for lng, lat in zip(lngs, lats):
        x, y = projection(lng, lat)
        xs.append(x)
        ys.append(y)
    return xs, ys

# convert all points to mercator projection
supply_df['merc_lng'], supply_df['merc_lat'] = convert_to_mercator(
    supply_df['lng_x'], supply_df['lat_x'])

# singular hour
hour = 10
dow = 'Monday'
# filter for 1 hour, 1 dow
display_df = supply_df[supply_df[dow, hour] != 0]
display_df

Unnamed: 0,index,available_at,battery_level_x,battery_level_y,charging_state_x,charging_state_y,door_status_x,door_status_y,event_datetime,fleet_id_x,...,"(Sunday, 16)","(Sunday, 17)","(Sunday, 18)","(Sunday, 19)","(Sunday, 20)","(Sunday, 21)","(Sunday, 22)","(Sunday, 23)",merc_lng,merc_lat
19,19,2019-05-06 06:55:18.771,100.0,100.000000,CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-06 14:33:46.892,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.351360e+07,4.653491e+06
45,23,2019-05-06 07:59:38.664,100.0,95.833333,CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-06 14:36:02.448,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.351358e+07,4.653460e+06
61,14,2019-05-05 08:12:00.752,100.0,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-07 15:51:30.091,US-SFO,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.352253e+07,4.661198e+06
68,5,2019-05-04 15:38:29.730,100.0,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-07 07:15:22.099,US-SFO,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.352639e+07,4.658806e+06
81,13,2019-05-05 05:13:56.880,100.0,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-06 12:00:00.624,US-SFO,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.352339e+07,4.662387e+06
88,5,2019-05-06 01:36:18.906,100.0,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-07 04:25:52.286,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.352033e+07,4.659426e+06
100,11,2019-05-04 19:00:43.449,100.0,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-06 23:01:34.516,US-SFO,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.351900e+07,4.659728e+06
108,3,2019-05-04 20:11:55.121,100.0,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-07 01:32:31.397,US-SFO,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.352390e+07,4.661701e+06
118,8,2019-05-04 18:36:43.520,100.0,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-06 16:19:04.963,US-SFO,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.352600e+07,4.661402e+06
137,19,2019-05-04 20:09:13.322,100.0,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-06 10:41:45.366,US-SFO,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.352368e+07,4.662125e+06


In [202]:
# get rental stuff
# get rentals started in particular hexes

# map df to axial (hexes), then take sum of hours 
from bokeh.util.hex import cartesian_to_axial

display_df['q'], display_df['r'] = cartesian_to_axial(
    display_df['merc_lng'],
    display_df['merc_lat'],
    size=500,
    orientation='pointytop'
)



In [17]:
from bokeh.util.hex import hexbin

# provide datapoints to hexbin
_hexbin_source = hexbin(
    x=display_df['merc_lng'],
    y=display_df['merc_lat'],
    size=500)
#     hover_color="pink",
#     fill_alpha=0.3)

# _hexbin_source.insert(len(_hexbin_source.columns), 'vehicle_hours', display_df[hour])
_hexbin_source.fillna(value=0, inplace=True)
_hexbin_source

Unnamed: 0,q,r,counts
0,-12712,-6065,1
1,-12712,-6063,1
2,-12711,-6064,2
3,-12707,-6058,1
4,-12705,-6062,8
5,-12702,-6065,4
6,-12702,-6064,4
7,-12701,-6068,3
8,-12701,-6065,1
9,-12701,-6064,1


In [28]:
from bokeh.plotting import figure, show, output_file
from bokeh.transform import linear_cmap
from bokeh.tile_providers import CARTODBPOSITRON

map_figure = figure(
    x_range=(-13618976.4221, -13605638.1607),  # bounding box for starting view
    y_range=(4549035.0828, 4564284.2700),
    x_axis_type='mercator',
    y_axis_type='mercator',
    plot_width=1000,
    plot_height=1000,
    title=f'Available Supply for {dow} {hour}',
    tooltips=[('(q, r)', '(@q, @r)'),
              ('Vehicle-Hours', '@counts')]
#               ('Vehicle Hours', '@vehicle_hours')]
#               ('Fill Color Hex Code', '@fc')]
)
map_figure.add_tile(CARTODBPOSITRON)
map_figure.hex_tile(q='q', r='r', size=500, source=_hexbin_source,
                    hover_color='pink', hover_alpha=0.8, fill_alpha=0.3,
                   fill_color=linear_cmap('counts', 'Viridis256', 0, max(_hexbin_source.counts)))
#                    fill_color=linear_cmap('vehicle_hours', 'Viridis256', 0, max(_hexbin_source.vehicle_hours)))

# print(max(_hexbin_source.vehicle_hours))
#                    fill_color='fc')

In [29]:
from bokeh.io import show
show(map_figure)