In [1]:
import pandas as pd
import numpy as np
import time

VEHICLE_DATAFILE = 'vehicle_data.csv'

# get df and clean up
vehicle_df = pd.read_csv(
    VEHICLE_DATAFILE,
    parse_dates=['event_datetime'],
    infer_datetime_format=True
).dropna()

# group by vin
vehicle_df = vehicle_df.groupby(['vin'])
supply_df = pd.DataFrame()

# NOTE: very expensive. should save intermediates so don't have to regenerate
for _, group in vehicle_df:
    # get pointers for prev and next events
    # sort, just in case it comes in unsorted
    group = group.sort_values(by='event_datetime')
    
    # get event_datetime when is_available goes from true to false (becomes unavailable)
    left = group[(group['is_available'] == False) & (group['is_available'].shift() == True)]
        
    # get event_datetime when is_available goes from false to true (becomes available)
    right = group[(group['is_available'] == True) & (group['is_available'].shift() == False)]
    right['available_at'] = right['event_datetime']  # keep this so we know when it was made available

    # can't assume symmetry for events
    # can't tell which event comes first
    group = pd.merge_asof(left, right, on='event_datetime')
    
    supply_df = supply_df.append(group)

supply_df = supply_df.dropna()
supply_df['unavailable_at'] = supply_df['event_datetime']
supply_df.drop(['event_datetime'], axis=1)
supply_df.reset_index(inplace=True)
supply_df['idle_duration'] = supply_df['unavailable_at'] - supply_df['available_at']  # duration for analysis
supply_df['idle_duration_minutes'] = supply_df['idle_duration'].dt.total_seconds()/60.0
# construct multi-index columns?
supply_df


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


Unnamed: 0,index,available_at,battery_level_x,battery_level_y,charging_state_x,charging_state_y,door_status_x,door_status_y,event_datetime,fleet_id_x,...,vehicle_groups_x,vehicle_groups_y,vehicle_id_x,vehicle_id_y,vehicle_make_x,vehicle_make_y,vin_x,vin_y,unavailable_at,idle_duration
0,1,2019-04-03 20:57:20.267,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-03 21:55:38.815,US-SFO,...,[Sacramento Fleet Maintenance],[Sacramento Fleet Maintenance],781,781.0,Electric Gig,Electric Gig,1G1FX6S08J4138281,1G1FX6S08J4138281,2019-04-03 21:55:38.815,0 days 00:58:18.548000
1,2,2019-04-03 22:12:08.695,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,UNLOCKED,2019-04-04 16:08:48.304,US-SFO,...,[Sacramento Fleet Maintenance],[Sacramento Fleet Maintenance],781,781.0,Electric Gig,Electric Gig,1G1FX6S08J4138281,1G1FX6S08J4138281,2019-04-04 16:08:48.304,0 days 17:56:39.609000
2,3,2019-04-04 16:25:00.531,100.000000,99.166667,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-04 16:25:16.662,US-SFO,...,[Sacramento Fleet Maintenance],[Sacramento Fleet Maintenance],781,781.0,Electric Gig,Electric Gig,1G1FX6S08J4138281,1G1FX6S08J4138281,2019-04-04 16:25:16.662,0 days 00:00:16.131000
3,4,2019-04-04 16:25:57.159,100.000000,97.500000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-04 21:24:35.849,US-SFO,...,[Sacramento Fleet Maintenance],[Sacramento Fleet Maintenance],781,781.0,Electric Gig,Electric Gig,1G1FX6S08J4138281,1G1FX6S08J4138281,2019-04-04 21:24:35.849,0 days 04:58:38.690000
4,5,2019-04-04 21:26:34.942,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-05 09:45:12.212,US-SFO,...,[Sacramento Fleet Maintenance],[Sacramento Fleet Maintenance],781,781.0,Electric Gig,Electric Gig,1G1FX6S08J4138281,1G1FX6S08J4138281,2019-04-05 09:45:12.212,0 days 12:18:37.270000
5,6,2019-04-05 10:08:17.190,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-05 19:00:01.052,US-SFO,...,[GIG Sacramento Car Share],[Sacramento Fleet Maintenance],781,781.0,Electric Gig,Electric Gig,1G1FX6S08J4138281,1G1FX6S08J4138281,2019-04-05 19:00:01.052,0 days 08:51:43.862000
6,7,2019-04-05 19:08:52.616,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-06 18:11:27.216,US-SFO,...,[GIG Sacramento Car Share],[GIG Sacramento Car Share],781,781.0,Electric Gig,Electric Gig,1G1FX6S08J4138281,1G1FX6S08J4138281,2019-04-06 18:11:27.216,0 days 23:02:34.600000
7,8,2019-04-07 14:25:19.753,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-08 18:17:51.120,US-SFO,...,[GIG Sacramento Car Share],[GIG Sacramento Car Share],781,781.0,Electric Gig,Electric Gig,1G1FX6S08J4138281,1G1FX6S08J4138281,2019-04-08 18:17:51.120,1 days 03:52:31.367000
8,9,2019-04-08 18:32:38.648,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-09 19:30:46.083,US-SFO,...,[GIG Sacramento Car Share],[GIG Sacramento Car Share],781,781.0,Electric Gig,Electric Gig,1G1FX6S08J4138281,1G1FX6S08J4138281,2019-04-09 19:30:46.083,1 days 00:58:07.435000
9,10,2019-04-09 19:43:56.987,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-10 00:00:00.779,US-SFO,...,[GIG Sacramento Car Share],[GIG Sacramento Car Share],781,781.0,Electric Gig,Electric Gig,1G1FX6S08J4138281,1G1FX6S08J4138281,2019-04-10 00:00:00.779,0 days 04:16:03.792000


In [4]:
# create datetimeindex of periods with the end datetime appended
df = supply_df.apply(
    lambda x: (pd.date_range(x['available_at'], x['unavailable_at'], freq='H', closed='left')).append(pd.to_datetime([x['unavailable_at']])), axis=1)

In [6]:
# construct large dow/hour df
# NOTE: very expensive. should save intermediates so don't have to regenerate
import pandas as pd
import calendar
from copy import deepcopy

# create multi-index and multi-index dataframe
mi = pd.MultiIndex.from_product([list(calendar.day_name), list(range(0, 24))], names=['dow', 'hour'])
base_series = pd.Series(index=mi).fillna(value=0)
mi_df = pd.DataFrame(columns=mi)

def extractor(x):
    global mi_df
    temp = deepcopy(base_series)
    # duration less than 1 hour, does span across slice (hour) ex: [1:30, 2:15]
    if x.size == 2 and x[0].hour != x[1].hour:  
        temp[x[0].day_name(), x[0].hour] += 60 - x[0].minute
        temp[x[1].day_name(), x[1].hour] += x[1].minute

    # duration less than 1 hour, doesn't span across slice (hour) ex: [1:30, 1:45]
    elif x.size == 2 and x[0].hour == x[1].hour:
        temp[x[0].day_name(), x[0].hour] += x[1].minute - x[0].minute
  
    # duration greater than 1 hour, does span across slice (hour) ex: [1:30, 2:30, 2:45]
    elif x.size == 3 and x[1].hour == x[2].hour:
        temp[x[0].day_name(), x[0].hour] += 60 - x[0].minute
        temp[x[2].day_name(), x[2].hour] += x[2].minute
  
    else:
        # duration greater than 2 hours, ex: [1:30, 2:30, 3:30, 3:45]
        # or spans across multiple hours
        n = 0
        min_marker = x[0].minute
        for i, j, k in zip(x.day_name(), x.hour, x.minute):
            # each datetimeindex
            if n == 0: # first element => 60 - 30 = 30
                temp[i, j] += (60 - k)
            elif n == (x.size - 1):  # last element, can't assume full hour
                if k >= min_marker:
                    temp[i, j] += (k - min_marker) # ex: 3:45 - 3:30 = 15m
                else:
                    temp[i, j] += k  # ex: 3:30 - 3:00 = 30m
            elif n == (x.size - 2):  # second to last element, can't assume full hour
                temp[i, j] += k  # ex: 3:30 - 3:00 = 30m
            else:  # middle of array
                temp[i, j] += 60 # ex: 3:30 - 2:30 = 1h
            n += 1
    mi_df = mi_df.append(temp, ignore_index=True)

df.apply(extractor)
mi_df

dow,Monday,Monday,Monday,Monday,Monday,Monday,Monday,Monday,Monday,Monday,...,Sunday,Sunday,Sunday,Sunday,Sunday,Sunday,Sunday,Sunday,Sunday,Sunday
hour,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,...,35.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
# merge the big dow/hour mask back with vehicle_update data

supply_df = supply_df.merge(mi_df, left_index=True, right_index=True)
supply_df



Unnamed: 0,index,available_at,battery_level_x,battery_level_y,charging_state_x,charging_state_y,door_status_x,door_status_y,event_datetime,fleet_id_x,...,"(Sunday, 14)","(Sunday, 15)","(Sunday, 16)","(Sunday, 17)","(Sunday, 18)","(Sunday, 19)","(Sunday, 20)","(Sunday, 21)","(Sunday, 22)","(Sunday, 23)"
0,1,2019-04-03 20:57:20.267,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-03 21:55:38.815,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,2019-04-03 22:12:08.695,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,UNLOCKED,2019-04-04 16:08:48.304,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3,2019-04-04 16:25:00.531,100.000000,99.166667,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-04 16:25:16.662,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4,2019-04-04 16:25:57.159,100.000000,97.500000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-04 21:24:35.849,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,5,2019-04-04 21:26:34.942,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-05 09:45:12.212,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,6,2019-04-05 10:08:17.190,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-05 19:00:01.052,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,7,2019-04-05 19:08:52.616,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-06 18:11:27.216,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,8,2019-04-07 14:25:19.753,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-08 18:17:51.120,US-SFO,...,35.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0
8,9,2019-04-08 18:32:38.648,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-09 19:30:46.083,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,10,2019-04-09 19:43:56.987,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-10 00:00:00.779,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
from pyproj import Proj

def convert_to_mercator(lngs, lats):
    projection = Proj(init='epsg:3857')
    xs = []
    ys = []
    for lng, lat in zip(lngs, lats):
        x, y = projection(lng, lat)
        xs.append(x)
        ys.append(y)
    return xs, ys

# convert all points to mercator projection
supply_df['merc_lng'], supply_df['merc_lat'] = convert_to_mercator(
    supply_df['lng_x'], supply_df['lat_x'])

# Start filtering for data to display
# singular hour
hour = 0
dow = 'Tuesday'
# filter for 1 hour, 1 dow, this will be the columndatasource later
figure_df = supply_df[supply_df[dow, hour] != 0]
figure_df  # to be used by figure

Unnamed: 0,index,available_at,battery_level_x,battery_level_y,charging_state_x,charging_state_y,door_status_x,door_status_y,event_datetime,fleet_id_x,...,"(Sunday, 16)","(Sunday, 17)","(Sunday, 18)","(Sunday, 19)","(Sunday, 20)","(Sunday, 21)","(Sunday, 22)","(Sunday, 23)",merc_lng,merc_lat
8,9,2019-04-08 18:32:38.648,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-09 19:30:46.083,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.352367e+07,4.661971e+06
19,20,2019-04-16 00:01:50.658,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-16 14:00:04.959,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.351360e+07,4.653523e+06
27,28,2019-04-22 19:36:13.020,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-23 13:50:35.511,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.351897e+07,4.658915e+06
31,32,2019-04-25 15:11:46.935,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,ALL_UNLOCKED,LOCKED,2019-04-30 00:32:08.530,US-SFO,...,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,-1.352397e+07,4.661997e+06
32,33,2019-04-30 00:50:01.892,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-30 03:58:08.229,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.352186e+07,4.659454e+06
37,1,2019-04-01 20:00:42.278,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-02 13:00:00.855,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.351365e+07,4.652974e+06
46,10,2019-04-08 23:00:48.284,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-09 05:00:00.969,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.351365e+07,4.652991e+06
56,20,2019-04-15 13:05:13.791,98.333333,98.333333,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-16 08:00:01.006,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.351365e+07,4.652989e+06
74,38,2019-04-22 22:25:26.878,100.000000,96.666667,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-23 14:20:41.456,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.351361e+07,4.653499e+06
89,53,2019-04-29 23:00:48.340,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-30 16:54:11.889,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.352410e+07,4.657529e+06


In [9]:
# map df to axial (hexes), then take sum of hours 
from bokeh.util.hex import cartesian_to_axial

# binning time
# map the points to hex grid
figure_df['q'], figure_df['r'] = cartesian_to_axial(
    figure_df['merc_lng'],
    figure_df['merc_lat'],
    size=500,
    orientation='pointytop'
)

display_df = figure_df.loc[:, ['q', 'r']]
display_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


Unnamed: 0,q,r
8,-12508,-6216
19,-12502,-6205
27,-12504,-6212
31,-12508,-6216
32,-12507,-6213
37,-12502,-6204
46,-12502,-6204
56,-12502,-6204
74,-12502,-6205
89,-12511,-6210


In [11]:
# binned_df[('Monday', 10),]  # FYI groupby changes the syntax
binned_df = figure_df.groupby(['q', 'r'])

# could add idle duration data later
# _hexbin_source = binned_df['idle_duration_minutes'].agg(['count', 'mean', 'median', 'sum', 'min', 'max']).reset_index()

# binned_df[('Monday', 10),]  # FYI groupby changes the syntax. access like this
_hexbin_source = binned_df[('Tuesday', 0),].agg(['count', 'mean', 'median', 'sum', 'min', 'max']).reset_index()
_hexbin_source.columns = ['q', 'r', 'count', 'mean', 'median', 'sum', 'min', 'max']
_hexbin_source

Unnamed: 0,q,r,count,mean,median,sum,min,max
0,-12714,-6065,2,60.000000,60.0,120.0,60.0,60.0
1,-12714,-6064,2,60.000000,60.0,120.0,60.0,60.0
2,-12714,-6063,2,49.500000,49.5,99.0,39.0,60.0
3,-12713,-6065,1,60.000000,60.0,60.0,60.0,60.0
4,-12713,-6064,1,60.000000,60.0,60.0,60.0,60.0
5,-12713,-6063,2,60.000000,60.0,120.0,60.0,60.0
6,-12712,-6064,4,59.000000,60.0,236.0,56.0,60.0
7,-12711,-6065,4,60.000000,60.0,240.0,60.0,60.0
8,-12711,-6064,10,60.000000,60.0,600.0,60.0,60.0
9,-12711,-6063,1,60.000000,60.0,60.0,60.0,60.0


In [12]:
from bokeh.plotting import figure, show, output_file
from bokeh.transform import linear_cmap
from bokeh.tile_providers import CARTODBPOSITRON

# draw the map
map_figure = figure(
    x_range=(-13618976.4221, -13605638.1607),  # bounding box for starting view
    y_range=(4549035.0828, 4564284.2700),
    x_axis_type='mercator',
    y_axis_type='mercator',
    plot_width=750,
    plot_height=750,
    title=f'Available Supply for {dow} {hour}',
    tooltips=[('(q, r)', '(@q, @r)'),
              ('vehicles', '@count'),
              ('mean idle minutes/hour', '@mean'),
              ('median idle minutes/hour', '@median'),
              ('sum idle minutes/hour', '@sum'),
              ('min idle minutes/hour', '@min'),
              ('max idle minutes/hour', '@max')]
)

# add background of streets for context
map_figure.add_tile(CARTODBPOSITRON)

# add hexes, with modified fill color based on mean of idle time per hour
map_figure.hex_tile(q='q', r='r', size=500, source=_hexbin_source,
                    hover_color='pink', hover_alpha=0.8, fill_alpha=0.3,
                    fill_color=linear_cmap('mean', 'Viridis256', 0, max(_hexbin_source['mean'])))

In [13]:
# generate map in browser
from bokeh.io import show
show(map_figure)