In [58]:
import pandas as pd
import numpy as np
import time

VEHICLE_DATAFILE = 'vehicle_data.csv'

# get df and clean up
vehicle_df = pd.read_csv(
    VEHICLE_DATAFILE,
    parse_dates=['event_datetime'],
    infer_datetime_format=True
).dropna()

# group by vin
vehicle_df = vehicle_df.groupby(['vin'])

supply_df = pd.DataFrame()

for _, group in vehicle_df:
    # get pointers for prev and next events
    # sort, just in case it comes in unsorted
    group = group.sort_values(by='event_datetime')
    
    # get event_datetime when is_available goes from true to false (becomes unavailable)
    left = group[(group['is_available'] == False) & (group['is_available'].shift() == True)]
        
    # get event_datetime when is_available goes from false to true (becomes available)
    right = group[(group['is_available'] == True) & (group['is_available'].shift() == False)]
    right['available_at'] = right['event_datetime']  # keep this so we know when it was made available

    # can't assume symmetry for events
    # can't tell which event comes first
    group = pd.merge_asof(left, right, on='event_datetime')
    
    supply_df = supply_df.append(group)

supply_df = supply_df.dropna()
supply_df['unavailable_at'] = supply_df['event_datetime']
supply_df.drop(['event_datetime'], axis=1)
supply_df.reset_index(inplace=True)
supply_df['idle_duration'] = supply_df['unavailable_at'] - supply_df['available_at']  # duration for analysis

# construct multi-index columns?
supply_df


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


Unnamed: 0,index,available_at,battery_level_x,battery_level_y,charging_state_x,charging_state_y,door_status_x,door_status_y,event_datetime,fleet_id_x,...,vehicle_groups_x,vehicle_groups_y,vehicle_id_x,vehicle_id_y,vehicle_make_x,vehicle_make_y,vin_x,vin_y,unavailable_at,idle_duration
0,0,2019-05-01 01:48:26.832,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-01 04:11:12.595,US-SFO,...,[GIG Sacramento Car Share],[GIG Sacramento Car Share],781,781.0,Electric Gig,Electric Gig,1G1FX6S08J4138281,1G1FX6S08J4138281,2019-05-01 04:11:12.595,0 days 02:22:45.763000
1,1,2019-05-01 04:21:16.126,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-01 20:41:19.948,US-SFO,...,[GIG Sacramento Car Share],[GIG Sacramento Car Share],781,781.0,Electric Gig,Electric Gig,1G1FX6S08J4138281,1G1FX6S08J4138281,2019-05-01 20:41:19.948,0 days 16:20:03.822000
2,2,2019-05-01 21:11:05.932,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,UNLOCKED,2019-05-01 21:11:31.322,US-SFO,...,[GIG Sacramento Car Share],[GIG Sacramento Car Share],781,781.0,Electric Gig,Electric Gig,1G1FX6S08J4138281,1G1FX6S08J4138281,2019-05-01 21:11:31.322,0 days 00:00:25.390000
3,3,2019-05-01 21:15:30.293,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,UNLOCKED,2019-05-01 23:41:50.193,US-SFO,...,[GIG Sacramento Car Share],[GIG Sacramento Car Share],781,781.0,Electric Gig,Electric Gig,1G1FX6S08J4138281,1G1FX6S08J4138281,2019-05-01 23:41:50.193,0 days 02:26:19.900000
4,4,2019-05-01 23:53:47.072,100.000000,96.666667,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-02 15:32:31.492,US-SFO,...,[GIG Sacramento Car Share],[GIG Sacramento Car Share],781,781.0,Electric Gig,Electric Gig,1G1FX6S08J4138281,1G1FX6S08J4138281,2019-05-02 15:32:31.492,0 days 15:38:44.420000
5,5,2019-05-02 15:34:18.517,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-02 23:43:28.259,US-SFO,...,[GIG Sacramento Car Share],[GIG Sacramento Car Share],781,781.0,Electric Gig,Electric Gig,1G1FX6S08J4138281,1G1FX6S08J4138281,2019-05-02 23:43:28.259,0 days 08:09:09.742000
6,6,2019-05-03 00:53:16.834,100.000000,99.166667,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-03 00:58:09.221,US-SFO,...,[GIG Sacramento Car Share],[GIG Sacramento Car Share],781,781.0,Electric Gig,Electric Gig,1G1FX6S08J4138281,1G1FX6S08J4138281,2019-05-03 00:58:09.221,0 days 00:04:52.387000
7,7,2019-05-03 00:59:38.482,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-03 01:18:45.768,US-SFO,...,[GIG Sacramento Car Share],[GIG Sacramento Car Share],781,781.0,Electric Gig,Electric Gig,1G1FX6S08J4138281,1G1FX6S08J4138281,2019-05-03 01:18:45.768,0 days 00:19:07.286000
8,8,2019-05-03 01:45:45.558,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-03 04:15:16.443,US-SFO,...,[GIG Sacramento Car Share],[GIG Sacramento Car Share],781,781.0,Electric Gig,Electric Gig,1G1FX6S08J4138281,1G1FX6S08J4138281,2019-05-03 04:15:16.443,0 days 02:29:30.885000
9,9,2019-05-03 04:25:54.612,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-03 14:40:52.814,US-SFO,...,[GIG Sacramento Car Share],[GIG Sacramento Car Share],781,781.0,Electric Gig,Electric Gig,1G1FX6S08J4138281,1G1FX6S08J4138281,2019-05-03 14:40:52.814,0 days 10:14:58.202000


In [59]:
# convert to minutes
supply_df['idle_duration_minutes'] = supply_df['idle_duration'].dt.total_seconds()/60.0
supply_df['idle_duration_minutes']

0         142.762717
1         980.063700
2           0.423167
3         146.331667
4         938.740333
5         489.162367
6           4.873117
7          19.121433
8         149.514750
9         614.970033
10        199.189200
11          4.416200
12         18.920900
13          1.025000
14         27.116167
15       1582.067000
16          3.797183
17        213.330667
18       1537.600683
19        458.468683
20        208.997450
21        175.652050
22         22.553550
23         24.625517
24        778.412417
25         20.038100
26        115.170217
27         12.258317
28        137.289717
29        508.313500
            ...     
12689       0.637967
12690       0.832183
12691       0.477000
12692    1045.630300
12693     275.951733
12694      91.349117
12695      29.703367
12696      43.976733
12697      34.075567
12698     528.617200
12699      90.230550
12700      68.143617
12701    2672.600250
12702      39.051283
12703       0.185000
12704       0.786250
12705      17

In [64]:
# create datetimeindex of periods with the end datetime appended
df = supply_df.apply(
    lambda x: (pd.date_range(x['available_at'], x['unavailable_at'], freq='H', closed='left')).append(pd.to_datetime([x['unavailable_at']])), axis=1)

df

0        DatetimeIndex(['2019-05-01 01:48:26.832000', '...
1        DatetimeIndex(['2019-05-01 04:21:16.126000', '...
2        DatetimeIndex(['2019-05-01 21:11:05.932000', '...
3        DatetimeIndex(['2019-05-01 21:15:30.293000', '...
4        DatetimeIndex(['2019-05-01 23:53:47.072000', '...
5        DatetimeIndex(['2019-05-02 15:34:18.517000', '...
6        DatetimeIndex(['2019-05-03 00:53:16.834000', '...
7        DatetimeIndex(['2019-05-03 00:59:38.482000', '...
8        DatetimeIndex(['2019-05-03 01:45:45.558000', '...
9        DatetimeIndex(['2019-05-03 04:25:54.612000', '...
10       DatetimeIndex(['2019-05-03 14:56:36.956000', '...
11       DatetimeIndex(['2019-05-03 19:09:31.368000', '...
12       DatetimeIndex(['2019-05-03 19:31:49.396000', '...
13       DatetimeIndex(['2019-05-03 19:53:13.619000', '...
14       DatetimeIndex(['2019-05-03 20:04:51.028000', '...
15       DatetimeIndex(['2019-05-03 21:02:55.259000', '...
16       DatetimeIndex(['2019-05-05 00:20:24.974000', '.

In [61]:
# construct large dow/hour df
import pandas as pd
import calendar
from copy import deepcopy

# create multi-index and multi-index dataframe
mi = pd.MultiIndex.from_product([list(calendar.day_name), list(range(0, 24))], names=['dow', 'hour'])
base_series = pd.Series(index=mi).fillna(value=0)
mi_df = pd.DataFrame(columns=mi)

def extractor(x):
    global mi_df
    temp = deepcopy(base_series)    
    for i, j in zip(x.day_name(), x.hour):
        temp[i, j] += 1
    mi_df = mi_df.append(temp, ignore_index=True)

df.apply(extractor)
mi_df

dow,Monday,Monday,Monday,Monday,Monday,Monday,Monday,Monday,Monday,Monday,...,Sunday,Sunday,Sunday,Sunday,Sunday,Sunday,Sunday,Sunday,Sunday,Sunday
hour,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [23]:
supply_df = supply_df.merge(mi_df, left_index=True, right_index=True)
supply_df

Unnamed: 0,index,available_at,battery_level_x,battery_level_y,charging_state_x,charging_state_y,door_status_x,door_status_y,event_datetime,fleet_id_x,...,"(Sunday, 14)","(Sunday, 15)","(Sunday, 16)","(Sunday, 17)","(Sunday, 18)","(Sunday, 19)","(Sunday, 20)","(Sunday, 21)","(Sunday, 22)","(Sunday, 23)"
0,0,2019-05-01 01:48:26.832,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-01 04:11:12.595,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,2019-05-01 04:21:16.126,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-01 20:41:19.948,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,2019-05-01 21:11:05.932,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,UNLOCKED,2019-05-01 21:11:31.322,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,2019-05-01 21:15:30.293,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,UNLOCKED,2019-05-01 23:41:50.193,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4,2019-05-01 23:53:47.072,100.000000,96.666667,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-02 15:32:31.492,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,5,2019-05-02 15:34:18.517,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-02 23:43:28.259,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,6,2019-05-03 00:53:16.834,100.000000,99.166667,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-03 00:58:09.221,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,7,2019-05-03 00:59:38.482,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-03 01:18:45.768,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,8,2019-05-03 01:45:45.558,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-03 04:15:16.443,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,9,2019-05-03 04:25:54.612,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-03 14:40:52.814,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [24]:
from pyproj import Proj

def convert_to_mercator(lngs, lats):
    projection = Proj(init='epsg:3857')
    xs = []
    ys = []
    for lng, lat in zip(lngs, lats):
        x, y = projection(lng, lat)
        xs.append(x)
        ys.append(y)
    return xs, ys

# convert all points to mercator projection
supply_df['merc_lng'], supply_df['merc_lat'] = convert_to_mercator(
    supply_df['lng_x'], supply_df['lat_x'])

# singular hour
hour = 10
dow = 'Monday'
# filter for 1 hour, 1 dow, this will be the columndatasource later
figure_df = supply_df[supply_df[dow, hour] != 0]
figure_df

Unnamed: 0,index,available_at,battery_level_x,battery_level_y,charging_state_x,charging_state_y,door_status_x,door_status_y,event_datetime,fleet_id_x,...,"(Sunday, 14)","(Sunday, 15)","(Sunday, 16)","(Sunday, 17)","(Sunday, 18)","(Sunday, 19)","(Sunday, 20)","(Sunday, 21)","(Sunday, 22)","(Sunday, 23)"
19,19,2019-05-06 06:55:18.771,100.0,100.000000,CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-06 14:33:46.892,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
45,23,2019-05-06 07:59:38.664,100.0,95.833333,CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-06 14:36:02.448,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
61,14,2019-05-05 08:12:00.752,100.0,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-07 15:51:30.091,US-SFO,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
68,5,2019-05-04 15:38:29.730,100.0,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-07 07:15:22.099,US-SFO,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
81,13,2019-05-05 05:13:56.880,100.0,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-06 12:00:00.624,US-SFO,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
88,5,2019-05-06 01:36:18.906,100.0,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-07 04:25:52.286,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
100,11,2019-05-04 19:00:43.449,100.0,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-06 23:01:34.516,US-SFO,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
108,3,2019-05-04 20:11:55.121,100.0,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-07 01:32:31.397,US-SFO,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
118,8,2019-05-04 18:36:43.520,100.0,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-06 16:19:04.963,US-SFO,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
137,19,2019-05-04 20:09:13.322,100.0,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-05-06 10:41:45.366,US-SFO,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [25]:
# get rental stuff
# get rentals started in particular hexes

# map df to axial (hexes), then take sum of hours 
from bokeh.util.hex import cartesian_to_axial

# map the points to hex grid
figure_df['q'], figure_df['r'] = cartesian_to_axial(
    figure_df['merc_lng'],
    figure_df['merc_lat'],
    size=500,
    orientation='pointytop'
)

display_df = figure_df.loc[:, ['q', 'r']]
display_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


Unnamed: 0,q,r
19,-12502,-6205
45,-12502,-6205
61,-12507,-6215
68,-12513,-6212
81,-12507,-6217
88,-12506,-6212
100,-12504,-6213
108,-12508,-6216
118,-12511,-6215
137,-12508,-6216


In [34]:
binned_df = figure_df.groupby(['q', 'r'])
_hexbin_source = binned_df['idle_duration_minutes'].agg(['count', 'mean', 'median', 'sum', 'min', 'max']).reset_index()
_hexbin_source

Unnamed: 0,q,r,count,mean,median,sum,min,max
0,-12712,-6065,1,2629.987067,2629.987067,2629.987067,2629.987067,2629.987067
1,-12712,-6063,1,1795.968100,1795.968100,1795.968100,1795.968100,1795.968100
2,-12711,-6064,2,2891.920175,2891.920175,5783.840350,2735.051333,3048.789017
3,-12707,-6058,1,2154.354900,2154.354900,2154.354900,2154.354900,2154.354900
4,-12705,-6062,8,1154.435510,1057.557667,9235.484083,711.193933,2342.818600
5,-12702,-6065,4,2572.078325,2680.985350,10288.313300,716.442317,4209.900283
6,-12702,-6064,4,1739.885354,1830.192825,6959.541417,781.958717,2517.197050
7,-12701,-6068,3,1422.808878,1219.772633,4268.426633,1160.565167,1888.088833
8,-12701,-6065,1,1271.739333,1271.739333,1271.739333,1271.739333,1271.739333
9,-12701,-6064,1,1104.768633,1104.768633,1104.768633,1104.768633,1104.768633


In [43]:
from bokeh.plotting import figure, show, output_file
from bokeh.transform import linear_cmap
from bokeh.tile_providers import CARTODBPOSITRON

map_figure = figure(
    x_range=(-13618976.4221, -13605638.1607),  # bounding box for starting view
    y_range=(4549035.0828, 4564284.2700),
    x_axis_type='mercator',
    y_axis_type='mercator',
    plot_width=750,
    plot_height=750,
    title=f'Available Supply for {dow} {hour}',
    tooltips=[('(q, r)', '(@q, @r)'),
              ('vehicles', '@count'),
              ('mean idle minutes', '@mean'),
              ('median idle minutes', '@median'),
              ('sum idle minutes', '@sum'),
              ('min idle minutes', '@min'),
              ('max idle minutes', '@max')]
#               ('Vehicle Hours', '@vehicle_hours')]
#               ('Fill Color Hex Code', '@fc')]
)
map_figure.add_tile(CARTODBPOSITRON)
map_figure.hex_tile(q='q', r='r', size=500, source=_hexbin_source,
                    hover_color='pink', hover_alpha=0.8, fill_alpha=0.3,
                   fill_color=linear_cmap('mean', 'Viridis256', 0, max(_hexbin_source['mean'])))
#                    fill_color=linear_cmap('vehicle_hours', 'Viridis256', 0, max(_hexbin_source.vehicle_hours)))

# print(max(_hexbin_source.vehicle_hours))
#                    fill_color='fc')

In [44]:
from bokeh.io import show
show(map_figure)