In [1]:
import pandas as pd
import numpy as np
import time

VEHICLE_DATAFILE = 'vehicle_data.csv'

# get df and clean up
vehicle_df = pd.read_csv(
    VEHICLE_DATAFILE,
    parse_dates=['event_datetime'],
    infer_datetime_format=True
).dropna()

# group by vin
vehicle_df = vehicle_df.groupby(['vin'])
supply_df = pd.DataFrame()


def collapse_is_available_events(group):
    global supply_df
    group = group.sort_values(by='event_datetime')
    
    # get event_datetime when is_available goes from true to false (becomes unavailable)
    left = group[(group['is_available'] == False) & (group['is_available'].shift() == True)]
        
    # get event_datetime when is_available goes from false to true (becomes available)
    right = group[(group['is_available'] == True) & (group['is_available'].shift() == False)]
    right['available_at'] = right['event_datetime']  # keep this so we know when it was made available

    # can't assume symmetry for events
    # can't tell which event comes first
    group = pd.merge_asof(left, right, on='event_datetime')
    
    supply_df = supply_df.append(group)

vehicle_df.apply(collapse_is_available_events)
supply_df = supply_df.dropna()
supply_df['unavailable_at'] = supply_df['event_datetime']
supply_df.drop(['event_datetime'], axis=1)
supply_df.reset_index(inplace=True)
supply_df['idle_duration'] = supply_df['unavailable_at'] - supply_df['available_at']  # duration for analysis
supply_df['idle_duration_minutes'] = supply_df['idle_duration'].dt.total_seconds()/60.0
# construct multi-index columns?
supply_df


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


Unnamed: 0,index,available_at,battery_level_x,battery_level_y,charging_state_x,charging_state_y,door_status_x,door_status_y,event_datetime,fleet_id_x,...,vehicle_groups_y,vehicle_id_x,vehicle_id_y,vehicle_make_x,vehicle_make_y,vin_x,vin_y,unavailable_at,idle_duration,idle_duration_minutes
0,1,2019-04-03 20:57:20.267,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-03 21:55:38.815,US-SFO,...,[Sacramento Fleet Maintenance],781,781.0,Electric Gig,Electric Gig,1G1FX6S08J4138281,1G1FX6S08J4138281,2019-04-03 21:55:38.815,0 days 00:58:18.548000,58.309133
1,2,2019-04-03 22:12:08.695,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,UNLOCKED,2019-04-04 16:08:48.304,US-SFO,...,[Sacramento Fleet Maintenance],781,781.0,Electric Gig,Electric Gig,1G1FX6S08J4138281,1G1FX6S08J4138281,2019-04-04 16:08:48.304,0 days 17:56:39.609000,1076.660150
2,3,2019-04-04 16:25:00.531,100.000000,99.166667,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-04 16:25:16.662,US-SFO,...,[Sacramento Fleet Maintenance],781,781.0,Electric Gig,Electric Gig,1G1FX6S08J4138281,1G1FX6S08J4138281,2019-04-04 16:25:16.662,0 days 00:00:16.131000,0.268850
3,4,2019-04-04 16:25:57.159,100.000000,97.500000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-04 21:24:35.849,US-SFO,...,[Sacramento Fleet Maintenance],781,781.0,Electric Gig,Electric Gig,1G1FX6S08J4138281,1G1FX6S08J4138281,2019-04-04 21:24:35.849,0 days 04:58:38.690000,298.644833
4,5,2019-04-04 21:26:34.942,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-05 09:45:12.212,US-SFO,...,[Sacramento Fleet Maintenance],781,781.0,Electric Gig,Electric Gig,1G1FX6S08J4138281,1G1FX6S08J4138281,2019-04-05 09:45:12.212,0 days 12:18:37.270000,738.621167
5,6,2019-04-05 10:08:17.190,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-05 19:00:01.052,US-SFO,...,[Sacramento Fleet Maintenance],781,781.0,Electric Gig,Electric Gig,1G1FX6S08J4138281,1G1FX6S08J4138281,2019-04-05 19:00:01.052,0 days 08:51:43.862000,531.731033
6,7,2019-04-05 19:08:52.616,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-06 18:11:27.216,US-SFO,...,[GIG Sacramento Car Share],781,781.0,Electric Gig,Electric Gig,1G1FX6S08J4138281,1G1FX6S08J4138281,2019-04-06 18:11:27.216,0 days 23:02:34.600000,1382.576667
7,8,2019-04-07 14:25:19.753,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-08 18:17:51.120,US-SFO,...,[GIG Sacramento Car Share],781,781.0,Electric Gig,Electric Gig,1G1FX6S08J4138281,1G1FX6S08J4138281,2019-04-08 18:17:51.120,1 days 03:52:31.367000,1672.522783
8,9,2019-04-08 18:32:38.648,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-09 19:30:46.083,US-SFO,...,[GIG Sacramento Car Share],781,781.0,Electric Gig,Electric Gig,1G1FX6S08J4138281,1G1FX6S08J4138281,2019-04-09 19:30:46.083,1 days 00:58:07.435000,1498.123917
9,10,2019-04-09 19:43:56.987,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-10 00:00:00.779,US-SFO,...,[GIG Sacramento Car Share],781,781.0,Electric Gig,Electric Gig,1G1FX6S08J4138281,1G1FX6S08J4138281,2019-04-10 00:00:00.779,0 days 04:16:03.792000,256.063200


In [2]:
# create datetimeindex of periods with the end datetime appended
df = supply_df.apply(
    lambda x: (pd.date_range(x['available_at'], x['unavailable_at'], freq='H', closed='left')).append(pd.to_datetime([x['unavailable_at']])), axis=1)

In [3]:
# construct large dow/hour df
# NOTE: very expensive. should save intermediates so don't have to regenerate
import pandas as pd
import calendar
from copy import deepcopy

# create multi-index and multi-index dataframe
mi = pd.MultiIndex.from_product([list(calendar.day_name), list(range(0, 24))], names=['dow', 'hour'])
base_series = pd.Series(index=mi).fillna(value=0)
mi_df = pd.DataFrame(columns=mi)

def extractor(x):
    global mi_df
    temp = deepcopy(base_series)
    # duration less than 1 hour, does span across slice (hour) ex: [1:30, 2:15]
    if x.size == 2 and x[0].hour != x[1].hour:  
        temp[x[0].day_name(), x[0].hour] += 60 - x[0].minute
        temp[x[1].day_name(), x[1].hour] += x[1].minute

    # duration less than 1 hour, doesn't span across slice (hour) ex: [1:30, 1:45]
    elif x.size == 2 and x[0].hour == x[1].hour:
        temp[x[0].day_name(), x[0].hour] += x[1].minute - x[0].minute
  
    # duration greater than 1 hour, does span across slice (hour) ex: [1:30, 2:30, 2:45]
    elif x.size == 3 and x[1].hour == x[2].hour:
        temp[x[0].day_name(), x[0].hour] += 60 - x[0].minute
        temp[x[2].day_name(), x[2].hour] += x[2].minute
  
    else:
        # duration greater than 2 hours, ex: [1:30, 2:30, 3:30, 3:45]
        # or spans across multiple hours
        n = 0
        min_marker = x[0].minute
        for i, j, k in zip(x.day_name(), x.hour, x.minute):
            # each datetimeindex
            if n == 0: # first element => 60 - 30 = 30
                temp[i, j] += (60 - k)
            elif n == (x.size - 1):  # last element, can't assume full hour
                if k >= min_marker:
                    temp[i, j] += (k - min_marker) # ex: 3:45 - 3:30 = 15m
                else:
                    temp[i, j] += k  # ex: 3:30 - 3:00 = 30m
            elif n == (x.size - 2):  # second to last element, can't assume full hour
                temp[i, j] += k  # ex: 3:30 - 3:00 = 30m
            else:  # middle of array
                temp[i, j] += 60 # ex: 3:30 - 2:30 = 1h
            n += 1
    mi_df = mi_df.append(temp, ignore_index=True)

df.apply(extractor)
mi_df

dow,Monday,Monday,Monday,Monday,Monday,Monday,Monday,Monday,Monday,Monday,...,Sunday,Sunday,Sunday,Sunday,Sunday,Sunday,Sunday,Sunday,Sunday,Sunday
hour,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,...,35.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [4]:
# merge the big dow/hour mask back with vehicle_update data

supply_df = supply_df.merge(mi_df, left_index=True, right_index=True)
supply_df



Unnamed: 0,index,available_at,battery_level_x,battery_level_y,charging_state_x,charging_state_y,door_status_x,door_status_y,event_datetime,fleet_id_x,...,"(Sunday, 14)","(Sunday, 15)","(Sunday, 16)","(Sunday, 17)","(Sunday, 18)","(Sunday, 19)","(Sunday, 20)","(Sunday, 21)","(Sunday, 22)","(Sunday, 23)"
0,1,2019-04-03 20:57:20.267,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-03 21:55:38.815,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,2019-04-03 22:12:08.695,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,UNLOCKED,2019-04-04 16:08:48.304,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3,2019-04-04 16:25:00.531,100.000000,99.166667,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-04 16:25:16.662,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4,2019-04-04 16:25:57.159,100.000000,97.500000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-04 21:24:35.849,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,5,2019-04-04 21:26:34.942,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-05 09:45:12.212,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,6,2019-04-05 10:08:17.190,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-05 19:00:01.052,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,7,2019-04-05 19:08:52.616,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-06 18:11:27.216,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,8,2019-04-07 14:25:19.753,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-08 18:17:51.120,US-SFO,...,35.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0,60.0
8,9,2019-04-08 18:32:38.648,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-09 19:30:46.083,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,10,2019-04-09 19:43:56.987,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-10 00:00:00.779,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
from pyproj import Proj

def convert_to_mercator(lngs, lats):
    projection = Proj(init='epsg:3857')
    xs = []
    ys = []
    for lng, lat in zip(lngs, lats):
        x, y = projection(lng, lat)
        xs.append(x)
        ys.append(y)
    return xs, ys

# convert all points to mercator projection
supply_df['merc_lng'], supply_df['merc_lat'] = convert_to_mercator(
    supply_df['lng_x'], supply_df['lat_x'])

# Start filtering for data to display
# 3 hour chunks
start_hour = 0
end_hour = 3
dow = 'Tuesday'
# filter for 1 hour, 1 dow, this will be the columndatasource later

# filter for the hour range
# figure_df = supply_df[supply_df[dow, hour] != 0]
supply_df['analysis_hours'] = supply_df[dow, 0] + supply_df[dow, 1] + supply_df[dow, 2]
figure_df = supply_df[supply_df['analysis_hours'] != 0]
figure_df  # to be used by figure

Unnamed: 0,index,available_at,battery_level_x,battery_level_y,charging_state_x,charging_state_y,door_status_x,door_status_y,event_datetime,fleet_id_x,...,"(Sunday, 17)","(Sunday, 18)","(Sunday, 19)","(Sunday, 20)","(Sunday, 21)","(Sunday, 22)","(Sunday, 23)",merc_lng,merc_lat,analysis_hours
8,9,2019-04-08 18:32:38.648,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-09 19:30:46.083,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.352367e+07,4.661971e+06,180.0
19,20,2019-04-16 00:01:50.658,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-16 14:00:04.959,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.351360e+07,4.653523e+06,179.0
27,28,2019-04-22 19:36:13.020,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-23 13:50:35.511,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.351897e+07,4.658915e+06,180.0
31,32,2019-04-25 15:11:46.935,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,ALL_UNLOCKED,LOCKED,2019-04-30 00:32:08.530,US-SFO,...,60.0,60.0,60.0,60.0,60.0,60.0,60.0,-1.352397e+07,4.661997e+06,32.0
32,33,2019-04-30 00:50:01.892,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-30 03:58:08.229,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.352186e+07,4.659454e+06,130.0
44,9,2019-04-08 18:32:38.648,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-09 19:30:46.083,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.352367e+07,4.661971e+06,180.0
55,20,2019-04-16 00:01:50.658,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-16 14:00:04.959,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.351360e+07,4.653523e+06,179.0
63,28,2019-04-22 19:36:13.020,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-23 13:50:35.511,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.351897e+07,4.658915e+06,180.0
67,32,2019-04-25 15:11:46.935,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,ALL_UNLOCKED,LOCKED,2019-04-30 00:32:08.530,US-SFO,...,60.0,60.0,60.0,60.0,60.0,60.0,60.0,-1.352397e+07,4.661997e+06,32.0
68,33,2019-04-30 00:50:01.892,100.000000,100.000000,NOT_CHARGING,NOT_CHARGING,LOCKED,LOCKED,2019-04-30 03:58:08.229,US-SFO,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.352186e+07,4.659454e+06,130.0


In [6]:
# create df from selected start and end hours
# bin the rental start positions
import pandas as pd
from pyproj import Proj

# get the rentals file
RENTAL_DATAFILE = 'rental_data_april_2019.csv'

# get df and clean up
rental_df = pd.read_csv(
    RENTAL_DATAFILE,
    parse_dates=['reserved_at', 'booked_at', 'ended_at'],
    infer_datetime_format=True
).dropna()

# extract the rental start dow/hour
rental_df['reserved_at_hour'] = rental_df['reserved_at'].dt.hour
rental_df['reserved_at_dow'] = rental_df['reserved_at'].dt.day_name()

def convert_to_mercator(lngs, lats):
    projection = Proj(init='epsg:3857')
    xs = []
    ys = []
    for lng, lat in zip(lngs, lats):
        x, y = projection(lng, lat)
        xs.append(x)
        ys.append(y)
    return xs, ys

anaylsis_dow = 'Tuesday'
analysis_hour_range = list(range(0, 3))

# convert all points to mercator projection
rental_df['start_merc_lng'], rental_df['start_merc_lat'] = convert_to_mercator(
    rental_df['start_location_lng'], rental_df['start_location_lat'])

rental_df = rental_df.groupby(['reserved_at_dow', 'reserved_at_hour'])
rental_analysis_df = rental_df.get_group((anaylsis_dow, analysis_hour_range[0])).append(
    [rental_df.get_group((anaylsis_dow, analysis_hour_range[1])), 
     rental_df.get_group((anaylsis_dow, analysis_hour_range[2]))])
rental_analysis_df

Unnamed: 0,rental_id,customer_id,reserved_at,booked_at,ended_at,start_location_lat,start_location_lng,end_location_lat,end_location_lng,reserved_at_hour,reserved_at_dow,start_merc_lng,start_merc_lat
13,617286,62074,2019-04-30 00:24:52.655560,2019-04-30 00:25:31.475729,2019-04-30 00:50:25.932545,38.571457,-121.470505,38.569824,-121.446960,0,Tuesday,-1.352203e+07,4.660471e+06
328,571923,46439,2019-04-02 00:06:20.558885,2019-04-02 00:06:49.143873,2019-04-02 00:28:03.195584,37.852750,-122.270320,37.865860,-122.301260,0,Tuesday,-1.361107e+07,4.558645e+06
401,582358,64463,2019-04-09 00:09:02.182974,2019-04-09 00:11:36.180662,2019-04-09 05:41:48.035763,37.790916,-122.227340,37.788734,-122.225180,0,Tuesday,-1.360629e+07,4.549931e+06
429,604831,34969,2019-04-23 00:23:09.324836,2019-04-23 00:33:43.354068,2019-04-23 01:47:57.120200,37.853485,-122.267840,37.858353,-122.273250,0,Tuesday,-1.361079e+07,4.558749e+06
628,593769,6893,2019-04-16 00:31:34.897723,2019-04-16 00:53:37.353959,2019-04-16 01:34:41.544311,37.856827,-122.291810,37.856630,-122.254260,0,Tuesday,-1.361346e+07,4.559220e+06
638,617354,26253,2019-04-30 00:58:39.509430,2019-04-30 00:59:30.703064,2019-04-30 01:23:19.042026,37.858665,-122.287830,37.828957,-122.262780,0,Tuesday,-1.361302e+07,4.559479e+06
716,572006,12487,2019-04-02 00:58:43.655977,2019-04-02 01:05:59.503194,2019-04-02 01:23:25.345332,37.858430,-122.252880,37.876884,-122.292830,0,Tuesday,-1.360913e+07,4.559446e+06
834,617319,8232,2019-04-30 00:36:30.073461,2019-04-30 00:53:39.697776,2019-04-30 00:56:33.301992,37.874542,-122.268650,37.878315,-122.266870,0,Tuesday,-1.361088e+07,4.561718e+06
871,582369,5818,2019-04-09 00:17:44.344047,2019-04-09 00:23:57.325829,2019-04-09 00:34:55.395707,37.869490,-122.253470,37.879130,-122.270350,0,Tuesday,-1.360919e+07,4.561005e+06
872,582343,46388,2019-04-09 00:04:49.324744,2019-04-09 00:13:08.803651,2019-04-09 00:29:23.580413,37.796707,-122.275406,37.832573,-122.260440,0,Tuesday,-1.361164e+07,4.550747e+06


In [7]:
# map df to axial (hexes), then take sum of hours 
from bokeh.util.hex import cartesian_to_axial

# binning time
# map the points to hex grid
rental_analysis_df['q'], rental_analysis_df['r'] = cartesian_to_axial(
    rental_analysis_df['start_merc_lng'],
    rental_analysis_df['start_merc_lat'],
    size=500,
    orientation='pointytop'
)

rental_analysis_df

Unnamed: 0,rental_id,customer_id,reserved_at,booked_at,ended_at,start_location_lat,start_location_lng,end_location_lat,end_location_lng,reserved_at_hour,reserved_at_dow,start_merc_lng,start_merc_lat,q,r
13,617286,62074,2019-04-30 00:24:52.655560,2019-04-30 00:25:31.475729,2019-04-30 00:50:25.932545,38.571457,-121.470505,38.569824,-121.446960,0,Tuesday,-1.352203e+07,4.660471e+06,-12507,-6214
328,571923,46439,2019-04-02 00:06:20.558885,2019-04-02 00:06:49.143873,2019-04-02 00:28:03.195584,37.852750,-122.270320,37.865860,-122.301260,0,Tuesday,-1.361107e+07,4.558645e+06,-12678,-6078
401,582358,64463,2019-04-09 00:09:02.182974,2019-04-09 00:11:36.180662,2019-04-09 05:41:48.035763,37.790916,-122.227340,37.788734,-122.225180,0,Tuesday,-1.360629e+07,4.549931e+06,-12678,-6067
429,604831,34969,2019-04-23 00:23:09.324836,2019-04-23 00:33:43.354068,2019-04-23 01:47:57.120200,37.853485,-122.267840,37.858353,-122.273250,0,Tuesday,-1.361079e+07,4.558749e+06,-12677,-6078
628,593769,6893,2019-04-16 00:31:34.897723,2019-04-16 00:53:37.353959,2019-04-16 01:34:41.544311,37.856827,-122.291810,37.856630,-122.254260,0,Tuesday,-1.361346e+07,4.559220e+06,-12680,-6079
638,617354,26253,2019-04-30 00:58:39.509430,2019-04-30 00:59:30.703064,2019-04-30 01:23:19.042026,37.858665,-122.287830,37.828957,-122.262780,0,Tuesday,-1.361302e+07,4.559479e+06,-12679,-6079
716,572006,12487,2019-04-02 00:58:43.655977,2019-04-02 01:05:59.503194,2019-04-02 01:23:25.345332,37.858430,-122.252880,37.876884,-122.292830,0,Tuesday,-1.360913e+07,4.559446e+06,-12675,-6079
834,617319,8232,2019-04-30 00:36:30.073461,2019-04-30 00:53:39.697776,2019-04-30 00:56:33.301992,37.874542,-122.268650,37.878315,-122.266870,0,Tuesday,-1.361088e+07,4.561718e+06,-12675,-6082
871,582369,5818,2019-04-09 00:17:44.344047,2019-04-09 00:23:57.325829,2019-04-09 00:34:55.395707,37.869490,-122.253470,37.879130,-122.270350,0,Tuesday,-1.360919e+07,4.561005e+06,-12674,-6081
872,582343,46388,2019-04-09 00:04:49.324744,2019-04-09 00:13:08.803651,2019-04-09 00:29:23.580413,37.796707,-122.275406,37.832573,-122.260440,0,Tuesday,-1.361164e+07,4.550747e+06,-12683,-6068


In [8]:
rental_analysis_df[(rental_analysis_df['q'] == -12681) & (rental_analysis_df['r'] == -6070)]

Unnamed: 0,rental_id,customer_id,reserved_at,booked_at,ended_at,start_location_lat,start_location_lng,end_location_lat,end_location_lng,reserved_at_hour,reserved_at_dow,start_merc_lng,start_merc_lat,q,r
7017,593781,3596,2019-04-16 00:35:47.562259,2019-04-16 00:42:49.958184,2019-04-16 00:54:25.415498,37.80681,-122.26499,37.805637,-122.24762,0,Tuesday,-13610480.0,4552170.0,-12681,-6070
9612,617306,36299,2019-04-30 00:31:52.239608,2019-04-30 00:43:19.090438,2019-04-30 01:58:36.531404,37.808746,-122.26682,37.760727,-122.25883,0,Tuesday,-13610680.0,4552443.0,-12681,-6070
11329,593691,8852,2019-04-16 00:00:00.488788,2019-04-16 00:10:19.510909,2019-04-16 01:10:04.912240,37.81041,-122.26673,37.829758,-122.26344,0,Tuesday,-13610670.0,4552678.0,-12681,-6070
28455,593717,14883,2019-04-16 00:08:58.278304,2019-04-16 00:14:32.773954,2019-04-16 00:35:41.777708,37.807175,-122.26509,37.86743,-122.28136,0,Tuesday,-13610490.0,4552222.0,-12681,-6070
28776,582362,37946,2019-04-09 00:14:58.212733,2019-04-09 00:42:02.267983,2019-04-09 01:11:10.533836,37.81073,-122.26735,37.860546,-122.27239,0,Tuesday,-13610740.0,4552723.0,-12681,-6070
33235,593771,4501,2019-04-16 00:31:58.200453,2019-04-16 00:38:16.752980,2019-04-16 00:52:52.786485,37.811157,-122.26755,37.846066,-122.27834,0,Tuesday,-13610760.0,4552783.0,-12681,-6070
11608,572078,2866,2019-04-02 01:46:11.378155,2019-04-02 01:57:17.355245,2019-04-02 02:07:16.326881,37.811237,-122.26226,37.795834,-122.27051,1,Tuesday,-13610170.0,4552794.0,-12681,-6070


In [9]:
# map df to axial (hexes), then take sum of hours 
from bokeh.util.hex import cartesian_to_axial

# binning time
# map the points to hex grid
figure_df['q'], figure_df['r'] = cartesian_to_axial(
    figure_df['merc_lng'],
    figure_df['merc_lat'],
    size=500,
    orientation='pointytop'
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.


In [10]:
# binned_df[('Monday', 10),]  # FYI groupby changes the syntax
binned_df = figure_df.groupby(['q', 'r'])

# could add idle duration data later
idle_minutes_df = binned_df['idle_duration_minutes'].agg(['count', 'mean', 'median', 'sum', 'min', 'max']).reset_index()

idle_minutes_df.drop(idle_minutes_df.columns[[0, 1, 2]], axis=1, inplace=True)
idle_minutes_df.columns = ['total_idle_mins_mean', 'total_idle_mins_median',
                           'total_idle_mins_sum', 'total_idle_mins_min', 'total_idle_mins_max']

# binned_df[('Monday', 10),]  # FYI groupby changes the syntax. access like this
_hexbin_source = binned_df['analysis_hours'].agg(['count', 'mean', 'median', 'sum', 'min', 'max']).reset_index()
_hexbin_source.columns = ['q', 'r', 'idle_events_count', 'idle_minutes_per_hour_mean',
                          'idle_minutes_per_hour_median', 'idle_minutes_per_hour_sum',
                          'idle_minutes_per_hour_min', 'idle_minutes_per_hour_max']
_hexbin_source = _hexbin_source.merge(idle_minutes_df, left_index=True, right_index=True)
_hexbin_source

Unnamed: 0,q,r,idle_events_count,idle_minutes_per_hour_mean,idle_minutes_per_hour_median,idle_minutes_per_hour_sum,idle_minutes_per_hour_min,idle_minutes_per_hour_max,total_idle_mins_mean,total_idle_mins_median,total_idle_mins_sum,total_idle_mins_min,total_idle_mins_max
0,-12715,-6063,1,60.000000,60.0,60.0,60.0,60.0,114.058800,114.058800,114.058800,114.058800,114.058800
1,-12714,-6065,2,180.000000,180.0,360.0,180.0,180.0,1794.231533,1794.231533,3588.463067,719.807867,2868.655200
2,-12714,-6064,2,180.000000,180.0,360.0,180.0,180.0,2549.312117,2549.312117,5098.624233,868.781183,4229.843050
3,-12714,-6063,2,124.500000,124.5,249.0,69.0,180.0,1404.896875,1404.896875,2809.793750,291.032483,2518.761267
4,-12713,-6065,1,180.000000,180.0,180.0,180.0,180.0,1034.123267,1034.123267,1034.123267,1034.123267,1034.123267
5,-12713,-6064,1,180.000000,180.0,180.0,180.0,180.0,2033.155017,2033.155017,2033.155017,2033.155017,2033.155017
6,-12713,-6063,2,180.000000,180.0,360.0,180.0,180.0,2913.806383,2913.806383,5827.612767,2517.918083,3309.694683
7,-12712,-6064,6,118.333333,129.0,710.0,36.0,180.0,1688.183422,1836.457975,10129.100533,78.375733,3435.760717
8,-12711,-6065,4,180.000000,180.0,720.0,180.0,180.0,1881.913804,1902.799925,7527.655217,1162.834400,2559.220967
9,-12711,-6064,10,180.000000,180.0,1800.0,180.0,180.0,2057.164675,1572.323250,20571.646750,904.293550,3856.132867


In [11]:
# rental_analysis_df_binned = rental_analysis_df['reserved_at'].agg(['count']).reset_index()
# rental_analysis_df_binned
rental_analysis_df_binned = rental_analysis_df.groupby(['q', 'r'])['reserved_at'].agg(['count']).reset_index()
rental_analysis_df_binned

Unnamed: 0,q,r,count
0,-12714,-6063,1
1,-12707,-6058,1
2,-12705,-6062,7
3,-12703,-6064,1
4,-12702,-6065,10
5,-12702,-6064,8
6,-12701,-6065,5
7,-12701,-6064,1
8,-12700,-6070,1
9,-12700,-6068,13


In [12]:
# join rental df to _hexbin_source df on q and r
# create new column of rentals/vehicles

_hexbin_source_final = _hexbin_source.merge(rental_analysis_df_binned, on=['q', 'r'])
_hexbin_source_final['rental_vehicle_ratio'] = _hexbin_source_final['count']/_hexbin_source_final['idle_events_count']
_hexbin_source_final

Unnamed: 0,q,r,idle_events_count,idle_minutes_per_hour_mean,idle_minutes_per_hour_median,idle_minutes_per_hour_sum,idle_minutes_per_hour_min,idle_minutes_per_hour_max,total_idle_mins_mean,total_idle_mins_median,total_idle_mins_sum,total_idle_mins_min,total_idle_mins_max,count,rental_vehicle_ratio
0,-12714,-6063,2,124.500000,124.5,249.0,69.0,180.0,1404.896875,1404.896875,2809.793750,291.032483,2518.761267,1,0.500000
1,-12707,-6058,23,163.739130,180.0,3766.0,56.0,180.0,1215.503256,1257.306250,27956.574883,158.091750,2820.043267,1,0.043478
2,-12705,-6062,25,119.320000,148.0,2983.0,3.0,180.0,1013.215690,695.963000,25330.392250,85.907150,2230.761000,7,0.280000
3,-12702,-6065,32,113.937500,129.0,3646.0,1.0,180.0,1021.476116,809.184867,32687.235717,0.921500,2637.892967,10,0.312500
4,-12702,-6064,27,133.925926,180.0,3616.0,16.0,180.0,1676.283340,1334.433050,45259.650167,188.719267,3913.526150,8,0.296296
5,-12701,-6065,9,119.777778,154.0,1078.0,1.0,180.0,522.070906,293.381583,4698.638150,126.505750,1430.930983,5,0.555556
6,-12701,-6064,25,171.640000,180.0,4291.0,65.0,180.0,1531.562603,981.123617,38289.065083,219.771683,4271.445917,1,0.040000
7,-12700,-6070,10,164.200000,174.5,1642.0,130.0,180.0,2074.428360,2250.717183,20744.283600,619.213767,4041.423850,1,0.100000
8,-12700,-6068,53,125.415094,166.0,6647.0,2.0,180.0,809.312687,621.451417,42893.572400,25.249583,3594.743750,13,0.245283
9,-12694,-5996,1,1.000000,1.0,1.0,1.0,1.0,1.219033,1.219033,1.219033,1.219033,1.219033,1,1.000000


In [15]:
from bokeh.plotting import figure, show, output_file
from bokeh.transform import linear_cmap
from bokeh.tile_providers import CARTODBPOSITRON

# draw the map
map_figure = figure(
    x_range=(-13618976.4221, -13605638.1607),  # bounding box for starting view
    y_range=(4549035.0828, 4564284.2700),
    x_axis_type='mercator',
    y_axis_type='mercator',
    plot_width=750,
    plot_height=750,
    title=f'Available Supply for {dow} {start_hour} to {end_hour}',
    tooltips=[('(q, r)', '(@q, @r)'),
              ('vehicles', '@idle_events_count'),
              ('rentals', '@count'),
              ('rental_vehicle_ratio', '@rental_vehicle_ratio'),
              ('mean idle minutes/block', '@idle_minutes_per_hour_mean'),
              ('median idle minutes/block', '@idle_minutes_per_hour_median'),
              ('sum idle minutes/block', '@idle_minutes_per_hour_sum'),
              ('min idle minutes/block', '@idle_minutes_per_hour_min'),
              ('max idle minutes/block', '@idle_minutes_per_hour_max'),
              ('mean total idle minutes', '@total_idle_mins_mean'),
              ('median total idle minutes', '@total_idle_mins_median'),
              ('sum total idle minutes', '@total_idle_mins_sum'),
              ('min total idle minutes', '@total_idle_mins_min'),
              ('max total idle minutes', '@total_idle_mins_max')]
)

# add background of streets for context
map_figure.add_tile(CARTODBPOSITRON)

# add hexes, with modified fill color based on mean of idle time per hour
map_figure.hex_tile(q='q', r='r', size=500, source=_hexbin_source_final,
                    hover_color='pink', hover_alpha=0.8, fill_alpha=0.3,
                    fill_color=linear_cmap('rental_vehicle_ratio', 'Magma256',
                                           0, max(_hexbin_source_final['rental_vehicle_ratio'])))

In [16]:
# generate map in browser
from bokeh.io import show
show(map_figure)