In [1]:
# settings - initialize this
region = {
     'oakland': dict(
         x_min=-13618976.4221,
         x_max=-13605638.1607,
         y_min=4549035.0828,
         y_max=4564284.2700,
         timezone='America/Los_Angeles'),
     'madrid': dict(
         x_min=-416448.0394,
         x_max=-406912.5201,
         y_min=4921025.4356,
         y_max=4931545.0816,
         timezone='Europe/Madrid')
}

In [4]:
import pandas as pd
import calendar

# ===== CHANGE THE INPUT FILES =====
VEHICLE_DATAFILE = 'vehicle_availability_data_darwin_2019_05_01_2019_05_15_with_dow_hour_mask.csv'
RENTAL_DATAFILE = 'rental_data_darwin_2019_05_01_2019_05_15_with_dow_hour_mask.csv'
APP_EVENTS_DATAFILE = 'nearest_vehicle_app_data_darwin_2019_05_01_2019_05_15_with_threshold_mask.csv'

VEHICLE_DT_COLS = ['available_at', 'unavailable_at']
RENTAL_DT_COLS = ['reserved_at', 'booked_at', 'ended_at']
APP_EVENTS_DT_COLS = ['event_datetime']

APP_EVENTS_MASKS = ['event_dow', 'event_hour']

selected_region = region['oakland']

# Start filtering for data to display
# 3 hour chunks
analysis_dow = 'Tuesday'
analysis_hour_range = list(range(15, 18))

assert len(analysis_hour_range) == 3  # I SAID 3 HOUR CHUNKS!
assert analysis_dow in list(calendar.day_name)  # check dow name is correct

def convert_to_mercator(lngs, lats):
    # converts incoming iterable degrees to mercator
    from pyproj import Proj  # put here for clarity
    
    projection = Proj(init='epsg:3857')
    xs = []
    ys = []
    for lng, lat in zip(lngs, lats):
        x, y = projection(lng, lat)
        xs.append(x)
        ys.append(y)
    return xs, ys

# ===== LOAD SOME DATA ======
# get ready vehicle event data
supply_df = pd.read_csv(
        VEHICLE_DATAFILE,
        parse_dates=VEHICLE_DT_COLS,
        infer_datetime_format=True
    ).dropna()

# get ready rental data
rental_df = pd.read_csv(
    RENTAL_DATAFILE,
    parse_dates=RENTAL_DT_COLS,
    infer_datetime_format=True
).dropna()

app_events_df = pd.read_csv(
    APP_EVENTS_DATAFILE,
    parse_dates=APP_EVENTS_DT_COLS,
    infer_datetime_format=True
).dropna()

# TODO: couple the mercator projection and binning?
# ===== CONVERT LAT/LNG FOR MERCATOR PROJECTION =====
# convert all of supply_df points to mercator projection
supply_df['merc_lng'], supply_df['merc_lat'] = convert_to_mercator(
    supply_df['lng'], supply_df['lat'])

# convert rental start position to mercator projection
rental_df['start_merc_lng'], rental_df['start_merc_lat'] = convert_to_mercator(
    rental_df['start_location_lng'], rental_df['start_location_lat'])

# convert app events position to mercator projection
app_events_df['merc_lng'], app_events_df['merc_lat'] = convert_to_mercator(
    app_events_df['data-user_location-lng'], app_events_df['data-user_location-lat'])

# ===== FILTER FOR DATA YOU WANT =====
# filter for the hour range
# figure_df = supply_df[supply_df[dow, hour] != 0]
# supply_df is multi-indexed columns. has to be accessed differently
supply_df['analysis_hours'] = (supply_df[f"('{analysis_dow}', {analysis_hour_range[0]})"] + 
                               supply_df[f"('{analysis_dow}', {analysis_hour_range[1]})"] + 
                               supply_df[f"('{analysis_dow}', {analysis_hour_range[2]})"])
supply_figure_df = supply_df[supply_df['analysis_hours'] != 0]  # to be used for the figure


rental_df = rental_df.groupby(['reserved_at_dow', 'reserved_at_hour'])  # group the data for filtering
rental_figure_df = rental_df.get_group((analysis_dow, analysis_hour_range[0])).append(  # to be used for the figure
    [rental_df.get_group((analysis_dow, analysis_hour_range[1])), 
     rental_df.get_group((analysis_dow, analysis_hour_range[2]))])

app_events_df = app_events_df.groupby(APP_EVENTS_MASKS)
app_events_df = app_events_df.get_group((analysis_dow, analysis_hour_range[0])).append(  # to be used for the figure
    [app_events_df.get_group((analysis_dow, analysis_hour_range[1])), 
     app_events_df.get_group((analysis_dow, analysis_hour_range[2]))])


# ===== START BINNING =====
from bokeh.util.hex import cartesian_to_axial

# ===== BIN THE SUPPLY =====
supply_figure_df['q'], supply_figure_df['r'] = cartesian_to_axial(
    supply_figure_df['merc_lng'],
    supply_figure_df['merc_lat'],
    size=500,
    orientation='pointytop'
)

# ===== BIN THE RENTALS =====
rental_figure_df['q'], rental_figure_df['r'] = cartesian_to_axial(
    rental_figure_df['start_merc_lng'],
    rental_figure_df['start_merc_lat'],
    size=500,
    orientation='pointytop'
)

# ===== BIN THE APP EVENTS =====
app_events_df['q'], app_events_df['r'] = cartesian_to_axial(
    app_events_df['merc_lng'],
    app_events_df['merc_lat'],
    size=500,
    orientation='pointytop'
)

# ======================
# SUPPLY DATA PROCESSING
# ======================
# ===== GROUP THE SUPPLY DATA FOR AGGREGATION =====
supply_figure_df = supply_figure_df.groupby(['q', 'r'])

# ===== AGGREGATE THE TOTAL IDLE MINUTES =====
total_idle_minutes_df = supply_figure_df['idle_duration_minutes'].agg(['count', 'mean', 'median', 'sum', 'min', 'max']).reset_index()
total_idle_minutes_df.drop(total_idle_minutes_df.columns[[0, 1, 2]], axis=1, inplace=True)
total_idle_minutes_df.columns = ['total_idle_mins_mean', 'total_idle_mins_median',
                           'total_idle_mins_sum', 'total_idle_mins_min', 'total_idle_mins_max']

# ===== AGGREGATE THE BLOCK IDLE MINUTES =====
# binned_df[('Monday', 10),]  # FYI groupby changes the syntax. access like this
block_idle_minutes_df = supply_figure_df['analysis_hours'].agg(['count', 'mean', 'median', 'sum', 'min', 'max']).reset_index()
block_idle_minutes_df.columns = ['q', 'r', 'idle_events_count', 'idle_minutes_per_block_mean',
                          'idle_minutes_per_block_median', 'idle_minutes_per_block_sum',
                          'idle_minutes_per_block_min', 'idle_minutes_per_block_max']

idle_minutes_df = block_idle_minutes_df.merge(total_idle_minutes_df, left_index=True, right_index=True)

# ======================
# RENTAL DATA PROCESSING
# ======================
# ===== GROUP THE RENTALS AND COUNT THE BINNED RENTALS =====
rental_analysis_df_binned = rental_figure_df.groupby(['q', 'r'])['reserved_at'].agg(['count']).reset_index()
rental_analysis_df_binned.rename(columns={'count': 'rental_count'}, inplace=True)

# ==========================
# APP EVENTS DATA PROCESSING
# ==========================
# ===== SEPARATE THE APP EVENTS BASED ON DISTANCE TO NEAREST VEHICLE =====
in_range_app_events = app_events_df[app_events_df['vehicle_nearby'] == True] 
out_of_range_app_events = app_events_df[app_events_df['vehicle_nearby'] == False] 

# # ===== GROUP THE APP EVENTS AND COUNT THEM =====
in_range_app_events = in_range_app_events.groupby(['q', 'r'])['event_datetime'].agg(['count']).reset_index()
in_range_app_events.rename(columns={'count': 'in_range_app_events_count'}, inplace=True)
out_of_range_app_events = out_of_range_app_events.groupby(['q', 'r'])['event_datetime'].agg(['count']).reset_index()
out_of_range_app_events.rename(columns={'count': 'out_of_range_app_events_count'}, inplace=True)

# ===== USE THE BINNED DATA AND CALCULATE SOME FIGURES =====
_hexbin_source = idle_minutes_df.merge(
    rental_analysis_df_binned, on=['q', 'r'], how='left').merge(
    in_range_app_events, on=['q', 'r'], how='left').merge(
    out_of_range_app_events, on=['q', 'r'], how='left'
)
_hexbin_source['rental_vehicle_ratio'] = _hexbin_source['rental_count']/_hexbin_source['idle_events_count']
_hexbin_source['rentals_per_available_minutes'] = _hexbin_source['rental_count']/_hexbin_source['idle_minutes_per_block_sum']
_hexbin_source['propensity_to_rent'] = _hexbin_source['rental_count']/_hexbin_source['in_range_app_events_count']
_hexbin_source['projected_demand_increase'] = _hexbin_source['out_of_range_app_events_count'] * _hexbin_source['propensity_to_rent']
_hexbin_source.fillna(-1, inplace=True)

# ===== PLOT THE DATA TO THE MAP =====
from bokeh.plotting import figure, show, output_file
from bokeh.transform import linear_cmap
from bokeh.tile_providers import CARTODBPOSITRON
import colorcet as cc

# draw the map
map_figure = figure(
    x_range=(selected_region['x_min'], selected_region['x_max']),  # bounding box for starting view
    y_range=(selected_region['y_min'], selected_region['y_max']),
    x_axis_type='mercator',
    y_axis_type='mercator',
    plot_width=750,
    plot_height=750,
    title=f'Available Supply for {analysis_dow} {analysis_hour_range[0]} to {analysis_hour_range[-1]}',
    tooltips=[('(q, r)', '(@q, @r)'),
              ('projected_demand_increase', '@projected_demand_increase'),
              ('propensity_to_rent', '@propensity_to_rent'),
              ('vehicles', '@idle_events_count'),
              ('rentals', '@rental_count'),
              ('out_of_range_app_events', '@out_of_range_app_events_count'),
              ('in_range_app_events', '@in_range_app_events_count'),
              ('rental_vehicle_ratio', '@rental_vehicle_ratio'),
              ('mean idle minutes/block', '@idle_minutes_per_block_mean'),
              ('median idle minutes/block', '@idle_minutes_per_block_median'),
              ('sum idle minutes/block', '@idle_minutes_per_block_sum'),
              ('min idle minutes/block', '@idle_minutes_per_block_min'),
              ('max idle minutes/block', '@idle_minutes_per_block_max'),
              ('mean total idle minutes', '@total_idle_mins_mean'),
              ('median total idle minutes', '@total_idle_mins_median'),
              ('sum total idle minutes', '@total_idle_mins_sum'),
              ('min total idle minutes', '@total_idle_mins_min'),
              ('max total idle minutes', '@total_idle_mins_max')]
)

# add background of streets for context
map_figure.add_tile(CARTODBPOSITRON)

# add hexes, with modified fill color based on mean of idle time per hour
map_figure.hex_tile(q='q', r='r', size=500, source=_hexbin_source,
                    hover_color='pink', hover_alpha=0.8, fill_alpha=0.3,
                    fill_color=linear_cmap('projected_demand_increase', cc.fire,
                                           0, max(_hexbin_source['projected_demand_increase'])))

# ===== DISPLAY THE MAP =====
# generate map in browser
from bokeh.io import show
show(map_figure)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


ModuleNotFoundError: No module named 'colorcet'

In [7]:
app_events_df['q'], app_events_df['r'] = cartesian_to_axial(
    app_events_df['data-user_location-lng'],
    app_events_df['data-user_location-lat'],
    size=500,
    orientation='pointytop'
)

In [13]:
app_events_df

Unnamed: 0.1,Unnamed: 0,event_datetime,data-distance,data-user_location-lat,data-user_location-lng,data-vehicle_latitude,data-vehicle_longitude,vehicle_nearby,event_hour,event_dow
15342,15371,2019-05-14 22:36:10.000,8.568238e+02,37.867615,-122.255148,37.859910,-122.255380,False,15,Tuesday
15343,15372,2019-05-14 22:36:24.000,8.621279e+02,37.867676,-122.255570,37.859910,-122.255380,False,15,Tuesday
15344,15373,2019-05-14 22:36:26.790,2.750000e+02,37.864418,-122.284969,37.865524,-122.287770,True,15,Tuesday
15345,15374,2019-05-14 22:36:28.270,1.470000e+02,37.880934,-122.269767,37.881516,-122.268270,True,15,Tuesday
15346,15375,2019-05-14 22:37:08.000,2.937837e+02,37.865784,-122.268945,37.864067,-122.266430,True,15,Tuesday
15347,15376,2019-05-14 22:37:23.000,2.369049e+02,37.844940,-122.261405,37.846653,-122.259780,True,15,Tuesday
15348,15377,2019-05-14 22:37:24.000,8.568837e+02,37.867615,-122.255144,37.859910,-122.255380,False,15,Tuesday
15349,15379,2019-05-14 22:37:39.000,8.759097e+02,38.539764,-121.427430,38.546806,-121.432010,False,15,Tuesday
15350,15380,2019-05-14 22:37:49.000,6.803418e+01,37.805237,-122.295444,37.805084,-122.296190,True,15,Tuesday
15351,15381,2019-05-14 22:37:51.280,4.058000e+03,37.873122,-122.305014,37.876137,-122.259050,False,15,Tuesday
