In [1]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}
// this makes the jupyter-notebook not generate a scrollbar when the cell is too big 

<IPython.core.display.Javascript object>

In [2]:
from datetime import timedelta

import pandas as pd

from bokeh.models import WheelZoomTool
from bokeh.plotting import figure, show, output_file
from bokeh.tile_providers import get_provider, Vendors
from bokeh.io import output_notebook

from stevelib import snap_to_roads, convert_data_pipeline_to_df, plot2, transform_wgs84_to_web_mercator

output_notebook()

In [3]:
TIMEZONE = 'America/Los_Angeles'

In [4]:
df = convert_data_pipeline_to_df('test_data/rawevents_daytona-prod_2019-09-05-21')
df.head()

Unnamed: 0_level_0,id,lat,long
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-09-05 20:38:04+00:00,248,37.781891,-122.396586
2019-09-05 21:00:01+00:00,248,37.781891,-122.396586
2019-09-05 21:00:01+00:00,248,37.780243,-122.395836
2019-09-05 21:00:01+00:00,248,37.781891,-122.396586
2019-09-05 21:03:43+00:00,248,37.781891,-122.398175


In [5]:
def convert_csv_to_df(filename, timezone):
    df = pd.read_csv(filename)
    df['datetime'] = pd.to_datetime(df['Time departed']).dt.tz_localize(timezone)
    df['lat'] = df['lat'].astype(float)
    df['long'] = df['long'].astype(float)
    df['ids'] = df['ids'].str.split(',')  # split the multiple ids into a list
    df = df.drop(columns=['Time departed', 'Time arrived', 'Take pictures'])
    return df#.set_index('datetime').sort_index()
ref = convert_csv_to_df('test_data/Sep 4 experimental iOS app data gathering - Sheet1.csv', TIMEZONE)
ref.head()

Unnamed: 0,Location,(Description),ids,lat,long,datetime
0,1,clay st,"[1G6YV34AX55602068, 248, JTDKDTB39G1589379]",37.794838,-122.401063,2019-09-05 14:23:20-07:00
1,2,?,"[1G6YV34AX55602068, 248, JTDKDTB39G1589379]",37.791824,-122.39977,2019-09-05 14:27:32-07:00
2,3,campton alley,"[1G6YV34AX55602068, 248, JTDKDTB39G1589379]",37.789057,-122.406366,2019-09-05 14:34:33-07:00
3,4,tow away zone,"[1G6YV34AX55602068, 248, JTDKDTB39G1589379]",37.79621,-122.397885,2019-09-05 14:43:35-07:00


In [22]:
res = []

for index, row in ref.iterrows():
    print(index)
    print(row.datetime)
    
    for id in row['ids']:
        p = figure(
           x_axis_type="mercator", 
           y_axis_type="mercator",
           match_aspect=True,  # make sure the map isn't skewed?
           tools="pan,wheel_zoom,reset")
        p.add_tile(get_provider(Vendors.CARTODBPOSITRON))
        p.toolbar.active_scroll = p.select_one(WheelZoomTool) 
    
    
        df_subset = df.loc[df['id'] == id]
        if len(df_subset) > 80:

            a = df_subset[:row['datetime']]  # take only the date from BEFORE the timestamp
            b = a.iloc[-80::1]  # take just the last 80 of these, incremented by increment (not best way only gets 80)

            # original data
            plot2(p, b['lat'].values, b['long'].values, color='#d62728', size=5)
            
            output = snap_to_roads(b)  # call the snap to roads API and get a df with lat/longs snapped to the road
            
            
            # plot new route and the last point 
            x2, y2 = transform_wgs84_to_web_mercator(output['long'].values, output['lat'].values) 
            p.line(x2, y2, line_width=2, legend='Snap to road')
            p.x(x2[-1], y2[-1], size=25)
            show(p)
            
            res.append([row['datetime'], id + '_snap', output.iloc[-1]['lat'], output.iloc[-1]['long']])

res = pd.DataFrame(res, columns=['datetime', 'id', 'lat', 'long'])

0
2019-09-05 14:23:20-07:00


1
2019-09-05 14:27:32-07:00


2
2019-09-05 14:34:33-07:00


3
2019-09-05 14:43:35-07:00


In [23]:
res.to_csv('test_data/snap.csv')

In [24]:
res

Unnamed: 0,datetime,id,lat,long
0,2019-09-05 14:23:20-07:00,248_snap,37.794943,-122.400878
1,2019-09-05 14:27:32-07:00,248_snap,37.791919,-122.39973
2,2019-09-05 14:34:33-07:00,248_snap,37.789015,-122.40648
3,2019-09-05 14:43:35-07:00,248_snap,37.796219,-122.397995
