In [1]:
from datetime import datetime
import pandas as pd
import common_geo
import numpy as np
import geocoder
import googlemaps
import os
import yaml

In [279]:
with open('credentials.yaml') as f: 
    credentials = yaml.load(f)
g_key = credentials['g_key']
g_key_tz = credentials['g_key_tz']
here_app_id = credentials['here_app_id']
here_app_code = credentials['here_app_code']
mapbox_token = credentials['mapbox_token']

AIzaSyCMS_AKA-PwkHFor1rfCLNzxVijWFCB4Lg
AIzaSyC92G3JW4JqVgaIWqO1DBFHhjKT2Z52114
o91vdIA1eMd4xgoJr9Ah
MC-T1SwjT0-ngD5-kiFUiw
sk.eyJ1Ijoic2xzdGFybmVzIiwiYSI6ImNpdmg2MWRxdDAweTUyeWxiZ3V3eGp0Mm4ifQ.jmO7REOrlmeW8mkKfrSQHQ


In [None]:
gmaps = googlemaps.Client(key=g_key)
gmaps_tz = googlemaps.Client(key=g_key_tz)

In [3]:
def string_getter(line, element_name):
    start = line.find('<{}>'.format(element_name))
    if start > 0:
        stop = line.find('</{}>'.format(element_name))
        if stop > 0:
            start = start + len(element_name) + 2
            return line[start:stop]
        else:
            return None
    else:
        return None

In [4]:
def coord_conv(s):
    return [float(i) for i in s.split(' ')]

In [5]:
def when_conv(s):
    try:
        return datetime.strptime(s, '%Y-%m-%dT%H:%M:%SZ')
    except:
        return s

In [30]:
def city_state_grabber(row):
    lat = row['Latitude']
    lng = row['Longitude']
    if row['Row_Num'] % 1000 == 0: print (row['Row_Num'], end=' ')
    if row['Speed'] is None or row['Speed'] < 5 or lat is None or lng is None: 
        return (None, None)
    try: 
        g = geocoder.here([lat, lng], method='reverse', 
                          app_id = here_app_id, 
                          app_code = here_app_code)
        return (g.city, g.state)
    except:
        return (None, None)

In [None]:
def kml_file_reader(kml_file):
    f = open(kml_file, 'r')
    state_flag = 0
    timed_position = []
    time = None
    loc = None
    for line in f.readlines():
        when = string_getter(line, 'when')
        if state_flag == 0 and when:
            time = when_conv(when)
            state_flag = 1
            continue
        coord = string_getter(line, 'gx:coord')
        if state_flag == 1 and coord:
            loc = coord_conv(coord)
            timed_position.append([time] + loc)
            state_flag = 0
            continue
    f.close()
    return timed_position

In [47]:
if os.path.exists('full_data.csv'):
    trip_data = pd.read_csv('full_data.csv')
    trip_data['Date'] = pd.to_datetime(trip_data['Date'])
    trip_data = trip_data.set_index(['Date'])
    trip_data.index = trip_data.index.tz_localize('UTC')
else:
    timed_position = kml_file_reader('jhs.kml')
    trip_data = pd.DataFrame(timed_position, columns=['Date', 'Longitude', 'Latitude', 'Altitude'])
    trip_data = trip_data.sort_values('Date')
    trip_data = trip_data.set_index(['Date'])
    trip_data.index = trip_data.index.tz_localize('UTC')
    trip_data.to_csv('full_data.csv')

In [48]:
# NOTE: trip start time = 2016-10-27 01:45:13+00:00
# NOTE: trip end time = 2016-11-09 18:35:00+00:00
trip_data = trip_data["2016-Oct-27 01:45":"2016-Nov-09 18:35"]
trip_data.to_csv('trip_data.csv')

In [49]:
x1,y1,z1 = common_geo.LLAtoXYZ(trip_data['Latitude'], trip_data['Longitude'], 0)
x2 = np.roll(x1,1)
y2 = np.roll(y1,1)
z2 = np.roll(z1,1)
x2[0] = 0
y2[0] = 0
z2[0] = 0
trip_data['Distance'] = np.sqrt((x2 - x1)**2 + (y2 - y1)**2 + (z2 - z1)**2)
trip_data['Distance'][0] = 0
trip_data['Distance'] *= 0.000621371 # converting from meters to miles
trip_data['Cumulative Distance'] = trip_data['Distance'].cumsum()

In [50]:
def speed(row):
    if (row['Distance'] == 0 or row['Time_Delta'] == 0 
        or row['Distance'] is None or row['Time_Delta'] is None):
        return None
    return row['Distance']/(row['Time_Delta'] / 60 / 60)
time = trip_data.index
td = [0]
for i, t in enumerate(time):
    if (i >= 1):
        ts = time[i] - time[i-1]
        td.append(ts.seconds)
trip_data['Time_Delta'] = td
trip_data['Speed'] = trip_data.apply(speed, axis=1)
trip_data.to_csv('trip_data.csv')

In [51]:
# this did not catch all of the correct breaks...
day_of_trip_list = []
day_of_trip = 1
for row in trip_data.itertuples():
    day = row.Index.day
    if row.Time_Delta >= (4 * 60 * 60):
        day_of_trip += 1
    day_of_trip_list.append(day_of_trip)
trip_data['Day'] = day_of_trip_list
trip_data.to_csv('trip_data.csv')

In [186]:
day_breaks = ['10/26/16 00:05', '10/27/16 11:03', '10/28/16 11:07', '10/29/16 11:12', 
              '10/30/16 11:10', '10/31/16 12:07', '11/1/16 11:13',
              '11/2/16 13:00', '11/3/16 18:18', '11/4/16 13:51',
              '11/5/16 13:45', '11/6/16 11:24', '11/7/16 11:56',
              '11/8/16 11:28', '11/9/16 13:30', '11/9/16 23:59']
for i in range(len(day_breaks)-1):
    trip_data.loc[day_breaks[i]:day_breaks[i+1],'Day'] = i + 1

In [84]:
trip_data_minutes = pd.DataFrame()
trip_data_minutes['Longitude'] = trip_data['Longitude'].resample('min').mean()
trip_data_minutes['Latitude'] = trip_data['Latitude'].resample('min').mean()
trip_data_minutes['Distance'] = trip_data['Distance'].resample('min').sum()
trip_data_minutes['Day'] = trip_data['Day'].resample('min').max()
trip_data_minutes['Day'].fillna(method='ffill', inplace=True)
# trip_data_minutes['Day'] = trip_data_minutes['Day'].astype('int') # doesnt work
trip_data_minutes['Time_Delta'] = trip_data['Time_Delta'].resample('min').sum()
trip_data_minutes['Speed'] = trip_data_minutes.apply(speed, axis=1)
trip_data_minutes['Speed'].fillna(0, inplace=True)
trip_data_minutes['Speed_i'] = trip_data_minutes['Speed'].astype('int')
trip_data_minutes['Cumulative Distance'] = trip_data_minutes['Distance'].cumsum()
trip_data_minutes.to_csv('trip_data_minutes.csv')

In [96]:
trip_data_stash = trip_data.copy()
trip_data_stash.to_csv('trip_data_stash.csv')
trip_data = trip_data_minutes

In [94]:
## Use to read from trip_data-- minutes and state.csv
# get state and city
if not ('City' in trip_data and 'State' in trip_data):
    cs_data = pd.read_csv('trip_data-- minutes and state.csv')
    cs_data['Date'] = pd.to_datetime(cs_data['Date'])
    cs_data = cs_data.set_index(['Date'])
    cs_data.index = cs_data.index.tz_localize('UTC')
    cs_data = cs_data.loc[:,['City', 'State']]
    trip_data = trip_data.join(cs_data)
    trip_data.to_csv('trip_data.csv')

In [None]:
## Use if you are grabbing anew
if not ('City' in trip_data and 'State' in trip_data):
    trip_data['Row_Num'] = list(range(len(trip_data)))
    c_s = trip_data.apply(city_state_grabber, axis=1)
    city, state = zip(*c_s)
    trip_data['City'] = list(city)
    trip_data['State'] = list(state)
    print ('')
    del trip_data['Row_Num']
    trip_data.to_csv('trip_data.csv')

In [113]:
if not ('Raw Offset' in trip_data and 'DST Offset' in trip_data):
    max_dist = trip_data['Cumulative Distance'].max()
    split_pt = max_dist / 2300 # 2500 is daily API limit

    trip_data['Cumulative_Distance'] = trip_data['Cumulative Distance']
    tz_name = []
    dst_offset = []
    tz_id = []
    raw_offset = []
    covered_dist = 0
    for row in trip_data.itertuples():
        if row.Cumulative_Distance - covered_dist >= split_pt:
            result = gmaps.timezone((row.Latitude, row.Longitude), 
                                    int(row.Index.timestamp()))
            covered_dist = row.Cumulative_Distance
            if result['status'] == 'OK':
                tz_name.append(result['timeZoneName'])
                dst_offset.append(result['dstOffset'])
                tz_id.append(result['timeZoneId'])
                raw_offset.append(result['rawOffset'])
            else:
                tz_name.append(None)
                dst_offset.append(None)
                tz_id.append(None)
                raw_offset.append(None)
        else:
            tz_name.append(None)
            dst_offset.append(None)
            tz_id.append(None)
            raw_offset.append(None)

    trip_data.loc[:, 'Timezone Name'] = tz_name
    trip_data.loc[:, 'DST Offset'] = dst_offset
    trip_data.loc[:, 'Timezone ID'] = tz_id
    trip_data.loc[:, 'Raw Offset'] = raw_offset

    del trip_data['Cumulative_Distance']

    trip_data.to_csv('trip_data.csv')

In [169]:
trip_data['DST Offset'] = trip_data['DST Offset'].fillna(method='bfill')
trip_data['Raw Offset'] = trip_data['Raw Offset'].fillna(method='bfill')
trip_data['Local'] = pd.to_datetime(trip_data.index)
trip_data['Local'] = trip_data['Local'] + pd.to_timedelta((trip_data['DST Offset'] + 
                                                           trip_data['Raw Offset']), unit='s')

In [None]:
trip_data_trim = trip_data[trip_data['Time Delta'] < 200]
trip_data_trim = trip_data_trim[trip_data_trim['Speed'] > 5]

In [193]:
for_csv = pd.DataFrame()
for_csv['Lat'] = trip_data['Latitude'].fillna(method='ffill')
for_csv['Long'] = trip_data['Longitude'].fillna(method='ffill')
for_csv.to_csv('lat-long.csv')

In [None]:
hourly_summary = pd.DataFrame()
hourly_summary['Longitude'] = trip_data['Longitude'].resample('H').mean()
hourly_summary['Latitude'] = trip_data['Latitude'].resample('H').mean()
hourly_summary['Distance'] = trip_data['Distance'].resample('H').sum()
hourly_summary['Day'] = trip_data['Day'].resample('H').max()
hourly_summary['Day'].fillna(method='ffill', inplace=True)
hourly_summary['Time_Delta'] = trip_data['Time_Delta'].resample('H').sum()
hourly_summary['Speed'] = hourly_summary.apply(speed, axis=1)
hourly_summary['Speed'].fillna(0, inplace=True)
hourly_summary['Speed_i'] = hourly_summary['Speed'].astype('int')
hourly_summary['Cumulative Distance'] = hourly_summary['Distance'].cumsum()
hourly_summary.to_csv('hourly_summary.csv')

In [280]:
dist_by_day = pd.pivot_table(trip_data, values='Distance', index=['Day'], aggfunc=np.sum)
dist_by_day

Day
1.0     167.360155
2.0     476.162637
3.0     333.056371
4.0     297.331604
5.0     526.227169
6.0     283.581199
7.0     410.548355
8.0     330.364534
9.0     279.858909
10.0    176.505005
11.0    694.138359
12.0    831.010276
13.0    619.221463
14.0     16.119660
15.0    252.663218
Name: Distance, dtype: float64

In [217]:
driving_data = trip_data[trip_data['Speed'] > 20]
driving_data[driving_data['Speed'] > 100] = np.nan # invalid data
avg_speed_by_day = pd.pivot_table(driving_data, values='Speed', index=['Day'], aggfunc=np.average)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


Day
1.0     69.992644
2.0     62.926742
3.0     45.519825
4.0     46.461279
5.0     52.426822
6.0     44.390648
7.0     51.901998
8.0     45.666785
9.0     40.211968
10.0    34.587577
11.0    75.608869
12.0    76.719398
13.0    68.775824
14.0    33.763115
15.0    64.398095
Name: Speed, dtype: float64

In [272]:
color_lut = ['#7BBD32', '#F94117', '#00CCFF', '#235927', '#69A02B',
             '#7BBD32', '#F94117', '#00CCFF', '#235927', '#69A02B',
             '#7BBD32', '#F94117', '#00CCFF', '#235927', '#69A02B',
             '#7BBD32', '#F94117', '#00CCFF', '#235927', '#69A02B']

def feature_maker_from_pos(row): #lat, long, i
    #print (row)
    i = str(int(row.Row_Num) + 1)
    if (row.Latitude is None or row.Longitude is None or 
        row.Latitude == np.nan or row.Longitude == np.nan): return ""
    lat = float("{0:.6f}".format(row.Latitude))
    long = float("{0:.6f}".format(row.Longitude))
    color = color_lut[int(row.Day)]
    
    return {"type": "Feature", 'id': i, "properties": {"marker-size": "small",
                                                       "marker-symbol": "circle",
                                                       "marker-color": color},
            "geometry": {"type": "Point","coordinates": [long, lat]}}
trip_data['Row_Num'] = list(range(len(trip_data)))
trip_data_reduced = trip_data.dropna(subset=['Latitude','Longitude'],axis=0,how='all')
features =[]
for row in trip_data_reduced.itertuples():
    features.append(feature_maker_from_pos(row))
del trip_data_reduced
del trip_data['Row_Num']
# features = tr_d_df.apply(feature_maker_from_pos, axis=1)
features = [str(x) for x in features]
features_st = '{"features":['+','.join(features)+'],"type": "FeatureCollection"}'
features_st = features_st.replace("\'", '"')
with open('trip_data.geojson', 'w') as f:
    f.write(features_st)

In [None]:
from mapbox import Static
service = Static(access_token=mapbox_token)

In [None]:
pt1 = {
      "type": "Feature",
      "properties": {"marker-size": "small",
                     "marker-symbol": "circle",
                     "marker-color": "#000000"},
      "geometry": {
        "type": "Point",
        "coordinates": [-86.7507626, 33.5480477]
      }
    }
pt2 = {
      "type": "Feature",
      "properties": {"marker-size": "small",
                     "marker-symbol": "circle",
                     "marker-color": "#000000"},
      "geometry": {
        "type": "Point",
        "coordinates": [-87.2038367, 33.7517015]
      }
    }
pt3 = {
      "type": "Feature",
      "properties": {"marker-size": "small",
                     "marker-symbol": "circle",
                     "marker-color": "#000000"},
      "geometry": {
        "type": "Point",
        "coordinates": [-88.3600413, 34.2431618]
      }
    }


response = service.image('mapbox.streets', features=[pt1,pt2,pt3],width=1028, height=1028, image_format='png')
print (response.url)
with open('map.png', 'wb') as output:
    _ = output.write(response.content)
    
#Next, try this: 
# Path

# path-{strokecolor}-{strokeopacity}+{fillcolor}-{fillopacity}({polyline})
# Encoded polylines with a precision of 5 decimal places can be used with the Static API via the path paramet

In [None]:


#_p~iF~ps|U_ulLnnqC_mqNvxq`@

#path does not work with the sdk
#but, this works....
#https://api.mapbox.com/v4/mapbox.streets/path-5+f44-0.5+f44-0.2(kumwFrjvbMaf%40kuD%7BnCkS)/
#-73.99,40.70,12/500x300.png?access_token={token}

path-{strokecolor}-{strokeopacity}+{fillcolor}-{fillopacity}({polyline})



response = service.image('mapbox.streets', 
                         features=["_p~iF~ps|U_ulLnnqC_mqNvxq`@"],
                         width=1028, height=1028, image_format='png')
print (response.url)
with open('map2.png', 'wb') as output:
    _ = output.write(response.content)
    


In [None]:
import polyline
pl = polyline.encode([(34.2431618, -88.3600413), (34.6226575, -89.204821), (36.499422, -91.537299)], 5)
base = "https://api.mapbox.com/v4/mapbox.streets"
path = "path-5+f44-0.5+f44-0.2({})".format(pl)
center = "-120.9,38.5"
zoom = "12"
url = "{}/{}/auto/500x300.png?access_token={}".format(base, path, mapbox_token)
print (url)
response = requests.get(url, stream=True)
with open('img.png', 'wb') as out_file:
    shutil.copyfileobj(response.raw, out_file)
del response

In [None]:
from mapbox import Datasets
datasets = Datasets(access_token=mapbox_token)
my_dataset = datasets.create(name='jhs_route66', description='Points along route taken on JHS cross-country jaunt')
dataset_id = my_dataset.json()['id']
print (my_dataset.json())

In [None]:
# hourly_summary['cntr'] = range(len(hourly_summary))
# features = hourly_summary.apply(feature_maker_from_pos, counter = 0, axis=1)

trip_data_trim['cntr'] = range(len(trip_data_trim))
features = trip_data_trim.apply(feature_maker_from_pos, axis=1)

import time
for f in list(features):
    status = 0
    while status == 200:
        resp = datasets.batch_update_features(dataset_id, put=[f])
        status = resp.status_code
        if status != 200:
            print (status)
            time.sleep(2)

print (my_dataset.json())

In [None]:
path-{strokecolor}-{strokeopacity}+{fillcolor}-{fillopacity}({polyline})



response = service.image('mapbox.streets', features=["_p~iF~ps|U_ulLnnqC_mqNvxq`@"],width=1028, height=1028, image_format='png')
print (response.url)
with open('map2.png', 'wb') as output:
    _ = output.write(response.content)
    

In [None]:
collection = datasets.list_features(dataset_id).json()
print(len(collection['features']))
print ([f['id'] for f in collection['features']])
# first = collection['features'][0]
# print(first['id'])
# print(first['properties']['name'])
# print (resp.status_code)

In [None]:
print (len([ds for ds in datasets.list().json()]))
for ds in datasets.list().json():
    id_ = ds['id']
    print (ds['name'])
    print (id_)
    r = datasets.delete_dataset(id_)
    print (r.url)
    print (r.status_code)

In [None]:
# The Google Maps Roads API takes up to 100 GPS points collected along a route, 
# and returns a similar set of data, with the points snapped to the most likely 
# roads the vehicle was traveling along. Optionally, you can request that the 
# points be interpolated, resulting in a path that smoothly follows the geometry of the road.

#https://developers.google.com/maps/documentation/roads/nearest
#https://developers.google.com/maps/documentation/roads/snap   
#https://developer.here.com/platform-extensions/documentation/route-match/topics/quick-start.html

import requests

# if speed > 10
# iterate over file and find sections of speed > 10
#      gather contiguous points (at least 2, up to 100)
# send points to API

https://roads.googleapis.com/v1/snapToRoads?parameters&key=YOUR_API_KEY
path=60.170880,24.942795|60.170879,24.942796|60.170877,24.942796

https://roads.googleapis.com/v1/snapToRoads?path=-35.27801,149.12958|-35.28032,149.12907|-35.28099,149.12929
    |-35.28144,149.12984|-35.28194,149.13003|-35.28282,149.12956|-35.28302,149.12881
    |-35.28473,149.12836&interpolate=true&key=YOUR_API_KEY

https://roads.googleapis.com/v1/snapToRoads?path=34.3325575,-88.8210022|34.3360208,-88.8246691&interpolate=true&
    key=AIzaSyC92G3JW4JqVgaIWqO1DBFHhjKT2Z52114