# Experiment on GeoData

In [31]:
#general libs
import json
import numpy as np
import pandas as pd
from time import sleep
from tqdm import tqdm

#geodata libs
import folium
import openrouteservice

Global variables (api keys and other stuff)

In [32]:
secrets = json.load(open('..\secrets.json'))

KEY_OPENROUTESERVICE = secrets['OPENROUTESERVICE']['API_KEY']


In [33]:
df = pd.read_csv('../data/intermediate/data_intermediate.csv', sep=';')

#correcting datatype
df['date'] = pd.to_datetime(df['date'])

df.head()

Unnamed: 0,date,year,latitude,longitude
0,2016-06-21 21:02:54,2016,-23.582274,-46.685666
1,2016-06-21 21:05:08,2016,-23.585315,-46.688474
2,2016-06-21 21:07:17,2016,-23.585046,-46.691041
3,2016-06-21 21:07:44,2016,-23.58458,-46.691775
4,2016-06-21 21:08:59,2016,-23.586483,-46.695554


### Hotwheels protocol

Limitting the volume of data during tests

In [34]:
df = df.head(10)

Defining default coordinates when plotting map

In [35]:
lat_default = df['latitude'].mean()
long_default = df['longitude'].mean()

print(lat_default)
print(long_default)

#folium.Map(location=[lat_default, long_default], zoom_start=2)

-23.57891611
-46.697245179999996


### Step 1: create the map with coordinates

In [48]:
#generate a new map
map = folium.Map(location=[lat_default, long_default], zoom_start=16, tiles='cartodbpositron')

#for each row in the data, add a cicle marker
for index, row in tqdm(df.iterrows(), total=df.shape[0]):

    #add starting location markers to the map
    folium.CircleMarker(location=[row['latitude'], row['longitude']],
                        color='red',
                        radius=5,
                        weight=1,
                        fill=True).add_to(map)

map

100%|██████████| 9/9 [00:00<00:00, 8989.93it/s]


### Step 2: connecting the dots

#### First try: with Open Route Service

In [46]:
#generate a new map
map_1try = folium.Map(location=[lat_default, long_default], zoom_start=15, tiles='cartodbpositron')

#for each row in the data, add a cicle marker
for index, row in tqdm(df.iterrows(), total=df.shape[0]):

    #add starting location markers to the map
    folium.CircleMarker(location=[row['latitude'], row['longitude']],
                        color='red',
                        radius=5,
                        weight=1,
                        fill=True).add_to(map_1try)

100%|██████████| 9/9 [00:00<00:00, 9004.95it/s]


Match coordinates

In [38]:
df.sort_values('date', ascending=True, inplace=True)

#matching current coordinates with next coordinates
df_shifted_coordinates = df[['latitude','longitude']].shift(periods=-1, axis=0)
df_shifted_coordinates.set_axis(['latitude_next','longitude_next'], axis=1, inplace=True)

df = pd.concat([df, df_shifted_coordinates], axis=1)
df.head()

Unnamed: 0,date,year,latitude,longitude,latitude_next,longitude_next
0,2016-06-21 21:02:54,2016,-23.582274,-46.685666,-23.585315,-46.688474
1,2016-06-21 21:05:08,2016,-23.585315,-46.688474,-23.585046,-46.691041
2,2016-06-21 21:07:17,2016,-23.585046,-46.691041,-23.58458,-46.691775
3,2016-06-21 21:07:44,2016,-23.58458,-46.691775,-23.586483,-46.695554
4,2016-06-21 21:08:59,2016,-23.586483,-46.695554,-23.58412,-46.698981


Using OpenRouteService API

In [39]:
path_list = []
reserve = []

for index, row in tqdm(df.iterrows(), total=df.shape[0]):
    
    # I included try / except as a precaution in case any paths are extremely long, which we'll skip. 
    # I noticed this exception error when I accidentally generated a lat / long for no address. Be aware of this and remove prior to using this function. 
    try:
        # coordinates of the trips living within specific table columns.
        coords = ((row['longitude'],row['latitude']),(row['longitude_next'],row['latitude_next']))
        
        # Specify your personal API key
        client = openrouteservice.Client(key=KEY_OPENROUTESERVICE) 
        geometry = client.directions(coords)['routes'][0]['geometry']
        decoded = openrouteservice.convert.decode_polyline(geometry)
        
        # We need to reverse the long / lat output from results so that we can graph lat / long
        reverse = [(y, x) for x, y in decoded['coordinates']]        
        
        # Append each route to the path_list list
        path_list.append(reverse)
        
        # confirmation of each route being processed. Feel free to comment out.
        sleep(1)

    except:
        pass

100%|██████████| 10/10 [00:18<00:00,  1.85s/it]


In [40]:
for path in path_list:
    
    line = folium.PolyLine(
        path,
        weight=1,
        color='#0A8A9F'
    ).add_to(map_1try)

map_1try

#### Second try

In [45]:
#generate a new map
map_2try = folium.Map(location=[lat_default, long_default], zoom_start=16, tiles='cartodbpositron')

#for each row in the data, add a cicle marker
for index, row in tqdm(df.iterrows(), total=df.shape[0]):

    #add starting location markers to the map
    folium.CircleMarker(location=[row['latitude'], row['longitude']],
                        color='red',
                        radius=5,
                        weight=1,
                        fill=True).add_to(map_2try)

100%|██████████| 9/9 [00:00<00:00, 8998.51it/s]


In [44]:
df.dropna(how='any', inplace=True)

for index, row in tqdm(df.iterrows(), total=df.shape[0]):
    list_coordinates = [(row['latitude'], row['longitude']), (row['latitude_next'], row['longitude_next'])]
    print(list_coordinates)

    line = folium.PolyLine(
    list_coordinates,
    weight=1,
    color='#0A8A9F'
).add_to(map_2try)

map_2try

100%|██████████| 9/9 [00:00<00:00, 9004.95it/s]

[(-23.582274, -46.6856661), (-23.5853147, -46.6884743)]
[(-23.5853147, -46.6884743), (-23.5850464, -46.6910407)]
[(-23.5850464, -46.6910407), (-23.5845803, -46.691775)]
[(-23.5845803, -46.691775), (-23.586483, -46.6955541)]
[(-23.586483, -46.6955541), (-23.5841196, -46.6989807)]
[(-23.5841196, -46.6989807), (-23.5775489, -46.7020922)]
[(-23.5775489, -46.7020922), (-23.5737421, -46.7033941)]
[(-23.5737421, -46.7033941), (-23.5713257, -46.7052409)]
[(-23.5713257, -46.7052409), (-23.5587264, -46.7102337)]





### Conclusion
GoogleMaps coordinates are not precise enough for OpenRouteService to be able to perceive the right route between two points. The coordinates are a little off the mark, often resulting in cenarios when two consecutives geo points are pinned in opposide lanes of the same road or even in different roads. When analysing specifically the points, that is not an issue, although it is a problematic when using OpenRouteService API to estimate a route between the points. 

In a pragmatic way, since GoogleMaps data are available in a relatively high frequency (2 to 3 minutes), it is reasonable to directly connect the data points directly. Comparing a sample of 9 datapoints using both methods shows that directly connecting the GoogleMaps data points provide an acceptable margin of error, resulting in a better visualization overall.

> In a conclusion, the use of OpenRouteService would not be advisable in this context.