# GeoLife dataset tests

1. Download dataset
    https://www.microsoft.com/en-us/research/publication/geolife-gps-trajectory-dataset-user-guide/
2. Put dataset in project root folder with the name `geolife-gps-trajectory-dataset`
3. Put your Google API key to the file `.env.json` like this:

    ```json
    {
        "GOOGLE_API_KEY": "<your key>"
    }
    ```
4. Run `jupyter nbextension enable --py --sys-prefix widgetsnbextension` to enable extentions in jupyter notebooks   
5. Run `jupyter nbextension enable --py gmaps` to allow gmaps to draw maps on jupyter notebook.

In [1]:
import numpy as np
import pandas as pd

from dateutil import parser
from os import path, walk

# Extract locations from trajectory files

In [2]:


files = []
user_dir = './geolife-gps-trajectory-dataset/001/Trajectory/'

for (_, _, filenames) in walk(user_dir):
    files.extend([f for f in filenames if f.endswith('.plt')])

In [3]:
df = None

def decode_str(s):
    return s.decode('utf-8')

for file in files:
    data = np.genfromtxt(
        path.join(user_dir, file),
        delimiter=',',
        skip_header=6,
        converters={
            0: float,
            1: float,
            2: int,
            3: int,
            4: float,
            5: decode_str,
            6: decode_str,
        }
    )
    
    if df is None:
        df = pd.DataFrame(
            [list(l) for l in data],
            columns=['latitude', 'longitude', 'Unknown1', 'Unknown2', 'Unknown3', 'date', 'time'],
        )
    else:
        df_aux = pd.DataFrame(
            [list(l) for l in data],
            columns=['latitude', 'longitude', 'Unknown1', 'Unknown2', 'Unknown3', 'date', 'time'],
        )
        df = pd.concat([df, df_aux])

In [4]:
df.sample(n=5)

Unnamed: 0,latitude,longitude,Unknown1,Unknown2,Unknown3,date,time
522,39.972514,116.334829,0,47,39788.029769,2008-12-06,00:42:52
6093,39.9912,116.21526,0,204,39746.44213,2008-10-25,10:36:40
3950,40.01388,116.306553,0,162,39795.83537,2008-12-13,20:02:56
439,40.003023,116.308982,0,121,39794.613924,2008-12-12,14:44:03
278,39.998529,116.326254,0,127,39788.017164,2008-12-06,00:24:43


In [12]:
print('Number of locations:', len(df))

Number of locations: 108607


In [13]:
print(f'Trajectories from {df.date.min()} until {df.date.max()}')

Trajectories from 2008-10-23 until 2008-12-15


# Render locations to Google map

In [6]:
import gmaps
import json
import ssl

from datetime import datetime

#
# Get settings from .env.json file
#
with open('.env.json') as f:
    settings = json.load(f)

# Your Google API key
gmaps.configure(api_key=settings.get('GOOGLE_API_KEY'))

# This restores the same behavior as before.
ssl._create_default_https_context = ssl._create_unverified_context

In [7]:
def gmaps_heatmap(locations):
    m = gmaps.Map()
#     m.add_layer(gmaps.heatmap_layer(locations))
    m.add_layer(gmaps.symbol_layer(
        locations,
        fill_color="red",
        stroke_color="red",
        scale=2
    ))
    return m

In [10]:
# load an array of (latitude, longitude) pairs
locations = list(zip(df.latitude, df.longitude))[::100]
m = gmaps_heatmap(locations)

In [11]:
m

Map(configuration={'api_key': 'AIzaSyDsGWf4km4amHaFgjMlBTXbN47aZSmo2CM'}, data_bounds=[(39.945642980984964, 11…