In [10]:
LOCATION_HISTORY_FILE='/home/pere/Semantic_Location_History/2020/2020_MAY.json'
TIME_ZONE='Europe/Berlin'

In [11]:
import json
import datetime

import shapely.geometry as sg
import pandas as pd
import geopandas as gp
from matplotlib import cm

In [12]:
history = json.load(open(LOCATION_HISTORY_FILE))

In [13]:
len(history['timelineObjects'])

54

In [14]:
def get_point(obj):
    """
    Returns a shapely.geometry.Point from an object that has the
    attributes "longitudeE7" and "latitudeE7"
    """
    return sg.Point(obj["longitudeE7"] / 10000000,
                    obj["latitudeE7"] / 10000000)

In [15]:
def to_datetime_tz(ms):
    return pd.to_datetime(ms, unit='ms').tz_localize('UTC').tz_convert(TIME_ZONE)

In [16]:
def get_raw_path_point(obj):
    """
    Returns a tuple with a shapely.geometry.Point and a datetime, 
    corresponding to a point in a sampled time-path e.g. what you can
    find in 'simplifiedRawPath' in 'activitySegment' objects
    """
    return (
        sg.Point(obj["lngE7"] / 10000000,
                 obj["latE7"] / 10000000),
        to_datetime_tz(obj['timestampMs'])
    )

In [17]:
def calc_duration_minutes(duration_obj):
    """ return the minutes from the difference in the object below:
    
      "duration" : {
         "startTimestampMs" : "1585739488388",
         "endTimestampMs" : "1585759177100"
      }    
    """
    return int((int(duration_obj['endTimestampMs']) - 
                int(duration_obj['startTimestampMs'])) / 1000 / 60)

In [18]:
def elapsed_minutes(start_date, end_date):
    """Calculate integer elapsed minutes from two datetimes"""
    elapsed_time = end_date - start_date
    return int(
        divmod(elapsed_time.total_seconds(), 60)[0]
    )

In [19]:
def semantic_history_to_df(history):
    """
    Returns a pandas DataFrame from a semantic history JSON array.
    Unifies the most important information into a flat schema.
    """
    parsed_objs = []
    
    for obj in history['timelineObjects']:
        parsed_obj = {
        }
        if 'activitySegment' in obj:
            # activity properties
            parsed_obj.update({
                  'type': 'moving',
                  'activity_type': obj['activitySegment']['activityType'],
                  'place': 'MOVING',
                  'from_point': get_point(obj['activitySegment']['startLocation']),
                  'route': [get_raw_path_point(p) for p in 
                            obj['activitySegment']
                            .get('simplifiedRawPath', {})
                            .get('points', [])]
            })
            duration_obj = obj['activitySegment']['duration']
        elif 'placeVisit' in obj:
            # place visit properties
            parsed_obj.update({
                  'type': 'visit',
                  'place': (
                      'HOME' if obj['placeVisit']['location']
                                 .get('semanticType', '') == 'TYPE_HOME'
                             else obj['placeVisit']['location']['placeId']
                  ),
                  # if I'm visiting a place then I'd say I'm still?
                  'activity_type': 'STILL',
                  'from_point': get_point(obj['placeVisit']['location']),
                  'route': []
            })
            duration_obj = obj['placeVisit']['duration']
        else:
            # let me know if I missed something!
            raise Exception('Unknown timeline object: {}'.format(obj))

        # common properties
        parsed_obj.update({
            'start_date': to_datetime_tz(duration_obj['startTimestampMs']),
            'end_date': to_datetime_tz(duration_obj['endTimestampMs']),
            'duration_minutes': calc_duration_minutes(duration_obj),
            'latitude': parsed_obj['from_point'].y,
            'longitude': parsed_obj['from_point'].x,
        })
        
        parsed_objs.append(parsed_obj)
            
    df = pd.DataFrame(parsed_objs)
    
    return df

In [20]:
df = semantic_history_to_df(history)

In [21]:
df.head()

Unnamed: 0,activity_type,duration_minutes,end_date,from_point,latitude,longitude,place,route,start_date,type
0,WALKING,49,2020-05-01 19:40:05.967000+02:00,POINT (13.2707059 52.4300337),52.430034,13.270706,MOVING,"[(POINT (13.2704077 52.4217796), 2020-05-01 19...",2020-05-01 18:51:01.163000+02:00,moving
1,STILL,12,2020-05-01 19:52:41.082000+02:00,POINT (13.2636161 52.4222327),52.422233,13.263616,ChIJsSFvwMlbqEcRp1hFk77mQro,[],2020-05-01 19:40:05.967000+02:00,visit
2,WALKING,29,2020-05-01 20:22:39.109000+02:00,POINT (13.2660179 52.4234404),52.42344,13.266018,MOVING,"[(POINT (13.2679396 52.4251823), 2020-05-01 20...",2020-05-01 19:52:41.082000+02:00,moving
3,STILL,881,2020-05-02 11:03:52.286000+02:00,POINT (13.2698764 52.4306325),52.430633,13.269876,HOME,[],2020-05-01 20:22:39.109000+02:00,visit
4,IN_BUS,25,2020-05-02 11:29:40.248000+02:00,POINT (13.2686761 52.4307043),52.430704,13.268676,MOVING,"[(POINT (13.2686761 52.4307043), 2020-05-02 11...",2020-05-02 11:03:52.286000+02:00,moving


In [22]:
df['activity_type'].value_counts().plot(kind='bar')

<matplotlib.axes._subplots.AxesSubplot at 0x7fc42b99c9b0>

In [23]:
def extract_full_activities(df):
    """
    Flatten the parsed semantic history in the DataFrame to a DataFrame of
    activites that start and end at home. Extract meaningful features such that 
    these activites can be compared to one another."""
    all_acts = []
    act = {}
    
    for i, row in df.iterrows():
        if (row['place'] == 'HOME' and row['duration_minutes'] > 30 and act):
            # consider an activity finished if we arrived home and stay there for more than
            # half an hour
            act['end_date'] = row['start_date']
            act['end_time'] = datetime.datetime.time(row['start_date'])
            act['end_weekday'] = row['start_date'].weekday()
            act['total_minutes'] = elapsed_minutes(act['start_date'],
                                                   act['end_date'])
            all_acts.append(act)
            
            # convert route datatimes into minute offsets (relative to total activity minutes)
            full_route_mo = []
            for rp in act['full_route']:
                elapsed = elapsed_minutes(act['start_date'], rp[1])
                full_route_mo.append((rp[0], elapsed))
                
            act['full_route'] = full_route_mo
            act = {}
        else:
            if row['type'] == 'moving':
                if 'start_time' not in act:
                    act['start_date'] = row['start_date']
                    act['start_time'] = datetime.datetime.time(row['start_date'])
                    act['start_weekday'] = row['start_date'].weekday()                
                act['full_route'] = (
                    act.get('full_route', []) + row['route']
                )
            elif row['type'] == 'visit':
                act['visited_places'] = (
                    act.get('visited_places', []) + 
                    [(row['from_point'], row['duration_minutes'])]
                )                
                act['full_route'] = (
                    act.get('full_route', []) + [(row['from_point'],
                                                  row['start_date'])]
                )
            act['moving_by'] = (
                act.get('moving_by', []) +
                [(row['activity_type'], row['duration_minutes'])]
            )
                
    return pd.DataFrame(all_acts)

In [24]:
f_df = extract_full_activities(df)
f_df.head()

Unnamed: 0,end_date,end_time,end_weekday,full_route,moving_by,start_date,start_time,start_weekday,total_minutes,visited_places
0,2020-05-01 20:22:39.109000+02:00,20:22:39.109000,4,"[(POINT (13.2704077 52.4217796), 12), (POINT (...","[(WALKING, 49), (STILL, 12), (WALKING, 29)]",2020-05-01 18:51:01.163000+02:00,18:51:01.163000,4,91,"[(POINT (13.2636161 52.4222327), 12)]"
1,2020-05-02 13:17:40.993000+02:00,13:17:40.993000,5,"[(POINT (13.2686761 52.4307043), 0), (POINT (1...","[(IN_BUS, 25), (STILL, 54), (WALKING, 25), (ST...",2020-05-02 11:03:52.286000+02:00,11:03:52.286000,5,133,"[(POINT (13.21755 52.42948), 54), (POINT (13.2..."
2,2020-05-02 19:08:01.989000+02:00,19:08:01.989000,5,"[(POINT (13.2633886 52.4331055), 12), (POINT (...","[(WALKING, 22), (STILL, 31), (WALKING, 25)]",2020-05-02 17:48:50.275000+02:00,17:48:50.275000,5,79,"[(POINT (13.2605135 52.4334403), 31)]"
3,2020-05-03 11:17:35.156000+02:00,11:17:35.156000,6,"[(POINT (13.263567 52.4322739), 8), (POINT (13...","[(WALKING, 9), (STILL, 6), (IN_PASSENGER_VEHIC...",2020-05-03 09:43:25.133000+02:00,09:43:25.133000,6,94,"[(POINT (13.2586675 52.4310979), 6), (POINT (1..."
4,2020-05-03 12:15:03.363000+02:00,12:15:03.363000,6,"[(POINT (13.2639236 52.435387), 3)]","[(IN_PASSENGER_VEHICLE, 3), (STILL, 10), (IN_P...",2020-05-03 11:56:03.226000+02:00,11:56:03.226000,6,19,"[(POINT (13.2639236 52.435387), 10)]"


In [25]:
def interpolate_route(full_route, 
                      start_point, 
                      sample_interval_minutes=5):
    interpolated_route = []
    route_index = 0
    minute_index = sample_interval_minutes
    
    while route_index < len(full_route):
        if full_route[route_index][1] < minute_index:
            # look for the first sampled point in the path
            # which is ahead in time of the curren time point to sample
            route_index += 1
            continue
        
        # interpolate location
        prev_loc = start_point if route_index == 0 else full_route[route_index - 1][0]
        curr_loc = full_route[route_index][0]
        
        prev_mins_off = 0 if route_index == 0 else full_route[route_index - 1][1]
        curr_mins_off = full_route[route_index][1]
        
        diff_mins = curr_mins_off - prev_mins_off
        diff_mins_int = minute_index - prev_mins_off
        
        int_loc_x = prev_loc.x + ((curr_loc.x - prev_loc.x) / diff_mins) * diff_mins_int
        int_loc_y = prev_loc.y + ((curr_loc.y - prev_loc.y) / diff_mins) * diff_mins_int
        
        interpolated_route.append((sg.Point(int_loc_x,
                                            int_loc_y),
                                   minute_index))
        minute_index += sample_interval_minutes
                                  
    return interpolated_route

## Debug code here

In [26]:
DEBUG_ROUTE=2
START_POINT=sg.Point(13.2698764, 52.4306325)

In [27]:
interpolate_route(f_df.iloc[DEBUG_ROUTE, ]['full_route'], start_point=START_POINT)

[(<shapely.geometry.point.Point at 0x7fc4296866a0>, 5),
 (<shapely.geometry.point.Point at 0x7fc429686080>, 10),
 (<shapely.geometry.point.Point at 0x7fc429686668>, 15),
 (<shapely.geometry.point.Point at 0x7fc460c0cf28>, 20),
 (<shapely.geometry.point.Point at 0x7fc4296860b8>, 25),
 (<shapely.geometry.point.Point at 0x7fc429686128>, 30),
 (<shapely.geometry.point.Point at 0x7fc4296867b8>, 35),
 (<shapely.geometry.point.Point at 0x7fc429686160>, 40),
 (<shapely.geometry.point.Point at 0x7fc4296868d0>, 45),
 (<shapely.geometry.point.Point at 0x7fc429686cc0>, 50),
 (<shapely.geometry.point.Point at 0x7fc429686240>, 55),
 (<shapely.geometry.point.Point at 0x7fc429686208>, 60),
 (<shapely.geometry.point.Point at 0x7fc429686c50>, 65)]

In [28]:
gmaps_routes = []

for i, row in f_df.iterrows():
    lat_lng_matrix = [[rp[0].y, rp[0].x] for rp in interpolate_route(row['full_route'],
                                                                     start_point=START_POINT)]
    gmaps_routes.append(lat_lng_matrix)

In [29]:
f_df.iloc[DEBUG_ROUTE, ]['full_route']

[(<shapely.geometry.point.Point at 0x7fc42db7e6a0>, 12),
 (<shapely.geometry.point.Point at 0x7fc42db7e2e8>, 22),
 (<shapely.geometry.point.Point at 0x7fc42db7e320>, 63),
 (<shapely.geometry.point.Point at 0x7fc42db7e400>, 68)]

In [30]:
f_df.iloc[DEBUG_ROUTE, ]

end_date                           2020-05-02 19:08:01.989000+02:00
end_time                                            19:08:01.989000
end_weekday                                                       5
full_route        [(POINT (13.2633886 52.4331055), 12), (POINT (...
moving_by               [(WALKING, 22), (STILL, 31), (WALKING, 25)]
start_date                         2020-05-02 17:48:50.275000+02:00
start_time                                          17:48:50.275000
start_weekday                                                     5
total_minutes                                                    79
visited_places                [(POINT (13.2605135 52.4334403), 31)]
Name: 2, dtype: object

In [31]:
gmaps_routes[DEBUG_ROUTE]

[[52.43166291666667, 13.26717315],
 [52.43269333333333, 13.2644699],
 [52.43320594, 13.26252607],
 [52.43337334, 13.26108852],
 [52.43350484390244, 13.260619970731707],
 [52.43361241707317, 13.260797421951219],
 [52.4337199902439, 13.260974873170731],
 [52.43382756341463, 13.261152324390244],
 [52.43393513658537, 13.261329775609756],
 [52.4340427097561, 13.261507226829268],
 [52.43415028292683, 13.26168467804878],
 [52.43425785609756, 13.261862129268293],
 [52.43359148, 13.262813959999999]]

In [32]:
import gmaps
import gmaps.datasets
import os

gmaps.configure(api_key=os.environ.get("GOOGLE_API_KEY")) # Your Google API key

#layer = gmaps.symbol_layer(
#    df[['latitude', 'longitude']], fill_color="green", stroke_color="green"
#)

layer = gmaps.symbol_layer(gmaps_routes[DEBUG_ROUTE])
fig = gmaps.figure()
fig.add_layer(layer)
fig

Figure(layout=FigureLayout(height='420px'))