## Import Libraries

In [1]:
import pandas as pd, numpy as np, matplotlib.pyplot as plt
from datetime import datetime as dt
%matplotlib inline

## Read Json

In [2]:
df_gps = pd.read_json('/Users/minseok/Downloads/Takeout/Location History/Location History.json')

In [3]:
df_gps2 = pd.read_json ('/Users/minseok/Downloads/Takeout 2/Location History/Location History.json')

## Process first

In [4]:
# parse lat, lon, and timestamp from the dict inside the locations column
df_gps['lat'] = df_gps['locations'].map(lambda x: x['latitudeE7'])
df_gps['lon'] = df_gps['locations'].map(lambda x: x['longitudeE7'])
df_gps['timestamp_ms'] = df_gps['locations'].map(lambda x: x['timestampMs'])
for i, row in df_gps.iterrows():
    if 'activity' in df_gps.at[i, 'locations'].keys():
        df_gps.at[i,'type'] = df_gps.at[i, 'locations']['activity'][-1]['activity'][0]['type']

# convert lat/lon to decimalized degrees and the timestamp to date-time
df_gps['lat'] = df_gps['lat'] / 10.**7
df_gps['lon'] = df_gps['lon'] / 10.**7
df_gps['timestamp_ms'] = df_gps['timestamp_ms'].astype(float) / 1000
df_gps['datetime'] = df_gps['timestamp_ms'].map(lambda x: dt.fromtimestamp(x).strftime('%Y-%m-%d %H:%M:%S'))
date_range = '{}-{}'.format(df_gps['datetime'].min()[:4], df_gps['datetime'].max()[:4])

In [5]:
# drop columns we don't need, then show a slice of the dataframe
df_gps = df_gps.drop(labels=['locations', 'timestamp_ms'], axis=1, inplace=False)

## Process second

In [6]:
# parse lat, lon, and timestamp from the dict inside the locations column
df_gps2['lat'] = df_gps2['locations'].map(lambda x: x['latitudeE7'])
df_gps2['lon'] = df_gps2['locations'].map(lambda x: x['longitudeE7'])
df_gps2['timestamp_ms'] = df_gps2['locations'].map(lambda x: x['timestampMs'])

# convert lat/lon to decimalized degrees and the timestamp to date-time
df_gps2['lat'] = df_gps2['lat'] / 10.**7
df_gps2['lon'] = df_gps2['lon'] / 10.**7
df_gps2['timestamp_ms'] = df_gps2['timestamp_ms'].astype(float) / 1000
df_gps2['datetime'] = df_gps2['timestamp_ms'].map(lambda x: dt.fromtimestamp(x).strftime('%Y-%m-%d %H:%M:%S'))
date_range = '{}-{}'.format(df_gps2['datetime'].min()[:4], df_gps2['datetime'].max()[:4])

In [7]:
# drop columns we don't need, then show a slice of the dataframe
df_gps2 = df_gps2.drop(labels=['locations', 'timestamp_ms'], axis=1, inplace=False)

## Flatten to date range (minute - basis)

In [8]:
df_gps.datetime = df_gps.datetime.apply(lambda x: x[:16]+':00')
df_gps = df_gps.drop_duplicates(subset = 'datetime', keep = 'first')
df_gps.datetime = pd.to_datetime(df_gps.datetime)

df_gps2.datetime = df_gps2.datetime.apply(lambda x: x[:16]+':00')
df_gps2 = df_gps2.drop_duplicates(subset = 'datetime', keep = 'first')
df_gps2.datetime = pd.to_datetime(df_gps2.datetime)

In [9]:
# Take minimum and maximum available dates between two gps series
min_time = max(df_gps.datetime.min(), df_gps2.datetime.min())
max_time = min(df_gps.datetime.max(), df_gps2.datetime.max())

In [10]:
# Generate continuous range of dates (per 1 minute) and prefill the missing values
ts = pd.date_range(min_time, max_time, freq='1min')

df_gps = df_gps.set_index('datetime').reindex(ts).fillna(method='bfill').rename_axis('datetime').reset_index()
df_gps2 = df_gps2.set_index('datetime').reindex(ts).fillna(method='bfill').rename_axis('datetime').reset_index()

In [11]:
# Modify column names to prepare for merge
df_gps.columns = df_gps.columns + '_1'
df_gps2.columns = df_gps2.columns + '_2'

In [12]:
df_merged = pd.merge(left = df_gps, right = df_gps2, left_on = 'datetime_1', right_on = 'datetime_2')

In [13]:
df_merged.head(5)

Unnamed: 0,datetime_1,lat_1,lon_1,type_1,datetime_2,lat_2,lon_2
0,2019-08-23 23:18:00,22.310177,114.258531,STILL,2019-08-23 23:18:00,22.31012,114.259363
1,2019-08-23 23:19:00,22.31014,114.25911,STILL,2019-08-23 23:19:00,22.310409,114.259367
2,2019-08-23 23:20:00,22.310167,114.259313,STILL,2019-08-23 23:20:00,22.310142,114.259567
3,2019-08-23 23:21:00,22.310114,114.259367,STILL,2019-08-23 23:21:00,22.30998,114.259327
4,2019-08-23 23:22:00,22.310114,114.259367,STILL,2019-08-23 23:22:00,22.310258,114.259264


In [14]:
import math

class Haversine:
    '''
    use the haversine class to calculate the distance between
    two lon/lat coordnate pairs.
    output distance available in kilometers, meters, miles, and feet.
    example usage: Haversine([lon1,lat1],[lon2,lat2]).feet
    
    '''
    def __init__(self,coord1,coord2):
        lon1,lat1=coord1
        lon2,lat2=coord2
        
        R=6371000                               # radius of Earth in meters
        phi_1=math.radians(lat1)
        phi_2=math.radians(lat2)

        delta_phi=math.radians(lat2-lat1)
        delta_lambda=math.radians(lon2-lon1)

        a=math.sin(delta_phi/2.0)**2+\
           math.cos(phi_1)*math.cos(phi_2)*\
           math.sin(delta_lambda/2.0)**2
        c=2*math.atan2(math.sqrt(a),math.sqrt(1-a))
        
        self.meters=R*c                         # output distance in meters
        self.km=self.meters/1000.0              # output distance in kilometers
        self.miles=self.meters*0.000621371      # output distance in miles
        self.feet=self.miles*5280               # output distance in feet


In [15]:
df_merged['distance'] = df_merged.apply(
    (lambda row: Haversine(
        (row['lon_1'], row['lat_1']),
        (row['lon_2'], row['lat_2'])
    ).km),
    axis=1
)

In [16]:
df_merged.dropna(inplace = True)

In [17]:
df_merged[df_merged['distance'] < 3].distance.count() / 60

127.68333333333334

In [18]:
df_merged['datetime_1'].min()

Timestamp('2019-08-23 23:18:00')

In [19]:
df_merged['datetime_1'].max()

Timestamp('2019-09-02 03:39:00')

In [20]:
df_together = df_merged[df_merged['distance'] < 3]

In [22]:
df_together_still = df_together[df_together['type_1'] == 'STILL']

In [24]:
df_together_still

Unnamed: 0,datetime_1,lat_1,lon_1,type_1,datetime_2,lat_2,lon_2,distance
0,2019-08-23 23:18:00,22.310177,114.258531,STILL,2019-08-23 23:18:00,22.310120,114.259363,0.085785
1,2019-08-23 23:19:00,22.310140,114.259110,STILL,2019-08-23 23:19:00,22.310409,114.259367,0.039933
2,2019-08-23 23:20:00,22.310167,114.259313,STILL,2019-08-23 23:20:00,22.310142,114.259567,0.026352
3,2019-08-23 23:21:00,22.310114,114.259367,STILL,2019-08-23 23:21:00,22.309980,114.259327,0.015399
4,2019-08-23 23:22:00,22.310114,114.259367,STILL,2019-08-23 23:22:00,22.310258,114.259264,0.019255
5,2019-08-23 23:23:00,22.310114,114.259367,STILL,2019-08-23 23:23:00,22.310287,114.259321,0.019858
6,2019-08-23 23:24:00,22.310114,114.259367,STILL,2019-08-23 23:24:00,22.309918,114.259623,0.034176
7,2019-08-23 23:25:00,22.310114,114.259367,STILL,2019-08-23 23:25:00,22.309849,114.259728,0.047355
8,2019-08-23 23:26:00,22.310114,114.259367,STILL,2019-08-23 23:26:00,22.309769,114.259797,0.058501
9,2019-08-23 23:27:00,22.310114,114.259367,STILL,2019-08-23 23:27:00,22.309752,114.259779,0.058383


In [26]:
import plotly.express as px

fig = px.scatter_mapbox(df_together, lat="lat_1", lon="lon_1",
                        color_discrete_sequence=["fuchsia"], zoom=12, height=600)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()