In [1]:
import pandas as pd
from pandas import DataFrame, Timestamp
from datetime import datetime as dt

In [2]:
def extract_maps_data(person: str) -> DataFrame:
    path = f'takeout-{person}/Takeout/Location History (Timeline)/Records.json'
    return pd.read_json(path)

In [3]:
def transform_maps_data(df: DataFrame, person: str) -> DataFrame:
    df['person'] = person
    df['lat'] = df['locations'].map(lambda x: x['latitudeE7'])
    df['lon'] = df['locations'].map(lambda x: x['longitudeE7'])
    df['timestamp'] = df['locations'].map(lambda x: x['timestamp'])
    df['accuracy'] = df['locations'].map(lambda x: x['accuracy'])
    df['source'] = df['locations'].map(lambda x: x['source'])
    # convert lat/lon to decimalized degrees and the timestamp to date-time
    df['lat'] = df['lat'] / 10.**7
    df['lon'] = df['lon'] / 10.**7
    df['timestamp'] = pd.to_datetime(df['timestamp'], format='ISO8601')
    df = df.drop(labels=['locations'], axis=1, inplace=False)
    # Get the locations where the accuracy was less than 20 meters of radius
    df = df[df['accuracy'] < 20]
    return df
    

In [4]:
def filter_relevant_days(df: DataFrame, start: Timestamp, end: Timestamp) -> DataFrame:
    df = df[(df['timestamp'] >= start) & (df['timestamp'] <= end)]
    return df

In [5]:
person_1 = 'JGPV'
person_2 = 'JSRV'
maps_person_1 = extract_maps_data(person_1)
maps_person_2 = extract_maps_data(person_2)


In [6]:
data_person_1 = transform_maps_data(maps_person_1, person_1)
data_person_1.head()

Unnamed: 0,person,lat,lon,timestamp,accuracy,source
8889,JGPV,4.708151,-74.071575,2013-07-08 16:59:03.101000+00:00,6,GPS
8890,JGPV,4.705921,-74.071111,2013-07-08 17:00:03.024000+00:00,8,GPS
8892,JGPV,4.695915,-74.068882,2013-07-08 17:03:03.009000+00:00,8,GPS
8893,JGPV,4.692902,-74.067094,2013-07-08 17:04:03.030000+00:00,6,GPS
8894,JGPV,4.689582,-74.065618,2013-07-08 17:06:03.027000+00:00,6,GPS


In [7]:
data_person_2 = transform_maps_data(maps_person_2, person_2)
data_person_2.head()

Unnamed: 0,person,lat,lon,timestamp,accuracy,source
1086,JSRV,4.685569,-74.076835,2014-10-12 16:17:33.897000+00:00,4,CELL
1087,JSRV,4.685569,-74.076835,2014-10-12 16:18:34.948000+00:00,4,CELL
1088,JSRV,4.685569,-74.076835,2014-10-12 16:19:35.028000+00:00,4,CELL
1089,JSRV,4.685569,-74.076835,2014-10-12 16:21:23.220000+00:00,4,CELL
1090,JSRV,4.685569,-74.076835,2014-10-12 16:22:43.968000+00:00,4,CELL


In [8]:
# Get the oldest date for each of the dataframes to start the analysis from there
oldest_day_person_1 = data_person_1['timestamp'].min()
oldest_day_person_2 = data_person_2['timestamp'].min()
start_day = oldest_day_person_1 if oldest_day_person_1 > oldest_day_person_2 else oldest_day_person_2
end_day = pd.Timestamp(dt.strptime('10/01/19', '%m/%d/%y')).tz_localize('UTC')

In [9]:
data_person_1 = filter_relevant_days(data_person_1, start_day, end_day)
data_person_2 = filter_relevant_days(data_person_2, start_day, end_day)