In [None]:
#| default_exp json_explorer

# json_explorer

> playing around with the google takeout json.

In [None]:
#| hide
from nbdev.showdoc import *

Imports

In [None]:
#| export
from datetime import datetime
from collections import namedtuple
import json

In [None]:
#| export

# make a datastructure to hold location data
Location = namedtuple("Location", ["timestamp", "latitude", "longitude", "accuracy"])

This is kinda neat that I can fire markdown in here. 
BTW #| export is magic to mean export it into the source file when we run `nbdev_export`

In [None]:
#| export

def get_timestamp(timestamp):
    """
    Google stores the timestamp in different formats, making this annoying
    """
    try:
        first_format = "%Y-%m-%dT%H:%M:%S.%fZ"
        timestamp=datetime.strptime(timestamp, first_format)
    except:
        second_format = "%Y-%m-%dT%H:%M:%SZ"
        timestamp=datetime.strptime(timestamp, second_format)
    return int(timestamp.timestamp())

In [None]:
show_doc(get_timestamp)

---

#### get_timestamp

>      get_timestamp (timestamp)

Google stores the timestamp in different formats, making this annoying

In [None]:
# test data
test_string = "2022-06-24T19:38:55.633Z"
get_timestamp(test_string)

1656110335

In [None]:
test_string2 = "2022-06-24T19:38:55Z"
get_timestamp(test_string2)

1656110335

In [None]:
show_doc(get_timestamp)

---

#### get_timestamp

>      get_timestamp (timestamp)

Google stores the timestamp in different formats, making this annoying

In [None]:
file_to_open = "sample-data/Records.json"

In [None]:
#| export

def get_locations(file_to_open): 
    with open(file_to_open, 'r') as f:
        json_file = json.load(f)
        return [l for l in json_file["locations"]]    

In [None]:
locations = get_locations(file_to_open)

In [None]:
len(locations)

672293

In [None]:
locations[1]

{'latitudeE7': 446549213,
 'longitudeE7': -635836178,
 'accuracy': 45,
 'source': 'WIFI',
 'deviceTag': 586642716,
 'timestamp': '2016-06-29T13:13:01.899Z'}

In [None]:
locations[-1]

{'latitudeE7': 446579163,
 'longitudeE7': -636416645,
 'accuracy': 17,
 'altitude': 52,
 'verticalAccuracy': 4,
 'activity': [{'activity': [{'type': 'UNKNOWN', 'confidence': 41},
    {'type': 'IN_VEHICLE', 'confidence': 10},
    {'type': 'ON_BICYCLE', 'confidence': 10},
    {'type': 'ON_FOOT', 'confidence': 10},
    {'type': 'STILL', 'confidence': 10},
    {'type': 'WALKING', 'confidence': 10},
    {'type': 'RUNNING', 'confidence': 10},
    {'type': 'IN_ROAD_VEHICLE', 'confidence': 10},
    {'type': 'IN_RAIL_VEHICLE', 'confidence': 10}],
   'timestamp': '2022-07-29T21:02:09.958Z'},
  {'activity': [{'type': 'ON_FOOT', 'confidence': 92},
    {'type': 'WALKING', 'confidence': 92},
    {'type': 'IN_VEHICLE', 'confidence': 2},
    {'type': 'STILL', 'confidence': 2},
    {'type': 'RUNNING', 'confidence': 2},
    {'type': 'IN_ROAD_VEHICLE', 'confidence': 2},
    {'type': 'ON_BICYCLE', 'confidence': 1},
    {'type': 'IN_RAIL_VEHICLE', 'confidence': 1},
    {'type': 'UNKNOWN', 'confidence': 0}]

In [None]:
#| export

# build up the location history
# due to the malformed google data, not all entries have latitudeE7/longitudeE7
# and the differing timestamps, best effort to parse those.

def build_location_history(locations):
    location_history = [Location(get_timestamp(l["timestamp"]), l["latitudeE7"], l["longitudeE7"], l["accuracy"]) for l in locations if l.get("latitudeE7")]
    # sort by timestamp
    location_history.sort(key=lambda location: location.timestamp)
    return location_history

more validation of output

In [None]:
location_history = build_location_history(locations)
assert len(location_history) == 672291

In [None]:
location_history[0]

Location(timestamp=1467216494, latitude=446549411, longitude=-635836042, accuracy=41)

In [None]:
location_history[-1]

Location(timestamp=1659139720, latitude=446579163, longitude=-636416645, accuracy=17)

In [None]:
location_history[-1]

Location(timestamp=1659139720, latitude=446579163, longitude=-636416645, accuracy=17)

In [None]:
from nbdev.doclinks import nbdev_export
nbdev_export()