In [1]:
import json
from os import listdir
from os.path import isdir, join
from glob import glob
import pandas as pd
from datetime import datetime

from modules.utilities import read_json, fmt_address
from modules.places import Visit

location_data_path = '../../data/locations/'

%reload_ext autoreload
%autoreload 2

In [2]:
cmb_semantic_path  = join(location_data_path, 'location_history_cmb/Location History/Semantic Location History')
smb_semantic_path  = join(location_data_path, 'location_history_smb/Location History/Semantic Location History')
semantic_paths = (smb_semantic_path, cmb_semantic_path)

# Record all semantic locations and visits to them

In [8]:
# parse all semantic locations
places, visits = [], []
for semantic_path in semantic_paths:
    owner = semantic_path.split('/')[-3].split('_')[-1].upper()
    for dirpath in glob(join(semantic_path, '*')):
        for filepath in glob(join(dirpath, '*.json')):
            semantic_history = read_json(filepath)
            for place in semantic_history['timelineObjects']:
                if 'placeVisit' in place.keys():
                    visit = Visit(place['placeVisit'], owner=owner)
                    places.extend(visit.all_places)
                    visits.append(visit.to_record())
                    
places = pd.DataFrame(places)
places = places.drop_duplicates('id')
visits = pd.DataFrame(visits)

In [9]:
# append formatted addresses
address_data = pd.DataFrame.from_records(places.address.apply(fmt_address).values.tolist(), index=places.index)
semantic_locations = places.join(address_data)

# remove blank entries
semantic_locations = semantic_locations[~semantic_locations.address.isna()]
semantic_locations = semantic_locations[~(semantic_locations.address=='')]

In [10]:
from modules.places import get_city_name, get_country_code, get_state_code, get_location_str

# clean up some common problems
semantic_locations.loc[semantic_locations.address.apply(lambda x: 'czech' in x.lower()), 'country'] = 'česko'
semantic_locations.loc[semantic_locations.address.apply(lambda x: 'ouarzazate' in x.lower()), 'city'] = 'ouarzazate'
semantic_locations.loc[semantic_locations.address.apply(lambda x: 'marrakesh' in x.lower()), 'city'] = 'marrakesh'

# parse city/country
semantic_locations['city'] = semantic_locations.apply(get_city_name, axis=1)
semantic_locations = semantic_locations[~semantic_locations.city.isna()]
semantic_locations['country_code'] = semantic_locations.country.apply(get_country_code)
semantic_locations['state_code'] = semantic_locations.apply(get_state_code, axis=1)
semantic_locations['location_str'] = semantic_locations.apply(get_location_str, axis=1)

In [11]:
# save semantic locations
semantic_locations_path = join(location_data_path, 'semantic_locations.hdf')
semantic_locations.to_hdf(semantic_locations_path, 'data')

# save visits
semantic_visits_path = join(location_data_path, 'semantic_visits.hdf')
visits.to_hdf(semantic_visits_path, 'data')

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block1_values] [items->Index(['id', 'name', 'address', 'house_number', 'road', 'city', 'state',
       'postcode', 'country', 'unit', 'house', 'city_district', 'suburb',
       'state_district', 'po_box', 'level', 'island', 'country_code',
       'state_code', 'location_str'],
      dtype='object')]

  encoding=encoding,


# Mobile ping data

In [12]:
from modules.utilities import posix_to_ts, read_json

In [13]:
def load_pings(filepath, owner):
    
    # load records into dataframe
    pings = read_json(filepath)['locations']
                
    pings = pd.DataFrame.from_records(pings)
    pings['latitude'] = pings['latitudeE7'] / 1e7
    pings['longitude'] = pings['longitudeE7'] / 1e7
    pings['timestamp'] = pings.timestampMs.apply(posix_to_ts)
    pings = pings.set_index('timestamp')
    
    columns = ['latitude', 'longitude', 'accuracy', 'altitude']
    pings = pings.loc[:, columns]
    
    pings['owner'] = owner
    
    return pings

In [26]:
smb_pings_path = join(location_data_path, 'location_history_smb/Location History/Location History.json')
cmb_pings_path = join(location_data_path, 'location_history_cmb/Location History/Location History.json')
smb_pings = load_pings(smb_pings_path, 'SMB')
cmb_pings = load_pings(cmb_pings_path, 'CMB')
pings = pd.concat([smb_pings, cmb_pings['2019-06-01': '2019-10-21']])
pings = pings.sort_index()

In [28]:
# # save pings
#pings_data_path = join(location_data_path, 'pings.hdf')
#pings.to_hdf(pings_data_path, 'data')