In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime as dt
import numpy as np
import json
%matplotlib inline

In [2]:
location_as_json = pd.DataFrame(json.load(open('Standortverlauf.json', 'r')))

In [3]:
# parse lat, lon, and timestamp from the dict inside the locations column
location_as_json['latitude'] = location_as_json['locations'].map(lambda x: x['latitudeE7'])
location_as_json['longitude'] = location_as_json['locations'].map(lambda x: x['longitudeE7'])
location_as_json['timestamp_ms'] = location_as_json['locations'].map(lambda x: x['timestampMs'])

# convert lat/lon to decimalized degrees and the timestamp to date-time
location_as_json['latitude'] = location_as_json['latitude'] / 10.**7
location_as_json['longitude'] = location_as_json['longitude'] / 10.**7
location_as_json['timestamp_ms'] = location_as_json['timestamp_ms'].astype(float) / 1000
location_as_json['datetime'] = location_as_json['timestamp_ms'].map(lambda x: dt.fromtimestamp(x).strftime('%Y-%m-%d %H:%M:%S'))

In [4]:
location_as_json.set_index(['datetime'], inplace=True)

In [5]:
location_as_json.drop(['locations', 'timestamp_ms'], axis=1, inplace=True, errors='ignore')
location_as_json.to_csv('locations')

In [6]:
#Need to do some magic because Google changed the way timestamps are stored at 2015-01-01 (my first time after that is 2015-01-01 00:09:53)
#Before that, all times were computed relative to US West (I think), after that relative to phone time.
#Options:
#    1. I could just start at 2015. Would be sad because I have lots of data with girlfriend before
#    2. Shift all times before 2015 to German local time (throw away the London stay because there wasn't much convo)
#    2a. Problem is that then I would have to take care of summer/winter time by hand.


In [7]:
#Now read the phone pickle
phone_df = pd.read_pickle('phone')

In [8]:
#Joining the phone data
mapChat = location_as_json.copy()
mapChat = mapChat.join(phone_df, how='outer')

# Let's cut first, maybe update some other time TODO!
i = mapChat.index.get_loc('2015-01-01 00:09:53')
mapChat = mapChat[i:]

In [9]:
#Fill the NaNs in latitude and longitude by interpolation:
mapChat['latitude'].interpolate(method='time', limit_direction='forward', inplace=True)
mapChat['longitude'].interpolate(method='time', limit_direction='forward', inplace=True)

In [10]:
girlfriend_sent_lat = mapChat.loc[mapChat['girlfriend'] == 'Sent']['latitude'].values
girlfriend_sent_lon = mapChat.loc[mapChat['girlfriend'] == 'Sent']['longitude'].values
girlfriend_sent_pos = np.column_stack((girlfriend_sent_lat, girlfriend_sent_lon)).tolist()

girlfriend_rec_lat = mapChat.loc[mapChat['girlfriend'] == 'Received']['latitude'].values
girlfriend_rec_lon = mapChat.loc[mapChat['girlfriend'] == 'Received']['longitude'].values
girlfriend_rec_pos = np.column_stack((girlfriend_rec_lat, girlfriend_rec_lon)).tolist()

girlfriend_combined_pos = girlfriend_sent_pos + girlfriend_rec_pos

print(len(girlfriend_combined_pos))
outliers_indices = [ind for ind, loc in enumerate(girlfriend_combined_pos) if (loc[0]<40 and loc[1]<20)]
print(outliers_indices)
run = 0
for ind in outliers_indices:
    print(girlfriend_combined_pos[ind])
    girlfriend_combined_pos.pop(ind-run)
    run += 1
print(len(girlfriend_combined_pos))

15755
[1364, 1365, 1366, 1367]
[38.993863696767676, 9.369493806060607]
[27.324877054343435, 6.565655260606061]
[48.159306427272725, 11.549813945454545]
[48.11222820992908, 11.548993081796691]
15751


In [11]:
import folium
from folium import plugins
heatChat = folium.Map(location=[48,12])
heatChat.add_child(folium.plugins.HeatMap(girlfriend_combined_pos, name='All convos with girlfriend', blur=20, min_opacity=0.4, radius=20, max_val=2.0))

heatChat.save('maps/heatChat.html')

