In [123]:
import matplotlib.pyplot as plt
matplotlib.style.use('ggplot')
import datetime
from bs4 import BeautifulSoup
import json
from geopy import distance
from itertools import cycle
import pandas as pd
import seaborn as sns
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [155]:
def get_coord(location):
    longitude = str(location['longitudeE7'])
    latitude = str(location['latitudeE7'])
    longitude = "{}.{}".format(longitude[:-7], longitude[-7:])
    latitude = "{}.{}".format(latitude[:-7], latitude[-7:])
    return float(latitude), float(longitude)


def get_date(location):
    return datetime.datetime.fromtimestamp(int(location['timestampMs']) / 1000)


class Locator:
    def __init__(self, places, threshold=0.1):
        self.places = places
        self.threshold = threshold
        self.been_there = {}
        self.start_time = {}
        self.initiate()
    
    def initiate(self):
        self.been_there = dict(zip(self.places.keys(), cycle([False])))
        self.start_time = dict(zip(self.places.keys(), cycle([None])))                        

    def exit_from_place(self, place, t, d):
        if self.been_there[place]:
            self.been_there[place] = False
            hour_diff = (self.start_time[place]- t).seconds / 3600
            d[place][-1] += hour_diff
            self.start_time[place] = None
        else:
            d[place][-1] += 0

    def enter_to_place(self, place, t):
        self.been_there[place] = True
        self.start_time[place] = t

    def calculate(self, locations):
        d = {"date": [], 'weekday': [], 'month': []}
        d.update(dict([(place, []) for place in self.places]))
        
        date_to_process = None
        for i, location in enumerate(locations, 1): 
            if i == 100000:
                break

            t = get_date(location)
            curr_date = t.strftime('%Y-%m-%d')

            # new day
            if curr_date != date_to_process:
                if date_to_process is not None:
                    for place in self.places.keys():
                        self.exit_from_place(place, t, d) # day.

                date_to_process = curr_date
                d['date'].append(date_to_process)
                d['month'].append("{}-{}".format(*date_to_process.split('-')[:2]))

                d['weekday'].append(t.weekday() + 1)
                for place in self.places:
                    d[place].append(0)

            curr_loc = get_coord(location)
            for place, loc in self.places.items():
                dist = distance.distance(loc, curr_loc).miles

                # Entering
                if  dist < self.threshold and not self.been_there[place]:
                    self.enter_to_place(place, t)

                # Exiting
                if dist > self.threshold and self.been_there[place]:
                    self.exit_from_place(place, t, d)
                    
        df = pd.DataFrame(d)
        df.to_csv('data-info.tsv', sep='\t', index=False)  # save
        return df
                               
                               
house = (37.449264, -122.165769)
sarah_mtv_office = (37.421965, -122.090632)

places = {'home': house, 
          "sarah-mtv-office": sarah_mtv_office,
         }

locator = Locator(places)
df = locator.calculate(locations)
df

Unnamed: 0,date,home,month,sarah-mtv-office,weekday
0,2018-05-03,8.858333,2018-05,0.000000,4
1,2018-05-02,16.552222,2018-05,0.000000,3
2,2018-05-01,12.709167,2018-05,0.298889,2
3,2018-04-30,15.170833,2018-04,0.000000,1
4,2018-04-29,17.719167,2018-04,0.000000,7
5,2018-04-28,14.123333,2018-04,0.000000,6
6,2018-04-27,14.112778,2018-04,0.000000,5
7,2018-04-26,15.968889,2018-04,0.000000,4
8,2018-04-25,17.472500,2018-04,0.034444,3
9,2018-04-24,21.462778,2018-04,0.000000,2


In [156]:
mask = df['date'].str.contains('2018-04-*')
df[mask]['home'].mean()

16.376740740740743

In [157]:
df.groupby('month').mean()

Unnamed: 0_level_0,home,sarah-mtv-office,weekday
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-12,6.281237,0.076856,4.136364
2018-01,14.092545,0.077473,3.806452
2018-02,14.683333,0.137401,4.0
2018-03,5.695708,0.112249,4.096774
2018-04,16.376741,0.116685,4.0
2018-05,12.706574,0.09963,3.0


In [12]:
data = '/Users/obaskaya/Desktop/Takeout/Location History/data.json'
locations = json.load(open(data))['locations']