In [1]:
from glob import glob
from os.path import join
import pandas as pd

from maps.pings import Pings
from maps.maps import FoliumMap

location_data_dirpath = '../../data/locations/'
photo_metadata_dirpath = '../../data/photo_metadata'

pings_data_path = join(location_data_dirpath, 'pings.hdf')
posts_path = join(photo_metadata_dirpath, 'posts.hdf')

%reload_ext autoreload
%autoreload 2

In [2]:
# load posts and all pings
ping_data = pd.read_hdf(pings_data_path, 'data')
ping_data = ping_data[ping_data.accuracy < 3000]
ping_data = ping_data[ping_data.altitude < 4000]

posts = pd.read_hdf(posts_path, 'data')

In [3]:
# geocode trip pings
pings = Pings(ping_data.loc['2019-07-24':], 2)
pings.cluster(use_weights=False, n_clusters=500)
pings.build_geocodes()
pings.geocode()

Loading formatted geocoded file...


In [4]:
# m = FoliumMap()
# m.build_map(width='70%', height='70%')
# m.add_bubbles(pings.centroids, pings.geocodes.index.values)
# m.map

In [5]:
from folium import FeatureGroup, Icon, Marker, PolyLine, TileLayer
from folium.features import CustomIcon
from folium.plugins import MarkerCluster, HeatMap, AntPath
from maps.popup import ImagePopup

class TripSegment:
    
    GPS_INDEX = ['latitude', 'longitude']
    
    def __init__(self, data, owner=None):
        self.data = data
        self.owner = owner
        
    @property
    def start(self):
        return self.data.index.values[0]
    
    @property
    def stop(self):
        return self.data.index.values[1]
    
    @property
    def dt(self):
        return self.stop-self.start
    
    @property
    def layer_id(self):
        for layer_id in self.data.trip_id.unique():
            if type(layer_id)==str:
                break
        return layer_id
    
    @property
    def origin(self):
        return self.data.iloc[0]
        
    @property
    def destination(self):
        return self.data.iloc[-1]
    
    @property
    def international(self):
        return self.origin.country != self.destination.country
    
    @staticmethod
    def fmt_location(location):
        
        if location.country == 'US':
            return '{:s}, {:s}'.format(location.state, location.country)
        else:
            return '{:s}'.format(location.country)
    
    @property
    def caption(self):
        origin_str = self.fmt_location(self.origin)
        destination_str = self.fmt_location(self.destination)
        caption = '{:s} --> {:s}'.format(origin_str, destination_str)
        return caption
        
    def get_line(self, **kwargs):
        xy = self.data[self.GPS_INDEX].values
        return PolyLine(xy, tooltip=self.caption, **kwargs)
                
    def get_antpath(self, **kwargs):
        xy = self.data[self.GPS_INDEX].values
        return AntPath(xy, tooltip=self.caption, **kwargs)
    
    def get_heatmap(self, radius=7, blur=7, **kwargs):
        xy = self.data[self.GPS_INDEX].values
        heatmap = HeatMap(xy,
                          radius=radius,
                          blur=blur,
                          **kwargs)
        
        return heatmap
        
            
class FlightSegment(TripSegment):
    pass

class DriveSegment(TripSegment):
    pass
        
class TrainSegment(TripSegment):
    pass

In [6]:
from modules.utilities import haversine
import numpy as np

def find_flights(pings, owner=None):
    SEMANTIC_GPS_INDEX = ['latitude_geocode', 'longitude_geocode']
    
    flights = []
    pings = pings.sort_index()    
    gps = pings[SEMANTIC_GPS_INDEX].values.astype(float)
    dx = np.array([haversine(*p) for p in zip(gps[:-1], gps[1:])])
    dt = np.array((pings.index.values[1:] - pings.index.values[:-1]).tolist()) / 1e9 / 3600 # hours
    for idx in np.logical_and(dx>250, dx/dt>25).nonzero()[0]:
        flight = FlightSegment(pings.iloc[[idx, idx+1]], owner=owner)
        flights.append(flight)    
    flights = [flight for flight in flights if flight.international or flight.origin.country=='US']
    return flights

def find_drives(pings, transits):
    transits = sorted(transits, key=lambda x: x.start)
    all_pings = pings.pings.droplevel(0).sort_index()
    drives = [DriveSegment(all_pings[: transits[0].start])]
    for i, transit in enumerate(transits[:-1]):
        drives.append(DriveSegment(all_pings[transit.stop: transits[i+1].start]))
    drives.append(DriveSegment(all_pings[transits[-1].stop:]))
    return drives

In [7]:
# from sklearn.cluster import MeanShift

# # cluster photos
# photos = posts[~posts.latitude.isna()]
# photos.loc[:, 'gallery'] = None
# for idx, album in photos.groupby('album'):
#     model = MeanShift(bandwidth=1.).fit(album[Pings.GPS_INDEX].values)
#     gallery_base = '-'.join([x.lower() for x in idx.split()])
#     labels = [gallery_base+'{:d}'.format(_id) for _id in model.labels_]
#     photos.loc[album.index, 'gallery'] = labels

In [8]:
# add trip ids
trip_ids = {
    'July': ('2019-07-24', '2019-07-28'),
    'August-November': ('2019-08-11', '2019-11-09'),
    'December': ('2019-12-01', '2019-12-24'),
    'February': ('2020-02-25', '2020-03-17')
}

pings._pings['trip_id'] = None
for trip_id, dates in trip_ids.items():
    after_start = pings._pings.index.get_level_values(1) >= dates[0]
    before_end = pings._pings.index.get_level_values(1) <= dates[1]
    pings._pings.loc[after_start&before_end, 'trip_id'] = trip_id

In [11]:
# find trains/planes/drives
trains = [
    TrainSegment(pings.pings.loc['SMB'].loc['2019-08-15 22:31:30':'2019-08-16 07:23:57']),
    TrainSegment(pings.pings.loc['CMB'].loc['2019-09-02 04:43:04':'2019-09-02 06:03:54'])]
flights = find_flights(pings.pings.loc['SMB'])
drives = find_drives(pings, trains+flights)

In [25]:
m = FoliumMap()
m.build_map(width='100%', height='100%')
#TileLayer('openstreetmap', show=False).add_to(m.map)
#TileLayer('Stamen Terrain', show=False).add_to(m.map)

layers = {name: FeatureGroup(name=name, show=False).add_to(m.map) for name in trip_ids.keys()}
fg_heatmap = FeatureGroup(name='Heatmap', show=True).add_to(m.map)
fg_photos = FeatureGroup(name='Photos', show=True).add_to(m.map)

for flight in flights:
    obj = flight.get_line(color='black', weight=3, opacity=0.2)
    obj.add_to(layers[flight.layer_id])
    
for train in trains:
    obj = train.get_line(color='blue', weight=3, opacity=0.2)
    obj.add_to(layers[train.layer_id])
    
for drive in drives:
    hm = drive.get_heatmap()
    hm.add_to(fg_heatmap)
    obj = drive.get_antpath(color='red', weight=3)
    obj.add_to(layers[drive.layer_id])
    
    
# photo clusters
mc = MarkerCluster().add_to(fg_photos)
for idx, photo in photos.iterrows():
    xy = photo[Pings.GPS_INDEX].values.astype(float)
    popup = ImagePopup(photo.imgur_id, photo.caption).popup
    tooltip = photo.caption
    Marker(xy, popup=popup, 
           tooltip=tooltip, 
           icon=Icon('darkred', icon_color='white', icon='image', prefix='fa')
          ).add_to(mc)
        
# add the layer control
LayerControl().add_to(m.map)

m.map

In [22]:
m.map.save('../travel/map.html')

In [18]:
photos = posts[~posts.latitude.isna()]