## Imports

In [4]:
# Default
import sys
import datetime

sys.path.append('../../da-irl')

# Local
from src.impl.activity_env import ActivityEnv
from src.impl.activity_mdp import ATPTransition, ActivityMDP
# from src.impl.activity_params import ATPParameters
from src.impl.activity_rewards import ActivityRewardFunction
from src.irl.meirl import MaxEntIRLAgent
from src.misc.math_utils import create_dir_if_not_exists
from src.file_io.trace_loader import TraceLoader

# Vendor
import matplotlib.pyplot as plt
import geopandas as gpd
import pandas as pd
from pandas.tseries.offsets import Day
import shapely
from shapely.geometry import box
import numpy as np
import folium

shapely.speedups.enable()



In [5]:
# Global magics statements

%load_ext autoreload
%autoreload 2
%matplotlib inline

## Constants and Utility Functions

In [877]:
# BBOX:
LAT_MIN=32.831
LAT_MAX=38.67
LNG_MIN=-128.9
LNG_MAX=-117.41

# DOW
MON = 0
TUES = 1
WEDS = 2
THUR = 3
FRI = 4
SAT = 5
SUN = 6

# Path
PATH = '../../da-irl/data/traces/traces_persona_2.csv'

def great_circle_distance(pt1,pt2):
    """
    Return the great-circle distance in kilometers between two points,
    defined by a tuple (lat, lon).
    Examples
    --------
    >>> brussels = (50.8503, 4.3517)
    >>> paris = (48.8566, 2.3522)
    >>> great_circle_distance(brussels, paris)
    263.9754164080347
    """
    r = 6371.
    
    delta_latitude = np.radians(pt1[:,0] - pt2[:,0])
    delta_longitude = np.radians(pt1[:,1] - pt2[:,1])
    latitude1 = np.radians(pt1[:,0])
    latitude2 = np.radians(pt2[:,0])

    a = np.sin(delta_latitude / 2) ** 2 + np.cos(latitude1) * np.cos(latitude2) * np.sin(delta_longitude / 2) ** 2
    return r * 2. * np.arcsin(np.sqrt(a))


## Filtering

In [1118]:

class User:
    def __init__(self,records_path):
        self._records_path = records_path
        self._records = None
        
        self.name = None
        self.home = None
        self.work = None
        self.weekend_days = [SAT,SUN]  # hard code for now
        self.work_start_hr = 7 # hard code for now
        self.work_end_hr = 18 # hard code for now
        self.early_morning_hr = 5
        self.late_night_hr = 20
        
        
        self.attributes = {}
        self._load_records()
    
    def _load_records(self):
        df = TraceLoader.load_traces_from_csv(self._records_path)
        df = self._fix_df(df)
        df=self._process_home_work_other(df)
        df=self._process_speed(df)
        self._records=df
        
    def _filter_weekend(self,df):
        return df[(df.enter_time.dt.dayofweek not in self.weekend_days)]
    
    def _fix_df(self,df):
        df['at_home']=0
        df['at_work']=0
        df['at_other']=0
        df['is_trip']=0
        df['exit_lat'] = df.lat
        df['exit_lng'] = df.lng
        df=df.rename(columns = {'lat':'enter_lat','lng':'enter_lng'})
        df_enter = df.iloc[1:].drop('exit_time',axis=1)
        df_exit = df.drop('enter_time',axis=1)
        df_exit=df_exit.rename(columns = {'exit_time':'enter_time'})
        df_exit['exit_time']=np.hstack([df_enter['enter_time'].dt.to_pydatetime(),df_enter['enter_time'].iloc[-1]])
        df_exit['exit_lat']=np.hstack([df_enter['enter_lat'],[0]])
        df_exit['exit_lng']=np.hstack([df_enter['enter_lng'],[0]])
        df_exit.loc[:,'is_trip']=1
        df_exit=df_exit[df.columns.tolist()]
        df=pd.concat([df,df_exit]).set_index('enter_time').sort_index().reset_index()
        dups=df.loc[(df.enter_time.dt.dayofyear<df.exit_time.dt.dayofyear)]
        df.loc[(df.enter_time.dt.dayofyear<df.exit_time.dt.dayofyear),'enter_time']=pd.DatetimeIndex(dups.enter_time.dt.normalize())+DateOffset(1)
        dups.loc[:,'exit_time'] = pd.DatetimeIndex(dups.enter_time.dt.normalize())+DateOffset(hour=23,minute=59,seconds=59)
        df=df.append(dups[1:],ignore_index=True)
        df=df.sort_values(by='enter_time',ascending=True)
        return df
        
    def _process_home_work_other(self,df):
        for idx,row in df.iterrows():
            tup = (row.enter_time.dayofweek, row.enter_time.hour, row.exit_time.dayofweek, row.exit_time.hour)
            is_at_home = self._filter_home(tup)
            is_at_work = self._filter_work(tup)
            is_trip = row.is_trip==1
            is_at_other = ((not is_at_home) & (not is_at_work) & (not is_trip))
            df.loc[idx,'at_home'] = 1 if is_at_home & (not is_trip) else 0
            df.loc[idx,'at_work'] = 1 if is_at_work & (not is_trip) else 0
            df.loc[idx,'at_other'] = 1 if is_at_other & (not is_trip) else 0
        df=df.dropna()
        return df
        
    def _filter_home(self, tup):
        enter_day,enter_hr,exit_day,exit_hr = tup
        is_same_day = (enter_day==exit_day)
        is_morning_hour = (exit_hr <= self.early_morning_hr)
        is_consecutive_day = (((enter_day==SUN) & (exit_day==MON))|(enter_day + 1 == exit_day))
        is_late_night = ((enter_hr >= self.late_night_hr)|is_morning_hour)
        cond_1 = (is_consecutive_day & is_late_night)
        cond_2 = (is_same_day & is_morning_hour)|(is_same_day&is_late_night)
        return cond_1|cond_2
    
    def _filter_work(self, tup):
        enter_day,enter_hr,exit_day,exit_hr = tup
        is_work_hour = ((enter_hr>self.work_start_hr)&(exit_hr<=self.work_end_hr))
        is_work_day = (enter_day not in self.weekend_days)&(exit_day not in self.weekend_days)
        is_same_day = (enter_day==exit_day)
        return is_work_hour & (is_work_day & is_same_day)
    
    def _process_speed(self,df):
        cons_pts = np.array(zip(zip(df.enter_lat.values,df.enter_lng.values),zip(df.exit_lat.values,df.exit_lng.values)))
        pts1=cons_pts[:,0]
        pts2=cons_pts[:,1]
        dist=np.round(great_circle_distance(pts1,pts2),5)
        df['dist'] =dist
        time_diff = np.diff(zip(df.enter_time,df.exit_time))
        time_diff_s = np.apply_along_axis(lambda x: x[0].seconds,1, time_diff)
        df['time_diff']=time_diff
        df['speed'] = np.round(dist/time_diff_s,5)
        df.loc[((df.time_diff.dt.seconds)<8.*60)&(df.dist!=0.0),'exit_lat'] = df.loc[((df.time_diff.dt.seconds)<8.*60)&(df.dist!=0.0),'enter_lat']
        df.loc[((df.time_diff.dt.seconds)<8.*60)&(df.dist!=0.0),'exit_lng'] = df.loc[((df.time_diff.dt.seconds)<8.*60)&(df.dist!=0.0),'enter_lng']
        df.loc[((df.time_diff.dt.seconds)<8.*60)&(df.dist!=0.0),'is_trip']=0
        return df
    
        

In [1119]:
#         self._records = gpd.GeoDataFrame(df.drop(['lat', 'lng'], axis=1),
#                                 crs={'init': 'epsg:4326'},
#                                 geometry=[shapely.geometry.Point(xy) for xy in zip(df.lat, df.lng)])

In [1120]:
from sklearn.cluster import DBSCAN

In [1121]:
df = User(PATH)._records

db = DBSCAN(eps=0.001, min_samples=12)
df['cluster'] = db.fit_predict(df[['enter_lat','enter_lng']].values)
labels = db.labels_
max_label = len(set(labels)) - (1 if -1 in labels else 0)
max_label = max(df.cluster.unique())
home_label = max(range(max_label+1), key=lambda x: df.loc[df.cluster.values==x,'at_home'].sum())
work_label = max(range(max_label+1), key=lambda x: df.loc[df.cluster.values==x,'at_work'].sum())
df.loc[(df.cluster == home_label), 'at_home'] = 1
df.loc[(df.cluster == work_label), 'at_work'] = 1
df.loc[(df.at_home == 1) & (df.at_work == 1), ['at_home','at_work']] = 0
df.loc[(df.at_home == 0) & (df.at_work == 0), 'at_other'] = 1
df.loc[(df.at_home == 1) & (df.at_other == 1), 'at_other'] = 0
df.loc[(df.at_work == 1) & (df.at_other == 1), 'at_other'] = 0
df.loc[(df.is_trip == 1) & (df.at_work == 1), 'at_work'] = 0
df.loc[(df.is_trip == 1) & (df.at_home == 1), 'at_home'] = 0
df.loc[(df.is_trip == 1) & (df.at_other == 1), 'at_other'] = 0
df=df[:-1]

In [1122]:
indices=df.loc[((df.time_diff.dt.seconds)<8.*60)&(df.dist!=0.0),'is_trip'].index

In [1123]:
indices-=1

In [1124]:
home_label,work_label,max_label

(0, 2, 2)

In [1125]:
vals=df.iloc[indices].loc[:,'at_home':'is_trip'].values.copy()

In [1126]:
df.loc[((df.time_diff.dt.seconds)<8.*60)&(df.dist!=0.0),'at_home':'is_trip']=vals

In [1070]:
from folium import MarkerCluster,Marker,Icon,CircleMarker

In [1071]:
geo=df.loc[(df.is_trip!=1.0)]
geo=geo[['enter_time','enter_lat','enter_lng','at_home','at_work','at_other','is_trip']]
geo.loc[:,'dow'] = df.enter_time.dt.hour

mapa = folium.Map([37.781872,-122.429162],
                  zoom_start=11,
                  tiles='cartodbpositron',detect_retina=True)
def get_marker_fill(tup):
    h,w,o,t=tup
    color = 'black'
    if h:
        return 'blue'
    elif w:
        return 'red'
    elif o:
        return 'green'
    elif t:
        return 'yellow'
# mc=MarkerCluster(name='clust')

def get_marker_border(hr):
    
    if hr<5:
        return 'lightred'
    elif 5<=hr<7:
        return 'red'
    elif 7<=hr<11:
        return 'orange'
    elif 11<=hr<13:
        return 'yellow'
    elif 13<=hr<17:
        return 'green'
    elif 17<=hr<20:
        return 'blue'
    elif hr<24:
        return 'darkpurple'
    
for lat,lng,h,w,o,t,hr in geo.loc[:,'enter_lat':].values:
    location=[lat,lng]
    CircleMarker(location=location,fill_opacity=0.1,radius=200,color=get_marker_border(hr), fill_color=get_marker_fill((h,w,o,t))).add_to(mapa)


In [1072]:
df

Unnamed: 0,enter_time,uid,exit_time,enter_lat,enter_lng,at_home,at_work,at_other,is_trip,exit_lat,exit_lng,dist,time_diff,speed,cluster
0,2012-11-01 00:00:00-07:00,0,2012-11-01 06:30:53-07:00,37.781872,-122.429162,1,0,0,0,37.781872,-122.429162,0.00000,06:30:53,0.00000,0
1,2012-11-01 06:30:53-07:00,0,2012-11-01 07:02:10-07:00,37.781872,-122.429162,1,0,0,0,37.764486,-122.419605,2.10773,00:31:17,0.00112,0
2,2012-11-01 07:02:10-07:00,0,2012-11-01 07:09:45-07:00,37.764486,-122.419605,0,0,0,1,37.764486,-122.419605,0.00000,00:07:35,0.00000,-1
3,2012-11-01 07:09:45-07:00,0,2012-11-01 07:56:47-07:00,37.764486,-122.419605,0,0,1,0,37.746070,-122.395109,2.97182,00:47:02,0.00105,-1
4,2012-11-01 07:56:47-07:00,0,2012-11-01 08:16:48-07:00,37.746070,-122.395109,0,0,1,0,37.746070,-122.395109,0.00000,00:20:01,0.00000,-1
5,2012-11-01 08:16:48-07:00,0,2012-11-01 09:12:06-07:00,37.746070,-122.395109,0,0,1,0,37.781968,-122.428365,4.94770,00:55:18,0.00149,-1
6,2012-11-01 09:12:06-07:00,0,2012-11-01 09:26:12-07:00,37.781968,-122.428365,0,0,0,1,37.781968,-122.428365,0.00000,00:14:06,0.00000,0
7,2012-11-01 09:26:12-07:00,0,2012-11-01 10:07:27-07:00,37.781968,-122.428365,0,0,1,0,37.673650,-122.455479,12.27814,00:41:15,0.00496,0
8,2012-11-01 10:07:27-07:00,0,2012-11-01 10:16:38-07:00,37.673650,-122.455479,0,0,0,1,37.673650,-122.455479,0.00000,00:09:11,0.00000,-1
9,2012-11-01 10:16:38-07:00,0,2012-11-01 11:12:07-07:00,37.673650,-122.455479,0,0,0,1,37.673577,-122.455490,0.00813,00:55:29,0.00000,-1


In [1073]:
mapa

In [1074]:
def get_current_label(row):
    if(row.at_home==1):
        return 'h'
    elif(row.at_work==1):
        return 'w'
    elif(row.is_trip==1):
        return 'car'
    else:
        return 's'

def make_seg_series(start, end, label, freq='15min'):
    dr = pd.date_range(start, end,freq=freq)
    label_ser = np.array([label]*len(dr),dtype=object)
    return pd.Series(label_ser,index=dr)
    
def segment_day(df):
    segs = []
    
    for idx,(day,row) in enumerate(df.iterrows()):
#         if idx!=0 and idx<len(df):  # add a trip before activity if it's not first of the day:
#             start_time = end_time
#             end_time = row.enter_time
#             label = 'car'
#             segs.append(make_seg_series(start_time,end_time,label))
        if idx==0: # starting day at home 
            label='h'
        else:
            label = get_current_label(row)
        start_time = row.enter_time
        end_time = row.exit_time
        segs.append(make_seg_series(start_time,end_time,label))
        
        if (idx == len(df)-1):
#             print idx,len(df)-1
            label = 'h'
            end_time = row.exit_time
            segs.append(make_seg_series(start_time, end_time, label))
    return pd.concat(segs)

In [1131]:
segs = []
grps = []
for d,grp in df.groupby([df.enter_time.dt.year,df.enter_time.dt.month,df.enter_time.dt.day,df.enter_time.dt.dayofweek]):
    if d[-1] not in [5,6]:
#         print grp.enter_time,grp.exit_time
        grps.append(grp)
        seg=np.array(segment_day(grp).values,dtype='S16')[:96]
        segs.append(seg)

In [1132]:
traj=np.array(segs[1:-1],dtype='S16')

In [1133]:
segs

[array(['h', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h',
        'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h',
        'h', 'car', 'car', 'car', 's', 'car', 'car', 'car', 'car', 's', 's',
        'car', 'car', 'car', 'car', 's', 'car', 'car', 'car', 'w', 'car',
        'car', 'car', 'car', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'w', 'w',
        'w', 'w', 'car', 'car', 'car', 'car', 's', 's', 's', 'car', 'w',
        'w', 'car', 'car', 'car', 'w', 'w', 'w', 'w', 'w', 'car', 'car',
        'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h',
        'h', 'h'],
       dtype='|S16'),
 array(['h', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h',
        'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h', 'h',
        'h', 'car', 'car', 'car', 'car', 'car', 's', 's', 's', 's', 's',
        's', 'car', 'car', 'w', 'w', 'w', 'w', 'w', 'car', 'car', 's', 's',
        'car', 'car', 'w', 'car', 'car', 'car', 'car', 'car', 'car', 's',


In [1078]:
np.save('../data/traces/trajectories/p1',traj)

In [948]:
segs = []
# starting day w/ travel
for idx,(day,row) in enumerate(grp.iterrows()):
    if idx==0:
        start_time = row.start_day
        label='h'
    else:
        start_time = end_time
        label='car'
    end_time = row.start_time

    segs.append(make_seg_series(start_time,end_time,label))
    start_time = end_time
    label = get_current_label(row)
    
    end_time = row.end_time
    segs.append(make_seg_series(start_time, end_time, label))

    if (idx == len(grp)-1) and (row.end_time < row.end_day):
        label = 'h'
        end_time = row.end_day
        segs.append(make_seg_series(start_time, end_time, label))


In [873]:
[seg.shape for seg in segs]

[(135,),
 (121,),
 (132,),
 (125,),
 (126,),
 (124,),
 (133,),
 (132,),
 (130,),
 (131,),
 (125,),
 (128,),
 (124,),
 (127,),
 (137,),
 (135,),
 (128,),
 (134,),
 (117,),
 (136,),
 (126,),
 (69,)]