## Imports

In [4]:
# Default
import sys
import datetime

sys.path.append('../../da-irl')

# Local
from src.impl.activity_env import ActivityEnv
from src.impl.activity_mdp import ATPTransition, ActivityMDP
from src.impl.activity_params import MATSimParameters
from src.impl.activity_rewards import ActivityRewardFunction
from src.irl.meirl import MaxEntIRLAgent
from src.misc.utils import create_dir_if_not_exists
from src.file_io.trace_loader import TraceLoader

# Vendor
import matplotlib.pyplot as plt
import geopandas as gpd
import pandas as pd
import shapely
from shapely.geometry import box
import numpy as np
import folium

shapely.speedups.enable()



In [5]:
# Global magics statements

%load_ext autoreload
%autoreload 2
%matplotlib inline

## Constants and Utility Functions

In [19]:
# BBOX:
LAT_MIN=32.831
LAT_MAX=38.67
LNG_MIN=-128.9
LNG_MAX=-117.41

# DOW
MON = 0
TUES = 1
WEDS = 2
THUR = 3
FRI = 4
SAT = 5
SUN = 6

# Path
PATH = '../../da-irl/data/traces/traces_persona_0.csv'

def great_circle_distance(pt1,pt2):
    """
    Return the great-circle distance in kilometers between two points,
    defined by a tuple (lat, lon).
    Examples
    --------
    >>> brussels = (50.8503, 4.3517)
    >>> paris = (48.8566, 2.3522)
    >>> great_circle_distance(brussels, paris)
    263.9754164080347
    """
    r = 6371.
    
    delta_latitude = np.radians(pt1[:,0] - pt2[:,0])
    delta_longitude = np.radians(pt1[:,1] - pt2[:,1])
    latitude1 = np.radians(pt1[:,0])
    latitude2 = np.radians(pt2[:,0])

    a = np.sin(delta_latitude / 2) ** 2 + np.cos(latitude1) * np.cos(latitude2) * np.sin(delta_longitude / 2) ** 2
    return r * 2. * np.arcsin(np.sqrt(a))


## Filtering

In [7]:

class User:
    def __init__(self,records_path):
        self._records_path = records_path
        self._records = None
        
        self.name = None
        self.home = None
        self.work = None
        self.weekend_days = [SAT,SUN]  # hard code for now
        self.work_start_hr = 13 # hard code for now
        self.work_end_hr = 16 # hard code for now
        self.early_morning_hr = 5
        self.late_night_hr = 18
        self.
        
        self.attributes = {}
        self._load_records()
    
    def _load_records(self):
        df = TraceLoader.load_traces_from_csv(self._records_path)
        
        df=self._process_home_work_other(df)
        df=self._process_speed(df)
        df=self._process_duration(df)
        self._records=df
        
    def _process_home_work_other(self,df):
        df['at_home']=0
        df['at_work']=0
        df['at_other']=0

        for idx,row in df.iterrows():
            tup = (row.enter_time.dayofweek, row.enter_time.hour, row.exit_time.dayofweek, row.exit_time.hour)
            is_at_home = self._filter_home(tup)
            is_at_work = self._filter_work(tup)
            is_at_other = ((not is_at_home) & (not is_at_work))
            df.loc[idx,'at_home'] = 1 if is_at_home else 0
            df.loc[idx,'at_work'] = 1 if is_at_work else 0
            df.loc[idx,'at_other'] = 1 if is_at_other else 0
        df=df.dropna()
        return df
        
    def _filter_home(self, tup):
        enter_day,enter_hr,exit_day,exit_hr = tup
        is_same_day = (enter_day==exit_day)
        is_morning_hour = (exit_day <= self.early_morning_hr)
        is_consecutive_day = (((enter_day==SUN) & (exit_day==MON))|(enter_day + 1 == exit_day))
        is_late_night = ((enter_hr >= self.late_night_hr)&(exit_hr <= self.early_morning_hr))
        cond_1 = (is_consecutive_day & is_late_night)
        cond_2 = (is_same_day & is_morning_hour)
        return cond_1|cond_2
    
    def _filter_work(self, tup):
        enter_day,enter_hr,exit_day,exit_hr = tup
        is_work_hour = ((enter_hr>self.work_start_hr)&(exit_hr<self.work_end_hr))
        is_work_day = (enter_day not in self.weekend_days)&(exit_day not in self.weekend_days)
        is_same_day = (enter_day==exit_day)
        return is_work_hour & (is_work_day & is_same_day)
    
    def _process_speed(self,df):
        cons_pts = np.array(zip(zip(df.lat.values[0:],df.lng.values[0:]),zip(df.lat.values[1:],df.lng.values[1:])))
        pts1=cons_pts[:,0]
        pts2=cons_pts[:,1]
        dist=np.round(great_circle_distance(pts1,pts2),5)
        df['dist'] = np.hstack([[0],dist])
        
        enters=df.enter_time.dt.to_pydatetime()
        exits=df.exit_time.dt.to_pydatetime()
        time_diff = np.diff(zip(exits[0:],enters[1:]))
        time_diff_s = np.apply_along_axis(lambda x: x[0].seconds,1, time_diff)
        df['time_diff']=np.vstack([[0],time_diff])
        df['speed'] = np.hstack([[0],np.round(dist/time_diff_s,5)])
        return df
    
    def _process_duration(self,df):
        df['duration']=df.exit_time-df.enter_time
        return df
        

In [8]:
#         self._records = gpd.GeoDataFrame(df.drop(['lat', 'lng'], axis=1),
#                                 crs={'init': 'epsg:4326'},
#                                 geometry=[shapely.geometry.Point(xy) for xy in zip(df.lat, df.lng)])

In [20]:
df=User(PATH)._records

In [22]:
for day,grp in df.groupby([df.enter_time.dt.year,df.enter_time.dt.day]):
    print grp

   uid                enter_time                 exit_time        lat  \
3    1 2012-11-01 08:24:04-07:00 2012-11-01 17:08:31-07:00  34.035771   
4    1 2012-11-01 17:30:49-07:00 2012-11-01 18:44:22-07:00  33.991084   
5    1 2012-11-01 19:01:28-07:00 2012-11-01 20:08:35-07:00  33.910885   
6    1 2012-11-01 20:32:24-07:00 2012-11-01 20:41:38-07:00  33.997860   
7    1 2012-11-01 20:49:47-07:00 2012-11-02 05:58:20-07:00  33.991100   

          lng  at_home  at_work  at_other      dist time_diff    speed  \
3 -118.390015        1        0         0   6.46235   0:19:30  0.00552   
4 -118.345346        1        0         0   6.45307   0:22:18  0.00482   
5 -118.310060        1        0         0   9.49301   0:17:06  0.00925   
6 -118.348028        1        0         0  10.28565   0:23:49  0.00720   
7 -118.345391        1        0         0   0.79007   0:08:09  0.00162   

  duration  
3 08:44:27  
4 01:13:33  
5 01:07:07  
6 00:09:14  
7 09:08:33  
    uid                enter_time     

102 12:51:07  
     uid                enter_time                 exit_time        lat  \
103    1 2012-11-23 05:57:47-08:00 2012-11-23 06:33:18-08:00  33.995638   
104    1 2012-11-23 06:39:26-08:00 2012-11-23 07:09:17-08:00  33.991188   
105    1 2012-11-23 07:30:46-08:00 2012-11-23 16:06:39-08:00  34.034734   
106    1 2012-11-23 16:43:54-08:00 2012-11-23 17:25:26-08:00  33.987244   
107    1 2012-11-23 17:53:11-08:00 2012-11-23 17:58:21-08:00  33.991801   
108    1 2012-11-23 18:35:12-08:00 2012-11-24 07:59:17-08:00  33.991135   

            lng  at_home  at_work  at_other      dist time_diff    speed  \
103 -118.359889        1        0         0   1.39815   0:09:33  0.00244   
104 -118.346494        1        0         0   1.33042   0:06:08  0.00362   
105 -118.390585        1        0         0   6.32150   0:21:29  0.00490   
106 -118.514866        1        0         0  12.61390   0:37:15  0.00564   
107 -118.345205        1        0         0  15.65026   0:27:45  0.00940   
108