## Imports

In [488]:
# Default
import sys
import datetime

# Local
from src.impl.activity_env import ActivityEnv
from src.impl.activity_mdp import ATPTransition, ActivityMDP
from src.impl.activity_params import MATSimParameters
from src.impl.activity_rewards import ActivityRewardFunction
from src.irl.meirl import MaxEntIRLAgent
from src.misc.utils import create_dir_if_not_exists
from src.file_io.trace_loader import TraceLoader

# Vendor
import matplotlib.pyplot as plt
import geopandas as gpd
import pandas as pd
import shapely
from shapely.geometry import box
import numpy as np
import folium

shapely.speedups.enable()

sys.path.append('/Users/sfeygin/PycharmProjects/da-irl')

In [120]:
# Global magics statements

%load_ext autoreload
%autoreload 2
%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Constants and Utility Functions

In [569]:
# BBOX:
LAT_MIN=32.831
LAT_MAX=38.67
LNG_MIN=-128.9
LNG_MAX=-117.41

# DOW
MON = 0
TUES = 1
WEDS = 2
THUR = 3
FRI = 4
SAT = 5
SUN = 6

# Path
PATH = '/Users/sfeygin/PycharmProjects/da-irl/data/traces'

def great_circle_distance(pt1,pt2):
    """
    Return the great-circle distance in kilometers between two points,
    defined by a tuple (lat, lon).
    Examples
    --------
    >>> brussels = (50.8503, 4.3517)
    >>> paris = (48.8566, 2.3522)
    >>> great_circle_distance(brussels, paris)
    263.9754164080347
    """
    r = 6371.
    
    delta_latitude = np.radians(pt1[:,0] - pt2[:,0])
    delta_longitude = np.radians(pt1[:,1] - pt2[:,1])
    latitude1 = np.radians(pt1[:,0])
    latitude2 = np.radians(pt2[:,0])

    a = np.sin(delta_latitude / 2) ** 2 + np.cos(latitude1) * np.cos(latitude2) * np.sin(delta_longitude / 2) ** 2
    return r * 2. * np.arcsin(np.sqrt(a))


## Filtering

In [702]:

class User:
    def __init__(self,records_path):
        self._records_path = records_path
        self._records = None
        
        self.name = None
        self.home = None
        self.work = None
        self.weekend_days = [SAT,SUN]  # hard code for now
        self.work_start_hr = 13 # hard code for now
        self.work_end_hr = 16 # hard code for now
        self.early_morning_hr = 5
        self.late_night_hr = 18
        
        self.attributes = {}
        self._load_records()
    
    def _load_records(self):
        df = TraceLoader.load_traces_from_csv(self._records_path)
        
        df=self._process_home_work_other(df)
        df=self._process_speed(df)
        df=self._process_duration(df)
        self._records=df
        
    def _process_home_work_other(self,df):
        df['at_home']=0
        df['at_work']=0
        df['at_other']=0

        for idx,row in df.iterrows():
            tup = (row.enter_time.dayofweek, row.enter_time.hour, row.exit_time.dayofweek, row.exit_time.hour)
            is_at_home = self._filter_home(tup)
            is_at_work = self._filter_work(tup)
            is_at_other = ((not is_at_home) & (not is_at_work))
            df.loc[idx,'at_home'] = 1 if is_at_home else 0
            df.loc[idx,'at_work'] = 1 if is_at_work else 0
            df.loc[idx,'at_other'] = 1 if is_at_other else 0
        df=df.dropna()
        return df
        
    def _filter_home(self, tup):
        enter_day,enter_hr,exit_day,exit_hr = tup
        is_same_day = (enter_day==exit_day)
        is_morning_hour = (exit_day <= self.early_morning_hr)
        is_consecutive_day = (((enter_day==SUN) & (exit_day==MON))|(enter_day + 1 == exit_day))
        is_late_night = ((enter_hr >= self.late_night_hr)&(exit_hr <= self.early_morning_hr))
        cond_1 = (is_consecutive_day & is_late_night)
        cond_2 = (is_same_day & is_morning_hour)
        return cond_1|cond_2
    
    def _filter_work(self, tup):
        enter_day,enter_hr,exit_day,exit_hr = tup
        is_work_hour = ((enter_hr>self.work_start_hr)&(exit_hr<self.work_end_hr))
        is_work_day = (enter_day not in self.weekend_days)&(exit_day not in self.weekend_days)
        is_same_day = (enter_day==exit_day)
        return is_work_hour & (is_work_day & is_same_day)
    
    def _process_speed(self,df):
        cons_pts = np.array(zip(zip(df.lat.values[0:],df.lng.values[0:]),zip(df.lat.values[1:],df.lng.values[1:])))
        pts1=cons_pts[:,0]
        pts2=cons_pts[:,1]
        dist=np.round(great_circle_distance(pts1,pts2),5)
        df['dist'] = np.hstack([[0],dist])
        
        enters=df.enter_time.dt.to_pydatetime()
        exits=df.exit_time.dt.to_pydatetime()
        time_diff = np.diff(zip(exits[0:],enters[1:]))
        time_diff_s = np.apply_along_axis(lambda x: x[0].seconds,1, time_diff)
        df['time_diff']=np.vstack([[0],time_diff])
        df['speed'] = np.hstack([[0],np.round(dist/time_diff_s,5)])
        return df
    
    def _process_duration(self,df):
        df['duration']=df.exit_time-df.enter_time
        return df
        

In [703]:
#         self._records = gpd.GeoDataFrame(df.drop(['lat', 'lng'], axis=1),
#                                 crs={'init': 'epsg:4326'},
#                                 geometry=[shapely.geometry.Point(xy) for xy in zip(df.lat, df.lng)])

In [704]:
User(PATH)._records

Unnamed: 0,uid,enter_time,exit_time,lat,lng,at_home,at_work,at_other,dist,time_diff,speed,duration
0,1,2012-10-31 18:31:00-07:00,2012-10-31 19:28:05-07:00,33.890701,-118.396215,0,0,1,0.00000,0,0.00000,00:57:05
1,1,2012-10-31 19:42:43-07:00,2012-10-31 21:26:12-07:00,33.885500,-118.395452,1,0,0,0.58253,0:14:38,0.00066,01:43:29
2,1,2012-10-31 21:49:10-07:00,2012-11-01 08:04:34-07:00,33.990948,-118.345386,1,0,0,12.60212,0:22:58,0.00915,10:15:24
3,1,2012-11-01 08:24:04-07:00,2012-11-01 17:08:31-07:00,34.035771,-118.390015,1,0,0,6.46235,0:19:30,0.00552,08:44:27
4,1,2012-11-01 17:30:49-07:00,2012-11-01 18:44:22-07:00,33.991084,-118.345346,1,0,0,6.45307,0:22:18,0.00482,01:13:33
5,1,2012-11-01 19:01:28-07:00,2012-11-01 20:08:35-07:00,33.910885,-118.310060,1,0,0,9.49301,0:17:06,0.00925,01:07:07
6,1,2012-11-01 20:32:24-07:00,2012-11-01 20:41:38-07:00,33.997860,-118.348028,1,0,0,10.28565,0:23:49,0.00720,00:09:14
7,1,2012-11-01 20:49:47-07:00,2012-11-02 05:58:20-07:00,33.991100,-118.345391,1,1,0,0.79007,0:08:09,0.00162,09:08:33
8,1,2012-11-02 06:13:02-07:00,2012-11-02 07:41:14-07:00,34.040380,-118.421540,1,1,0,8.90428,0:14:42,0.01010,01:28:12
9,1,2012-11-02 09:54:20-07:00,2012-11-02 10:10:41-07:00,38.218775,-128.806459,1,0,0,1041.11462,2:13:06,0.13037,00:16:21
