#Automatically Detecting Stops in Trajectories using Scikit-Mobility

1) Reading a csv into a dataframe with GPS reading of trajectories made by different users with the following structure: latitude, longiture, user_id, datetime

In [1]:
import skmob
import pandas as pd
import numpy as np
from skmob.preprocessing import detection
# read the trajectory data (GeoLife)
url = skmob.utils.constants.GEOLIFE_SAMPLE
df = pd.read_csv(url, sep=',', compression='gzip')
tdf = skmob.TrajDataFrame(df, latitude='location_lat', longitude='location_lon', user_id='user_x', datetime='datetime')
tdf

Unnamed: 0,lat,lng,datetime,uid
0,39.984094,116.319236,2008-10-23 05:53:05,1
1,39.984198,116.319322,2008-10-23 05:53:06,1
2,39.984224,116.319402,2008-10-23 05:53:11,1
3,39.984211,116.319389,2008-10-23 05:53:16,1
4,39.984217,116.319422,2008-10-23 05:53:21,1
...,...,...,...,...
217648,39.999896,116.327290,2009-03-19 05:46:02,5
217649,39.999899,116.327352,2009-03-19 05:46:07,5
217650,39.999945,116.327394,2009-03-19 05:46:12,5
217651,40.000015,116.327433,2009-03-19 05:46:17,5


2) Detecting stops in these trajectories specifying minimum stop time and a radius in km

https://scikit-mobility.github.io/scikit-mobility/reference/preprocessing.html#skmob.preprocessing.detection.stay_locations

In [2]:
stdf = detection.stay_locations(tdf, stop_radius_factor=0.5, minutes_for_a_stop=20.0, spatial_radius_km=0.2, leaving_time=True)
stdf

Unnamed: 0,lat,lng,datetime,uid,leaving_datetime
0,39.978253,116.327275,2008-10-23 06:01:05,1,2008-10-23 10:32:53
1,40.013819,116.306532,2008-10-23 11:10:09,1,2008-10-23 23:46:02
2,39.978950,116.326439,2008-10-24 00:12:30,1,2008-10-24 01:48:57
3,39.981316,116.310181,2008-10-24 01:56:47,1,2008-10-24 02:28:19
4,39.981451,116.309505,2008-10-24 02:28:19,1,2008-10-24 03:18:23
...,...,...,...,...,...
408,40.000265,116.327024,2009-03-13 05:05:54,5,2009-03-13 13:29:06
409,40.010834,116.322620,2009-03-13 13:31:16,5,2009-03-14 05:31:07
410,39.990965,116.327294,2009-03-14 05:46:42,5,2009-03-14 06:36:12
411,39.990040,116.333476,2009-03-14 06:44:07,5,2009-03-19 04:35:37


In [3]:
print('Points of the original trajectory:\t%s'%len(tdf))
print('Points of stops:\t\t\t%s'%len(stdf))

Points of the original trajectory:	217653
Points of stops:			413


3) Identify each trajectory by assigning the corresponding trajectory id in a separate list

In [5]:
#first valid trajectory id
trajectory_id = 1 

#initially all trajectory ids are 0 - invalid trajectory ids
trajectory_ids_list = np.zeros(((len(tdf['datetime']))))

#first timestamp representing when the first stop was left
leaving = stdf['leaving_datetime'][0]

#timestamp representing the last moment of the previous stop
previous_trajectory=pd.Timestamp('2000-01-01')

#Am I in a stop period?
inside_stop=False
index2=0

#For each stop
for index1,stop in stdf.iterrows():           
    #For each GPS point from the use who is stopped and between (stop.datetime, stop.leaving_datetime)
    for gps in tdf[(tdf.uid == stop.uid) & (tdf.datetime <= stop.leaving_datetime) & (tdf.datetime >= previous_trajectory)]['datetime']:
        if stop.datetime == gps:                  #stop detected, starting point            
            leaving=stop.leaving_datetime
            inside_stop=True
            trajectory_id= trajectory_id + 1
            trajectory_ids_list[index2]=-1
        elif inside_stop == True:     #inside a stop period
            if leaving == gps:                  #this stop leaving point
                inside_stop=False                
                previous_trajectory=gps
            trajectory_ids_list[index2]=-1
        else: #inside a moving point
            trajectory_ids_list[index2]=trajectory_id
        index2=index2 + 1


In [6]:
#The stops
print(len(stdf))

#The list of trajectory ids created is of the same size as the number of trajectory dataframe rows
print(len(trajectory_ids_list))
print(len(tdf['datetime'])) 

#The trajectory ids are in a increasing order, when -1 the user is on a stop, 0s are the GPs points not considered (e.g. the last ones)
print(trajectory_ids_list)

413
217653
217653
[1. 1. 1. ... 0. 0. 0.]


4) Integrating the trajectory ids list into the trajectory dataframe

In [7]:
tdf['trajectory_id']=trajectory_ids_list
tdf

Unnamed: 0,lat,lng,datetime,uid,trajectory_id
0,39.984094,116.319236,2008-10-23 05:53:05,1,1.0
1,39.984198,116.319322,2008-10-23 05:53:06,1,1.0
2,39.984224,116.319402,2008-10-23 05:53:11,1,1.0
3,39.984211,116.319389,2008-10-23 05:53:16,1,1.0
4,39.984217,116.319422,2008-10-23 05:53:21,1,1.0
...,...,...,...,...,...
217648,39.999896,116.327290,2009-03-19 05:46:02,5,0.0
217649,39.999899,116.327352,2009-03-19 05:46:07,5,0.0
217650,39.999945,116.327394,2009-03-19 05:46:12,5,0.0
217651,40.000015,116.327433,2009-03-19 05:46:17,5,0.0
