#Automatically Detecting Stops in Trajectories using Scikit-Mobility

1) Reading a csv into a dataframe with GPS reading of trajectories made by different users with the following structure: latitude, longiture, user_id, datetime

In [10]:
#pip install scikit-mobility

In [11]:
import skmob
import pandas as pd
import numpy as np
from skmob.preprocessing import clustering, detection
# read the trajectory data (GeoLife)
url = 'sensor_logger.csv'
df = pd.read_csv(url, sep=',')
tdf = skmob.TrajDataFrame(df, latitude='latitude', longitude='longitude', user_id='user_x', datetime='time_t')
tdf

Unnamed: 0,datetime,lng,lat,uid
0,2024-05-05 09:59:54,-8.407971,40.200369,257
1,2024-05-05 09:59:59,-8.407971,40.200369,257
2,2024-05-05 10:01:04,-8.407971,40.200369,257
3,2024-05-05 10:01:09,-8.407971,40.200369,257
4,2024-05-05 10:01:14,-8.407971,40.200369,257
...,...,...,...,...
203284,2024-05-05 10:00:39,-8.407971,40.200369,257
203285,2024-05-05 10:00:44,-8.407971,40.200369,257
203286,2024-05-05 10:00:49,-8.407971,40.200369,257
203287,2024-05-05 10:00:54,-8.407971,40.200369,257


In [12]:
import skmob

2) Detecting stops in these trajectories specifying minimum stop time and a radius in km

https://scikit-mobility.github.io/scikit-mobility/reference/preprocessing.html#skmob.preprocessing.detection.stay_locations

In [13]:
stdf = detection.stay_locations(tdf, stop_radius_factor=0.5,minutes_for_a_stop=20.0, spatial_radius_km=0.2, leaving_time=True)
stdf


Unnamed: 0,datetime,lng,lat,uid,leaving_datetime
0,2024-05-07 11:26:41,-8.409377,40.198673,146,2024-05-07 16:49:52
1,2024-05-07 16:49:52,-8.416462,40.186296,146,2024-05-07 18:28:10
2,2024-05-07 18:29:49,-8.429089,40.197293,146,2024-05-07 19:13:21
3,2024-05-07 19:13:21,-8.425924,40.202361,146,2024-05-07 19:37:31
4,2024-05-07 19:37:31,-8.436075,40.206345,146,2024-05-07 21:04:08
5,2024-05-07 21:08:02,-8.432103,40.204806,146,2024-05-07 21:34:39
6,2024-05-07 21:34:39,-8.43078,40.200921,146,2024-05-07 22:00:13
7,2024-05-07 22:00:13,-8.409364,40.198674,146,2024-05-08 14:00:22
8,2024-05-08 14:00:22,-8.414995,40.186314,146,2024-05-08 19:45:45
9,2024-05-08 19:45:45,-8.411616,40.193155,146,2024-05-08 20:14:00


In [14]:
print('Points of the original trajectory:\t%s'%len(tdf))
print('Points of stops:\t\t\t%s'%len(stdf))

Points of the original trajectory:	203289
Points of stops:			23


3) Identify each trajectory by assigning the corresponding trajectory id in a separate list

In [15]:
#first valid trajectory id
trajectory_id = 1

#initially all trajectory ids are 0 - invalid trajectory ids
trajectory_ids_list = np.zeros(((len(tdf['datetime']))))

#first timestamp representing when the first stop was left
leaving = stdf['leaving_datetime'][0]

#timestamp representing the last moment of the previous stop
previous_trajectory=pd.Timestamp('2000-01-01')

#Am I in a stop period?
inside_stop=False
index2=0

#For each stop
for index1,stop in stdf.iterrows():
    #For each GPS point from the use who is stopped and between (stop.datetime, stop.leaving_datetime)
    for gps in tdf[(tdf.uid == stop.uid) & (tdf.datetime <= stop.leaving_datetime) & (tdf.datetime >= previous_trajectory)]['datetime']:
        if stop.datetime == gps:                  #stop detected, starting point
            leaving=stop.leaving_datetime
            inside_stop=True
            trajectory_id= trajectory_id + 1
            trajectory_ids_list[index2]=-1
        elif inside_stop == True:     #inside a stop period
            if leaving == gps:                  #this stop leaving point
                inside_stop=False
                previous_trajectory=gps
            trajectory_ids_list[index2]=-1
        else: #inside a moving point
            trajectory_ids_list[index2]=trajectory_id
        index2=index2 + 1


In [16]:
#The stops
print(len(stdf))

#The list of trajectory ids created is of the same size as the number of trajectory dataframe rows
print(len(trajectory_ids_list))
print(len(tdf['datetime']))

#The trajectory ids are in a increasing order, when -1 the user is on a stop, 0s are the GPs points not considered (e.g. the last ones)
print(trajectory_ids_list)

23
203289
203289
[1. 1. 1. ... 0. 0. 0.]


4) Integrating the trajectory ids list into the trajectory dataframe

In [17]:
tdf['trajectory_id']=trajectory_ids_list
tdf

Unnamed: 0,datetime,lng,lat,uid,trajectory_id
0,2024-05-05 09:59:54,-8.407971,40.200369,257,1.0
1,2024-05-05 09:59:59,-8.407971,40.200369,257,1.0
2,2024-05-05 10:01:04,-8.407971,40.200369,257,1.0
3,2024-05-05 10:01:09,-8.407971,40.200369,257,1.0
4,2024-05-05 10:01:14,-8.407971,40.200369,257,1.0
...,...,...,...,...,...
203284,2024-05-05 10:00:39,-8.407971,40.200369,257,0.0
203285,2024-05-05 10:00:44,-8.407971,40.200369,257,0.0
203286,2024-05-05 10:00:49,-8.407971,40.200369,257,0.0
203287,2024-05-05 10:00:54,-8.407971,40.200369,257,0.0


In [18]:
tdf.to_csv('sensorlogger_TripsOpenData.csv')
stdf.to_csv('sensorlogger_StopsOpenData.csv')