#Automatically Detecting Stops in Trajectories using Scikit-Mobility

1) Reading a csv into a dataframe with GPS reading of trajectories made by different users with the following structure: latitude, longiture, user_id, datetime

In [1]:
# import pandas as pd
# from google.colab import files

# uploaded = files.upload()

In [2]:
#pip install scikit-mobility

In [3]:
import skmob
import pandas as pd
import numpy as np
from skmob.preprocessing import detection
# read the trajectory data (GeoLife)
url = 'context_dataset_v1.csv'
df = pd.read_csv(url, sep=',')
tdf = skmob.TrajDataFrame(df, latitude='location_lat', longitude='location_lon', user_id='user_x', datetime='time_t')
tdf

Unnamed: 0,time,datetime,lat,lng,wifi_connected,sensor_light_mean,sensor_accelerometer_x_mean,sensor_accelerometer_y_mean,sensor_accelerometer_z_mean,sensor_gravity_x_mean,...,sensor_linear_acc_x_mean,sensor_linear_acc_y_mean,sensor_linear_acc_z_mean,sensor_rotation_vec_x_mean,sensor_rotation_vec_y_mean,sensor_rotation_vec_z_mean,sensor_proximity_mean,label,activity_label,uid
0,1525666992353,2018-05-07 05:23:12,43.702109,10.400969,0,0.491083,-0.841768,-9.798033,0.893719,3.690201,...,-3.178609,-4.835157,0.029616,-0.531964,-0.058802,-0.286727,2.500000,Home,activity_rec_on_foot,2
1,1520875794472,2018-03-12 17:29:54,43.719917,10.422036,0,2313.365000,0.090955,-0.035095,9.941500,0.090053,...,0.000720,0.000559,0.135715,-0.004916,-0.000646,-0.880064,0.500000,Working,activity_rec_still,3
2,1521047383369,2018-03-14 17:09:43,43.719917,10.422036,0,4693.660000,-0.081390,-0.004915,9.928235,-0.085803,...,0.004413,0.021426,0.122015,-0.004425,-0.001167,0.999185,0.333333,Working,activity_rec_still,3
3,1520872866143,2018-03-12 16:41:06,43.718449,10.423058,0,399.206400,-0.017048,-0.058225,9.914915,-0.020499,...,0.000341,0.005610,0.000917,-0.002664,-0.000225,0.470244,5.000305,Break,activity_rec_still,1
4,1520872926143,2018-03-12 16:42:06,43.718449,10.423058,0,358.872934,-0.012993,-0.063340,9.914752,-0.012417,...,0.001090,-0.000354,0.001047,-0.002985,-0.000889,0.474117,5.000305,Break,activity_rec_still,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45676,1522411614683,2018-03-30 13:06:54,43.718588,10.422543,1,5540.054054,-0.236280,-9.867495,-0.841630,0.032914,...,-0.269194,-0.531366,0.948790,-0.232583,0.727089,-0.602808,0.000000,Launch Break,activity_rec_on_foot,3
45677,1522411674683,2018-03-30 13:07:54,43.718588,10.422543,1,5540.054054,0.046685,-9.941655,-1.194635,0.002404,...,0.044226,-0.586462,0.568400,-0.244852,0.561846,-0.455302,0.000000,Launch Break,activity_rec_on_foot,3
45678,1522411734683,2018-03-30 13:08:54,43.718588,10.422543,1,5540.054054,-0.906800,-9.309720,-2.164250,-0.925490,...,0.020100,0.067008,0.171119,-0.487713,-0.612670,0.513169,0.000000,Launch Break,activity_rec_on_foot,3
45679,1522411794683,2018-03-30 13:09:54,43.718588,10.422543,0,5540.054054,-0.180495,-10.570415,-1.179730,0.055294,...,-0.227864,-1.232717,0.417479,-0.306835,-0.294565,0.233445,0.000000,Launch Break,activity_rec_on_foot,3


2) Detecting stops in these trajectories specifying minimum stop time and a radius in km

https://scikit-mobility.github.io/scikit-mobility/reference/preprocessing.html#skmob.preprocessing.detection.stay_locations

In [4]:
stdf = detection.stay_locations(tdf, stop_radius_factor=0.5, minutes_for_a_stop=20.0, spatial_radius_km=0.2, leaving_time=True)
stdf

Unnamed: 0,time,datetime,lat,lng,wifi_connected,sensor_light_mean,sensor_accelerometer_x_mean,sensor_accelerometer_y_mean,sensor_accelerometer_z_mean,sensor_gravity_x_mean,...,sensor_linear_acc_y_mean,sensor_linear_acc_z_mean,sensor_rotation_vec_x_mean,sensor_rotation_vec_y_mean,sensor_rotation_vec_z_mean,sensor_proximity_mean,label,activity_label,uid,leaving_datetime
0,1520874394710,2018-03-12 16:41:06,43.718449,10.422657,1,257.975575,-4.157792,8.615661,-1.916967,-4.227128,...,0.023709,0.020389,0.088427,0.768910,0.533013,5.000305,Working,activity_rec_still,1,2018-03-12 19:06:01
1,1520882821079,2018-03-12 19:06:01,43.701253,10.402322,0,1.689863,0.014816,-0.143928,9.869295,0.014143,...,0.000115,-0.000439,-0.006166,0.003968,-0.621208,5.000305,Home,activity_rec_still,1,2018-03-12 19:28:01
2,1520926892615,2018-03-12 19:31:01,43.701233,10.402322,0,1.689863,-2.862386,4.545425,8.284350,-2.863062,...,0.071294,0.000089,0.201812,-0.196298,-0.934624,5.000305,Home,activity_rec_still,1,2018-03-13 07:42:32
3,1520929412615,2018-03-13 07:45:32,43.701246,10.402321,0,1.689863,-2.862386,4.545425,8.284350,-2.863062,...,0.071294,0.000089,0.201812,-0.196298,-0.934624,5.000305,Home,activity_rec_still,1,2018-03-13 09:31:12
4,1520936172149,2018-03-13 09:31:12,43.719485,10.421907,0,125.962913,-2.195862,-9.447113,-0.558394,-2.139991,...,-0.002318,0.008244,-0.504787,0.511778,-0.364800,1.666768,Working,activity_rec_still,1,2018-03-13 12:24:04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
445,1522393387315,2018-03-30 00:59:26,43.714313,10.416257,1,283.125000,-8.965945,-3.245095,-2.343125,-8.954188,...,-0.003404,-0.003444,-0.528223,0.583318,0.233728,0.333333,Home,activity_rec_still,3,2018-03-30 08:04:07
446,1522395094347,2018-03-30 08:09:07,43.714313,10.416230,0,74.857143,0.775850,-4.881135,-8.056230,0.719719,...,-0.789980,0.410698,-0.053649,-0.838681,0.201849,0.500000,Free time,activity_rec_on_bicycle,3,2018-03-30 08:32:34
447,1522404932054,2018-03-30 08:42:34,43.719918,10.422036,1,9243.200000,-0.008825,0.009305,9.967700,-0.008776,...,0.000146,0.161155,-0.000415,0.000487,0.997533,0.500000,Working,activity_rec_still,3,2018-03-30 11:16:32
448,1522405652054,2018-03-30 11:16:32,43.718513,10.427891,1,10169.900000,-0.964245,-0.608140,7.223885,-0.652514,...,-0.127210,0.120621,-0.081929,-0.030226,0.776495,0.500000,Working,activity_rec_still,3,2018-03-30 11:41:54


In [5]:
print('Points of the original trajectory:\t%s'%len(tdf))
print('Points of stops:\t\t\t%s'%len(stdf))

Points of the original trajectory:	45681
Points of stops:			450


3) Identify each trajectory by assigning the corresponding trajectory id in a separate list

In [6]:
#first valid trajectory id
trajectory_id = 1

#initially all trajectory ids are 0 - invalid trajectory ids
trajectory_ids_list = np.zeros(((len(tdf['datetime']))))

#first timestamp representing when the first stop was left
leaving = stdf['leaving_datetime'][0]

#timestamp representing the last moment of the previous stop
previous_trajectory=pd.Timestamp('2000-01-01')

#Am I in a stop period?
inside_stop=False
index2=0

#For each stop
for index1,stop in stdf.iterrows():
    #For each GPS point from the use who is stopped and between (stop.datetime, stop.leaving_datetime)
    for gps in tdf[(tdf.uid == stop.uid) & (tdf.datetime <= stop.leaving_datetime) & (tdf.datetime >= previous_trajectory)]['datetime']:
        if stop.datetime == gps:                  #stop detected, starting point
            leaving=stop.leaving_datetime
            inside_stop=True
            trajectory_id= trajectory_id + 1
            trajectory_ids_list[index2]=-1
        elif inside_stop == True:     #inside a stop period
            if leaving == gps:                  #this stop leaving point
                inside_stop=False
                previous_trajectory=gps
            trajectory_ids_list[index2]=-1
        else: #inside a moving point
            trajectory_ids_list[index2]=trajectory_id
        index2=index2 + 1


In [7]:
#The stops
print(len(stdf))

#The list of trajectory ids created is of the same size as the number of trajectory dataframe rows
print(len(trajectory_ids_list))
print(len(tdf['datetime']))

#The trajectory ids are in a increasing order, when -1 the user is on a stop, 0s are the GPs points not considered (e.g. the last ones)
print(trajectory_ids_list)

450
45681
45681
[-1. -1. -1. ...  0.  0.  0.]


4) Integrating the trajectory ids list into the trajectory dataframe

In [8]:
tdf['trajectory_id']=trajectory_ids_list
tdf

Unnamed: 0,time,datetime,lat,lng,wifi_connected,sensor_light_mean,sensor_accelerometer_x_mean,sensor_accelerometer_y_mean,sensor_accelerometer_z_mean,sensor_gravity_x_mean,...,sensor_linear_acc_y_mean,sensor_linear_acc_z_mean,sensor_rotation_vec_x_mean,sensor_rotation_vec_y_mean,sensor_rotation_vec_z_mean,sensor_proximity_mean,label,activity_label,uid,trajectory_id
0,1525666992353,2018-05-07 05:23:12,43.702109,10.400969,0,0.491083,-0.841768,-9.798033,0.893719,3.690201,...,-4.835157,0.029616,-0.531964,-0.058802,-0.286727,2.500000,Home,activity_rec_on_foot,2,-1.0
1,1520875794472,2018-03-12 17:29:54,43.719917,10.422036,0,2313.365000,0.090955,-0.035095,9.941500,0.090053,...,0.000559,0.135715,-0.004916,-0.000646,-0.880064,0.500000,Working,activity_rec_still,3,-1.0
2,1521047383369,2018-03-14 17:09:43,43.719917,10.422036,0,4693.660000,-0.081390,-0.004915,9.928235,-0.085803,...,0.021426,0.122015,-0.004425,-0.001167,0.999185,0.333333,Working,activity_rec_still,3,-1.0
3,1520872866143,2018-03-12 16:41:06,43.718449,10.423058,0,399.206400,-0.017048,-0.058225,9.914915,-0.020499,...,0.005610,0.000917,-0.002664,-0.000225,0.470244,5.000305,Break,activity_rec_still,1,-1.0
4,1520872926143,2018-03-12 16:42:06,43.718449,10.423058,0,358.872934,-0.012993,-0.063340,9.914752,-0.012417,...,-0.000354,0.001047,-0.002985,-0.000889,0.474117,5.000305,Break,activity_rec_still,1,-1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45676,1522411614683,2018-03-30 13:06:54,43.718588,10.422543,1,5540.054054,-0.236280,-9.867495,-0.841630,0.032914,...,-0.531366,0.948790,-0.232583,0.727089,-0.602808,0.000000,Launch Break,activity_rec_on_foot,3,0.0
45677,1522411674683,2018-03-30 13:07:54,43.718588,10.422543,1,5540.054054,0.046685,-9.941655,-1.194635,0.002404,...,-0.586462,0.568400,-0.244852,0.561846,-0.455302,0.000000,Launch Break,activity_rec_on_foot,3,0.0
45678,1522411734683,2018-03-30 13:08:54,43.718588,10.422543,1,5540.054054,-0.906800,-9.309720,-2.164250,-0.925490,...,0.067008,0.171119,-0.487713,-0.612670,0.513169,0.000000,Launch Break,activity_rec_on_foot,3,0.0
45679,1522411794683,2018-03-30 13:09:54,43.718588,10.422543,0,5540.054054,-0.180495,-10.570415,-1.179730,0.055294,...,-1.232717,0.417479,-0.306835,-0.294565,0.233445,0.000000,Launch Break,activity_rec_on_foot,3,0.0


In [9]:
tdf.to_csv('context_TripsOpenData.csv')
stdf.to_csv('context_StopsOpenData.csv')