In [2]:
import pandas as pd
import skmob 
from skmob.preprocessing import detection

def traj(data):    
    data['datetime_wib'] = pd.to_datetime(data['datetime_wib'])

    tdf = skmob.TrajDataFrame(data, latitude='latitude', longitude='longitude', datetime='datetime_wib', user_id='maid')

    stays = detection.stay_locations(
        tdf, 
        spatial_radius_km=0.2,
        minutes_for_a_stop=5,
        leaving_time=True)
    stays = pd.DataFrame(stays)
    stays.rename(columns={'uid': 'maid', 'lat': 'latitude', 'lng':'longitude', 'datetime':'datetime_wib'}, inplace=True)

    return stays

  set_use_pygeos()


Kesimpulan sejauh ini: dia itu mendeteksi stay locations hanya pada area yg berada dalam jangkauan 5 menit DAN pada area max 200m. Apabila area tersebut dalam kurun waktu <5menit namun jaraknya jauh, dia tdk dideteksi.

In [20]:
def remove_stops(gps, stops):
    stops_df = stops.copy()
    gps_df = gps.copy()

    # Convert datetime columns to datetime objects
    stops_df['datetime_wib'] = pd.to_datetime(stops_df['datetime_wib'])
    stops_df['leaving_datetime'] = pd.to_datetime(stops_df['leaving_datetime'])
    gps_df['datetime_wib'] = pd.to_datetime(gps_df['datetime_wib'])

    # Iterate through each row in Stops DataFrame
    for index, row in stops_df.iterrows():
        uid = row['maid']
        start_time = row['datetime_wib']
        end_time = row['leaving_datetime']

        # Filter GPS DataFrame based on UID and time range
        filtered_gps_df = gps_df[(gps_df['maid'] == uid) & (gps_df['datetime_wib'] >= start_time) & (gps_df['datetime_wib'] < end_time)]
        # Remove corresponding rows from GPS DataFrame
        gps_df = gps_df.drop(filtered_gps_df.index)
    
    # Reset index of GPS DataFrame
    gps_df = gps_df.reset_index(drop=True)

    # Resulting GPS DataFrame without eliminated data
    print("Number of points dropped from original data: %d" %(len(gps) - len(gps_df)))
    
    return gps_df

In [21]:
data = pd.DataFrame({'maid': [1] * (13),
                     'latitude': [1] * 5  + [2] * 3 + [3, 3] + [1] * 3,  
                     'longitude': [1] * 5  + [2] * 3 + [3, 3] + [1] * 3,
                     'datetime_wib': ['2018-01-01 ' + f'00:{x}' for x in [0, 1, 2, 3, 6, 20, 21, 22, 23, 24, 25, 26, 27]]})
stays = traj(data)
data1 = remove_stops(data, stays)

stays1 = traj(data1)
data2 = remove_stops(data1, stays1)

stays2 = traj(data2)
data3 = remove_stops(data2, stays2)
print(data)
print(data1)
print(data2)

Number of points dropped from original data: 5
Number of points dropped from original data: 0
Number of points dropped from original data: 0
    maid  latitude  longitude        datetime_wib
0      1         1          1 2018-01-01 00:00:00
1      1         1          1 2018-01-01 00:01:00
2      1         1          1 2018-01-01 00:02:00
3      1         1          1 2018-01-01 00:03:00
4      1         1          1 2018-01-01 00:06:00
5      1         2          2 2018-01-01 00:20:00
6      1         2          2 2018-01-01 00:21:00
7      1         2          2 2018-01-01 00:22:00
8      1         3          3 2018-01-01 00:23:00
9      1         3          3 2018-01-01 00:24:00
10     1         1          1 2018-01-01 00:25:00
11     1         1          1 2018-01-01 00:26:00
12     1         1          1 2018-01-01 00:27:00
   maid  latitude  longitude        datetime_wib
0     1         2          2 2018-01-01 00:20:00
1     1         2          2 2018-01-01 00:21:00
2     1     

In [22]:
stays

Unnamed: 0,maid,latitude,longitude,datetime_wib,leaving_datetime
0,1,1.0,1.0,2018-01-01,2018-01-01 00:20:00


In [3]:
dataA = pd.DataFrame({'maid': [1] * (8),
                     'latitude': [2] * 3 + [3, 3] + [1] * 3,  
                     'longitude': [2] * 3 + [3, 3] + [1] * 3,
                     'datetime_wib': ['2018-01-01 ' + f'00:{x}' for x in [20, 21, 22, 23, 24, 25, 26, 27]]})


b = traj(dataA)
b

Unnamed: 0,maid,latitude,longitude,datetime_wib,leaving_datetime


In [4]:
dataA

Unnamed: 0,maid,latitude,longitude,datetime_wib
0,1,2,2,2018-01-01 00:20:00
1,1,2,2,2018-01-01 00:21:00
2,1,2,2,2018-01-01 00:22:00
3,1,3,3,2018-01-01 00:23:00
4,1,3,3,2018-01-01 00:24:00
5,1,1,1,2018-01-01 00:25:00
6,1,1,1,2018-01-01 00:26:00
7,1,1,1,2018-01-01 00:27:00


In [17]:
dataB = pd.DataFrame({'maid': [1] * (8),
                     'latitude': [2] * 3 + [3, 3] + [1] * 3,  
                     'longitude': [2] * 3 + [3, 3] + [1] * 3,
                     'datetime_wib': ['2018-01-01 ' + f'00:{x}' for x in [10, 11, 12, 23, 24, 25, 26, 27]]})


c = traj(dataB)
d = traj()

Unnamed: 0,maid,latitude,longitude,datetime_wib,leaving_datetime
0,1,2.0,2.0,2018-01-01 00:10:00,2018-01-01 00:23:00


In [18]:
dataB

Unnamed: 0,maid,latitude,longitude,datetime_wib
0,1,2,2,2018-01-01 00:10:00
1,1,2,2,2018-01-01 00:11:00
2,1,2,2,2018-01-01 00:12:00
3,1,3,3,2018-01-01 00:23:00
4,1,3,3,2018-01-01 00:24:00
5,1,1,1,2018-01-01 00:25:00
6,1,1,1,2018-01-01 00:26:00
7,1,1,1,2018-01-01 00:27:00


In [5]:
data = pd.DataFrame({'maid': [1] * (13),
                     'latitude': [1] * 5  + [2] * 3 + [3, 3] + [1] * 3,  
                     'longitude': [1] * 5  + [2] * 3 + [3, 3] + [1] * 3,
                     'datetime_wib': ['2018-01-01 ' + f'00:{x}' for x in [0, 1, 2, 3, 6, 20, 21, 22, 23, 24, 25, 26, 27]]})
stays = traj(data)
stays

Unnamed: 0,maid,latitude,longitude,datetime_wib,leaving_datetime
0,1,1.0,1.0,2018-01-01,2018-01-01 00:20:00


In [6]:
data

Unnamed: 0,maid,latitude,longitude,datetime_wib
0,1,1,1,2018-01-01 00:00:00
1,1,1,1,2018-01-01 00:01:00
2,1,1,1,2018-01-01 00:02:00
3,1,1,1,2018-01-01 00:03:00
4,1,1,1,2018-01-01 00:06:00
5,1,2,2,2018-01-01 00:20:00
6,1,2,2,2018-01-01 00:21:00
7,1,2,2,2018-01-01 00:22:00
8,1,3,3,2018-01-01 00:23:00
9,1,3,3,2018-01-01 00:24:00
