In [1]:
import pandas as pd 
import datetime
data = pd.read_csv('newData.csv')
user_id_list = data['user_id'].unique()
print(user_id_list, len(user_id_list))

[    0     2     3 ... 34490 34492 34497] 13969


In [2]:
new_data = {'user_id': user_id_list, 'car_ownership': 0}
df = pd.DataFrame(new_data)
df

Unnamed: 0,user_id,car_ownership
0,0,0
1,2,0
2,3,0
3,8,0
4,10,0
...,...,...
13964,34483,0
13965,34484,0
13966,34490,0
13967,34492,0


In [3]:
tripleg = pd.read_csv('triplegs.csv')
stop_points = pd.read_csv('staypoints.csv')

In [4]:
stop_points['started_at'] = pd.to_datetime(stop_points['started_at'])
tripleg['finished_at'] = pd.to_datetime(tripleg['finished_at'])

In [5]:
ID_list=stop_points['user_id'].unique()
len(ID_list)

13969

In [6]:
import os
os.environ['USE_PYGEOS'] = '0'
import geopandas
import trackintel as ti
import numpy as np
tpls = ti.io.file.read_triplegs_csv('trip_5.csv', columns={'trip_started_at':'started_at', 'trip_finished_at':'finished_at', 'trip':'geom'}, crs='EPSG:4326', index_col=0)

In [7]:
# Predict the transport mode of triplegs.
"""simple-coarse method includes {'slow_mobility', 'motorized_mobility', 'fast_mobility'}. 
In the default classification, slow_mobility (<15 km/h) includes transport modes such as walking or cycling, 
motorized_mobility (<100 km/h) modes such as car or train, 
and fast_mobility (>100 km/h) modes such as high-speed rail or airplanes."""
categories = {
    5/3.6: 'slow_mobility',  # walking
    180/3.6: 'motorized_mobility',  # car
    np.inf: 'fast_mobility'  # high-speed rail or airplane
}
mode_tpls  = tpls.as_triplegs.predict_transport_mode(categories = categories)
mode_tpls



Unnamed: 0,user_id,tripleg_ID,started_at,finished_at,geom,mode
0,2,6,2022-09-06 22:19:08-04:00,2022-09-06 22:19:58-04:00,"LINESTRING (-81.68406 29.65175, -81.68407 29.6...",slow_mobility
1,2,7,2022-09-06 22:31:37-04:00,2022-09-06 22:34:49-04:00,"LINESTRING (-81.68406 29.65175, -81.68407 29.6...",motorized_mobility
2,2,10,2022-09-09 02:51:09-04:00,2022-09-09 02:53:51-04:00,"LINESTRING (-81.67754 29.65503, -81.65788 29.6...",motorized_mobility
3,2,12,2022-09-13 02:34:20-04:00,2022-09-13 02:38:41-04:00,"LINESTRING (-81.65788 29.65178, -81.65827 29.6...",slow_mobility
4,2,13,2022-09-13 02:49:03-04:00,2022-09-13 02:55:06-04:00,"LINESTRING (-81.65827 29.65349, -81.65788 29.6...",slow_mobility
...,...,...,...,...,...,...
83833,34471,298718,2022-10-30 00:49:31-04:00,2022-10-30 00:53:46-04:00,"LINESTRING (-82.50224 29.76146, -82.50225 29.7...",motorized_mobility
83834,34471,298719,2022-10-30 01:31:03-04:00,2022-10-30 01:35:01-04:00,"LINESTRING (-82.59743 30.00280, -82.59742 30.0...",motorized_mobility
83835,34471,298720,2022-10-30 01:52:42-04:00,2022-10-30 01:54:53-04:00,"LINESTRING (-82.78531 30.29825, -82.81831 30.3...",motorized_mobility
83836,34471,298721,2022-10-30 02:06:15-04:00,2022-10-30 02:12:24-04:00,"LINESTRING (-82.95013 30.45929, -82.95011 30.4...",motorized_mobility


In [8]:
mode_split = ti.analysis.modal_split.calculate_modal_split(tpls=mode_tpls, metric='count', per_user=True)
mode_split

mode,fast_mobility,motorized_mobility,slow_mobility
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2,0,2,4
8,0,6,2
10,0,3,15
14,0,1,0
16,0,1,0
...,...,...,...
34405,0,4,0
34406,0,3,0
34441,0,1,0
34471,0,10,0


In [9]:
index = mode_split.index.values
for id in index:
    motor_count = mode_split.loc[id, 'fast_mobility'] + mode_split.loc[id, 'motorized_mobility'] + mode_split.loc[id, 'slow_mobility']
    if mode_split.loc[id, 'motorized_mobility'] / motor_count > 0.5:
        df.loc[df['user_id'] == id, 'car_ownership'] = 1
df

Unnamed: 0,user_id,car_ownership
0,0,0
1,2,0
2,3,0
3,8,1
4,10,0
...,...,...
13964,34483,0
13965,34484,0
13966,34490,0
13967,34492,0


In [10]:
print(len(df[df['car_ownership'] == 1]))

3639


In [11]:
home_location = pd.read_csv('home_location.csv', index_col=0)
home_location

Unnamed: 0,ID,LAT,LON,LAT-4326,LON-4326
0,0,3550540.0,-9092080.0,30.364891,-81.675544
1,2,3458860.0,-9093020.0,29.651722,-81.683988
2,3,3456360.0,-9163720.0,29.632203,-82.319097
3,8,3510020.0,-9105740.0,30.050323,-81.798254
4,10,3565080.0,-9087560.0,30.477524,-81.634940
...,...,...,...,...,...
12907,34337,3384040.0,-9147900.0,29.065929,-82.176984
12908,34373,3541500.0,-9103240.0,30.294798,-81.775796
12909,34399,3396500.0,-9149440.0,29.163716,-82.190818
12910,34418,3578260.0,-9088880.0,30.579509,-81.646798


In [12]:
from geopy.distance import geodesic
from shapely.wkt import loads
from tqdm import tqdm
data_to_append_list = []
home_id_list = home_location['ID'].unique()
for id in tqdm(home_id_list, total = len(home_id_list)):
    # print(home_location.loc[home_location['ID'] == id, 'LAT-4326'].values[0])
    p1 = (home_location.loc[home_location['ID'] == id, "LAT-4326"].values[0], home_location.loc[home_location['ID'] == id, "LON-4326"].values[0])
    # print(p1)
    for sp in stop_points[stop_points['user_id'] == id].itertuples():
        p = loads(sp[5])
        lat = p.y
        lon = p.x
        p2 = (lat, lon)
        # print(p2)
        if geodesic(p1, p2).meters < 100:
            data_to_append_list.append(sp)
new_staypoints = pd.DataFrame.from_records(data_to_append_list)
new_staypoints


100%|██████████| 12912/12912 [02:27<00:00, 87.47it/s] 


Unnamed: 0,0,1,2,3,4,5
0,3,3,2,2022-09-06 03:00:04-04:00,2022-09-06 09:15:08-04:00,POINT (-81.6840799999999945 29.6517529999999994)
1,6,6,2,2022-09-06 21:34:07-04:00,2022-09-06 22:09:29-04:00,POINT (-81.6840588550000177 29.6517638699999999)
2,9,9,2,2022-09-06 22:37:57-04:00,2022-09-07 00:05:48-04:00,POINT (-81.6840577100000189 29.6517348299999988)
3,10,10,2,2022-09-07 00:07:03-04:00,2022-09-07 00:38:53-04:00,POINT (-81.6840690000000080 29.6517566666666674)
4,12,12,2,2022-09-07 04:46:05-04:00,2022-09-07 10:25:44-04:00,POINT (-81.6840540000000033 29.6517789999999977)
...,...,...,...,...,...,...
143367,638883,638883,34266,2022-10-29 17:53:56-04:00,2022-10-29 18:21:27-04:00,POINT (-82.1440560000000062 29.7638719999999992)
143368,638937,638937,34300,2022-10-29 14:26:05-04:00,2022-10-29 17:29:26-04:00,POINT (-81.3289268660000033 29.9039547340000027)
143369,638964,638964,34337,2022-10-23 08:21:03-04:00,2022-10-23 09:34:50-04:00,POINT (-82.1769299999999987 29.0659899999999993)
143370,639125,639125,34418,2022-10-23 20:00:30-04:00,2022-10-23 21:03:18-04:00,POINT (-81.6467745000000065 30.5794744999999999)


In [13]:
drop_cols = [0,1]
new_staypoint = new_staypoints.drop(new_staypoints.columns[drop_cols], axis=1)
new_staypoint = new_staypoint.rename(columns={2:'user_id', 3: 'started_at', 4: 'finished_at', 5: 'geom'})
new_staypoint


Unnamed: 0,user_id,started_at,finished_at,geom
0,2,2022-09-06 03:00:04-04:00,2022-09-06 09:15:08-04:00,POINT (-81.6840799999999945 29.6517529999999994)
1,2,2022-09-06 21:34:07-04:00,2022-09-06 22:09:29-04:00,POINT (-81.6840588550000177 29.6517638699999999)
2,2,2022-09-06 22:37:57-04:00,2022-09-07 00:05:48-04:00,POINT (-81.6840577100000189 29.6517348299999988)
3,2,2022-09-07 00:07:03-04:00,2022-09-07 00:38:53-04:00,POINT (-81.6840690000000080 29.6517566666666674)
4,2,2022-09-07 04:46:05-04:00,2022-09-07 10:25:44-04:00,POINT (-81.6840540000000033 29.6517789999999977)
...,...,...,...,...
143367,34266,2022-10-29 17:53:56-04:00,2022-10-29 18:21:27-04:00,POINT (-82.1440560000000062 29.7638719999999992)
143368,34300,2022-10-29 14:26:05-04:00,2022-10-29 17:29:26-04:00,POINT (-81.3289268660000033 29.9039547340000027)
143369,34337,2022-10-23 08:21:03-04:00,2022-10-23 09:34:50-04:00,POINT (-82.1769299999999987 29.0659899999999993)
143370,34418,2022-10-23 20:00:30-04:00,2022-10-23 21:03:18-04:00,POINT (-81.6467745000000065 30.5794744999999999)


In [14]:
time_window = datetime.timedelta(minutes=5)

miss = 0
check_len = 0
data_to_append_list = []
for id in ID_list:
    related_stop_by_user = new_staypoint[new_staypoint['user_id'] == id]
    related_tripled_by_user = tripleg[tripleg['user_id'] == id]
    if len(related_tripled_by_user) == 0: 
        miss += 1
        continue
    
    for stop_related_row in related_stop_by_user.itertuples():
        tripled_found = related_tripled_by_user[
            (related_tripled_by_user['finished_at'] > stop_related_row[2] - time_window) &
            (related_tripled_by_user['finished_at'] <= stop_related_row[2])
        ]
        #print(len(tripled_found))
        if len(tripled_found) == 0: continue #did not found related trip
        #check if there if repeat data
        if len(tripled_found) > 1: 
            check_len += 1
            tripled_found = tripled_found.nlargest(1, 'finished_at')
        
        data_to_append = {'user_id': id, 'trip_started_at': tripled_found.iloc[0]['started_at'],
                'trip_finished_at': tripled_found.iloc[0]['finished_at'],
                'trip':tripled_found.iloc[0]['geom']}
        
        data_to_append_list.append(data_to_append)
        

new_trip_df = pd.DataFrame(data_to_append_list, columns=['user_id', 'trip_started_at', 'trip_finished_at', 'trip'])
new_trip_df

Unnamed: 0,user_id,trip_started_at,trip_finished_at,trip
0,2,2022-09-06 22:31:37-04:00,2022-09-06 22:34:49-04:00,LINESTRING (-81.6840600000000023 29.6517499999...
1,8,2022-09-10 00:59:14-04:00,2022-09-10 01:03:16-04:00,LINESTRING (-81.8096299999999985 30.0851299999...
2,8,2022-09-27 10:38:11-04:00,2022-09-27 10:38:22-04:00,LINESTRING (-81.8045299999999997 30.1029400000...
3,22,2022-09-05 22:30:59-04:00,2022-09-05 22:32:23-04:00,LINESTRING (-81.4898787999999996 30.3373826000...
4,22,2022-09-06 13:42:11-04:00,2022-09-06 13:48:23-04:00,LINESTRING (-81.4465212799999989 30.3217291799...
...,...,...,...,...
12525,34105,2022-10-27 02:21:01-04:00,2022-10-27 02:21:25-04:00,LINESTRING (-83.3323100000000068 30.3900000000...
12526,34110,2022-10-26 18:25:06-04:00,2022-10-26 18:25:15-04:00,LINESTRING (-82.0379379999999969 29.1221589999...
12527,34187,2022-10-25 09:49:27-04:00,2022-10-25 09:53:25-04:00,LINESTRING (-81.5762849599999953 30.3208326100...
12528,34341,2022-10-26 21:51:56-04:00,2022-10-26 21:55:56-04:00,LINESTRING (-82.0117489999999947 29.7893889999...


In [15]:
new_trip_df.to_csv('home_related_trip.csv')

In [16]:
new_tpls = ti.io.file.read_triplegs_csv('home_related_trip.csv', columns={'trip_started_at':'started_at', 'trip_finished_at':'finished_at', 'trip':'geom'}, crs='EPSG:4326', index_col=0)
new_mode_tpls  = new_tpls.as_triplegs.predict_transport_mode(categories = categories)
new_mode_tpls

Unnamed: 0,user_id,started_at,finished_at,geom,mode
0,2,2022-09-06 22:31:37-04:00,2022-09-06 22:34:49-04:00,"LINESTRING (-81.68406 29.65175, -81.68407 29.6...",motorized_mobility
1,8,2022-09-10 00:59:14-04:00,2022-09-10 01:03:16-04:00,"LINESTRING (-81.80963 30.08513, -81.79732 30.1...",motorized_mobility
2,8,2022-09-27 10:38:11-04:00,2022-09-27 10:38:22-04:00,"LINESTRING (-81.80453 30.10294, -81.80454 30.1...",slow_mobility
3,22,2022-09-05 22:30:59-04:00,2022-09-05 22:32:23-04:00,"LINESTRING (-81.48988 30.33738, -81.49342 30.3...",motorized_mobility
4,22,2022-09-06 13:42:11-04:00,2022-09-06 13:48:23-04:00,"LINESTRING (-81.44652 30.32173, -81.44652 30.3...",motorized_mobility
...,...,...,...,...,...
12525,34105,2022-10-27 02:21:01-04:00,2022-10-27 02:21:25-04:00,"LINESTRING (-83.33231 30.39000, -83.33296 30.3...",motorized_mobility
12526,34110,2022-10-26 18:25:06-04:00,2022-10-26 18:25:15-04:00,"LINESTRING (-82.03794 29.12216, -82.03725 29.1...",motorized_mobility
12527,34187,2022-10-25 09:49:27-04:00,2022-10-25 09:53:25-04:00,"LINESTRING (-81.57628 30.32083, -81.57623 30.3...",motorized_mobility
12528,34341,2022-10-26 21:51:56-04:00,2022-10-26 21:55:56-04:00,"LINESTRING (-82.01175 29.78939, -82.01604 29.7...",motorized_mobility


In [17]:
new_mode_split = ti.analysis.modal_split.calculate_modal_split(tpls=new_mode_tpls, metric='count', per_user=True)
new_mode_split

mode,fast_mobility,motorized_mobility,slow_mobility
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2,0,1,0
8,0,1,1
22,1,26,21
30,0,0,1
50,0,1,0
...,...,...,...
33979,0,1,1
34105,0,1,0
34110,0,1,0
34187,0,1,0


In [18]:
index = new_mode_split.index.values
for id in index:
    motor_count = new_mode_split.loc[id, 'motorized_mobility']
    if motor_count > 0:
        df.loc[df['user_id'] == id, 'car_ownership'] = 1
df

Unnamed: 0,user_id,car_ownership
0,0,0
1,2,1
2,3,0
3,8,1
4,10,0
...,...,...
13964,34483,0
13965,34484,0
13966,34490,0
13967,34492,0


In [19]:
print(len(df[df['car_ownership'] == 1]))

4046


In [20]:
tpls = ti.io.file.read_triplegs_csv('triplegs.csv', index_col=0)
all_tpls = tpls[tpls['user_id'].isin(user_id_list)]

In [21]:
len(all_tpls['user_id'].unique())

12734

In [22]:
all_mode_tpls  = all_tpls.as_triplegs.predict_transport_mode(categories=categories)
all_mode_tpls



Unnamed: 0_level_0,user_id,started_at,finished_at,geom,mode
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
6,2,2022-09-06 22:19:08-04:00,2022-09-06 22:19:58-04:00,"LINESTRING (-81.68406 29.65175, -81.68407 29.6...",slow_mobility
7,2,2022-09-06 22:31:37-04:00,2022-09-06 22:34:49-04:00,"LINESTRING (-81.68406 29.65175, -81.68407 29.6...",motorized_mobility
9,2,2022-09-08 08:52:07-04:00,2022-09-08 08:55:53-04:00,"LINESTRING (-81.68407 29.65176, -81.68408 29.6...",slow_mobility
10,2,2022-09-09 02:51:09-04:00,2022-09-09 02:53:51-04:00,"LINESTRING (-81.67754 29.65503, -81.65788 29.6...",motorized_mobility
11,2,2022-09-11 10:27:01-04:00,2022-09-11 10:29:08-04:00,"LINESTRING (-81.68407 29.65176, -81.68406 29.6...",slow_mobility
...,...,...,...,...,...
298729,34483,2022-10-31 12:33:43-04:00,2022-10-31 12:43:14-04:00,"LINESTRING (-81.37185 29.99983, -81.37185 29.9...",slow_mobility
298730,34484,2022-10-28 00:55:00-04:00,2022-10-28 00:55:55-04:00,"LINESTRING (-81.64084 30.49224, -81.64084 30.4...",slow_mobility
298744,34497,2022-10-26 08:25:34-04:00,2022-10-26 08:25:48-04:00,"LINESTRING (-82.09808 29.99144, -82.09809 29.9...",slow_mobility
298745,34497,2022-10-29 20:39:33-04:00,2022-10-29 20:39:40-04:00,"LINESTRING (-82.23321 29.38124, -82.23322 29.3...",slow_mobility


In [23]:
all_mode_split = ti.analysis.modal_split.calculate_modal_split(tpls=all_mode_tpls, metric='count', per_user=True)
all_mode_split

mode,fast_mobility,motorized_mobility,slow_mobility
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2,0,4,9
3,0,6,73
8,0,6,6
10,0,4,32
11,0,0,21
...,...,...,...
34471,0,12,1
34472,1,0,0
34483,0,0,1
34484,0,0,1


In [24]:
index = all_mode_split.index.values
for id in index:
    motor_count = all_mode_split.loc[id, 'motorized_mobility']
    if motor_count > 0:
        df.loc[df['user_id'] == id, 'car_ownership'] = 1
df

Unnamed: 0,user_id,car_ownership
0,0,0
1,2,1
2,3,1
3,8,1
4,10,1
...,...,...
13964,34483,0
13965,34484,0
13966,34490,0
13967,34492,0


In [25]:
print(len(df[df['car_ownership'] == 1]))

6580


In [26]:
filter_mode = all_mode_split[(all_mode_split['fast_mobility'] + all_mode_split['motorized_mobility'] + all_mode_split['slow_mobility']) > 20 ]
filter_mode

mode,fast_mobility,motorized_mobility,slow_mobility
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
3,0,6,73
10,0,4,32
11,0,0,21
16,0,3,27
22,3,143,60
...,...,...,...
33253,0,0,40
33300,0,35,6
33413,0,19,2
33578,2,12,10


In [27]:
filter_mode.shape[0]

4167

In [28]:
test = filter_mode[filter_mode['motorized_mobility'] > 0]

In [29]:
test

mode,fast_mobility,motorized_mobility,slow_mobility
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
3,0,6,73
10,0,4,32
16,0,3,27
22,3,143,60
30,2,24,36
...,...,...,...
32822,0,47,4
33300,0,35,6
33413,0,19,2
33578,2,12,10


In [30]:
percentage = len(filter_mode[(filter_mode['motorized_mobility']) > 0]) / filter_mode.shape[0]
percentage_string = "{:.0%}".format(percentage)
print(percentage_string)

64%


In [31]:
home_locations = home_location.rename(columns={'ID':'user_id'})
home_locations
merged_data = pd.merge(df, home_locations[['user_id','LAT-4326','LON-4326']], on='user_id', how='left')
merged_data

Unnamed: 0,user_id,car_ownership,LAT-4326,LON-4326
0,0,0,30.364891,-81.675544
1,2,1,29.651722,-81.683988
2,3,1,29.632203,-82.319097
3,8,1,30.050323,-81.798254
4,10,1,30.477524,-81.634940
...,...,...,...,...
13964,34483,0,30.000546,-81.370836
13965,34484,0,30.368612,-81.668897
13966,34490,0,30.297280,-81.562896
13967,34492,0,29.988720,-81.480610


In [32]:
merged_data.to_csv('User.csv')