In [1]:
import import_ipynb
from readdrivingdata import read_driving_data

pdata = read_driving_data() ### driving data for STL participants; at this point in the project, I will be focusing on the
                            ### STL (main) data *only* as it has more features and is generally more comprehensive

importing Jupyter notebook from readdrivingdata.ipynb


In [2]:
import numpy as np

## 4 decimal places for latitude/longitude represent precision to about 11m, which should be sufficient to identify unique
## locations for number_of_locations, while allowing some variation in parking location (e.g. if the participant uses street
## parking to visit a location)... this should also be sufficient for the other functions

pdata['TELat'] = np.round(pdata['TELat'], decimals = 4)
pdata['TELong'] = np.round(pdata['TELong'], decimals = 4)

In [3]:
import skmob
from skmob.measures.individual import home_location, max_distance_from_home, maximum_distance, number_of_locations, radius_of_gyration

tdf = skmob.TrajDataFrame(pdata, latitude='TELat', longitude='TELong', datetime='TEtime', user_id='uid')

## computing 4 extra features for eaach participant in this dataset: maximum distance from home, maximum distance (in a
## single trip), number of unique locations, and radius of gyration (essentially a measure of how far an individual travels
## from his or her 'mean' location)

maxdistances_home = max_distance_from_home(tdf)
maxdistances = maximum_distance(tdf)
no_locs = number_of_locations(tdf)
rgs = radius_of_gyration(tdf)

100%|██████████| 246/246 [00:05<00:00, 48.63it/s]
100%|██████████| 246/246 [00:03<00:00, 70.81it/s]
100%|██████████| 246/246 [00:02<00:00, 102.14it/s]
100%|██████████| 246/246 [00:02<00:00, 84.98it/s] 


In [4]:
import pandas as pd

skmobdata = pd.concat([maxdistances_home["max_distance_from_home"], maxdistances["maximum_distance"], no_locs["number_of_locations"], 
                        rgs["radius_of_gyration"]], axis = 1)

skmobdata.columns = ['max. distance from home', 'max. distance/trip', 'no. unique locations', 'radius of gyration']

In [5]:
STLdata = pd.read_csv(r'C:\Users\maria\OneDrive\Documents\RESEARCH\Data files\ruralurban_driving summary_main.csv')

## adjusting these features for no. days in study, since max distance, no. unique locations, and radius of gyration
## are all somewhat related to number of trips or time spent in the study... e.g. if an individual is recorded for a longer
## period of time, there is a higher likelihood to record a 'maximum distance' or new location (of course, when study
## time increases to multiple years per participant, the effect of changing sample time will be less significant)

skmobdata2 = skmobdata.div(STLdata["total days in study"], axis=0)
skmobdata2.columns = ['max. distance from home (adj)', 'max. distance/trip (adj)', 'no. unique locations (adj)', 'radius of gyration (adj)']

## now, our 'skmobdata' file will include the 4 features, both complete values as well as values that are adjusted
## for time in study (by dividing each participant value by the number of days they participated for)

skmobdata = pd.concat([skmobdata, skmobdata2], axis = 1)

In [7]:
skmobdata.to_csv(r'C:\Users\maria\OneDrive\Documents\RESEARCH\Data files\ruralurban_skmob summary_main.csv', index = False)