In [25]:
import os
os.environ['USE_PYGEOS'] = '0'
import geopandas
import trackintel as ti
import pandas as pd
import pyproj
import numpy as np
import warnings
from shapely.wkt import loads
import datetime
from shapely import wkt, Point


In [2]:
df = pd.read_csv('part_0.csv.gz.csv')
columns_to_keep = ['grid', 'latitude', 'longitude', 'time']
newdf = df[columns_to_keep].copy()
newdf = newdf.rename(columns={'grid': 'user_id'}) 
newdf['user_id'] = pd.factorize(newdf['user_id'])[0]
newdf.to_csv('temp/data.csv')

## Stop Inference

In [3]:
pfs = ti.io.file.read_positionfixes_csv('temp/data.csv', 
                                        columns={'user_id':'user_id', 'latitude':'latitude', 'longitude':'longitude', 'time':'tracked_at'},
                                        tz= 'America/New_York',
                                        crs ='EPSG:4326',
                                        index_col = 0)

In [4]:
# generate staypoints
pfs, sp = pfs.as_positionfixes.generate_staypoints(method='sliding', distance_metric='haversine', dist_threshold=100, time_threshold=5.0, gap_threshold=720.0)



## Trip Inference

In [5]:
pfs, tpls = pfs.as_positionfixes.generate_triplegs(sp, method='between_staypoints', gap_threshold = 60)



In [6]:
# Write positionfixes to csv file.
ti.io.file.write_positionfixes_csv(pfs, 'temp/pfs.csv')
# Write staypoints to csv file.
ti.io.file.write_staypoints_csv(sp, 'temp/staypoints.csv')
# Write triplegs to csv file.
ti.io.file.write_triplegs_csv(tpls, 'temp/triplegs.csv')

## Home Inference

In [7]:
ID_list=sp['user_id'].unique()
ndf = newdf[newdf['user_id'].isin(ID_list)]

<h3>Add columns Lat-3856 and Lon-3857 with EPSG:3857</h3>

In [8]:
# Create a transformer object to convert from EPSG 4326 (WGS84) to EPSG 3857 (Web Mercator)
transformer = pyproj.Transformer.from_crs("EPSG:4326", "EPSG:3857", always_xy=True)

# Iterate over rows and apply the transformer to convert the coordinates
for idx, row in ndf.iterrows():
    lat, lon = row['latitude'], row['longitude']
    x, y = transformer.transform(lon, lat)
    ndf.at[idx, 'Lat-3857'] = y
    ndf.at[idx, 'Lon-3857'] = x

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ndf.at[idx, 'Lat-3857'] = y
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ndf.at[idx, 'Lon-3857'] = x


In [9]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
def home_location(input_df,s_hour=22,t_hour=6):
    '''
    ndf: gps data
    s_hour: define start time of night, defult = 22
    t_hour: define end time of night, defult = 6
    '''
    input_df['time']=pd.to_datetime(input_df['time'], infer_datetime_format=True)
    input_df['date']=input_df['time'].dt.date
    input_df['hour']=pd.Series(input_df['time']).dt.hour
    
    input_df['LAT_Grid']=np.round(input_df['Lat-3857']/20)*20  # Grid size = 20m
    input_df['LON_Grid']=np.round(input_df['Lon-3857']/20)*20
    id=input_df.iloc[0,0]
    input_df=input_df[(input_df['hour']>=s_hour)|(input_df['hour']<t_hour)]

    if len(input_df)==0:  # if no signal during the night, return NaN
        return np.nan,np.nan
    home = input_df.groupby(['LAT_Grid','LON_Grid'])['date'].nunique().idxmax()
    return home[0],home[1]

In [10]:
home=pd.DataFrame(columns=['user_id','LAT','LON']) # initial a DataFrame to save home location

for i in ID_list[0:]: 
    dffh=ndf[ndf['user_id']==i].sort_values('time',axis=0,ascending=True) # extract records
    h_lat,h_lon=home_location(dffh) # calculate home location
    home=home.append(pd.DataFrame([[i,h_lat,h_lon]],columns=['user_id','LAT','LON'])) # append the new row 
    
nan_df = home[home.isna().any(axis=1)]
home.dropna(subset=['LAT'], inplace=True)
home

Unnamed: 0,user_id,LAT,LON
0,0,3399820.0,-9133880.0
0,3,3564660.0,-9087920.0
0,5,3456120.0,-9175080.0
0,6,3464520.0,-9173200.0
0,9,3546060.0,-9072020.0
...,...,...,...
0,1922,3375500.0,-9049420.0
0,1924,3565060.0,-9081220.0
0,1929,3501100.0,-9060680.0
0,1930,3493100.0,-9223940.0


In [11]:
ID_list=nan_df['user_id'].unique()
#check if a date is in saturday or sunday.
def is_weekend(date):
    return date.weekday() in [5, 6]

def home_location(input_df):
    '''
    ndf: gps data
    '''
    input_df['time']=pd.to_datetime(input_df['time'], infer_datetime_format=True)
    input_df['date']=input_df['time'].dt.date
    input_df['hour']=pd.Series(input_df['time']).dt.hour
    
    input_df['LAT_Grid']=np.round(input_df['Lat-3857']/20)*20  # Grid size = 20m
    input_df['LON_Grid']=np.round(input_df['Lon-3857']/20)*20
    input_df= input_df[input_df['date'].apply(is_weekend)]
    
    if len(input_df)==0: 
        return np.nan,np.nan
    home2 = input_df.groupby(['LAT_Grid','LON_Grid']).count()['user_id'].idxmax()
    return home2[0],home2[1]

In [12]:
home2=pd.DataFrame(columns=['user_id','LAT','LON']) # initial a DataFrame to save home location

for i in ID_list[0:]: 
    dffh=ndf[ndf['user_id']==i].sort_values('time',axis=0,ascending=True) # extract records
    h_lat,h_lon=home_location(dffh) # calculate home location
    home2=home2.append(pd.DataFrame([[i,h_lat,h_lon]],columns=['user_id','LAT','LON'])) # append the new row 

In [13]:
home2.dropna(subset=['LAT'], inplace=True)
new_home = pd.concat([home, home2], ignore_index=True)

# Create a transformer object to convert from EPSG 3857 (Web Mercator) to EPSG 4326 (WGS84)
inputGrid = pyproj.Proj(projparams='epsg:3857')
wgs84 = pyproj.Proj(projparams='epsg:4326')

new_home = new_home.reset_index(drop=True)

# Iterate over rows and apply the transformer to convert the coordinates
for idx, row in new_home.iterrows():
    lat, lon = pyproj.transform(inputGrid, wgs84, row['LON'], row['LAT'])
    new_home.at[idx, 'LAT-4326'] = lat
    new_home.at[idx, 'LON-4326'] = lon

# Print the updated DataFrame
new_home.to_csv('temp/home_location.csv')

## Food Inference

In [14]:
stop_point_df = pd.read_csv('temp/staypoints.csv')
retail_pd = pd.read_csv('retail.csv')

In [15]:
def is_close(dist, latA1, lonA2, latB1, lonB2):
    # Create shapely Point objects from the locations
    point1 = Point(lonA2, latA1)
    point2 = Point(lonB2, latB1)
    
    # Calculate distance between points in meters
    distance = point1.distance(point2) * 111319.9
    
    # Return True if distance is less than or equal to dist meters
    return distance <= dist

In [16]:
count = 0
warnings.simplefilter(action='ignore', category=Warning)
data_to_append_list = []
for sp in stop_point_df.itertuples():
    p = loads(sp[5])
    lat = p.y
    lon = p.x
    
    for row in retail_pd.itertuples():
        if is_close(200, float(row[12]), float(row[11]), lat, lon):
            count += 1
            
            data_to_append = {'id': sp[1], 'user_id': sp[2], 'started_at': sp[3], 'finished_at': sp[4], 'lat': lat, 'lon': lon, 'retail_id': row[1], 
                                'retail_lat': float(row[12]), 'retail_lon': float(row[11])}
            data_to_append_list.append(data_to_append)
            
            break

new_df = pd.DataFrame.from_records(data_to_append_list)
new_df.to_csv('temp/food_inference_200.csv')

## Food trip inference

In [20]:
tripleg = pd.read_csv('temp/triplegs.csv')
stop_df = pd.read_csv('temp/staypoints.csv')
food_sp_df = pd.read_csv('temp/food_inference_200.csv')


In [21]:
stop_df['started_at'] = pd.to_datetime(stop_df['started_at'])
stop_df['finished_at'] = pd.to_datetime(stop_df['finished_at'])
tripleg['finished_at'] = pd.to_datetime(tripleg['finished_at'])
tripleg['started_at'] = pd.to_datetime(tripleg['started_at'])
stop_df['continuous_check'] = ((stop_df['user_id'] == stop_df['user_id'].shift()) & (stop_df['started_at'] == stop_df['finished_at'].shift())).astype(int)

In [22]:
ID_list=food_sp_df['id'].unique()
def find_trip_before(stay_point_id, time_window = 15):
    time_window = datetime.timedelta(minutes=time_window)
    sp = stop_df[stop_df['id'] == stay_point_id]
    stop_of_user = stop_df[(stop_df['user_id'] == sp.iloc[0]['user_id']) & (stop_df['id'] <= stay_point_id)]
    related_tripled_by_user = tripleg[tripleg['user_id'] == sp.iloc[0]['user_id']]
    
    trip_traveled = []
    stop_point_traveled = []
    # temp_stop_list = []

    i = len(stop_of_user) - 1
    while i >= 0 and stop_of_user.iloc[i]['continuous_check'] == 1:
        stop_point_traveled.insert(0, stop_of_user.iloc[i]['id'])
        # temp_stop_list.insert(0, stop_of_user.iloc[i]['id'])
        i -= 1
    # if len(temp_stop_list) == 0: temp_stop_list.append(stay_point_id)

    while i >= 0:

        tripled_found = related_tripled_by_user[
            (related_tripled_by_user['finished_at'] > stop_of_user.iloc[i]['started_at'] - time_window) &
            (related_tripled_by_user['finished_at'] <= stop_of_user.iloc[i]['started_at'])]
        
        #try to find trip
        if len(tripled_found) > 0:
            if len(tripled_found) > 1: tripled_found = tripled_found.nlargest(1, 'finished_at')
            trip_traveled.insert(0, tripled_found.iloc[0]['id'])
        #in this case, no trip or stop point were found, end program.
        elif i - 1 >= 0 and stop_of_user.iloc[i]['started_at'] - stop_of_user.iloc[i - 1]['finished_at'] > time_window: 
            stop_point_traveled.insert(0, stop_of_user.iloc[i]['id'])
            break
        
        # if len(temp_stop_list) == 0: temp_stop_list.insert(0, stop_of_user.iloc[i]['id'])
        # temp_stop_list = []
        stop_point_traveled.insert(0, stop_of_user.iloc[i]['id'])

        i -= 1
        if len(tripled_found) > 0 and tripled_found.iloc[0]['started_at'] != stop_of_user.iloc[i]['finished_at']: break

        while i >= 0 and stop_of_user.iloc[i]['continuous_check'] == 1:
            # temp_stop_list.insert(0, stop_of_user.iloc[i]['id'])
            stop_point_traveled.insert(0, stop_of_user.iloc[i]['id'])
            i -= 1
            
    if len(trip_traveled) == 0: return None
    fist_trip = related_tripled_by_user[related_tripled_by_user['id'] == trip_traveled[0]]
    data_to_append = {'deviceID': sp.iloc[0]['user_id'], 'tripleg_ID': trip_traveled, 
                          'trip_start_location': Point(list(wkt.loads(fist_trip.iloc[0]['geom']).coords)[0]),
                          'trip_end_location': sp.iloc[0]['geom'],
                          'trip_start_timestamp': fist_trip.iloc[0]['started_at'],
                          'trip_end_timestamp': sp.iloc[0]['finished_at'],
                          'stop_point_between_trips': stop_point_traveled}

    return data_to_append

In [26]:
data_to_generate = []
for sp_id in ID_list:
    res = find_trip_before(sp_id)
    if res != None:
        data_to_generate.append(res)
output_df = pd.DataFrame.from_records(data_to_generate)
output_df.to_csv('temp/food_related_tour.csv')