In [1]:
import trackintel as ti
import pandas as pd

# data loading

In [2]:
dfa = pd.read_csv('task1_dataset_kotae.csv')

In [3]:
dfb = pd.read_csv('hiroshima_challengedata.csv')

In [4]:
dfc = pd.read_csv('sapporo_challengedata.csv')

In [5]:
dfd = pd.read_csv('kumamoto_challengedata.csv')

In [6]:
# to keep it within first 30 days

dfa = dfa[dfa['d'].between(1,10)] # 30 days too long for A
dfb = dfb[dfb['d'].between(1,30)]
dfc = dfc[dfc['d'].between(1,30)]
dfd = dfd[dfd['d'].between(1,30)]

# Define functions

In [7]:
def preprocess(dftest):
    
    dftest = dftest[(dftest['x'] != -999) & (dftest['y'] != -999)]
    
    dftest['date'] = pd.to_datetime(dftest['d'], format='%j', errors='coerce')
    # since the max d is 47, we assume that each d is part of 48 30 minute intervals
    dftest['time'] = pd.to_timedelta(dftest['t'] * 30, unit='m')
    dftest['tracked_at'] = dftest['date'] + dftest['time']
    dftest['tracked_at'] = dftest['tracked_at'].dt.tz_localize('UTC')
    
    data = dftest.rename(columns={
        'uid': 'user_id',
        'x': 'longitude',
        'y': 'latitude'
    })

    return data

In [8]:
def custom_write_triplegs_csv(triplegs, filename, **kwargs):
    """Saves triplegs data to a CSV file with custom options.

    Args:
        triplegs: The triplegs data.
        filename: The name of the output CSV file.
        **kwargs: Additional keyword arguments to pass to to_csv.
    """
    triplegs_df = triplegs.to_wkt(rounding_precision=-1, trim=False)
    triplegs_df.to_csv(filename, **kwargs)  # Use kwargs to control index etc.

In [9]:
def create_triplegs(city):
    
    """
    Process positionfixes to generate staypoints and triplegs, then save triplegs to a CSV file.

    Args:
        city (str): Name of the city used to identify data files.
    """
    # Load preprocessed data
    print(f"Loading preprocessed data: city {city}")
    position_fixes = ti.read_positionfixes_csv(f'data_{city}_preprocessed.csv')

    # Generate staypoints
    print("Generating staypoints")
    position_fixes, staypoints = position_fixes.as_positionfixes.generate_staypoints(
        method='sliding',
        dist_threshold=1,  # Maximum allowed distance in meters (e.g., 500 meters per cell)
        time_threshold=90,  # Minimum time duration in minutes
        gap_threshold=300,  # Maximum allowable gap in minutes
        distance_metric='haversine',  # Use haversine for distance calculation
        include_last=True,  # Ensure the final staypoint is included
        print_progress=True,  # Display progress during processing
        exclude_duplicate_pfs=True,  # Remove duplicate position fixes
        n_jobs=-1  # Use all available cores for parallelization
    )

    # Generate triplegs
    print("Generating triplegs")
    position_fixes, triplegs = ti.preprocessing.generate_triplegs(
        position_fixes, 
        staypoints, 
        method='between_staypoints', 
        gap_threshold=90  # Configure gap threshold based on staypoint parameters
    )

    # Step 4: Save triplegs to CSV
    print("Saving Triplegs to csv")
    custom_write_triplegs_csv(triplegs, f'triplegs_{city}.csv', index=False)
    print("Triplegs saved to csv")



# Preprocess data

In [10]:
dftest = dfa
data = preprocess(dftest)
data.to_csv(f'data_A10d_preprocessed.csv', index=False)

dftest = dfb
data = preprocess(dftest)
data.to_csv(f'data_B_preprocessed.csv', index=False)

dftest = dfc
data = preprocess(dftest)
data.to_csv(f'data_C_preprocessed.csv', index=False)

dftest = dfd
data = preprocess(dftest)
data.to_csv(f'data_D_preprocessed.csv', index=False)

In [None]:
create_triplegs('A')

Loading preprocessed data: city A7d




Generating staypoints


100%|██████████| 96869/96869 [05:13<00:00, 308.60it/s]


Generating triplegs


  pfs["tripleg_id"] = pfs["tripleg_id"].ffill()


Saving Triplegs to csv
Triplegs saved to csv


In [None]:
create_triplegs('B')

Loading preprocessed data: city B




Generating staypoints


100%|██████████| 943/943 [00:19<00:00, 47.46it/s] 


Generating triplegs


  pfs["tripleg_id"] = pfs["tripleg_id"].ffill()


Saving Triplegs to csv
Triplegs saved to csv


In [4]:
create_triplegs('C')

Loading preprocessed data: city C




Generating staypoints


100%|██████████| 224/224 [00:43<00:00,  5.10it/s]


Generating triplegs


  pfs["tripleg_id"] = pfs["tripleg_id"].ffill()


Saving Triplegs to csv
Triplegs saved to csv


In [None]:
create_triplegs('D')

Loading preprocessed data: city D




Generating staypoints


100%|██████████| 702/702 [00:08<00:00, 84.92it/s]


Generating triplegs


  pfs["tripleg_id"] = pfs["tripleg_id"].ffill()


Saving Triplegs to csv
Triplegs saved to csv
