In [1]:
# Imports
import pandas as pd
import gpxpy
import os
from pathlib import Path
from tqdm import tqdm
from route import Route

In [2]:
# Display options
# pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', 100)

In [3]:
# Variables
DATA_PATH = 'data/'
GPX_FILE_PATH = DATA_PATH + 'gpx/'
ROUTES_INDEX_PATH = DATA_PATH + 'routes-index.csv'

In [4]:
# Function definitions
def check_uniques(df, col_name):
    """
    Checks if the values in a DataFrame column are unique. If there are duplicates, prints
    the duplicates.

    Args:
        df (DataFrame): The DataFrame to check.
        col_name (str): The name of the column to check.

    Returns:
        list: A list of the duplicate values.
    """
    duplicates = df[col_name].value_counts()[df[col_name].value_counts() > 1].index.tolist()
    
    if duplicates:
        print(f"Duplicate values in column {col_name}: {duplicates}")
        return duplicates
    else:
        print(f"All values in column {col_name} are unique")
        return []

def process_gpx_files(gpx_file_path=GPX_FILE_PATH):
    """
    Reads all GPX files in the given directory, parsing out their track names and returns
    a DataFrame with the file names and track names. Also checks for duplicates in the
    track names.

    Args:
        gpx_file_path (str): The path to the directory containing the GPX files.

    Returns:
        DataFrame: A DataFrame with the file names and track names.
    """
    # Get list of GPX files
    gpx_dir = Path(gpx_file_path)
    gpx_files = list(gpx_dir.glob('*.gpx'))
    gpx_files.sort()

    # Create empty lists to store data
    file_names = []
    route_names = []
    route_distances = []
    avg_elevations = []
    center_coordinates = []

    pbar = tqdm(gpx_files, desc='Processing GPX files', unit='file')
    
    # Process each GPX file
    for gpx_file in pbar:
        pbar.set_description(f'Processing {gpx_file.name}')
        # Create Route object to access metrics
        route = Route(os.path.join(GPX_FILE_PATH, gpx_file.name))
        
        file_names.append(route.file_name)
        route_names.append(route.route_name)
        route_distances.append(route.total_distance)
        avg_elevations.append(route.avg_elevation_gain_per_km)
        center_coordinates.append(route.center_coordinates)
        if route.route_name is None:
            print(f"No route name found for {gpx_file}")

    # Create DataFrame
    df = pd.DataFrame({
        'file_name': file_names,
        'route_name': route_names,
        'route_distance_m': route_distances,
        'avg_elevation_gain_per_km': avg_elevations,
        'center_coordinates': center_coordinates
    })

    duplicates = check_uniques(df, 'route_name')

    return df

In [5]:
df = process_gpx_files()

df

Processing xterra-ilhagrande-5k-2025.gpx: 100%|██████████| 113/113 [01:37<00:00,  1.15file/s]                                              

All values in column route_name are unique





Unnamed: 0,file_name,route_name,route_distance_m,avg_elevation_gain_per_km,center_coordinates
0,Bocaina Park Trail 18 Km - 2024.gpx,Bocaina Park Trail 2024 - 18 Km,18732.860320,45.371448,"[-19.721198424465147, -46.80936591567634]"
1,Bocaina Park Trail 28Km - 2024.gpx,Bocaina Park Trail 2024 - 27Km,27821.882089,45.147018,"[-19.724198239540605, -46.796225933897894]"
2,Bocaina Park Trail 45Km NOVO - 2024.gpx,Bocaina Park Trail 2024 - 45Km,45572.546133,44.241000,"[-19.72772115231206, -46.77763812349694]"
3,Bocaina Park Trail 8Km - 2024.gpx,Bocaina Park Trail 2024 - 8Km,8580.817075,47.450726,"[-19.718138415952694, -46.80736000015879]"
4,cambotas-cocais-11-2024.gpx,Cambotas Trail Fest 2024 - 11km,11188.007659,45.799039,"[-19.87696768595041, -43.473363636363636]"
...,...,...,...,...,...
108,xterra-ilhabela-21k-2025.gpx,XTERRA 2025 Ilhabela - Trail Run 21K,20859.438117,33.948326,"[-23.819229540795487, -45.35795993933552]"
109,xterra-ilhabela-5k-2025.gpx,XTERRA 2025 Ilhabela - Trail Run 5K,4878.878120,40.813071,"[-23.805893913063255, -45.35835311594363]"
110,xterra-ilhagrande-10k-2025.gpx,XTERRA 2025 Ilha Grande - Trail Run 10K,10197.757358,37.666517,"[-23.144722172536525, -44.171023626187235]"
111,xterra-ilhagrande-21k-2025.gpx,XTERRA 2025 Ilha Grande - Trail Run 21K,21070.695429,51.468259,"[-23.15900974075692, -44.163574229622036]"


In [6]:

# Save to CSV
df.to_csv(ROUTES_INDEX_PATH, index=False)