In [None]:
# Description

# Requirements
import os
import geopandas as gpd
import pandas as pd
import pathlib
from pathlib import Path

# Parameters
gpx_foldername = '../data/gpxfiles'
gpx_directory = pathlib.Path('.').absolute() / gpx_foldername

hexagonfile = Path.cwd().parent / "data" / "hexagons" / "hexagonNetherlands.geojson"


In [None]:
hexagons = gpd.read_file(hexagonfile)

# Simplify geometries with a tolerance (in units of the CRS)
# The higher the tolerance, the more simplified the geometry
tolerance = 0.001  # Adjust as needed
hexagons['geometry'] = hexagons['geometry'].simplify(tolerance)

In [None]:
## NEW
# Temporary gpd to store information per run
temp_gdf_hexagons_per_run = []

# Proces per gpx-file
for filename in os.listdir(gpx_directory):
    if filename.endswith('.gpx'):
        gpxfile = os.path.join(gpx_directory, filename)
        
        gpx = gpd.read_file(gpxfile, layer='tracks')

        gpx['name'] = gpx['name'].fillna('').astype(str)
        gpx['type'] = gpx['type'].fillna('').astype(str)

        ## gdf_gpxline = gpx[gpx['name'].str.contains("Running")]
        # Make the string match case‑insensitive
        mask_name = gpx['name'].str.contains('Running', case=False, na=False)

        # Match the exact value in the `type` column (also case‑insensitive)
        mask_type = gpx['type'].str.lower() == 'running'

        # Combine the two masks with OR (|) and filter
        gdf_gpxline = gpx[mask_name | mask_type]

        # Read trackpoints
        gpx_tp = gpd.read_file(gpxfile, layer='track_points')

        # Get date from track points
        gpx_tp['time'] = pd.to_datetime(gpx_tp['time'], format='%Y-%m-%dT%H:%M:%SZ')
        gpx_tp['date'] = gpx_tp['time'].dt.strftime('%Y-%m-%d')

        # Extract the first date
        if not gpx_tp.empty:
            date = gpx_tp['date'].iloc[0]
        else:
            date = None  # or handle as needed

        if date is not None:
            gdf_gpxline['date'] = date

        # Perform the intersection
        gdf_join = gpd.sjoin(left_df=hexagons, right_df=gdf_gpxline,  how="inner", predicate="intersects")
        #print(gdf_join)
        
        #Append data to temp gdf
        temp_gdf_hexagons_per_run.append(gdf_join)

# Create gdf with information from all gpx-files
gdf_new = gpd.GeoDataFrame( pd.concat(temp_gdf_hexagons_per_run, ignore_index=True) )

In [None]:
# Concatenate results
gdf_rh = gpd.GeoDataFrame(pd.concat(temp_gdf_hexagons_per_run, ignore_index=True))

In [None]:
# Calculate information per hexagons (number of runs, first date and last date)
df_count = gdf_rh.groupby('uuid', as_index=False).agg(count=('uuid', 'count'))
df_first_date = gdf_rh.groupby('uuid', as_index=False).agg(first_date=('date', 'min'))
df_last_date = gdf_rh.groupby('uuid', as_index=False).agg(last_date=('date', 'max'))

# Merge information per hexagon in one df
df_all_attr = pd.merge(df_count, df_first_date, on=["uuid"])
df_all_attr = pd.merge(df_all_attr, df_last_date, on=["uuid"])

# Merge geometry (gdf) with attribute information (df)
gdf_temp = gdf_rh.merge(df_all_attr, on=["uuid"])

gdf_complete = gdf_temp.drop_duplicates(subset=['uuid'])

# Select columns
col_list = ['uuid', 'count', 'first_date', 'last_date', 'geometry']
gdf_complete = gdf_complete[col_list]

# Simplify and write geojson
# Simplify geometries with a tolerance (in units of the CRS)
# The higher the tolerance, the more simplified the geometry
tolerance = 0.001  # Adjust as needed
gdf_complete['geometry'] = gdf_complete['geometry'].simplify(tolerance)

# Write geojsonfile
if(os.path.isfile("runninghexagons.geojson")):
    os.remove("runninghexagons.geojson")
    print("File Deleted successfully")
else:
    print("File does not exist")

gdf_complete.to_file("runninghexagons.geojson", driver='GeoJSON')