In [67]:
import polars as pl
from fitparse import FitFile

import pathlib
import os
from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler

import plotly.express as px
import plotly.graph_objects as go
import numpy as np

In [68]:

this_dir = pathlib.Path('.').parent.resolve()
data_dir = this_dir / 'data'

workouts = [data_dir / file for file in os.listdir(data_dir) if file.endswith('.fit')]
test = workouts[2]

In [69]:
data = pl.read_parquet(data_dir / 'combined.parquet')

In [70]:
map_centroids = data.group_by('workout_name').agg([
    pl.col('position_lat').median().alias('centroid_lat'),
    pl.col('position_long').median().alias('centroid_long')
])

map_centroids = map_centroids.select([
    pl.col('workout_name'),
    pl.col('centroid_lat').cast(pl.Float64),
    pl.col('centroid_long').cast(pl.Float64)
])

map_centroids = map_centroids.sort('workout_name')

workout_ids = map_centroids['workout_name'].to_numpy()
locs = map_centroids.select(['centroid_long', 'centroid_lat']).to_numpy()
locs = StandardScaler().fit_transform(locs)

In [84]:
distances = NearestNeighbors(n_neighbors=30).fit(locs)
distances, _ = distances.kneighbors(locs)
distances = distances.mean(axis=1)

fig = px.histogram(distances, nbins=500, marginal="box", title='Distances to 30 nearest neighbors')
fig.show()

In [90]:
algo = DBSCAN(eps=0.3, min_samples=1)
clusters = algo.fit_predict(locs)
map_centroids = map_centroids.with_columns(pl.Series('Clustered_Region', clusters).cast(pl.Utf8))

data = data.join(map_centroids.select(['workout_name', 'Clustered_Region']), on='workout_name', how='left')


In [None]:
for region, group in data.group_by('Clustered_Region'):
    fig = px.scatter_3d(group, x='position_long', y='position_lat', z='altitude', color='heart_rate', title=f'Region {region}')
    fig.update_layout(
    scene=dict(aspectmode='manual', aspectratio=dict(x=1, y=1, z=0.1)))
    fig.show(renderer="browser")

In [53]:
distances = NearestNeighbors(n_neighbors=25).fit(data.select(['position_long', 'position_lat']).to_numpy())
distances, _ = distances.kneighbors(data.select(['position_long', 'position_lat']).to_numpy())
distances = distances.mean(axis=1)

In [55]:
fig = px.histogram(distances, nbins=1000)
fig.show(renderer="browser")