### Install necessary packages

In [1]:
#!pip install gpxpy geopy folium matplotlib

### Import libraries and define helper functions

In [2]:
import os
import gpxpy
import pandas as pd
import numpy as np
import folium
from geopy.distance import geodesic
from datetime import datetime, timedelta
from IPython.display import display, HTML
import matplotlib.pyplot as plt
import plotly.express as px
import io
import base64

# Function to read GPX files and extract the tracks
def read_gpx_files(folder_path):
    tracks = []
    for file_name in os.listdir(folder_path):
        if file_name.endswith('.gpx'):
            file_path = os.path.join(folder_path, file_name)
            with open(file_path, 'r') as gpx_file:
                gpx = gpxpy.parse(gpx_file)
                for track in gpx.tracks:
                    for segment in track.segments:
                        points = [(point.latitude, point.longitude, point.elevation, point.time) for point in segment.points]
                        tracks.append(points)
    return tracks

# Function to count the frequency of points in tracks
def count_point_frequencies(tracks):
    point_counter = {}
    for track in tracks:
        for point in track:
            lat_lon = (point[0], point[1])
            if lat_lon in point_counter:
                point_counter[lat_lon] += 1
            else:
                point_counter[lat_lon] = 1
    return point_counter

# Function to calculate the distance between two points
def calculate_distance(point1, point2):
    return geodesic((point1[0], point1[1]), (point2[0], point2[1])).meters


### Define functions to create maps

In [3]:
# Function to create the map with two zoom levels
def create_maps(tracks, point_counter, output_file_global='map_global.html', output_file_local='map_local.html'):
    # Create global map
    m_global = folium.Map(location=[54.5260, 15.2551], zoom_start=3, tiles='cartodb positron')
    for track in tracks:
        points = [(point[0], point[1]) for point in track]
        folium.PolyLine(points, color='red', weight=2.5, opacity=1).add_to(m_global)
    for point, count in point_counter.items():
        if count > 10:
            folium.CircleMarker(location=point, radius=3, color='gold', fill=True, fill_opacity=0.7).add_to(m_global)
    m_global.save(output_file_global)

    # Create local map for Copenhagen
    m_local = folium.Map(location=[55.6761, 12.5683], zoom_start=12, tiles='cartodb positron')
    for track in tracks:
        points = [(point[0], point[1]) for point in track]
        folium.PolyLine(points, color='red', weight=2.5, opacity=1).add_to(m_local)
    for point, count in point_counter.items():
        if count > 10:
            folium.CircleMarker(location=point, radius=3, color='gold', fill=True, fill_opacity=0.7).add_to(m_local)
    m_local.save(output_file_local)


### Define functions to save run list and statistics HTML

In [4]:
# Optimized function to save runs list HTML with sortable columns
def save_runs_list_html(tracks, output_file='runs_list.html'):
    runs_info = []
    for idx, track in enumerate(tracks):
        if len(track) < 2:
            continue
        distances = np.array([calculate_distance(track[i], track[i + 1]) for i in range(len(track) - 1)])
        total_distance_meters = distances.sum()
        total_distance_km = total_distance_meters / 1000
        times = np.array([(track[i + 1][3] - track[i][3]).total_seconds() for i in range(len(track) - 1)])
        total_time_seconds = times.sum()
        avg_pace_seconds_per_km = total_time_seconds / total_distance_km if total_distance_km > 0 else 0
        avg_pace_minutes = int(avg_pace_seconds_per_km // 60)
        avg_pace_seconds = int(avg_pace_seconds_per_km % 60)
        start_time = track[0][3]
        end_time = track[-1][3]
        run_date = start_time.strftime("%Y-%m-%d")
        run_time = f"{start_time.strftime('%H:%M:%S')} - {end_time.strftime('%H:%M:%S')} (Time: {str(timedelta(seconds=int(total_time_seconds)))})"
        
        runs_info.append({
            'Run Number': idx + 1,
            'Date': run_date,
            'Time': run_time,
            'Distance (km)': f"{total_distance_km:.3f}",
            'Average Pace': f"{avg_pace_minutes}:{avg_pace_seconds:02d} min/km",
            'Average Pace Seconds': avg_pace_seconds_per_km  # for sorting
        })
    
    # Sort runs from most recent to earliest
    runs_info.reverse()
    
    html_content = """
    <html>
    <head>
        <style>
            body { font-family: Arial, sans-serif; color: #333; }
            table { width: 100%; border-collapse: collapse; margin: 20px 0; }
            th, td { padding: 12px; border: 1px solid #ddd; text-align: left; }
            th { background-color: #f4f4f4; cursor: pointer; }
            th.sort-asc::after { content: " \\2191"; }
            th.sort-desc::after { content: " \\2193"; }
            tr:nth-child(even) { background-color: #f9f9f9; }
        </style>
        <script>
            document.addEventListener('DOMContentLoaded', () => {
                const getCellValue = (tr, idx) => tr.children[idx].innerText || tr.children[idx].textContent;
                const comparer = (idx, asc, type) => (a, b) => {
                    let v1 = getCellValue(asc ? a : b, idx);
                    let v2 = getCellValue(asc ? b : a, idx);
                    if (type === 'date') {
                        v1 = new Date(v1);
                        v2 = new Date(v2);
                    } else if (type === 'pace') {
                        const [min1, sec1] = v1.split(':');
                        const [min2, sec2] = v2.split(':');
                        v1 = parseInt(min1) * 60 + parseInt(sec1);
                        v2 = parseInt(min2) * 60 + parseInt(sec2);
                    } else if (!isNaN(v1) && !isNaN(v2)) {
                        v1 = parseFloat(v1);
                        v2 = parseFloat(v2);
                    }
                    return v1 > v2 ? 1 : v1 < v2 ? -1 : 0;
                };

                document.querySelectorAll('th').forEach(th => th.addEventListener('click', (() => {
                    const table = th.closest('table');
                    const type = th.getAttribute('data-type');
                    Array.from(table.querySelectorAll('tr:nth-child(n+2)'))
                        .sort(comparer(Array.from(th.parentNode.children).indexOf(th), this.asc = !this.asc, type))
                        .forEach(tr => table.appendChild(tr));
                    th.classList.toggle('sort-asc', this.asc);
                    th.classList.toggle('sort-desc', !this.asc);
                })));
            });
        </script>
    </head>
    <body>
        <h1>List of Runs</h1>
        <table>
            <tr>
                <th data-type="number">Run Number</th>
                <th data-type="date">Date</th>
                <th data-type="text">Time</th>
                <th data-type="number">Distance (km)</th>
                <th data-type="pace">Average Pace (min/km)</th>
            </tr>
    """
    for run_info in runs_info:
        html_content += f"""
            <tr>
                <td>{run_info['Run Number']}</td>
                <td>{run_info['Date']}</td>
                <td>{run_info['Time']}</td>
                <td>{run_info['Distance (km)']}</td>
                <td>{run_info['Average Pace']}</td>
            </tr>
        """
    html_content += """
        </table>
    </body>
    </html>
    """
    with open(output_file, 'w') as f:
        f.write(html_content)



# Optimized function to save statistics HTML with additional yearly statistics
def save_statistics_html(tracks, output_file='statistics.html'):
    from datetime import datetime
    
    current_year = datetime.now().year
    total_runs = len(tracks)
    total_distance_km = sum(sum(calculate_distance(track[i], track[i + 1]) for i in range(len(track) - 1)) for track in tracks) / 1000
    avg_distance_km = total_distance_km / total_runs if total_runs else 0

    tracks_this_year = [track for track in tracks if track[0][3].year == current_year]
    total_runs_this_year = len(tracks_this_year)
    total_distance_km_this_year = sum(sum(calculate_distance(track[i], track[i + 1]) for i in range(len(track) - 1)) for track in tracks_this_year) / 1000
    avg_distance_km_this_year = total_distance_km_this_year / total_runs_this_year if total_runs_this_year else 0
    
    last_run_date = max(track[-1][3] for track in tracks).strftime("%d.%m.%Y")

    html_content = f"""
    <html>
    <head>
        <style>
            body {{ font-family: Arial, sans-serif; color: #333; }}
        </style>
    </head>
    <body>
        <p><strong>Total Runs:</strong> {total_runs}</p>
        <p><strong>Total Distance (km):</strong> {total_distance_km:.3f}</p>
        <p><strong>Average Distance per Run (km):</strong> {avg_distance_km:.3f}</p>
        <br>
        <p><strong>Total Runs, This Year:</strong> {total_runs_this_year}</p>
        <p><strong>Total Distance, This Year (km):</strong> {total_distance_km_this_year:.3f}</p>
        <p><strong>Average Distance per Run, This Year (km):</strong> {avg_distance_km_this_year:.3f}</p>
        <br>
        <p><strong>Date of Last Run:</strong> {last_run_date}</p>
    </body>
    </html>
    """
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(html_content)

### Define functions to save runs over years HTML

In [5]:
import pandas as pd
import plotly.express as px

# Function to save runs over years HTML with a modern and transparent design
def save_runs_over_years_html(tracks, output_file='runs_over_years.html'):
    years = [track[0][3].year for track in tracks]
    runs_per_year = pd.Series(years).value_counts().sort_index()

    fig = px.bar(
        runs_per_year,
        x=runs_per_year.index,
        y=runs_per_year.values,
        labels={'index': 'Year', 'y': 'Number of Runs'},
        title='Number of Runs Over the Years',
        template='plotly_white'
    )

    fig.update_layout(
        plot_bgcolor='rgba(0,0,0,0)',
        paper_bgcolor='rgba(0,0,0,0)',
        font=dict(color='#333', size=18, family='Arial'),
        title=dict(font=dict(size=24)),
        xaxis=dict(tickfont=dict(size=16)),
        yaxis=dict(tickfont=dict(size=16)),
        hovermode='x unified',
        margin=dict(l=40, r=40, t=60, b=40)
    )

    fig.update_traces(marker_color='rgb(26, 118, 255)', marker_line_color='rgb(8, 48, 107)', marker_line_width=1.5)

    fig_html = fig.to_html(full_html=False)

    html_content = f"""
    <html>
    <head>
        <style>
            body {{
                font-family: 'Arial', sans-serif;
                background-color: transparent;
                color: #333;
                margin: 0;
                padding: 0;
            }}
            h1 {{
                text-align: center;
                font-size: 2.5em;
                margin-bottom: 20px;
                color: #1a76ff;
            }}
            .chart-container {{
                width: 100%;
                height: auto;
            }}
        </style>
    </head>
    <body>
        <h1>Number of Runs Over the Years</h1>
        <div class="chart-container">
            {fig_html}
        </div>
    </body>
    </html>
    """

    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(html_content)


In [6]:
import pandas as pd
import numpy as np
import plotly.graph_objs as go
from datetime import timedelta

# Helper function to format pace in min:sec/km
def format_pace(seconds_per_km):
    if seconds_per_km == 0:
        return "N/A"
    minutes = int(seconds_per_km // 60)
    seconds = int(seconds_per_km % 60)
    return f"{minutes}:{seconds:02d} min/km"

# Function to save scatter plot HTML for distance vs. pace with correct pace data and clickable year legend
def save_distance_vs_pace_html(tracks, output_file='distance_vs_pace.html'):
    run_data = []
    for idx, track in enumerate(tracks):
        if len(track) < 2:
            continue
        distances = np.array([calculate_distance(track[i], track[i + 1]) for i in range(len(track) - 1)])
        total_distance_km = distances.sum() / 1000
        times = np.array([(track[i + 1][3] - track[i][3]).total_seconds() for i in range(len(track) - 1)])
        total_time_seconds = times.sum()
        avg_pace_seconds_per_km = total_time_seconds / total_distance_km if total_distance_km > 0 else 0
        run_date = track[0][3].strftime("%d.%m.%Y")
        total_time = str(timedelta(seconds=int(total_time_seconds)))

        run_data.append({
            'Run Number': idx + 1,
            'Date': run_date,
            'Distance (km)': total_distance_km,
            'Total Time': total_time,
            'Average Pace (seconds/km)': avg_pace_seconds_per_km,
            'Average Pace (min/km)': format_pace(avg_pace_seconds_per_km),
            'Year': track[0][3].year
        })

    run_df = pd.DataFrame(run_data)

    fig = go.Figure()
    years = run_df['Year'].unique()
    
    for year in years:
        year_data = run_df[run_df['Year'] == year]
        fig.add_trace(go.Scatter(
            x=year_data['Distance (km)'],
            y=year_data['Average Pace (seconds/km)'],
            mode='markers',
            name=str(year),
            text=[f"Run Number: {rn}<br>Date: {date}<br>Distance: {distance:.3f} km<br>Total Time: {time}<br>Pace: {pace}" for rn, date, distance, time, pace in zip(year_data['Run Number'], year_data['Date'], year_data['Distance (km)'], year_data['Total Time'], year_data['Average Pace (min/km)'])],
            hoverinfo='text'
        ))

    fig.update_layout(
        title='Distance vs. Pace',
        xaxis_title='Distance (km)',
        yaxis_title='Average Pace (min/km)',
        plot_bgcolor='rgba(0,0,0,0)',
        paper_bgcolor='rgba(0,0,0,0)',
        font_color='#333',
        yaxis=dict(
            autorange='reversed',  # Lower values (faster pace) at the bottom
            tickvals=[i * 60 for i in range(15)],  # Assumed paces range from 0 to 15 min/km
            ticktext=[format_pace(i * 60) for i in range(15)]
        ),
        legend_title_text='Year'
    )

    fig.update_traces(marker=dict(size=12))  # Adjust marker size if needed

    fig_html = fig.to_html(full_html=False)

    html_content = f"""
    <html>
    <head>
        <style>
            body {{ font-family: Arial, sans-serif; color: #333; }}
        </style>
    </head>
    <body>
        <h1>Distance vs. Pace</h1>
        {fig_html}
    </body>
    </html>
    """
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(html_content)

# Sample execution (replace this with actual tracks data)
# Each track is a list of tuples where each tuple contains (latitude, longitude, altitude, timestamp)
# Example: [(lat1, lon1, alt1, timestamp1), (lat2, lon2, alt2, timestamp2), ...]

# Example usage
tracks = [
    # Sample track data
]

In [7]:
# Main execution code
folder_path = '../Adidas Running Data/Sport-sessions/GPS-data'

tracks = read_gpx_files(folder_path)
point_counter = count_point_frequencies(tracks)

# Create global and local maps
create_maps(tracks, point_counter)

# Save and display the runs list HTML
save_runs_list_html(tracks)
display(HTML('runs_list.html'))

# Save and display the statistics HTML
save_statistics_html(tracks)
display(HTML('statistics.html'))

# Save and display the runs over years HTML
save_runs_over_years_html(tracks)
display(HTML('runs_over_years.html'))

# Save the distance vs. pace scatter plot HTML
save_distance_vs_pace_html(tracks, output_file='distance_vs_pace.html')

Run Number,Date,Time,Distance (km),Average Pace (min/km)
72,2024-05-24,09:00:27 - 09:12:03 (Time: 0:11:36),2.003,5:47 min/km
71,2024-05-24,08:07:53 - 08:37:17 (Time: 0:29:24),4.989,5:53 min/km
70,2024-05-19,19:01:01 - 19:55:41 (Time: 0:54:40),9.113,5:59 min/km
69,2024-05-17,06:34:49 - 07:33:22 (Time: 0:58:33),10.31,5:40 min/km
68,2024-05-15,18:03:47 - 18:29:38 (Time: 0:25:51),5.505,4:41 min/km
67,2024-05-05,07:54:31 - 12:37:57 (Time: 4:43:26),42.948,6:35 min/km
66,2024-04-28,12:00:33 - 13:32:31 (Time: 1:31:58),12.899,7:07 min/km
65,2024-04-26,04:54:13 - 05:30:34 (Time: 0:36:21),5.391,6:44 min/km
64,2024-04-21,09:50:58 - 11:36:32 (Time: 1:45:34),18.213,5:47 min/km
63,2024-04-13,15:08:29 - 15:56:43 (Time: 0:48:14),9.107,5:17 min/km
