In [None]:
# USER PARAMETERS
filesource = 'manual' # 'strava' or 'manual'
filename = 'hiking_log.csv' # in same directory as this script

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import colors
import matplotlib.dates as mdates
import matplotlib.ticker as mticker
import numpy as np
import mpld3

In [None]:
# Import spreadsheet of activity data and calculate additional stats

if filesource == 'manual':
    hikes = pd.read_csv(f'./{filename}', parse_dates=['date'])
    hikes.average_moving_pace = pd.to_timedelta(hikes.average_moving_pace)
    hikes.total_duration = pd.to_timedelta(hikes.total_duration)

elif filesource == 'strava':
    # Read Strava data and filter to necessary attributes
    activities = pd.read_csv(f'./{filename}')
    hikes = activities[activities['Activity Type'] == 'Hike']
    hikes = hikes[{'Activity Name', 'Activity Date', 'Moving Time', 'Elapsed Time', 'Distance', 'Elevation Gain', 'Elevation High'}]

    # Convert units
    hikes['date'] = pd.to_datetime(hikes['Activity Date'])
    hikes['total_duration'] = pd.to_timedelta(hikes['Elapsed Time'], unit='second')
    hikes['moving_time'] = pd.to_timedelta(hikes['Moving Time'], unit='second')
    hikes['distance_mi'] = hikes['Distance'] / 1.609 # km to mi
    hikes['average_moving_pace'] = hikes['moving_time'] / hikes['distance_mi']
    hikes['elevation_gain_ft'] = hikes['Elevation Gain'] * 3.281 # m to ft
    hikes['max_elevation_ft'] = hikes['Elevation High'] * 3.281 # m to ft

    hikes = hikes[{'date', 'total_duration', 'moving_time', 'average_moving_pace', 'distance_mi', 'elevation_gain_ft', 'max_elevation_ft'}]

# Calculate other stats (steepness, pace, fitness, duration, speed)
hikes['steepness_fpm'] = hikes['elevation_gain_ft'] / (hikes['distance_mi'] / 2.0)
hikes['pace_minpmi'] = hikes['average_moving_pace'].dt.total_seconds() / 60.0
hikes['fitness_fph'] = hikes['steepness_fpm'] / (hikes['pace_minpmi'] / 60.0)
hikes['duration_h'] = hikes['total_duration'].dt.total_seconds() / 3600.0
hikes['speed_mph'] = hikes['distance_mi'] / hikes['duration_h']

In [None]:
# Total annual miles and elevation gain
fig, ax = plt.subplots(nrows=1, ncols=2, sharex=True, figsize=(12,5))
ax = ax.flatten()

# Total miles hiked per year
hikes.groupby(hikes.date.dt.year).sum()['distance_mi'].plot.bar(ax=ax[0], xlabel='Year', ylabel='Miles', title='Total miles hiked', rot=0)
ax[0].bar_label(ax[0].containers[0])

# Total elevation gain per year
hikes.groupby(hikes.date.dt.year).sum()['elevation_gain_ft'].plot.bar(ax=ax[1], xlabel='Year', ylabel='Feet', title='Total elevation gain', rot=0)
ax[1].bar_label(ax[1].containers[0])

plt.savefig('./total_annual_distance_elevation.png', bbox_inches='tight', dpi=300, facecolor='white', transparent=False)
plt.show()

In [None]:
# Cumulative annual distance and elevation gain
fig, ax = plt.subplots(nrows=1, ncols=2, sharex=True, figsize=(12,5))
ax = ax.flatten()

# Construct cumulative stats dataframe
# -- Cumulative sum of distance and elevation gain per year
cs = hikes.groupby(hikes.date.dt.year)[['distance_mi', 'elevation_gain_ft']].cumsum()
# -- Add time data back in
cs['doy'] = hikes.date.dt.dayofyear.values
cs['year'] = hikes.date.dt.year.values
# -- Convert data frame to pivot table for plotting (inspiration: https://atedstone.github.io/pandas-plot-seasons-time-series/)
cumulative_pivot = pd.pivot_table(cs, index=['doy'], columns=['year'], values=['distance_mi', 'elevation_gain_ft'])
# -- Force index to have all days-of-year, and fill NaN forward with cumulative-to-date and backward with zeros (for beginning of year)
# -- fill methods are so that plot projects cumulative forward in time rather than linearly interpolating between points
cumulative_pivot = cumulative_pivot.reindex(list(range(1, 366)))
cumulative_pivot = cumulative_pivot.fillna(method='ffill')
cumulative_pivot = cumulative_pivot.fillna(0)

# Intuitive monthly x-axis (inspiration: https://matplotlib.org/3.4.3/gallery/ticks_and_spines/centered_ticklabels.html)
ax[0].xaxis.set_major_locator(mdates.MonthLocator())
ax[0].xaxis.set_minor_locator(mdates.MonthLocator(bymonthday=16))
ax[0].xaxis.set_major_formatter(mticker.NullFormatter())
ax[0].xaxis.set_minor_formatter(mdates.DateFormatter('%b'))
for tick in ax[0].xaxis.get_minor_ticks():
    tick.tick1line.set_markersize(0)
    tick.tick2line.set_markersize(0)
    tick.label1.set_horizontalalignment('center')
ax[0].set_xlim([-0.5, 366.5])

# Cumulative miles hiked per year
cumulative_pivot['distance_mi'].plot(ax=ax[0], xlabel='Day of year', ylabel='Miles', title='Cumulative miles hiked', rot=0, cmap='viridis')

# Cumulative elevation gain hiked per year
cumulative_pivot['elevation_gain_ft'].plot(ax=ax[1], xlabel='Day of year', ylabel='Feet', title='Cumulative elevation gain', rot=0, cmap='viridis')

plt.savefig('./cumulative_annual_distance_elevation.png', bbox_inches='tight', dpi=300, facecolor='white', transparent=False)
plt.show()

In [None]:
# Hiking trends over time (average and median)
fig, ax = plt.subplots(nrows=2, ncols=3, sharex=True, figsize=(17,9))
ax = ax.flatten()

# Calculate mean and median statistics
mean_stats = hikes.groupby(hikes.date.dt.year).mean()
med_stats = hikes.groupby(hikes.date.dt.year).median()

# Distance
ax[0].plot(mean_stats['distance_mi'].index, mean_stats['distance_mi'].values, 'o-', color='tab:blue', label='average')
ax[0].plot(med_stats['distance_mi'].index, med_stats['distance_mi'].values, 'o-', color='tab:orange', label='median')
ax[0].set_title('Distance')
ax[0].set_ylabel('Miles')
ax[0].set_xticks(hikes.date.dt.year.unique())
ax[0].legend()

# Elevation gain
ax[1].plot(mean_stats['elevation_gain_ft'].index, mean_stats['elevation_gain_ft'].values, 'o-', color='tab:blue')
ax[1].plot(med_stats['elevation_gain_ft'].index, med_stats['elevation_gain_ft'].values, 'o-', color='tab:orange')
ax[1].set_title('Elevation gain')
ax[1].set_ylabel('Feet')

# Max elevation
ax[2].plot(mean_stats['max_elevation_ft'].index, mean_stats['max_elevation_ft'].values, 'o-', color='tab:blue')
ax[2].plot(med_stats['max_elevation_ft'].index, med_stats['max_elevation_ft'].values, 'o-', color='tab:orange')
ax[2].set_title('Max elevation')
ax[2].set_ylabel('Feet')

# Steepness
ax[3].plot(mean_stats['steepness_fpm'].index, mean_stats['steepness_fpm'].values, 'o-', color='tab:blue')
ax[3].plot(med_stats['steepness_fpm'].index, med_stats['steepness_fpm'].values, 'o-', color='tab:orange')
ax[3].set_title('Steepness')
ax[3].set_xlabel('Year')
ax[3].set_ylabel('Feet per mile')

# Fitness
ax[4].plot(mean_stats['fitness_fph'].index, mean_stats['fitness_fph'].values, 'o-', color='tab:blue')
ax[4].plot(med_stats['fitness_fph'].index, med_stats['fitness_fph'].values, 'o-', color='tab:orange')
ax[4].set_title('Fitness (steepness over pace)')
ax[4].set_xlabel('Year')
ax[4].set_ylabel('Feet per hour')

# Speed
ax[5].plot(mean_stats['speed_mph'].index, mean_stats['speed_mph'].values, 'o-', color='tab:blue')
ax[5].plot(med_stats['speed_mph'].index, med_stats['speed_mph'].values, 'o-', color='tab:orange')
ax[5].set_title('Speed')
ax[5].set_xlabel('Year')
ax[5].set_ylabel('Miles per hour')

fig.suptitle('Annual Hiking Statistics', fontweight='bold', fontsize=16)
plt.savefig('./annual_stats.png', bbox_inches='tight', dpi=300, facecolor='white', transparent=False)
plt.show()

In [None]:
# Individual hiking statistics
fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(24,6))
ax = ax.flatten()
cmap = plt.cm.viridis
norm = colors.BoundaryNorm(np.arange(hikes.date.dt.year.unique()[0]-0.5, hikes.date.dt.year.unique()[-1]+1.5), cmap.N)

# Annual hike steepness (elevation gain over distance)
ax[0].scatter(hikes.distance_mi, hikes.elevation_gain_ft, c=hikes.date.dt.year, cmap=cmap, norm=norm, alpha=0.7)
ax[0].axline(xy1=(0,0), slope=1000/2, linestyle='--', color='gray', label='1000 ft/mi')
ax[0].axline(xy1=(0,0), slope=500/2, linestyle='-.', color='gray', label='500 ft/mi')
ax[0].set_xlabel('Distance (mi)')
ax[0].set_ylabel('Elevation gain (ft)')
ax[0].set_title('Hike steepness')
ax[0].legend()

# Annual hike fitness (pace over steepness)
# pace_minutes = [m.total_seconds()/60.0 for m in hikes.average_moving_pace]
# ax[1].scatter(pace_minutes, (hikes.elevation_gain_ft / (hikes.distance_mi/2.)), c=hikes.date.dt.year, cmap=cmap, norm=norm, alpha=0.7)
ax[1].scatter(hikes.pace_minpmi, hikes.steepness_fpm, c=hikes.date.dt.year, cmap=cmap, norm=norm, alpha=0.7)
ax[1].axline(xy1=(30,1000), slope=2000/60, linestyle='--', color='gray', label='2000 ft/hr')
ax[1].axline(xy1=(30,500), slope=1000/60, linestyle='-.', color='gray', label='1000 ft/hr')
ax[1].set_xlabel('Average moving pace (min/mi)')
ax[1].set_ylabel('Average steepness (ft/mi)')
ax[1].set_title('Hiking fitness')
ax[1].legend()

# Miles per hour (including breaks)
h = ax[2].scatter(hikes.duration_h, hikes.distance_mi, c=hikes.date.dt.year, cmap=cmap, norm=norm, alpha=0.7)
ax[2].axline(xy1=(2,4), slope=2, linestyle='--', color='gray', label='2 mph')
ax[2].axline(xy1=(2,2), slope=1, linestyle='-.', color='gray', label='1 mph')
ax[2].set_xlabel('Duration (hr)')
ax[2].set_ylabel('Distance (mi)')
ax[2].set_title('Hiking speed (including breaks)')
ax[2].legend()

plt.colorbar(h, ax=ax, ticks=range(hikes.date.dt.year.unique()[0], hikes.date.dt.year.unique()[-1]+1), label='Year')

plt.suptitle('Individual Hike Statistics', fontweight='bold', fontsize=16)
plt.savefig('./individual_hike_stats.png', bbox_inches='tight', dpi=300, facecolor='white', transparent=False)
plt.show()

In [None]:
# # TESTING - Interactive version of individual hiking statistics
# # Having trouble with colorbar - cannot use norm (for the last scatter plot) and cannot set tick labels to display as e.g. 2020 rather than 2,020.0

# fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(24,6))
# ax = ax.flatten()
# cmap = plt.cm.viridis
# norm = colors.BoundaryNorm(np.arange(hikes.date.dt.year.unique()[0]-0.5, hikes.date.dt.year.unique()[-1]+1.5), cmap.N)
# labels = ['{0}'.format(hikes.name.iloc[n]) for n in range(len(hikes))]

# # Annual hike steepness (elevation gain over distance)
# scatter0 = ax[0].scatter(hikes.distance_mi, hikes.elevation_gain_ft, c=hikes.date.dt.year, alpha=0.9, cmap=cmap, norm=norm)
# ax[0].axline(xy1=(0,0), slope=1000/2, linestyle='--', color='gray', label='1000 ft/mi')
# ax[0].axline(xy1=(0,0), slope=500/2, linestyle='-.', color='gray', label='500 ft/mi')
# ax[0].set_xlabel('Distance (mi)')
# ax[0].set_ylabel('Elevation gain (ft)')
# ax[0].set_title('Hike steepness')
# ax[0].legend()
# tooltip0 = mpld3.plugins.PointLabelTooltip(scatter0, labels=labels)
# mpld3.plugins.connect(fig, tooltip0)

# # Annual hike fitness (pace over steepness)
# pace_minutes = [m.total_seconds()/60.0 for m in hikes.average_moving_pace]
# scatter1 = ax[1].scatter(pace_minutes, (hikes.elevation_gain_ft / (hikes.distance_mi/2.)), c=hikes.date.dt.year, alpha=0.9, cmap=cmap, norm=norm)
# ax[1].axline(xy1=(30,1000), slope=2000/60, linestyle='--', color='gray', label='2000 ft/hr')
# ax[1].axline(xy1=(30,500), slope=1000/60, linestyle='-.', color='gray', label='1000 ft/hr')
# ax[1].set_xlabel('Average moving pace (min/mi)')
# ax[1].set_ylabel('Average steepness (ft/mi)')
# ax[1].set_title('Hiking fitness')
# ax[1].legend()
# tooltip1 = mpld3.plugins.PointLabelTooltip(scatter1, labels=labels)
# mpld3.plugins.connect(fig, tooltip1)

# # Miles per hour (including breaks)
# total_hours = [t.total_seconds()/3600.0 for t in hikes.total_duration]
# scatter2 = ax[2].scatter(total_hours, hikes.distance_mi, c=hikes.date.dt.year, alpha=0.9, cmap=cmap)#, norm=norm)
# ax[2].axline(xy1=(2,4), slope=2, linestyle='--', color='gray', label='2 mph')
# ax[2].axline(xy1=(2,2), slope=1, linestyle='-.', color='gray', label='1 mph')
# ax[2].set_xlabel('Duration (hr)')
# ax[2].set_ylabel('Distance (mi)')
# ax[2].set_title('Hiking speed (including breaks)')
# ax[2].legend()
# cbar = plt.colorbar(scatter2, ax=ax, ticks=range(hikes.date.dt.year.unique()[0], hikes.date.dt.year.unique()[-1]+1), label='Year')
# tooltip2 = mpld3.plugins.PointLabelTooltip(scatter2, labels=labels)
# mpld3.plugins.connect(fig, tooltip2)

# plt.suptitle('Individual Hike Statistics', fontweight='bold', fontsize=16)

# mpld3.show()