<div align="right" style="text-align: right"><i>Peter Norvig, Oct 2017<br>pandas Aug 2020<br>Data updated monthly</i></div>

# Bike Code

Code to support the analysis in the notebook [Bike Speed versus Grade.ipynb](Bike%20Speed%20versus%20Grade.ipynb).

In [1]:
from IPython.core.display import HTML
from typing import Iterator, Tuple, List, Dict
import matplotlib
import matplotlib.pyplot as plt
import numpy  as np
import pandas as pd
import re

# Reading Data

I downloaded a bunch of my recorded [Strava](https://www.strava.com/athletes/575579) rides, most of them longer than 25 miles (with a few exceptions), as [`bikerides.tsv`](bikerides.tsv).  The columns are: the date; a title; the elapsed time of the ride; the length of the ride in miles; and the total climbing in feet, e.g.: 

    Mon, 10/5/2020	Half way around the bay on bay trail	6:26:35	80.05	541
    
I parse the file into the pandas dataframe `rides`:

In [2]:
def parse_hours(time: str) -> float: 
    """Parse '4:30:00' => 4.5 hours."""
    while time.count(':') < 2: 
        time = '0:' + time
    return round(pd.Timedelta(time).seconds / 60 / 60, 4)

def clean_num(field): return float(re.sub(',| ft| mi', '', field))

In [3]:
rides = pd.read_table(open('bikerides.tsv'), comment='#',
            converters=dict(miles=clean_num, hours=parse_hours, feet=clean_num))

miles, hours, feet = [rides[attr] for attr in ('miles', 'hours', 'feet')]

# Segments

I picked some representative climbing segments ([`bikesegments.csv`](bikesegments.csv)) with the segment length in miles and climb in feet, along with several of my times on the segment. A line like

    Old La Honda,                   2.98, 1255, 28:49, 34:03, 36:44
    
means that this segment of Old La Honda Rd is 2.98 miles long, 1255 feet of climbing, and I've selected three times for my rides on that segment: the fastest, middle, and slowest of the times  that Strava shows.

In [4]:
Segment = Tuple[str, float, int, float] # (Name, miles, feet, hours)

def parse_segments(lines) -> Iterator[Segment]:
    """Parse a string of segments into Rides. Each line contains:
    'segment_title, miles, feet_climb, time(s), ...'."""
    for segment in lines:
        title, mi, ft, *times = segment.split(',')
        for time in times:
            yield title, float(mi), clean_num(ft), parse_hours(time)

In [5]:
segments = pd.DataFrame(parse_segments(open('bikesegments.csv')),
                        columns=('title', 'miles', 'feet', 'hours'))

miles2, hours2, feet2 = [segments[attr] for attr in ('miles', 'hours', 'feet')]

pct2 = feet2 / miles2 * 100 / 5280

# Eddington Number

In [6]:
def Ed_number(distances) -> int:
    """Eddington number: The maximum integer e such that you have bicycled 
    a distance of at least e on at least e days."""
    distances = sorted(distances, reverse=True)
    return max(e for e, d in enumerate(distances, 1) if d >= e)

def Ed_gap(distances, target) -> int:
    """The number of rides needed to reach an Eddington number target."""
    return target - sum(distances > target)

def Ed_progress(years, rides=rides) -> pd.DataFrame:
    """A table of Eddington numbers by year."""
    def row(y):
        miles = np.array([r.miles for r in rides.itertuples() if year(r) <= y])
        return y, Ed_number(miles), Ed_number(kms(miles))
    return pd.DataFrame(map(row, years), columns=['year', 'E_mi', 'E_km'])

def kms(miles: np.array) -> np.array: 
    "Convert miles to kilometers."
    return 1.609344 * miles

def year(ride) -> int: return int(ride.date[-4:])

# Plotting and Curve-Fitting

In [7]:
plt.rcParams["figure.figsize"] = (10, 6)

def show(X, Y, xlabel='Segment Grade (percent)', ylabel='Speed (mph)', degrees=(2, 3)): 
    """Plot X versus Y and a best fit curve to it, with some bells and whistles."""
    grid(); plt.ylabel(ylabel); plt.xlabel(xlabel)
    plt.scatter(X, Y, c='grey', marker='+')
    X1 = np.linspace(min(X), max(X), 100)
    for degree in degrees:
        F = poly_fit(X, Y, degree)
        plt.plot(X1, [F(x) for x in X1], '-')

def grid(axis='both'): 
    "Turn on the grid."
    plt.minorticks_on() 
    plt.grid(which='major', axis=axis, ls='-', alpha=1/2)
    plt.grid(which='minor', axis=axis, ls=':', alpha=1/4)
    
def poly_fit(X, Y, degree: int) -> callable:
    """The polynomial function that best fits the X,Y vectors."""
    coeffs = np.polyfit(X, Y, degree)[::-1]
    return lambda x: sum(c * x ** i for i, c in enumerate(coeffs)) 

def estimate(dist, climb, mph=poly_fit(feet/miles, miles/hours, 2)) -> float:
    """Given a ride distance in miles and total climb in feet, estimate time in minutes."""
    return 60 * dist / mph(climb / dist)

# Strava Data

Here is my raw data from Strava, for rides and segments:

In [8]:
rides

Unnamed: 0,date,title,hours,miles,feet
0,"Sun, 2/7/2021",Saratoga / Campbell,5.8925,78.38,2270.0
1,"Fri, 1/8/2021",Coyote Hills Geocaching,4.9689,69.08,797.0
2,"Sun, 10/11/2020",Los Altos Hills Paths,5.8247,65.03,1870.0
3,"Mon, 10/5/2020",Half way around the bay on bay trail,6.4431,80.05,541.0
4,"Tue, 9/29/2020",Saratoga Geocaching,4.9722,64.30,961.0
...,...,...,...,...,...
474,"Sat, 3/19/2016",Morning Ride,1.4667,24.00,1125.0
475,"Sat, 7/13/2013",Doug's Event,1.8653,21.35,1677.0
476,"Sun, 8/4/2013",Kris's first trike ride,1.8558,20.96,988.0
477,"Sun, 11/24/2013",Alpine Rd,1.7100,21.02,1289.0


In [9]:
rides.describe()

Unnamed: 0,hours,miles,feet
count,479.0,479.0,479.0
mean,3.09099,39.9319,1632.755741
std,1.294152,15.772976,1345.854656
min,1.4667,20.96,68.0
25%,2.12985,27.98,657.5
50%,2.6669,34.28,1286.0
75%,3.61635,45.18,2088.5
max,8.1375,101.0,7644.0


In [10]:
segments

Unnamed: 0,title,miles,feet,hours
0,Alma Mountain Charlie,3.12,875.0,0.5303
1,Alpine Westridge,0.76,99.0,0.0572
2,Alpine Westridge,0.76,99.0,0.0581
3,Alpine Westridge,0.76,99.0,0.0619
4,Alpine last kicker,0.39,114.0,0.0531
...,...,...,...,...
129,Westridge Hill 2,0.51,166.0,0.0861
130,Westridge Hill 2,0.51,166.0,0.0889
131,Woodside Climb,1.71,295.0,0.1347
132,Woodside Climb,1.71,295.0,0.1500


In [11]:
segments.describe()

Unnamed: 0,miles,feet,hours
count,134.0,134.0,134.0
mean,0.927761,264.037313,0.1418
std,0.631672,223.422436,0.113192
min,0.17,35.0,0.0267
25%,0.47,122.0,0.06645
50%,0.76,193.0,0.109
75%,1.215,322.0,0.1692
max,3.12,1255.0,0.6122


# Wandering

Monthly, I will take my [summary data from wandrer.earth](https://wandrer.earth/athletes/3534/santa-clara-county-california) and enter it in the file [bikeplaces.txt](bikeplaces.txt), in a format where

      Cupertino: 172: 22.1 23.9 26.2*3 26.3 | 26.4
      
means that Cupertino has 172 miles of roads, and that by the first month I started keeping track, I had ridden 22.1% of them; in the last month 26.4%; and the `26.2*3` means that for 3 months in a row I had 26.2%. The `|` indicates the end of a year. A line that starts with `#` is a comment.

In [12]:
class Month(int):
    """An integer in the form: 12 * year + month."""
    def __str__(self): return f'{(self - 1) // 12}-{self % 12 or 12:02d}'

start = Month(2020 * 12 + 7) # Starting month: July 2020
dates = [Month(start + i) for i in range(8)]
bonuses = (25, 90, 99)

Entry = Tuple[str, float, List[float]] # (Place_Name, miles_of_roads, [pct_by_month,...])

def wandrer(category, entries, dates=dates):
    """Plot Wandrer.earth data."""
    D = len(dates)
    fig, ax = plt.figure(), plt.subplot(111); plt.plot()
    for (place, miles, pcts), marker in zip(entries, '^v><osdhxDHPX*1234'):
        X = [dates[i] for i in range(D) if pcts[i]]
        Y = [pcts[i]  for i in range(D) if pcts[i]]
        ax.plot(X, Y, ':', marker=marker, label=label(pcts, place, miles))
    all_pcts = [p for _, _, pcts in entries for p in pcts if p]
    for p in bonuses: 
        if min(all_pcts) < p < max(all_pcts):
            ax.plot(dates, [p] * D, 'k:', lw=1, alpha=3/4) # Plot bonus line
    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), shadow=True,
              prop=matplotlib.font_manager.FontProperties(family='monospace'))
    plt.xticks(dates, [str(d) for d in dates], rotation=90)
    plt.ylabel('Percent of Area Ridden')
    plt.title(category); plt.tight_layout(); grid(axis='y'); plt.show()
    
def label(pcts, place, miles) -> str:
    pct = f'{rounded(pcts[-1]):>3}' if pcts[-1] > 1 else f'{pcts[-1]}'
    done = miles * pcts[-1]
    bonus = next((f' {rounded((p - pcts[-1]) / 100 * miles):>3} to {p}%' 
                  for p in bonuses if p >= pcts[-1]), '')
    return f'{pct}% ({rounded(done / 100):>3}/{rounded(miles):>3} mi){bonus} {place}'
    
def parse_places(lines) -> Dict[str, List[Entry]]:
    "Parse bikeplaces.txt into a dict of {'Title': [entry,...]}"
    places = {}
    category = None
    for line in lines:
        line = line.strip()
        if line.startswith('#') or not line: 
            pass
        elif line.startswith(':'):
            title = line.strip(':')
            places[title] = []
        else:
            places[title].append(parse_entry(line))
            places[title].sort(key=lambda entry: -entry[-1][-1])
    return places
    
def parse_entry(line: str, dates=dates) -> Entry:
    """Parse line => ('Place Name', miles, [percents]); '=' can be used."""
    if line.count(':') != 2:
        print('bad', line)
    place, miles, pcts = line.replace('|', ' ').split(':')
    pcts = re.sub('( [0-9.]+)[*]([0-9]+)', lambda m: m.group(1) * int(m.group(2)),
                  pcts).split()
    assert len(pcts) == len(dates), f"Bad percents: {place}: {pcts}"
    for i, p in enumerate(pcts):
        pcts[i] = pcts[i - 1] if p == '=' else 100 if p == '100' else float(p)
    return place, float(miles), pcts 
                   
def rounded(x: float) -> str: return f'{round(x):,d}' if x > 10 else f'{x:.1f}'

def wandering(places: dict):
    "Plot charts of unique roads ridden in various places."
    for category in places:
        wandrer(category, places[category])

In [13]:
places = parse_places(open('bikeplaces.txt'))