<div align="right" style="text-align: right"><i>Peter Norvig, Oct 2017<br>pandas Aug 2020<br>Data updated monthly</i></div>

# Bike Code

Code to support the analysis in the notebook [Bike Speed versus Grade.ipynb](Bike%20Speed%20versus%20Grade.ipynb).

In [1]:
from IPython.core.display import HTML
from typing import Iterator, Tuple, List, Dict
import matplotlib
import matplotlib.pyplot as plt
import numpy  as np
import pandas as pd
import re

# Reading Data: `rides`

I saved a bunch of my recorded [Strava](https://www.strava.com/athletes/575579) rides, most of them longer than 25 miles, as [`bikerides.tsv`](bikerides.tsv).  The columns are: the date; the year; a title; the elapsed time of the ride; the length of the ride in miles; and the total climbing in feet, e.g.: 

    Mon, 10/5	2020	Half way around the bay on bay trail	6:26:35	80.05	541
    
I parse the file into the pandas dataframe `rides`, adding derived columns for miles per hour, vertical feet climbed per hour (VAM), grade in feet per mile, grade in percent, and kilometers ridden:

In [2]:
def parse_rides(lines):
    """Parse a bikerides.tsv file."""
    return add_columns(pd.read_table(lines, comment='#',
                       converters=dict(hours=parse_hours, feet=parse_int)))

def parse_hours(time: str) -> float: 
    """Parse '4:30:00' => 4.5 hours."""
    while time.count(':') < 2: 
        time = '0:' + time
    return round(pd.Timedelta(time).seconds / 60 / 60, 4)

def parse_int(field: str) -> int: return int(field.replace(',', ''))

def add_columns(rides) -> pd.DataFrame:
    """Compute new columns from existing ones."""
    mi, hr, ft = rides['miles'], rides['hours'], rides['feet']
    return rides.assign(
        mph=round(mi / hr, 2),
        vam=round(ft / hr / 3.28084),
        fpm=round(ft / mi),
        pct=round(ft / mi * 100 / 5280, 2),
        kms=round(mi * 1.609, 2))

In [3]:
rides = parse_rides(open('bikerides.tsv'))

# Reading Data: `segments`

I picked some representative climbing segments ([`bikesegments.csv`](bikesegments.csv)) with the segment length in miles and climb in feet, along with several of my times on the segment. A line like

    Old La Honda, 2.98, 1255, 28:49, 34:03, 36:44
    
means that this segment of Old La Honda Rd is 2.98 miles long, 1255 feet of climbing, and I've selected three times for my rides on that segment: the fastest, middle, and slowest of the times  that Strava shows. (However, I ended up dropping the slowest time in the charts to make them less busy.)

In [4]:
def parse_segments(lines) -> pd.DataFrame:
    """Parse segments into rides. Each ride is a tuple of:
    (segment_title, time,  miles, feet_climb)."""
    records = []
    for segment in lines:
        title, mi, ft, *times = segment.split(',')[:5]
        for time in times:
            records.append((title, parse_hours(time), float(mi), parse_int(ft)))
    return add_columns(pd.DataFrame(records, columns=('title', 'hours', 'miles', 'feet')))

In [5]:
segments = parse_segments(open('bikesegments.csv'))

# Reading Data: `places`

Monthly, I will take my [summary data from wandrer.earth](https://wandrer.earth/athletes/3534/santa-clara-county-california) and enter it in the file [bikeplaces.txt](bikeplaces.txt), in a format where

      Cupertino: 172: 22.1 23.9 26.2*3 26.3 | 26.4
      
means that Cupertino has 172 miles of roads, and that by the first month I started keeping track, I had ridden 22.1% of them; in the last month 26.4%; and the `26.2*3` means that for 3 months in a row I had 26.2%. The `|` indicates the end of a year. A line that starts with `#` is a comment.

In [6]:
class Month(int):
    """An integer in the form: 12 * year + month."""
    def __str__(self): return f'{(self - 1) // 12}-{(self % 12) or 12:02d}'

start   = Month(2020 * 12 + 7) # Starting month: July 2020
bonuses = (0.1, 25, 90, 99)         # Percents the earn important bonuses

Entry = Tuple[str, float, List[float]] # (Place_Name, miles_of_roads, [pct_by_month,...])

def wandrer(category, entries, start=start):
    """Plot Wandrer.earth data."""
    fig, ax = plt.figure(), plt.subplot(111); 
    plt.plot()
    for (place, miles, pcts), marker in zip(entries, '^v><osdhxDHPX*1234'):
        N = len(pcts)
        dates = [Month(start + i) for i in range(N)]
        X = [dates[i] for i in range(N) if pcts[i]]
        Y = [pcts[i]  for i in range(N) if pcts[i]]
        ax.plot(X, Y, ':', marker=marker, label=label(pcts, place, miles))
    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), shadow=True,
              prop=matplotlib.font_manager.FontProperties(family='monospace'))
    plt.xticks(dates, [str(d) for d in dates], rotation=90)
    plt.ylabel('Percent of Area Ridden')
    plt.title(category); plt.tight_layout(); grid(axis='y'); plt.show()
    
def label(pcts, place, miles) -> str:
    pct = f'{rounded(pcts[-1]):>3}' if pcts[-1] > 1.4 else f'{pcts[-1]}'
    done = miles * pcts[-1]
    bonus = next((f' {rounded((p - pcts[-1]) / 100 * miles):>3} to {p}%' 
                  for p in bonuses if p >= pcts[-1]), '')
    return f'{pct}% ({rounded(done / 100):>3}/{rounded(miles):<3} mi){bonus} {place}'
    
def parse_places(lines) -> Dict[str, List[Entry]]:
    "Parse bikeplaces.txt into a dict of {'Title': [entry,...]}"
    places = {}
    category = None
    for line in lines:
        line = line.strip()
        if line.startswith('#') or not line: 
            pass
        elif line.startswith(':'):
            title = line.strip(':')
            places[title] = []
        else:
            places[title].append(parse_entry(line))
            places[title].sort(key=lambda entry: -entry[-1][-1])
    return places
    
def parse_entry(line: str) -> Entry:
    """Parse line => ('Place Name', miles, [percents]); '=' can be used."""
    if line.count(':') != 2:
        print('bad', line)
    place, miles, pcts = line.replace('|', ' ').split(':')
    pcts = re.sub('( [0-9.]+)[*]([0-9]+)', lambda m: m.group(1) * int(m.group(2)),
                  pcts).split()
    for i, p in enumerate(pcts):
        pcts[i] = pcts[i - 1] if p == '=' else 100 if p == '100' else float(p)
    return place, float(miles), pcts 
                   
def rounded(x: float) -> str: return f'{round(x):,d}' if x > 10 else f'{x:.1f}'

def wandering(places: dict):
    "Plot charts of unique roads ridden in various places."
    for category in places:
        wandrer(category, places[category])

In [7]:
places = parse_places(open('bikeplaces.txt'))

## Updating `bikeplaces.txt` to a new month

In [8]:
def update_places(filename='bikeplaces.txt'):
    """Print an update of the bikeplaces.txt file by adding a new month."""
    return ''.join(map(update_line, open(filename)))

def update_line(line):
    """Update a line by adding a new month with the same % as the previous."""
    words = line.split()
    if not words or words[0].startswith(':'):
        pass
    elif '*' in words[-1]:
        m, d = words[-1].split('*')
        words[-1] = m + '*' + str(int(d) + 1)
    else:
        words[-1] += '*2'
    return ' '.join(words) + '\n'

In [9]:
#print(update_places()) #### Do this once a month and copy/paste the output

:Complete!:
Atherton: 56.0: 0*4 91.2 94.0 | 95.2*2 94.2 94.9 96.2*2 98.7 99.5 99.19 99.3*2
Kensington Square: 0.6: 86.9 100*5 | 100*4 99.9*4 99.99*3
Menlo Oaks: 3.5: 0*4 98.4 99.7 | 99.7*4 100*4 99.99*3
West Menlo Park: 11.2: 0*4 97.5 98.1 | 98.1*3 99.3 99.7*2 100*2 99.99*3
North Fair Oaks: 26.7: 78.1 90.4 93.1 93.8 94.8 96.1 | 96.9*2 99.1*6 99.17*3
Sequoia Tract: 11.5: 0*4 72.8 82.3 | 92.5*4 96.4*2 97.5 98.2 99.4*3
East Palo Alto: 47.6: 74.4 91.2 91.9*2 92.2 93.3 | 93.4*2 92.4*2 92.5*4 95.0 99.2*2

:Over 90%:
Los Altos: 139.6: 40.2 43.7 72.4 77.2 84.5 90.8 | 91.0*2 91.1*2 91.2 90.4*3 90.63 90.9*2
Emerald Lake Hills: 24.6: 0.0 94.3*3 44.7 51.0 | 80.0 85.8 91.3*6 92.2*3
Menlo Park: 141.1: 67.7 76.8 87.7 90.7 91.2 90.1 | 92.5*2 91.4 91.6*3 91.8*2 93.5 94.7*2
Mountain View: 211.8: 53.0 59.9 63.0 63.6 72.9 77.1 | 91.1*4 92.3*4 91.12 93.5*2
Palo Alto: 298.8: 63.0 73.6 85.4 85.7 87.1 87.6 | 88.1*2 90.3 90.4 90.5 90.6*2 90.7 89.8 91.3*2
Loyola: 18.3: 0*4 60.8 62.1 | 62.1*4 91.5 90.6 91.3*2
Pa

# Eddington Number

In [11]:
def Ed_number(distances) -> int:
    """Eddington number: The maximum integer e such that you have bicycled 
    a distance of at least e on at least e days."""
    distances = sorted(distances, reverse=True)
    return max(e for e, d in enumerate(distances, 1) if d >= e)

def Ed_gap(distances, target) -> int:
    """The number of rides needed to reach an Eddington number target."""
    return target - sum(distances > target)

def Ed_gaps(rides, N=10) -> dict:
    """A table of gaps to Eddington numbers by year, and a plot."""
    E_km, E_mi = Ed_number(rides.kms), Ed_number(rides.miles)
    data = [(E_km + d, Ed_gap(rides.kms, E_km + d), E_mi + d, Ed_gap(rides.miles, E_mi + d))
            for d in range(N)]
    frame = pd.DataFrame(data, columns=['kms', 'Ed gap (kms)', 'miles', 'Ed gap (miles)'])
    figure, (ax1, ax2) = plt.subplots(1, 2)
    frame.plot(ax=ax1, kind='line', x='kms', y='Ed gap (kms)', style='o:', grid=1,
              title='Metric Eddington Gaps')
    frame.plot(ax=ax2, kind='line', x='miles', y='Ed gap (miles)', style='o:', grid=1,
              title='Eddington Gaps')
    return frame

def Ed_gaps(rides, N=10) -> dict:
    """A table of gaps to Eddington numbers by year, and a plot.."""
    E_km, E_mi = Ed_number(rides.kms), Ed_number(rides.miles)
    data = [(E_km + d, Ed_gap(rides.kms, E_km + d), E_mi + d, Ed_gap(rides.miles, E_mi + d))
            for d in range(N)]
    frame = pd.DataFrame(data, columns=['kms', 'Ed gap (kms)', 'miles', 'Ed gap (miles)'])
    figure, (ax1, ax2) = plt.subplots(1, 2)
    frame.plot(ax=ax1, kind='line', x='kms', y='Ed gap (kms)', style='o:', grid=1,
              title='Metric Eddington Gaps')
    frame.plot(ax=ax2, kind='line', x='miles', y='Ed gap (miles)', style='o:', grid=1,
              title='Eddington Gaps')
    return frame

def Ed_progress(years=reversed(range(2013, 2022)), rides=rides) -> pd.DataFrame:
    """A table of Eddington numbers by year, and a plot."""
    def Ed(year, d): return Ed_number(rides[rides['year'] <= year][d])
    data  = [(y, Ed(y, 'kms'), Ed(y, 'miles')) for y in years]
    frame = pd.DataFrame(data, columns=['year', 'Ed_km', 'Ed_mi'])
    frame.plot('year', ['Ed_km', 'Ed_mi'], style='o:',
               title=f'My Eddington Numbers: {Ed_number(rides.kms)} in kms, {Ed_number(rides.miles)} in miles')
    grid(axis='y')
    return frame

# Plotting and Curve-Fitting

In [12]:
plt.rcParams["figure.figsize"] = (10, 6)

def show(X, Y, data, title='', degrees=(2, 3)): 
    """Plot X versus Y and a best fit curve to it, with some bells and whistles."""
    grid(); plt.ylabel(Y); plt.xlabel(X); plt.title(title)
    plt.scatter(X, Y, data=data, c='grey', marker='+')
    X1 = np.linspace(min(data[X]), max(data[X]), 100)
    for degree in degrees:
        F = poly_fit(data[X], data[Y], degree)
        plt.plot(X1, [F(x) for x in X1], '-')
    
def grid(axis='both'): 
    "Turn on the grid."
    plt.minorticks_on() 
    plt.grid(which='major', ls='-', alpha=3/4, axis=axis)
    plt.grid(which='minor', ls=':', alpha=1/2, axis=axis)
    
def poly_fit(X, Y, degree: int) -> callable:
    """The polynomial function that best fits the X,Y vectors."""
    coeffs = np.polyfit(X, Y, degree)[::-1]
    return lambda x: sum(c * x ** i for i, c in enumerate(coeffs)) 

estimator = poly_fit(rides['feet'] / rides['miles'], 
                   rides['miles'] / rides['hours'], 2)

def estimate(miles, feet, estimator=estimator) -> float:
    """Given a ride distance in miles and total climb in feet, estimate time in minutes."""
    return round(60 * miles / estimator(feet / miles))

def top(frame, field, n=20): return frame.sort_values(field, ascending=False).head(n)

In [14]:
class Acker:
    def __init__(self, n=0):
        self.HWM  = n
        self._set = {n}
        
    def ack(self, n):
        if n <= self.HWM:
            warn(f'duplicate ack of {n}')
        else:
            self._set.add(n)
            while self.HWM + 1 in self._set:
                self._set.remove(self.HWM)
                self.HWM += 1

A = Acker(3)
A.ack(5)
A.ack(6)
A.ack(10)
assert A.HWM == 3
A.ack(4)
assert  A.HWM == 6
A._set


{6, 10}